diff --git a/src/discord/monitor/exec-approvals.ts b/src/discord/monitor/exec-approvals.ts index 09ea174790e..19fef714d8b 100644 --- a/src/discord/monitor/exec-approvals.ts +++ b/src/discord/monitor/exec-approvals.ts @@ -24,7 +24,7 @@ import type { import { logDebug, logError } from "../../logger.js"; import { normalizeAccountId, resolveAgentIdFromSessionKey } from "../../routing/session-key.js"; import type { RuntimeEnv } from "../../runtime.js"; -import { compileSafeRegex } from "../../security/safe-regex.js"; +import { compileSafeRegex, testRegexWithBoundedInput } from "../../security/safe-regex.js"; import { GATEWAY_CLIENT_MODES, GATEWAY_CLIENT_NAMES, @@ -34,8 +34,6 @@ import { createDiscordClient, stripUndefinedFields } from "../send.shared.js"; import { DiscordUiContainer } from "../ui.js"; const EXEC_APPROVAL_KEY = "execapproval"; -const SESSION_FILTER_REGEX_MAX_INPUT = 2048; - export type { ExecApprovalRequest, ExecApprovalResolved }; /** Extract Discord channel ID from a session key like "agent:main:discord:channel:123456789" */ @@ -368,28 +366,12 @@ export class DiscordExecApprovalHandler { if (!session) { return false; } - const head = session.slice(0, SESSION_FILTER_REGEX_MAX_INPUT); - const tail = - session.length > SESSION_FILTER_REGEX_MAX_INPUT - ? session.slice(-SESSION_FILTER_REGEX_MAX_INPUT) - : ""; const matches = config.sessionFilter.some((p) => { if (session.includes(p)) { return true; } const regex = compileSafeRegex(p); - if (!regex) { - return false; - } - regex.lastIndex = 0; - if (regex.test(head)) { - return true; - } - if (tail) { - regex.lastIndex = 0; - return regex.test(tail); - } - return false; + return regex ? testRegexWithBoundedInput(regex, session) : false; }); if (!matches) { return false; diff --git a/src/infra/exec-approval-forwarder.ts b/src/infra/exec-approval-forwarder.ts index 72017a36982..296a6aa6e49 100644 --- a/src/infra/exec-approval-forwarder.ts +++ b/src/infra/exec-approval-forwarder.ts @@ -7,7 +7,7 @@ import type { } from "../config/types.approvals.js"; import { createSubsystemLogger } from "../logging/subsystem.js"; import { normalizeAccountId, parseAgentSessionKey } from "../routing/session-key.js"; -import { compileSafeRegex } from "../security/safe-regex.js"; +import { compileSafeRegex, testRegexWithBoundedInput } from "../security/safe-regex.js"; import { isDeliverableMessageChannel, normalizeMessageChannel, @@ -22,8 +22,6 @@ import { deliverOutboundPayloads } from "./outbound/deliver.js"; import { resolveSessionDeliveryTarget } from "./outbound/targets.js"; const log = createSubsystemLogger("gateway/exec-approvals"); -const SESSION_FILTER_REGEX_MAX_INPUT = 2048; - export type { ExecApprovalRequest, ExecApprovalResolved }; type ForwardTarget = ExecApprovalForwardTarget & { source: "session" | "target" }; @@ -57,28 +55,12 @@ function normalizeMode(mode?: ExecApprovalForwardingConfig["mode"]) { } function matchSessionFilter(sessionKey: string, patterns: string[]): boolean { - const head = sessionKey.slice(0, SESSION_FILTER_REGEX_MAX_INPUT); - const tail = - sessionKey.length > SESSION_FILTER_REGEX_MAX_INPUT - ? sessionKey.slice(-SESSION_FILTER_REGEX_MAX_INPUT) - : ""; return patterns.some((pattern) => { if (sessionKey.includes(pattern)) { return true; } const regex = compileSafeRegex(pattern); - if (!regex) { - return false; - } - regex.lastIndex = 0; - if (regex.test(head)) { - return true; - } - if (tail) { - regex.lastIndex = 0; - return regex.test(tail); - } - return false; + return regex ? testRegexWithBoundedInput(regex, sessionKey) : false; }); } diff --git a/src/security/safe-regex.test.ts b/src/security/safe-regex.test.ts index fe6f1042770..460149ad8ce 100644 --- a/src/security/safe-regex.test.ts +++ b/src/security/safe-regex.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from "vitest"; -import { compileSafeRegex, hasNestedRepetition } from "./safe-regex.js"; +import { compileSafeRegex, hasNestedRepetition, testRegexWithBoundedInput } from "./safe-regex.js"; describe("safe regex", () => { it("flags nested repetition patterns", () => { @@ -27,4 +27,16 @@ describe("safe regex", () => { expect(re).toBeInstanceOf(RegExp); expect("TOKEN=abcd1234".replace(re as RegExp, "***")).toBe("***"); }); + + it("checks bounded regex windows for long inputs", () => { + expect( + testRegexWithBoundedInput(/^agent:main:discord:/, `agent:main:discord:${"x".repeat(5000)}`), + ).toBe(true); + expect(testRegexWithBoundedInput(/discord:tail$/, `${"x".repeat(5000)}discord:tail`)).toBe( + true, + ); + expect(testRegexWithBoundedInput(/discord:tail$/, `${"x".repeat(5000)}telegram:tail`)).toBe( + false, + ); + }); }); diff --git a/src/security/safe-regex.ts b/src/security/safe-regex.ts index 04c9be61a95..ffa34509130 100644 --- a/src/security/safe-regex.ts +++ b/src/security/safe-regex.ts @@ -21,7 +21,15 @@ type ParseFrame = { altMaxLength: number | null; }; +type PatternToken = + | { kind: "simple-token" } + | { kind: "group-open" } + | { kind: "group-close" } + | { kind: "alternation" } + | { kind: "quantifier"; quantifier: QuantifierRead }; + const SAFE_REGEX_CACHE_MAX = 256; +const SAFE_REGEX_TEST_WINDOW = 2048; const safeRegexCache = new Map(); function createParseFrame(): ParseFrame { @@ -60,134 +68,6 @@ function recordAlternative(frame: ParseFrame): void { frame.altMaxLength = Math.max(frame.altMaxLength, frame.branchMaxLength); } -export function hasNestedRepetition(source: string): boolean { - // Conservative parser: reject patterns where a repeated token/group is repeated again. - const frames: ParseFrame[] = [createParseFrame()]; - let inCharClass = false; - - const emitToken = (token: TokenState) => { - const frame = frames[frames.length - 1]; - frame.lastToken = token; - if (token.containsRepetition) { - frame.containsRepetition = true; - } - frame.branchMinLength = addLength(frame.branchMinLength, token.minLength); - frame.branchMaxLength = addLength(frame.branchMaxLength, token.maxLength); - }; - - const emitSimpleToken = () => { - emitToken({ - containsRepetition: false, - hasAmbiguousAlternation: false, - minLength: 1, - maxLength: 1, - }); - }; - - for (let i = 0; i < source.length; i += 1) { - const ch = source[i]; - - if (ch === "\\") { - i += 1; - emitSimpleToken(); - continue; - } - - if (inCharClass) { - if (ch === "]") { - inCharClass = false; - } - continue; - } - - if (ch === "[") { - inCharClass = true; - emitSimpleToken(); - continue; - } - - if (ch === "(") { - frames.push(createParseFrame()); - continue; - } - - if (ch === ")") { - if (frames.length > 1) { - const frame = frames.pop() as ParseFrame; - if (frame.hasAlternation) { - recordAlternative(frame); - } - const groupMinLength = frame.hasAlternation - ? (frame.altMinLength ?? 0) - : frame.branchMinLength; - const groupMaxLength = frame.hasAlternation - ? (frame.altMaxLength ?? 0) - : frame.branchMaxLength; - emitToken({ - containsRepetition: frame.containsRepetition, - hasAmbiguousAlternation: - frame.hasAlternation && - frame.altMinLength !== null && - frame.altMaxLength !== null && - frame.altMinLength !== frame.altMaxLength, - minLength: groupMinLength, - maxLength: groupMaxLength, - }); - } - continue; - } - - if (ch === "|") { - const frame = frames[frames.length - 1]; - frame.hasAlternation = true; - recordAlternative(frame); - frame.branchMinLength = 0; - frame.branchMaxLength = 0; - frame.lastToken = null; - continue; - } - - const quantifier = readQuantifier(source, i); - if (quantifier) { - const frame = frames[frames.length - 1]; - const token = frame.lastToken; - if (!token) { - continue; - } - if (token.containsRepetition) { - return true; - } - if (token.hasAmbiguousAlternation && quantifier.maxRepeat === null) { - return true; - } - - const previousMinLength = token.minLength; - const previousMaxLength = token.maxLength; - token.minLength = multiplyLength(token.minLength, quantifier.minRepeat); - token.maxLength = - quantifier.maxRepeat === null - ? Number.POSITIVE_INFINITY - : multiplyLength(token.maxLength, quantifier.maxRepeat); - token.containsRepetition = true; - frame.containsRepetition = true; - frame.branchMinLength = frame.branchMinLength - previousMinLength + token.minLength; - - const branchMaxBase = - Number.isFinite(frame.branchMaxLength) && Number.isFinite(previousMaxLength) - ? frame.branchMaxLength - previousMaxLength - : Number.POSITIVE_INFINITY; - frame.branchMaxLength = addLength(branchMaxBase, token.maxLength); - - i += quantifier.consumed - 1; - continue; - } - - emitSimpleToken(); - } - - return false; -} - function readQuantifier(source: string, index: number): QuantifierRead | null { const ch = source[index]; const consumed = source[index + 1] === "?" ? 2 : 1; @@ -237,6 +117,191 @@ function readQuantifier(source: string, index: number): QuantifierRead | null { return { consumed: i - index, minRepeat, maxRepeat }; } +function tokenizePattern(source: string): PatternToken[] { + const tokens: PatternToken[] = []; + let inCharClass = false; + + for (let i = 0; i < source.length; i += 1) { + const ch = source[i]; + + if (ch === "\\") { + i += 1; + tokens.push({ kind: "simple-token" }); + continue; + } + + if (inCharClass) { + if (ch === "]") { + inCharClass = false; + } + continue; + } + + if (ch === "[") { + inCharClass = true; + tokens.push({ kind: "simple-token" }); + continue; + } + + if (ch === "(") { + tokens.push({ kind: "group-open" }); + continue; + } + + if (ch === ")") { + tokens.push({ kind: "group-close" }); + continue; + } + + if (ch === "|") { + tokens.push({ kind: "alternation" }); + continue; + } + + const quantifier = readQuantifier(source, i); + if (quantifier) { + tokens.push({ kind: "quantifier", quantifier }); + i += quantifier.consumed - 1; + continue; + } + + tokens.push({ kind: "simple-token" }); + } + + return tokens; +} + +function analyzeTokensForNestedRepetition(tokens: PatternToken[]): boolean { + const frames: ParseFrame[] = [createParseFrame()]; + + const emitToken = (token: TokenState) => { + const frame = frames[frames.length - 1]; + frame.lastToken = token; + if (token.containsRepetition) { + frame.containsRepetition = true; + } + frame.branchMinLength = addLength(frame.branchMinLength, token.minLength); + frame.branchMaxLength = addLength(frame.branchMaxLength, token.maxLength); + }; + + const emitSimpleToken = () => { + emitToken({ + containsRepetition: false, + hasAmbiguousAlternation: false, + minLength: 1, + maxLength: 1, + }); + }; + + for (const token of tokens) { + if (token.kind === "simple-token") { + emitSimpleToken(); + continue; + } + + if (token.kind === "group-open") { + frames.push(createParseFrame()); + continue; + } + + if (token.kind === "group-close") { + if (frames.length > 1) { + const frame = frames.pop() as ParseFrame; + if (frame.hasAlternation) { + recordAlternative(frame); + } + const groupMinLength = frame.hasAlternation + ? (frame.altMinLength ?? 0) + : frame.branchMinLength; + const groupMaxLength = frame.hasAlternation + ? (frame.altMaxLength ?? 0) + : frame.branchMaxLength; + emitToken({ + containsRepetition: frame.containsRepetition, + hasAmbiguousAlternation: + frame.hasAlternation && + frame.altMinLength !== null && + frame.altMaxLength !== null && + frame.altMinLength !== frame.altMaxLength, + minLength: groupMinLength, + maxLength: groupMaxLength, + }); + } + continue; + } + + if (token.kind === "alternation") { + const frame = frames[frames.length - 1]; + frame.hasAlternation = true; + recordAlternative(frame); + frame.branchMinLength = 0; + frame.branchMaxLength = 0; + frame.lastToken = null; + continue; + } + + const frame = frames[frames.length - 1]; + const previousToken = frame.lastToken; + if (!previousToken) { + continue; + } + if (previousToken.containsRepetition) { + return true; + } + if (previousToken.hasAmbiguousAlternation && token.quantifier.maxRepeat === null) { + return true; + } + + const previousMinLength = previousToken.minLength; + const previousMaxLength = previousToken.maxLength; + previousToken.minLength = multiplyLength(previousToken.minLength, token.quantifier.minRepeat); + previousToken.maxLength = + token.quantifier.maxRepeat === null + ? Number.POSITIVE_INFINITY + : multiplyLength(previousToken.maxLength, token.quantifier.maxRepeat); + previousToken.containsRepetition = true; + frame.containsRepetition = true; + frame.branchMinLength = frame.branchMinLength - previousMinLength + previousToken.minLength; + + const branchMaxBase = + Number.isFinite(frame.branchMaxLength) && Number.isFinite(previousMaxLength) + ? frame.branchMaxLength - previousMaxLength + : Number.POSITIVE_INFINITY; + frame.branchMaxLength = addLength(branchMaxBase, previousToken.maxLength); + } + + return false; +} + +function testRegexFromStart(regex: RegExp, value: string): boolean { + regex.lastIndex = 0; + return regex.test(value); +} + +export function testRegexWithBoundedInput( + regex: RegExp, + input: string, + maxWindow = SAFE_REGEX_TEST_WINDOW, +): boolean { + if (maxWindow <= 0) { + return false; + } + if (input.length <= maxWindow) { + return testRegexFromStart(regex, input); + } + const head = input.slice(0, maxWindow); + if (testRegexFromStart(regex, head)) { + return true; + } + return testRegexFromStart(regex, input.slice(-maxWindow)); +} + +export function hasNestedRepetition(source: string): boolean { + // Conservative parser: tokenize first, then check if repeated tokens/groups are repeated again. + // Non-goal: complete regex AST support; keep strict enough for config safety checks. + return analyzeTokensForNestedRepetition(tokenizePattern(source)); +} + export function compileSafeRegex(source: string, flags = ""): RegExp | null { const trimmed = source.trim(); if (!trimmed) {