refactor(security): split safe-regex parse and bounded matching

2026-05-06 04:10:46 +00:00 · 2026-03-02 16:46:31 +00:00
parent 8611fd67b5
commit 031bf0c6c0
4 changed files with 210 additions and 169 deletions
--- a/src/discord/monitor/exec-approvals.ts
+++ b/src/discord/monitor/exec-approvals.ts
@@ -24,7 +24,7 @@ import type {
 import { logDebug, logError } from "../../logger.js";
 import { normalizeAccountId, resolveAgentIdFromSessionKey } from "../../routing/session-key.js";
 import type { RuntimeEnv } from "../../runtime.js";
-import { compileSafeRegex } from "../../security/safe-regex.js";
+import { compileSafeRegex, testRegexWithBoundedInput } from "../../security/safe-regex.js";
 import {
  GATEWAY_CLIENT_MODES,
  GATEWAY_CLIENT_NAMES,
@@ -34,8 +34,6 @@ import { createDiscordClient, stripUndefinedFields } from "../send.shared.js";
 import { DiscordUiContainer } from "../ui.js";

 const EXEC_APPROVAL_KEY = "execapproval";
-const SESSION_FILTER_REGEX_MAX_INPUT = 2048;
-
 export type { ExecApprovalRequest, ExecApprovalResolved };

 /** Extract Discord channel ID from a session key like "agent:main:discord:channel:123456789" */
@@ -368,28 +366,12 @@ export class DiscordExecApprovalHandler {
      if (!session) {
        return false;
      }
-      const head = session.slice(0, SESSION_FILTER_REGEX_MAX_INPUT);
-      const tail =
-        session.length > SESSION_FILTER_REGEX_MAX_INPUT
-          ? session.slice(-SESSION_FILTER_REGEX_MAX_INPUT)
-          : "";
      const matches = config.sessionFilter.some((p) => {
        if (session.includes(p)) {
          return true;
        }
        const regex = compileSafeRegex(p);
-        if (!regex) {
-          return false;
-        }
-        regex.lastIndex = 0;
-        if (regex.test(head)) {
-          return true;
-        }
-        if (tail) {
-          regex.lastIndex = 0;
-          return regex.test(tail);
-        }
-        return false;
+        return regex ? testRegexWithBoundedInput(regex, session) : false;
      });
      if (!matches) {
        return false;
--- a/src/infra/exec-approval-forwarder.ts
+++ b/src/infra/exec-approval-forwarder.ts
@@ -7,7 +7,7 @@ import type {
 } from "../config/types.approvals.js";
 import { createSubsystemLogger } from "../logging/subsystem.js";
 import { normalizeAccountId, parseAgentSessionKey } from "../routing/session-key.js";
-import { compileSafeRegex } from "../security/safe-regex.js";
+import { compileSafeRegex, testRegexWithBoundedInput } from "../security/safe-regex.js";
 import {
  isDeliverableMessageChannel,
  normalizeMessageChannel,
@@ -22,8 +22,6 @@ import { deliverOutboundPayloads } from "./outbound/deliver.js";
 import { resolveSessionDeliveryTarget } from "./outbound/targets.js";

 const log = createSubsystemLogger("gateway/exec-approvals");
-const SESSION_FILTER_REGEX_MAX_INPUT = 2048;
-
 export type { ExecApprovalRequest, ExecApprovalResolved };

 type ForwardTarget = ExecApprovalForwardTarget & { source: "session" | "target" };
@@ -57,28 +55,12 @@ function normalizeMode(mode?: ExecApprovalForwardingConfig["mode"]) {
 }

 function matchSessionFilter(sessionKey: string, patterns: string[]): boolean {
-  const head = sessionKey.slice(0, SESSION_FILTER_REGEX_MAX_INPUT);
-  const tail =
-    sessionKey.length > SESSION_FILTER_REGEX_MAX_INPUT
-      ? sessionKey.slice(-SESSION_FILTER_REGEX_MAX_INPUT)
-      : "";
  return patterns.some((pattern) => {
    if (sessionKey.includes(pattern)) {
      return true;
    }
    const regex = compileSafeRegex(pattern);
-    if (!regex) {
-      return false;
-    }
-    regex.lastIndex = 0;
-    if (regex.test(head)) {
-      return true;
-    }
-    if (tail) {
-      regex.lastIndex = 0;
-      return regex.test(tail);
-    }
-    return false;
+    return regex ? testRegexWithBoundedInput(regex, sessionKey) : false;
  });
 }

--- a/src/security/safe-regex.test.ts
+++ b/src/security/safe-regex.test.ts
@@ -1,5 +1,5 @@
 import { describe, expect, it } from "vitest";
-import { compileSafeRegex, hasNestedRepetition } from "./safe-regex.js";
+import { compileSafeRegex, hasNestedRepetition, testRegexWithBoundedInput } from "./safe-regex.js";

 describe("safe regex", () => {
  it("flags nested repetition patterns", () => {
@@ -27,4 +27,16 @@ describe("safe regex", () => {
    expect(re).toBeInstanceOf(RegExp);
    expect("TOKEN=abcd1234".replace(re as RegExp, "***")).toBe("***");
  });
+
+  it("checks bounded regex windows for long inputs", () => {
+    expect(
+      testRegexWithBoundedInput(/^agent:main:discord:/, `agent:main:discord:${"x".repeat(5000)}`),
+    ).toBe(true);
+    expect(testRegexWithBoundedInput(/discord:tail$/, `${"x".repeat(5000)}discord:tail`)).toBe(
+      true,
+    );
+    expect(testRegexWithBoundedInput(/discord:tail$/, `${"x".repeat(5000)}telegram:tail`)).toBe(
+      false,
+    );
+  });
 });
--- a/src/security/safe-regex.ts
+++ b/src/security/safe-regex.ts
@@ -21,7 +21,15 @@ type ParseFrame = {
  altMaxLength: number | null;
 };

+type PatternToken =
+  | { kind: "simple-token" }
+  | { kind: "group-open" }
+  | { kind: "group-close" }
+  | { kind: "alternation" }
+  | { kind: "quantifier"; quantifier: QuantifierRead };
+
 const SAFE_REGEX_CACHE_MAX = 256;
+const SAFE_REGEX_TEST_WINDOW = 2048;
 const safeRegexCache = new Map<string, RegExp | null>();

 function createParseFrame(): ParseFrame {
@@ -60,134 +68,6 @@ function recordAlternative(frame: ParseFrame): void {
  frame.altMaxLength = Math.max(frame.altMaxLength, frame.branchMaxLength);
 }

-export function hasNestedRepetition(source: string): boolean {
-  // Conservative parser: reject patterns where a repeated token/group is repeated again.
-  const frames: ParseFrame[] = [createParseFrame()];
-  let inCharClass = false;
-
-  const emitToken = (token: TokenState) => {
-    const frame = frames[frames.length - 1];
-    frame.lastToken = token;
-    if (token.containsRepetition) {
-      frame.containsRepetition = true;
-    }
-    frame.branchMinLength = addLength(frame.branchMinLength, token.minLength);
-    frame.branchMaxLength = addLength(frame.branchMaxLength, token.maxLength);
-  };
-
-  const emitSimpleToken = () => {
-    emitToken({
-      containsRepetition: false,
-      hasAmbiguousAlternation: false,
-      minLength: 1,
-      maxLength: 1,
-    });
-  };
-
-  for (let i = 0; i < source.length; i += 1) {
-    const ch = source[i];
-
-    if (ch === "\\") {
-      i += 1;
-      emitSimpleToken();
-      continue;
-    }
-
-    if (inCharClass) {
-      if (ch === "]") {
-        inCharClass = false;
-      }
-      continue;
-    }
-
-    if (ch === "[") {
-      inCharClass = true;
-      emitSimpleToken();
-      continue;
-    }
-
-    if (ch === "(") {
-      frames.push(createParseFrame());
-      continue;
-    }
-
-    if (ch === ")") {
-      if (frames.length > 1) {
-        const frame = frames.pop() as ParseFrame;
-        if (frame.hasAlternation) {
-          recordAlternative(frame);
-        }
-        const groupMinLength = frame.hasAlternation
-          ? (frame.altMinLength ?? 0)
-          : frame.branchMinLength;
-        const groupMaxLength = frame.hasAlternation
-          ? (frame.altMaxLength ?? 0)
-          : frame.branchMaxLength;
-        emitToken({
-          containsRepetition: frame.containsRepetition,
-          hasAmbiguousAlternation:
-            frame.hasAlternation &&
-            frame.altMinLength !== null &&
-            frame.altMaxLength !== null &&
-            frame.altMinLength !== frame.altMaxLength,
-          minLength: groupMinLength,
-          maxLength: groupMaxLength,
-        });
-      }
-      continue;
-    }
-
-    if (ch === "|") {
-      const frame = frames[frames.length - 1];
-      frame.hasAlternation = true;
-      recordAlternative(frame);
-      frame.branchMinLength = 0;
-      frame.branchMaxLength = 0;
-      frame.lastToken = null;
-      continue;
-    }
-
-    const quantifier = readQuantifier(source, i);
-    if (quantifier) {
-      const frame = frames[frames.length - 1];
-      const token = frame.lastToken;
-      if (!token) {
-        continue;
-      }
-      if (token.containsRepetition) {
-        return true;
-      }
-      if (token.hasAmbiguousAlternation && quantifier.maxRepeat === null) {
-        return true;
-      }
-
-      const previousMinLength = token.minLength;
-      const previousMaxLength = token.maxLength;
-      token.minLength = multiplyLength(token.minLength, quantifier.minRepeat);
-      token.maxLength =
-        quantifier.maxRepeat === null
-          ? Number.POSITIVE_INFINITY
-          : multiplyLength(token.maxLength, quantifier.maxRepeat);
-      token.containsRepetition = true;
-      frame.containsRepetition = true;
-      frame.branchMinLength = frame.branchMinLength - previousMinLength + token.minLength;
-
-      const branchMaxBase =
-        Number.isFinite(frame.branchMaxLength) && Number.isFinite(previousMaxLength)
-          ? frame.branchMaxLength - previousMaxLength
-          : Number.POSITIVE_INFINITY;
-      frame.branchMaxLength = addLength(branchMaxBase, token.maxLength);
-
-      i += quantifier.consumed - 1;
-      continue;
-    }
-
-    emitSimpleToken();
-  }
-
-  return false;
-}
-
 function readQuantifier(source: string, index: number): QuantifierRead | null {
  const ch = source[index];
  const consumed = source[index + 1] === "?" ? 2 : 1;
@@ -237,6 +117,191 @@ function readQuantifier(source: string, index: number): QuantifierRead | null {
  return { consumed: i - index, minRepeat, maxRepeat };
 }

+function tokenizePattern(source: string): PatternToken[] {
+  const tokens: PatternToken[] = [];
+  let inCharClass = false;
+
+  for (let i = 0; i < source.length; i += 1) {
+    const ch = source[i];
+
+    if (ch === "\\") {
+      i += 1;
+      tokens.push({ kind: "simple-token" });
+      continue;
+    }
+
+    if (inCharClass) {
+      if (ch === "]") {
+        inCharClass = false;
+      }
+      continue;
+    }
+
+    if (ch === "[") {
+      inCharClass = true;
+      tokens.push({ kind: "simple-token" });
+      continue;
+    }
+
+    if (ch === "(") {
+      tokens.push({ kind: "group-open" });
+      continue;
+    }
+
+    if (ch === ")") {
+      tokens.push({ kind: "group-close" });
+      continue;
+    }
+
+    if (ch === "|") {
+      tokens.push({ kind: "alternation" });
+      continue;
+    }
+
+    const quantifier = readQuantifier(source, i);
+    if (quantifier) {
+      tokens.push({ kind: "quantifier", quantifier });
+      i += quantifier.consumed - 1;
+      continue;
+    }
+
+    tokens.push({ kind: "simple-token" });
+  }
+
+  return tokens;
+}
+
+function analyzeTokensForNestedRepetition(tokens: PatternToken[]): boolean {
+  const frames: ParseFrame[] = [createParseFrame()];
+
+  const emitToken = (token: TokenState) => {
+    const frame = frames[frames.length - 1];
+    frame.lastToken = token;
+    if (token.containsRepetition) {
+      frame.containsRepetition = true;
+    }
+    frame.branchMinLength = addLength(frame.branchMinLength, token.minLength);
+    frame.branchMaxLength = addLength(frame.branchMaxLength, token.maxLength);
+  };
+
+  const emitSimpleToken = () => {
+    emitToken({
+      containsRepetition: false,
+      hasAmbiguousAlternation: false,
+      minLength: 1,
+      maxLength: 1,
+    });
+  };
+
+  for (const token of tokens) {
+    if (token.kind === "simple-token") {
+      emitSimpleToken();
+      continue;
+    }
+
+    if (token.kind === "group-open") {
+      frames.push(createParseFrame());
+      continue;
+    }
+
+    if (token.kind === "group-close") {
+      if (frames.length > 1) {
+        const frame = frames.pop() as ParseFrame;
+        if (frame.hasAlternation) {
+          recordAlternative(frame);
+        }
+        const groupMinLength = frame.hasAlternation
+          ? (frame.altMinLength ?? 0)
+          : frame.branchMinLength;
+        const groupMaxLength = frame.hasAlternation
+          ? (frame.altMaxLength ?? 0)
+          : frame.branchMaxLength;
+        emitToken({
+          containsRepetition: frame.containsRepetition,
+          hasAmbiguousAlternation:
+            frame.hasAlternation &&
+            frame.altMinLength !== null &&
+            frame.altMaxLength !== null &&
+            frame.altMinLength !== frame.altMaxLength,
+          minLength: groupMinLength,
+          maxLength: groupMaxLength,
+        });
+      }
+      continue;
+    }
+
+    if (token.kind === "alternation") {
+      const frame = frames[frames.length - 1];
+      frame.hasAlternation = true;
+      recordAlternative(frame);
+      frame.branchMinLength = 0;
+      frame.branchMaxLength = 0;
+      frame.lastToken = null;
+      continue;
+    }
+
+    const frame = frames[frames.length - 1];
+    const previousToken = frame.lastToken;
+    if (!previousToken) {
+      continue;
+    }
+    if (previousToken.containsRepetition) {
+      return true;
+    }
+    if (previousToken.hasAmbiguousAlternation && token.quantifier.maxRepeat === null) {
+      return true;
+    }
+
+    const previousMinLength = previousToken.minLength;
+    const previousMaxLength = previousToken.maxLength;
+    previousToken.minLength = multiplyLength(previousToken.minLength, token.quantifier.minRepeat);
+    previousToken.maxLength =
+      token.quantifier.maxRepeat === null
+        ? Number.POSITIVE_INFINITY
+        : multiplyLength(previousToken.maxLength, token.quantifier.maxRepeat);
+    previousToken.containsRepetition = true;
+    frame.containsRepetition = true;
+    frame.branchMinLength = frame.branchMinLength - previousMinLength + previousToken.minLength;
+
+    const branchMaxBase =
+      Number.isFinite(frame.branchMaxLength) && Number.isFinite(previousMaxLength)
+        ? frame.branchMaxLength - previousMaxLength
+        : Number.POSITIVE_INFINITY;
+    frame.branchMaxLength = addLength(branchMaxBase, previousToken.maxLength);
+  }
+
+  return false;
+}
+
+function testRegexFromStart(regex: RegExp, value: string): boolean {
+  regex.lastIndex = 0;
+  return regex.test(value);
+}
+
+export function testRegexWithBoundedInput(
+  regex: RegExp,
+  input: string,
+  maxWindow = SAFE_REGEX_TEST_WINDOW,
+): boolean {
+  if (maxWindow <= 0) {
+    return false;
+  }
+  if (input.length <= maxWindow) {
+    return testRegexFromStart(regex, input);
+  }
+  const head = input.slice(0, maxWindow);
+  if (testRegexFromStart(regex, head)) {
+    return true;
+  }
+  return testRegexFromStart(regex, input.slice(-maxWindow));
+}
+
+export function hasNestedRepetition(source: string): boolean {
+  // Conservative parser: tokenize first, then check if repeated tokens/groups are repeated again.
+  // Non-goal: complete regex AST support; keep strict enough for config safety checks.
+  return analyzeTokensForNestedRepetition(tokenizePattern(source));
+}
+
 export function compileSafeRegex(source: string, flags = ""): RegExp | null {
  const trimmed = source.trim();
  if (!trimmed) {