fix: harden mention pattern regex compilation

This commit is contained in:
Peter Steinberger
2026-03-15 08:44:02 -07:00
parent e4c61723cd
commit ff61343d76
17 changed files with 265 additions and 65 deletions

View File

@@ -0,0 +1,78 @@
import {
compileSafeRegexDetailed,
type SafeRegexCompileResult,
type SafeRegexRejectReason,
} from "./safe-regex.js";
export type ConfigRegexRejectReason = Exclude<SafeRegexRejectReason, "empty">;
export type CompiledConfigRegex =
| {
regex: RegExp;
pattern: string;
flags: string;
reason: null;
}
| {
regex: null;
pattern: string;
flags: string;
reason: ConfigRegexRejectReason;
};
function normalizeRejectReason(result: SafeRegexCompileResult): ConfigRegexRejectReason | null {
if (result.reason === null || result.reason === "empty") {
return null;
}
return result.reason;
}
export function compileConfigRegex(pattern: string, flags = ""): CompiledConfigRegex | null {
const result = compileSafeRegexDetailed(pattern, flags);
if (result.reason === "empty") {
return null;
}
return {
regex: result.regex,
pattern: result.source,
flags: result.flags,
reason: normalizeRejectReason(result),
} as CompiledConfigRegex;
}
export function compileConfigRegexes(
patterns: string[],
flags = "",
): {
regexes: RegExp[];
rejected: Array<{
pattern: string;
flags: string;
reason: ConfigRegexRejectReason;
}>;
} {
const regexes: RegExp[] = [];
const rejected: Array<{
pattern: string;
flags: string;
reason: ConfigRegexRejectReason;
}> = [];
for (const pattern of patterns) {
const compiled = compileConfigRegex(pattern, flags);
if (!compiled) {
continue;
}
if (compiled.regex) {
regexes.push(compiled.regex);
continue;
}
rejected.push({
pattern: compiled.pattern,
flags: compiled.flags,
reason: compiled.reason,
});
}
return { regexes, rejected };
}

View File

@@ -1,5 +1,10 @@
import { describe, expect, it } from "vitest";
import { compileSafeRegex, hasNestedRepetition, testRegexWithBoundedInput } from "./safe-regex.js";
import {
compileSafeRegex,
compileSafeRegexDetailed,
hasNestedRepetition,
testRegexWithBoundedInput,
} from "./safe-regex.js";
describe("safe regex", () => {
it("flags nested repetition patterns", () => {
@@ -28,6 +33,13 @@ describe("safe regex", () => {
expect("TOKEN=abcd1234".replace(re as RegExp, "***")).toBe("***");
});
it("returns structured reject reasons", () => {
expect(compileSafeRegexDetailed(" ").reason).toBe("empty");
expect(compileSafeRegexDetailed("(a+)+$").reason).toBe("unsafe-nested-repetition");
expect(compileSafeRegexDetailed("(invalid").reason).toBe("invalid-regex");
expect(compileSafeRegexDetailed("^agent:main$").reason).toBeNull();
});
it("checks bounded regex windows for long inputs", () => {
expect(
testRegexWithBoundedInput(/^agent:main:discord:/, `agent:main:discord:${"x".repeat(5000)}`),

View File

@@ -30,7 +30,23 @@ type PatternToken =
const SAFE_REGEX_CACHE_MAX = 256;
const SAFE_REGEX_TEST_WINDOW = 2048;
const safeRegexCache = new Map<string, RegExp | null>();
export type SafeRegexRejectReason = "empty" | "unsafe-nested-repetition" | "invalid-regex";
export type SafeRegexCompileResult =
| {
regex: RegExp;
source: string;
flags: string;
reason: null;
}
| {
regex: null;
source: string;
flags: string;
reason: SafeRegexRejectReason;
};
const safeRegexCache = new Map<string, SafeRegexCompileResult>();
function createParseFrame(): ParseFrame {
return {
@@ -302,31 +318,44 @@ export function hasNestedRepetition(source: string): boolean {
return analyzeTokensForNestedRepetition(tokenizePattern(source));
}
export function compileSafeRegex(source: string, flags = ""): RegExp | null {
export function compileSafeRegexDetailed(source: string, flags = ""): SafeRegexCompileResult {
const trimmed = source.trim();
if (!trimmed) {
return null;
return { regex: null, source: trimmed, flags, reason: "empty" };
}
const cacheKey = `${flags}::${trimmed}`;
if (safeRegexCache.has(cacheKey)) {
return safeRegexCache.get(cacheKey) ?? null;
return (
safeRegexCache.get(cacheKey) ?? {
regex: null,
source: trimmed,
flags,
reason: "invalid-regex",
}
);
}
let compiled: RegExp | null = null;
if (!hasNestedRepetition(trimmed)) {
let result: SafeRegexCompileResult;
if (hasNestedRepetition(trimmed)) {
result = { regex: null, source: trimmed, flags, reason: "unsafe-nested-repetition" };
} else {
try {
compiled = new RegExp(trimmed, flags);
result = { regex: new RegExp(trimmed, flags), source: trimmed, flags, reason: null };
} catch {
compiled = null;
result = { regex: null, source: trimmed, flags, reason: "invalid-regex" };
}
}
safeRegexCache.set(cacheKey, compiled);
safeRegexCache.set(cacheKey, result);
if (safeRegexCache.size > SAFE_REGEX_CACHE_MAX) {
const oldestKey = safeRegexCache.keys().next().value;
if (oldestKey) {
safeRegexCache.delete(oldestKey);
}
}
return compiled;
return result;
}
export function compileSafeRegex(source: string, flags = ""): RegExp | null {
return compileSafeRegexDetailed(source, flags).regex;
}