mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-13 19:10:39 +00:00
fix(security): block quantified ambiguous alternation regex
This commit is contained in:
@@ -4,11 +4,15 @@ import { compileSafeRegex, hasNestedRepetition } from "./safe-regex.js";
|
||||
describe("safe regex", () => {
|
||||
it("flags nested repetition patterns", () => {
|
||||
expect(hasNestedRepetition("(a+)+$")).toBe(true);
|
||||
expect(hasNestedRepetition("(a|aa)+$")).toBe(true);
|
||||
expect(hasNestedRepetition("^(?:foo|bar)$")).toBe(false);
|
||||
expect(hasNestedRepetition("^(ab|cd)+$")).toBe(false);
|
||||
});
|
||||
|
||||
it("rejects unsafe nested repetition during compile", () => {
|
||||
expect(compileSafeRegex("(a+)+$")).toBeNull();
|
||||
expect(compileSafeRegex("(a|aa)+$")).toBeNull();
|
||||
expect(compileSafeRegex("(a|aa){2}$")).toBeInstanceOf(RegExp);
|
||||
});
|
||||
|
||||
it("compiles common safe filter regex", () => {
|
||||
|
||||
@@ -1,22 +1,68 @@
|
||||
type QuantifierRead = {
|
||||
consumed: number;
|
||||
minRepeat: number;
|
||||
maxRepeat: number | null;
|
||||
};
|
||||
|
||||
type TokenState = {
|
||||
containsRepetition: boolean;
|
||||
hasAmbiguousAlternation: boolean;
|
||||
minLength: number;
|
||||
maxLength: number;
|
||||
};
|
||||
|
||||
type ParseFrame = {
|
||||
lastToken: TokenState | null;
|
||||
containsRepetition: boolean;
|
||||
hasAlternation: boolean;
|
||||
branchMinLength: number;
|
||||
branchMaxLength: number;
|
||||
altMinLength: number | null;
|
||||
altMaxLength: number | null;
|
||||
};
|
||||
|
||||
const SAFE_REGEX_CACHE_MAX = 256;
|
||||
const safeRegexCache = new Map<string, RegExp | null>();
|
||||
|
||||
function createParseFrame(): ParseFrame {
|
||||
return {
|
||||
lastToken: null,
|
||||
containsRepetition: false,
|
||||
hasAlternation: false,
|
||||
branchMinLength: 0,
|
||||
branchMaxLength: 0,
|
||||
altMinLength: null,
|
||||
altMaxLength: null,
|
||||
};
|
||||
}
|
||||
|
||||
function addLength(left: number, right: number): number {
|
||||
if (!Number.isFinite(left) || !Number.isFinite(right)) {
|
||||
return Number.POSITIVE_INFINITY;
|
||||
}
|
||||
return left + right;
|
||||
}
|
||||
|
||||
function multiplyLength(length: number, factor: number): number {
|
||||
if (!Number.isFinite(length)) {
|
||||
return factor === 0 ? 0 : Number.POSITIVE_INFINITY;
|
||||
}
|
||||
return length * factor;
|
||||
}
|
||||
|
||||
function recordAlternative(frame: ParseFrame): void {
|
||||
if (frame.altMinLength === null || frame.altMaxLength === null) {
|
||||
frame.altMinLength = frame.branchMinLength;
|
||||
frame.altMaxLength = frame.branchMaxLength;
|
||||
return;
|
||||
}
|
||||
frame.altMinLength = Math.min(frame.altMinLength, frame.branchMinLength);
|
||||
frame.altMaxLength = Math.max(frame.altMaxLength, frame.branchMaxLength);
|
||||
}
|
||||
|
||||
export function hasNestedRepetition(source: string): boolean {
|
||||
// Conservative parser: reject patterns where a repeated token/group is repeated again.
|
||||
const frames: ParseFrame[] = [{ lastToken: null, containsRepetition: false }];
|
||||
const frames: ParseFrame[] = [createParseFrame()];
|
||||
let inCharClass = false;
|
||||
|
||||
const emitToken = (token: TokenState) => {
|
||||
@@ -25,6 +71,17 @@ export function hasNestedRepetition(source: string): boolean {
|
||||
if (token.containsRepetition) {
|
||||
frame.containsRepetition = true;
|
||||
}
|
||||
frame.branchMinLength = addLength(frame.branchMinLength, token.minLength);
|
||||
frame.branchMaxLength = addLength(frame.branchMaxLength, token.maxLength);
|
||||
};
|
||||
|
||||
const emitSimpleToken = () => {
|
||||
emitToken({
|
||||
containsRepetition: false,
|
||||
hasAmbiguousAlternation: false,
|
||||
minLength: 1,
|
||||
maxLength: 1,
|
||||
});
|
||||
};
|
||||
|
||||
for (let i = 0; i < source.length; i += 1) {
|
||||
@@ -32,7 +89,7 @@ export function hasNestedRepetition(source: string): boolean {
|
||||
|
||||
if (ch === "\\") {
|
||||
i += 1;
|
||||
emitToken({ containsRepetition: false });
|
||||
emitSimpleToken();
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -45,25 +102,47 @@ export function hasNestedRepetition(source: string): boolean {
|
||||
|
||||
if (ch === "[") {
|
||||
inCharClass = true;
|
||||
emitToken({ containsRepetition: false });
|
||||
emitSimpleToken();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === "(") {
|
||||
frames.push({ lastToken: null, containsRepetition: false });
|
||||
frames.push(createParseFrame());
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === ")") {
|
||||
if (frames.length > 1) {
|
||||
const frame = frames.pop() as ParseFrame;
|
||||
emitToken({ containsRepetition: frame.containsRepetition });
|
||||
if (frame.hasAlternation) {
|
||||
recordAlternative(frame);
|
||||
}
|
||||
const groupMinLength = frame.hasAlternation
|
||||
? (frame.altMinLength ?? 0)
|
||||
: frame.branchMinLength;
|
||||
const groupMaxLength = frame.hasAlternation
|
||||
? (frame.altMaxLength ?? 0)
|
||||
: frame.branchMaxLength;
|
||||
emitToken({
|
||||
containsRepetition: frame.containsRepetition,
|
||||
hasAmbiguousAlternation:
|
||||
frame.hasAlternation &&
|
||||
frame.altMinLength !== null &&
|
||||
frame.altMaxLength !== null &&
|
||||
frame.altMinLength !== frame.altMaxLength,
|
||||
minLength: groupMinLength,
|
||||
maxLength: groupMaxLength,
|
||||
});
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === "|") {
|
||||
const frame = frames[frames.length - 1];
|
||||
frame.hasAlternation = true;
|
||||
recordAlternative(frame);
|
||||
frame.branchMinLength = 0;
|
||||
frame.branchMaxLength = 0;
|
||||
frame.lastToken = null;
|
||||
continue;
|
||||
}
|
||||
@@ -78,13 +157,32 @@ export function hasNestedRepetition(source: string): boolean {
|
||||
if (token.containsRepetition) {
|
||||
return true;
|
||||
}
|
||||
if (token.hasAmbiguousAlternation && quantifier.maxRepeat === null) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const previousMinLength = token.minLength;
|
||||
const previousMaxLength = token.maxLength;
|
||||
token.minLength = multiplyLength(token.minLength, quantifier.minRepeat);
|
||||
token.maxLength =
|
||||
quantifier.maxRepeat === null
|
||||
? Number.POSITIVE_INFINITY
|
||||
: multiplyLength(token.maxLength, quantifier.maxRepeat);
|
||||
token.containsRepetition = true;
|
||||
frame.containsRepetition = true;
|
||||
frame.branchMinLength = frame.branchMinLength - previousMinLength + token.minLength;
|
||||
|
||||
const branchMaxBase =
|
||||
Number.isFinite(frame.branchMaxLength) && Number.isFinite(previousMaxLength)
|
||||
? frame.branchMaxLength - previousMaxLength
|
||||
: Number.POSITIVE_INFINITY;
|
||||
frame.branchMaxLength = addLength(branchMaxBase, token.maxLength);
|
||||
|
||||
i += quantifier.consumed - 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
emitToken({ containsRepetition: false });
|
||||
emitSimpleToken();
|
||||
}
|
||||
|
||||
return false;
|
||||
@@ -92,12 +190,20 @@ export function hasNestedRepetition(source: string): boolean {
|
||||
|
||||
function readQuantifier(source: string, index: number): QuantifierRead | null {
|
||||
const ch = source[index];
|
||||
if (ch === "*" || ch === "+" || ch === "?") {
|
||||
return { consumed: source[index + 1] === "?" ? 2 : 1 };
|
||||
const consumed = source[index + 1] === "?" ? 2 : 1;
|
||||
if (ch === "*") {
|
||||
return { consumed, minRepeat: 0, maxRepeat: null };
|
||||
}
|
||||
if (ch === "+") {
|
||||
return { consumed, minRepeat: 1, maxRepeat: null };
|
||||
}
|
||||
if (ch === "?") {
|
||||
return { consumed, minRepeat: 0, maxRepeat: 1 };
|
||||
}
|
||||
if (ch !== "{") {
|
||||
return null;
|
||||
}
|
||||
|
||||
let i = index + 1;
|
||||
while (i < source.length && /\d/.test(source[i])) {
|
||||
i += 1;
|
||||
@@ -105,12 +211,18 @@ function readQuantifier(source: string, index: number): QuantifierRead | null {
|
||||
if (i === index + 1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const minRepeat = Number.parseInt(source.slice(index + 1, i), 10);
|
||||
let maxRepeat: number | null = minRepeat;
|
||||
if (source[i] === ",") {
|
||||
i += 1;
|
||||
const maxStart = i;
|
||||
while (i < source.length && /\d/.test(source[i])) {
|
||||
i += 1;
|
||||
}
|
||||
maxRepeat = i === maxStart ? null : Number.parseInt(source.slice(maxStart, i), 10);
|
||||
}
|
||||
|
||||
if (source[i] !== "}") {
|
||||
return null;
|
||||
}
|
||||
@@ -118,7 +230,11 @@ function readQuantifier(source: string, index: number): QuantifierRead | null {
|
||||
if (source[i] === "?") {
|
||||
i += 1;
|
||||
}
|
||||
return { consumed: i - index };
|
||||
if (maxRepeat !== null && maxRepeat < minRepeat) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return { consumed: i - index, minRepeat, maxRepeat };
|
||||
}
|
||||
|
||||
export function compileSafeRegex(source: string, flags = ""): RegExp | null {
|
||||
|
||||
Reference in New Issue
Block a user