mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-12 15:30:39 +00:00
refactor(security): split safe-regex parse and bounded matching
This commit is contained in:
@@ -24,7 +24,7 @@ import type {
|
||||
import { logDebug, logError } from "../../logger.js";
|
||||
import { normalizeAccountId, resolveAgentIdFromSessionKey } from "../../routing/session-key.js";
|
||||
import type { RuntimeEnv } from "../../runtime.js";
|
||||
import { compileSafeRegex } from "../../security/safe-regex.js";
|
||||
import { compileSafeRegex, testRegexWithBoundedInput } from "../../security/safe-regex.js";
|
||||
import {
|
||||
GATEWAY_CLIENT_MODES,
|
||||
GATEWAY_CLIENT_NAMES,
|
||||
@@ -34,8 +34,6 @@ import { createDiscordClient, stripUndefinedFields } from "../send.shared.js";
|
||||
import { DiscordUiContainer } from "../ui.js";
|
||||
|
||||
const EXEC_APPROVAL_KEY = "execapproval";
|
||||
const SESSION_FILTER_REGEX_MAX_INPUT = 2048;
|
||||
|
||||
export type { ExecApprovalRequest, ExecApprovalResolved };
|
||||
|
||||
/** Extract Discord channel ID from a session key like "agent:main:discord:channel:123456789" */
|
||||
@@ -368,28 +366,12 @@ export class DiscordExecApprovalHandler {
|
||||
if (!session) {
|
||||
return false;
|
||||
}
|
||||
const head = session.slice(0, SESSION_FILTER_REGEX_MAX_INPUT);
|
||||
const tail =
|
||||
session.length > SESSION_FILTER_REGEX_MAX_INPUT
|
||||
? session.slice(-SESSION_FILTER_REGEX_MAX_INPUT)
|
||||
: "";
|
||||
const matches = config.sessionFilter.some((p) => {
|
||||
if (session.includes(p)) {
|
||||
return true;
|
||||
}
|
||||
const regex = compileSafeRegex(p);
|
||||
if (!regex) {
|
||||
return false;
|
||||
}
|
||||
regex.lastIndex = 0;
|
||||
if (regex.test(head)) {
|
||||
return true;
|
||||
}
|
||||
if (tail) {
|
||||
regex.lastIndex = 0;
|
||||
return regex.test(tail);
|
||||
}
|
||||
return false;
|
||||
return regex ? testRegexWithBoundedInput(regex, session) : false;
|
||||
});
|
||||
if (!matches) {
|
||||
return false;
|
||||
|
||||
@@ -7,7 +7,7 @@ import type {
|
||||
} from "../config/types.approvals.js";
|
||||
import { createSubsystemLogger } from "../logging/subsystem.js";
|
||||
import { normalizeAccountId, parseAgentSessionKey } from "../routing/session-key.js";
|
||||
import { compileSafeRegex } from "../security/safe-regex.js";
|
||||
import { compileSafeRegex, testRegexWithBoundedInput } from "../security/safe-regex.js";
|
||||
import {
|
||||
isDeliverableMessageChannel,
|
||||
normalizeMessageChannel,
|
||||
@@ -22,8 +22,6 @@ import { deliverOutboundPayloads } from "./outbound/deliver.js";
|
||||
import { resolveSessionDeliveryTarget } from "./outbound/targets.js";
|
||||
|
||||
const log = createSubsystemLogger("gateway/exec-approvals");
|
||||
const SESSION_FILTER_REGEX_MAX_INPUT = 2048;
|
||||
|
||||
export type { ExecApprovalRequest, ExecApprovalResolved };
|
||||
|
||||
type ForwardTarget = ExecApprovalForwardTarget & { source: "session" | "target" };
|
||||
@@ -57,28 +55,12 @@ function normalizeMode(mode?: ExecApprovalForwardingConfig["mode"]) {
|
||||
}
|
||||
|
||||
function matchSessionFilter(sessionKey: string, patterns: string[]): boolean {
|
||||
const head = sessionKey.slice(0, SESSION_FILTER_REGEX_MAX_INPUT);
|
||||
const tail =
|
||||
sessionKey.length > SESSION_FILTER_REGEX_MAX_INPUT
|
||||
? sessionKey.slice(-SESSION_FILTER_REGEX_MAX_INPUT)
|
||||
: "";
|
||||
return patterns.some((pattern) => {
|
||||
if (sessionKey.includes(pattern)) {
|
||||
return true;
|
||||
}
|
||||
const regex = compileSafeRegex(pattern);
|
||||
if (!regex) {
|
||||
return false;
|
||||
}
|
||||
regex.lastIndex = 0;
|
||||
if (regex.test(head)) {
|
||||
return true;
|
||||
}
|
||||
if (tail) {
|
||||
regex.lastIndex = 0;
|
||||
return regex.test(tail);
|
||||
}
|
||||
return false;
|
||||
return regex ? testRegexWithBoundedInput(regex, sessionKey) : false;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { compileSafeRegex, hasNestedRepetition } from "./safe-regex.js";
|
||||
import { compileSafeRegex, hasNestedRepetition, testRegexWithBoundedInput } from "./safe-regex.js";
|
||||
|
||||
describe("safe regex", () => {
|
||||
it("flags nested repetition patterns", () => {
|
||||
@@ -27,4 +27,16 @@ describe("safe regex", () => {
|
||||
expect(re).toBeInstanceOf(RegExp);
|
||||
expect("TOKEN=abcd1234".replace(re as RegExp, "***")).toBe("***");
|
||||
});
|
||||
|
||||
it("checks bounded regex windows for long inputs", () => {
|
||||
expect(
|
||||
testRegexWithBoundedInput(/^agent:main:discord:/, `agent:main:discord:${"x".repeat(5000)}`),
|
||||
).toBe(true);
|
||||
expect(testRegexWithBoundedInput(/discord:tail$/, `${"x".repeat(5000)}discord:tail`)).toBe(
|
||||
true,
|
||||
);
|
||||
expect(testRegexWithBoundedInput(/discord:tail$/, `${"x".repeat(5000)}telegram:tail`)).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -21,7 +21,15 @@ type ParseFrame = {
|
||||
altMaxLength: number | null;
|
||||
};
|
||||
|
||||
type PatternToken =
|
||||
| { kind: "simple-token" }
|
||||
| { kind: "group-open" }
|
||||
| { kind: "group-close" }
|
||||
| { kind: "alternation" }
|
||||
| { kind: "quantifier"; quantifier: QuantifierRead };
|
||||
|
||||
const SAFE_REGEX_CACHE_MAX = 256;
|
||||
const SAFE_REGEX_TEST_WINDOW = 2048;
|
||||
const safeRegexCache = new Map<string, RegExp | null>();
|
||||
|
||||
function createParseFrame(): ParseFrame {
|
||||
@@ -60,134 +68,6 @@ function recordAlternative(frame: ParseFrame): void {
|
||||
frame.altMaxLength = Math.max(frame.altMaxLength, frame.branchMaxLength);
|
||||
}
|
||||
|
||||
export function hasNestedRepetition(source: string): boolean {
|
||||
// Conservative parser: reject patterns where a repeated token/group is repeated again.
|
||||
const frames: ParseFrame[] = [createParseFrame()];
|
||||
let inCharClass = false;
|
||||
|
||||
const emitToken = (token: TokenState) => {
|
||||
const frame = frames[frames.length - 1];
|
||||
frame.lastToken = token;
|
||||
if (token.containsRepetition) {
|
||||
frame.containsRepetition = true;
|
||||
}
|
||||
frame.branchMinLength = addLength(frame.branchMinLength, token.minLength);
|
||||
frame.branchMaxLength = addLength(frame.branchMaxLength, token.maxLength);
|
||||
};
|
||||
|
||||
const emitSimpleToken = () => {
|
||||
emitToken({
|
||||
containsRepetition: false,
|
||||
hasAmbiguousAlternation: false,
|
||||
minLength: 1,
|
||||
maxLength: 1,
|
||||
});
|
||||
};
|
||||
|
||||
for (let i = 0; i < source.length; i += 1) {
|
||||
const ch = source[i];
|
||||
|
||||
if (ch === "\\") {
|
||||
i += 1;
|
||||
emitSimpleToken();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inCharClass) {
|
||||
if (ch === "]") {
|
||||
inCharClass = false;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === "[") {
|
||||
inCharClass = true;
|
||||
emitSimpleToken();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === "(") {
|
||||
frames.push(createParseFrame());
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === ")") {
|
||||
if (frames.length > 1) {
|
||||
const frame = frames.pop() as ParseFrame;
|
||||
if (frame.hasAlternation) {
|
||||
recordAlternative(frame);
|
||||
}
|
||||
const groupMinLength = frame.hasAlternation
|
||||
? (frame.altMinLength ?? 0)
|
||||
: frame.branchMinLength;
|
||||
const groupMaxLength = frame.hasAlternation
|
||||
? (frame.altMaxLength ?? 0)
|
||||
: frame.branchMaxLength;
|
||||
emitToken({
|
||||
containsRepetition: frame.containsRepetition,
|
||||
hasAmbiguousAlternation:
|
||||
frame.hasAlternation &&
|
||||
frame.altMinLength !== null &&
|
||||
frame.altMaxLength !== null &&
|
||||
frame.altMinLength !== frame.altMaxLength,
|
||||
minLength: groupMinLength,
|
||||
maxLength: groupMaxLength,
|
||||
});
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === "|") {
|
||||
const frame = frames[frames.length - 1];
|
||||
frame.hasAlternation = true;
|
||||
recordAlternative(frame);
|
||||
frame.branchMinLength = 0;
|
||||
frame.branchMaxLength = 0;
|
||||
frame.lastToken = null;
|
||||
continue;
|
||||
}
|
||||
|
||||
const quantifier = readQuantifier(source, i);
|
||||
if (quantifier) {
|
||||
const frame = frames[frames.length - 1];
|
||||
const token = frame.lastToken;
|
||||
if (!token) {
|
||||
continue;
|
||||
}
|
||||
if (token.containsRepetition) {
|
||||
return true;
|
||||
}
|
||||
if (token.hasAmbiguousAlternation && quantifier.maxRepeat === null) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const previousMinLength = token.minLength;
|
||||
const previousMaxLength = token.maxLength;
|
||||
token.minLength = multiplyLength(token.minLength, quantifier.minRepeat);
|
||||
token.maxLength =
|
||||
quantifier.maxRepeat === null
|
||||
? Number.POSITIVE_INFINITY
|
||||
: multiplyLength(token.maxLength, quantifier.maxRepeat);
|
||||
token.containsRepetition = true;
|
||||
frame.containsRepetition = true;
|
||||
frame.branchMinLength = frame.branchMinLength - previousMinLength + token.minLength;
|
||||
|
||||
const branchMaxBase =
|
||||
Number.isFinite(frame.branchMaxLength) && Number.isFinite(previousMaxLength)
|
||||
? frame.branchMaxLength - previousMaxLength
|
||||
: Number.POSITIVE_INFINITY;
|
||||
frame.branchMaxLength = addLength(branchMaxBase, token.maxLength);
|
||||
|
||||
i += quantifier.consumed - 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
emitSimpleToken();
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
function readQuantifier(source: string, index: number): QuantifierRead | null {
|
||||
const ch = source[index];
|
||||
const consumed = source[index + 1] === "?" ? 2 : 1;
|
||||
@@ -237,6 +117,191 @@ function readQuantifier(source: string, index: number): QuantifierRead | null {
|
||||
return { consumed: i - index, minRepeat, maxRepeat };
|
||||
}
|
||||
|
||||
function tokenizePattern(source: string): PatternToken[] {
|
||||
const tokens: PatternToken[] = [];
|
||||
let inCharClass = false;
|
||||
|
||||
for (let i = 0; i < source.length; i += 1) {
|
||||
const ch = source[i];
|
||||
|
||||
if (ch === "\\") {
|
||||
i += 1;
|
||||
tokens.push({ kind: "simple-token" });
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inCharClass) {
|
||||
if (ch === "]") {
|
||||
inCharClass = false;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === "[") {
|
||||
inCharClass = true;
|
||||
tokens.push({ kind: "simple-token" });
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === "(") {
|
||||
tokens.push({ kind: "group-open" });
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === ")") {
|
||||
tokens.push({ kind: "group-close" });
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === "|") {
|
||||
tokens.push({ kind: "alternation" });
|
||||
continue;
|
||||
}
|
||||
|
||||
const quantifier = readQuantifier(source, i);
|
||||
if (quantifier) {
|
||||
tokens.push({ kind: "quantifier", quantifier });
|
||||
i += quantifier.consumed - 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
tokens.push({ kind: "simple-token" });
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
function analyzeTokensForNestedRepetition(tokens: PatternToken[]): boolean {
|
||||
const frames: ParseFrame[] = [createParseFrame()];
|
||||
|
||||
const emitToken = (token: TokenState) => {
|
||||
const frame = frames[frames.length - 1];
|
||||
frame.lastToken = token;
|
||||
if (token.containsRepetition) {
|
||||
frame.containsRepetition = true;
|
||||
}
|
||||
frame.branchMinLength = addLength(frame.branchMinLength, token.minLength);
|
||||
frame.branchMaxLength = addLength(frame.branchMaxLength, token.maxLength);
|
||||
};
|
||||
|
||||
const emitSimpleToken = () => {
|
||||
emitToken({
|
||||
containsRepetition: false,
|
||||
hasAmbiguousAlternation: false,
|
||||
minLength: 1,
|
||||
maxLength: 1,
|
||||
});
|
||||
};
|
||||
|
||||
for (const token of tokens) {
|
||||
if (token.kind === "simple-token") {
|
||||
emitSimpleToken();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (token.kind === "group-open") {
|
||||
frames.push(createParseFrame());
|
||||
continue;
|
||||
}
|
||||
|
||||
if (token.kind === "group-close") {
|
||||
if (frames.length > 1) {
|
||||
const frame = frames.pop() as ParseFrame;
|
||||
if (frame.hasAlternation) {
|
||||
recordAlternative(frame);
|
||||
}
|
||||
const groupMinLength = frame.hasAlternation
|
||||
? (frame.altMinLength ?? 0)
|
||||
: frame.branchMinLength;
|
||||
const groupMaxLength = frame.hasAlternation
|
||||
? (frame.altMaxLength ?? 0)
|
||||
: frame.branchMaxLength;
|
||||
emitToken({
|
||||
containsRepetition: frame.containsRepetition,
|
||||
hasAmbiguousAlternation:
|
||||
frame.hasAlternation &&
|
||||
frame.altMinLength !== null &&
|
||||
frame.altMaxLength !== null &&
|
||||
frame.altMinLength !== frame.altMaxLength,
|
||||
minLength: groupMinLength,
|
||||
maxLength: groupMaxLength,
|
||||
});
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (token.kind === "alternation") {
|
||||
const frame = frames[frames.length - 1];
|
||||
frame.hasAlternation = true;
|
||||
recordAlternative(frame);
|
||||
frame.branchMinLength = 0;
|
||||
frame.branchMaxLength = 0;
|
||||
frame.lastToken = null;
|
||||
continue;
|
||||
}
|
||||
|
||||
const frame = frames[frames.length - 1];
|
||||
const previousToken = frame.lastToken;
|
||||
if (!previousToken) {
|
||||
continue;
|
||||
}
|
||||
if (previousToken.containsRepetition) {
|
||||
return true;
|
||||
}
|
||||
if (previousToken.hasAmbiguousAlternation && token.quantifier.maxRepeat === null) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const previousMinLength = previousToken.minLength;
|
||||
const previousMaxLength = previousToken.maxLength;
|
||||
previousToken.minLength = multiplyLength(previousToken.minLength, token.quantifier.minRepeat);
|
||||
previousToken.maxLength =
|
||||
token.quantifier.maxRepeat === null
|
||||
? Number.POSITIVE_INFINITY
|
||||
: multiplyLength(previousToken.maxLength, token.quantifier.maxRepeat);
|
||||
previousToken.containsRepetition = true;
|
||||
frame.containsRepetition = true;
|
||||
frame.branchMinLength = frame.branchMinLength - previousMinLength + previousToken.minLength;
|
||||
|
||||
const branchMaxBase =
|
||||
Number.isFinite(frame.branchMaxLength) && Number.isFinite(previousMaxLength)
|
||||
? frame.branchMaxLength - previousMaxLength
|
||||
: Number.POSITIVE_INFINITY;
|
||||
frame.branchMaxLength = addLength(branchMaxBase, previousToken.maxLength);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
function testRegexFromStart(regex: RegExp, value: string): boolean {
|
||||
regex.lastIndex = 0;
|
||||
return regex.test(value);
|
||||
}
|
||||
|
||||
export function testRegexWithBoundedInput(
|
||||
regex: RegExp,
|
||||
input: string,
|
||||
maxWindow = SAFE_REGEX_TEST_WINDOW,
|
||||
): boolean {
|
||||
if (maxWindow <= 0) {
|
||||
return false;
|
||||
}
|
||||
if (input.length <= maxWindow) {
|
||||
return testRegexFromStart(regex, input);
|
||||
}
|
||||
const head = input.slice(0, maxWindow);
|
||||
if (testRegexFromStart(regex, head)) {
|
||||
return true;
|
||||
}
|
||||
return testRegexFromStart(regex, input.slice(-maxWindow));
|
||||
}
|
||||
|
||||
export function hasNestedRepetition(source: string): boolean {
|
||||
// Conservative parser: tokenize first, then check if repeated tokens/groups are repeated again.
|
||||
// Non-goal: complete regex AST support; keep strict enough for config safety checks.
|
||||
return analyzeTokensForNestedRepetition(tokenizePattern(source));
|
||||
}
|
||||
|
||||
export function compileSafeRegex(source: string, flags = ""): RegExp | null {
|
||||
const trimmed = source.trim();
|
||||
if (!trimmed) {
|
||||
|
||||
Reference in New Issue
Block a user