diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ba832e4692..2db4805cee0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -49,6 +49,7 @@ Docs: https://docs.openclaw.ai - Telegram/final preview delivery: split active preview lifecycle from cleanup retention so missing archived preview edits avoid duplicate fallback sends without clearing the live preview or blocking later in-place finalization. (#41662) thanks @hougangdev. - Cron/state errors: record `lastErrorReason` in cron job state and keep the gateway schema aligned with the full failover-reason set, including regression coverage for protocol conformance. (#14382) thanks @futuremind2026. - Tools/web search: recover OpenRouter Perplexity citation extraction from `message.annotations` when chat-completions responses omit top-level citations. (#40881) Thanks @laurieluo. +- Security/external content: treat whitespace-delimited `EXTERNAL UNTRUSTED CONTENT` boundary markers like underscore-delimited variants so prompt wrappers cannot bypass marker sanitization. (#35983) Thanks @urianpaul94. ## 2026.3.8 diff --git a/src/security/external-content.test.ts b/src/security/external-content.test.ts index 17076b642b1..b943bdacf72 100644 --- a/src/security/external-content.test.ts +++ b/src/security/external-content.test.ts @@ -138,6 +138,21 @@ describe("external-content security", () => { content: "Before <<>> middle <<>> after", }, + { + name: "sanitizes space-separated boundary markers", + content: + "Before <<>> middle <<>> after", + }, + { + name: "sanitizes mixed space/underscore boundary markers", + content: + "Before <<>> middle <<>> after", + }, + { + name: "sanitizes tab-delimited boundary markers", + content: + "Before <<>> middle <<>> after", + }, ])("$name", ({ content }) => { const result = wrapExternalContent(content, { source: "email" }); expectSanitizedBoundaryMarkers(result); @@ -204,6 +219,7 @@ describe("external-content security", () => { ["\u27EE", "\u27EF"], // flattened parentheses ["\u276C", "\u276D"], // medium angle bracket ornaments ["\u276E", "\u276F"], // heavy angle quotation ornaments + ["\u02C2", "\u02C3"], // modifier letter left/right arrowhead ]; for (const [left, right] of bracketPairs) { diff --git a/src/security/external-content.ts b/src/security/external-content.ts index 60f92584108..ff571871b5e 100644 --- a/src/security/external-content.ts +++ b/src/security/external-content.ts @@ -132,6 +132,8 @@ const ANGLE_BRACKET_MAP: Record = { 0x276d: ">", // medium right-pointing angle bracket ornament 0x276e: "<", // heavy left-pointing angle quotation mark ornament 0x276f: ">", // heavy right-pointing angle quotation mark ornament + 0x02c2: "<", // modifier letter left arrowhead + 0x02c3: ">", // modifier letter right arrowhead }; function foldMarkerChar(char: string): string { @@ -151,25 +153,27 @@ function foldMarkerChar(char: string): string { function foldMarkerText(input: string): string { return input.replace( - /[\uFF21-\uFF3A\uFF41-\uFF5A\uFF1C\uFF1E\u2329\u232A\u3008\u3009\u2039\u203A\u27E8\u27E9\uFE64\uFE65\u00AB\u00BB\u300A\u300B\u27EA\u27EB\u27EC\u27ED\u27EE\u27EF\u276C\u276D\u276E\u276F]/g, + /[\uFF21-\uFF3A\uFF41-\uFF5A\uFF1C\uFF1E\u2329\u232A\u3008\u3009\u2039\u203A\u27E8\u27E9\uFE64\uFE65\u00AB\u00BB\u300A\u300B\u27EA\u27EB\u27EC\u27ED\u27EE\u27EF\u276C\u276D\u276E\u276F\u02C2\u02C3]/g, (char) => foldMarkerChar(char), ); } function replaceMarkers(content: string): string { const folded = foldMarkerText(content); - if (!/external_untrusted_content/i.test(folded)) { + // Intentionally catch whitespace-delimited spoof variants (space, tab, newline) in addition + // to the legacy underscore form because LLMs may still parse them as trusted boundary markers. + if (!/external[\s_]+untrusted[\s_]+content/i.test(folded)) { return content; } const replacements: Array<{ start: number; end: number; value: string }> = []; // Match markers with or without id attribute (handles both legacy and spoofed markers) const patterns: Array<{ regex: RegExp; value: string }> = [ { - regex: /<<>>/gi, + regex: /<<<\s*EXTERNAL[\s_]+UNTRUSTED[\s_]+CONTENT(?:\s+id="[^"]{1,128}")?\s*>>>/gi, value: "[[MARKER_SANITIZED]]", }, { - regex: /<<>>/gi, + regex: /<<<\s*END[\s_]+EXTERNAL[\s_]+UNTRUSTED[\s_]+CONTENT(?:\s+id="[^"]{1,128}")?\s*>>>/gi, value: "[[END_MARKER_SANITIZED]]", }, ];