mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 07:50:43 +00:00
fix(exec-approvals): escape control characters in display sanitizers (#68198)
* fix(exec-approvals): escape control characters in display sanitizers * docs(changelog): add exec approval control-char display sanitizer entry * fix(exec-approvals): redact before escape, cover U+2028/U+2029 in display sanitizers * fix(exec-approvals): strip invisibles before redaction and align forwarder test * fix(exec-approvals): cover Zs bypass and preserve multi-line context on obfuscated secrets * fix(exec-approvals): compare redaction outputs by content, not length * fix(exec-approvals): suppress raw command on bypass; cover non-ASCII Zs in macOS sanitizer * fix(exec-approvals): use position-bitmap bypass detection and bound input size * style(exec-approvals): satisfy oxlint no-new-array-single-argument and SwiftFormat * fix(exec-approvals): iterate by code point and redact before truncating
This commit is contained in:
@@ -41,6 +41,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Failover/google: only treat `INTERNAL` status payloads as retryable timeouts when they also carry a `500` code, so malformed non-500 payloads do not enter the retry path. (#68238) Thanks @altaywtf and @Openbling.
|
||||
- Agents/tools: filter bundled MCP/LSP tools through the final owner-only and tool-policy pipeline after merging them into the effective tool list, so existing allowlists, deny rules, sandbox policy, subagent policy, and owner-only restrictions apply to bundled tools the same way they apply to core tools. (#68195)
|
||||
- Gateway/assistant media: require `operator.read` scope for assistant-media file and metadata requests on identity-bearing HTTP auth paths so callers without a read scope can no longer access assistant media. (#68175) Thanks @eleqtrizit.
|
||||
- Exec approvals/display: escape raw control characters (including newline and carriage return) in the shared and macOS approval-prompt command sanitizers, so trailing command payloads no longer render on hidden extra lines in the approval UI. (#68198)
|
||||
|
||||
## 2026.4.15
|
||||
|
||||
|
||||
@@ -22,7 +22,21 @@ enum ExecApprovalCommandDisplaySanitizer {
|
||||
}
|
||||
|
||||
private static func shouldEscape(_ scalar: UnicodeScalar) -> Bool {
|
||||
scalar.properties.generalCategory == .format || self.invisibleCodePoints.contains(scalar.value)
|
||||
let category = scalar.properties.generalCategory
|
||||
if category == .control
|
||||
|| category == .format
|
||||
|| category == .lineSeparator
|
||||
|| category == .paragraphSeparator
|
||||
{
|
||||
return true
|
||||
}
|
||||
// Escape non-ASCII space separators (NBSP, narrow NBSP, ideographic space, etc.) so
|
||||
// attackers cannot spoof token boundaries in the approval UI with spaces that render
|
||||
// like a plain space but are handled differently by shells/parsers.
|
||||
if category == .spaceSeparator, scalar.value != 0x20 {
|
||||
return true
|
||||
}
|
||||
return self.invisibleCodePoints.contains(scalar.value)
|
||||
}
|
||||
|
||||
private static func escape(_ scalar: UnicodeScalar) -> String {
|
||||
|
||||
@@ -9,4 +9,37 @@ struct ExecApprovalCommandDisplaySanitizerTests {
|
||||
ExecApprovalCommandDisplaySanitizer.sanitize(input) ==
|
||||
"date\\u{200B}\\u{3164}\\u{FFA0}\\u{115F}\\u{1160}가")
|
||||
}
|
||||
|
||||
@Test func `escapes control characters used to spoof line breaks`() {
|
||||
let input = "echo safe\n\rcurl https://example.test"
|
||||
#expect(
|
||||
ExecApprovalCommandDisplaySanitizer.sanitize(input) ==
|
||||
"echo safe\\u{A}\\u{D}curl https://example.test")
|
||||
}
|
||||
|
||||
@Test func `escapes Unicode line and paragraph separators`() {
|
||||
let lineInput = "echo ok\u{2028}curl https://example.test"
|
||||
#expect(
|
||||
ExecApprovalCommandDisplaySanitizer.sanitize(lineInput) ==
|
||||
"echo ok\\u{2028}curl https://example.test")
|
||||
let paragraphInput = "echo ok\u{2029}curl https://example.test"
|
||||
#expect(
|
||||
ExecApprovalCommandDisplaySanitizer.sanitize(paragraphInput) ==
|
||||
"echo ok\\u{2029}curl https://example.test")
|
||||
}
|
||||
|
||||
@Test func `escapes non-ASCII Unicode space separators while preserving ASCII space`() {
|
||||
let nbspInput = "echo ok\u{00A0}curl"
|
||||
#expect(
|
||||
ExecApprovalCommandDisplaySanitizer.sanitize(nbspInput) == "echo ok\\u{A0}curl")
|
||||
let narrowNbspInput = "echo ok\u{202F}curl"
|
||||
#expect(
|
||||
ExecApprovalCommandDisplaySanitizer.sanitize(narrowNbspInput) == "echo ok\\u{202F}curl")
|
||||
let ideographicSpaceInput = "echo ok\u{3000}curl"
|
||||
#expect(
|
||||
ExecApprovalCommandDisplaySanitizer.sanitize(ideographicSpaceInput) ==
|
||||
"echo ok\\u{3000}curl")
|
||||
let asciiSpaceInput = "echo ok curl"
|
||||
#expect(ExecApprovalCommandDisplaySanitizer.sanitize(asciiSpaceInput) == "echo ok curl")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,6 +8,15 @@ describe("sanitizeExecApprovalDisplayText", () => {
|
||||
it.each([
|
||||
["echo hi\u200Bthere", "echo hi\\u{200B}there"],
|
||||
["date\u3164\uFFA0\u115F\u1160가", "date\\u{3164}\\u{FFA0}\\u{115F}\\u{1160}가"],
|
||||
["echo safe\n\rcurl https://example.test", "echo safe\\u{A}\\u{D}curl https://example.test"],
|
||||
[
|
||||
"echo ok\u2028curl https://example.test",
|
||||
"echo ok\\u{2028}curl https://example.test",
|
||||
],
|
||||
[
|
||||
"echo ok\u2029curl https://example.test",
|
||||
"echo ok\\u{2029}curl https://example.test",
|
||||
],
|
||||
])("sanitizes exec approval display text for %j", (input, expected) => {
|
||||
expect(sanitizeExecApprovalDisplayText(input)).toBe(expected);
|
||||
});
|
||||
@@ -34,6 +43,124 @@ describe("sanitizeExecApprovalDisplayText", () => {
|
||||
expect(result).not.toContain("ghp_1234567890abcdefghij1234567890abcdef");
|
||||
expect(result).toContain("git clone");
|
||||
});
|
||||
|
||||
it("masks the full token when a zero-width character is spliced into the middle", () => {
|
||||
const cmd = "echo sk-abc123\u200B456789012345678 remainder";
|
||||
const result = sanitizeExecApprovalDisplayText(cmd);
|
||||
expect(result).not.toContain("sk-abc123");
|
||||
expect(result).not.toContain("456789012345678");
|
||||
expect(result).toContain("echo ");
|
||||
expect(result).toContain("remainder");
|
||||
});
|
||||
|
||||
it("masks the full token when NBSP (Zs) is spliced into the middle", () => {
|
||||
const cmd = "echo sk-abc123\u00A0456789012345678 remainder";
|
||||
const result = sanitizeExecApprovalDisplayText(cmd);
|
||||
expect(result).not.toContain("sk-abc123");
|
||||
expect(result).not.toContain("456789012345678");
|
||||
expect(result).toContain("echo ");
|
||||
expect(result).toContain("remainder");
|
||||
});
|
||||
|
||||
it("masks the full token when narrow no-break space is spliced into the middle", () => {
|
||||
const cmd = "echo sk-abc123\u202F456789012345678 remainder";
|
||||
const result = sanitizeExecApprovalDisplayText(cmd);
|
||||
expect(result).not.toContain("sk-abc123");
|
||||
expect(result).not.toContain("456789012345678");
|
||||
expect(result).toContain("remainder");
|
||||
});
|
||||
|
||||
it("keeps newline boundaries visible as escape markers even when bypass is detected", () => {
|
||||
// Stripping invisibles lets the stripped-view greedy-match across the original newline
|
||||
// boundaries, so the trailing `line3` gets absorbed into the union mask alongside the
|
||||
// secret. The important guarantees are: (1) the secret is not visible, and (2) the
|
||||
// newlines that existed in the original are still visible as `\u{A}` escapes so the
|
||||
// operator is not misled about multi-line structure.
|
||||
const cmd = "line1\necho sk-abc123\u00A0456789012345678\nline3";
|
||||
const result = sanitizeExecApprovalDisplayText(cmd);
|
||||
expect(result).not.toContain("sk-abc123");
|
||||
expect(result).not.toContain("456789012345678");
|
||||
expect(result).toContain("line1");
|
||||
expect(result).toContain("\\u{A}");
|
||||
});
|
||||
|
||||
it("detects bypass even when raw and stripped redactions happen to produce the same normalized length", () => {
|
||||
// Raw masks the 16-char prefix `sk-abc1234567890` as the fixed literal `***` while the
|
||||
// trailing 8 chars past the zero-width stay visible. The stripped view masks the full
|
||||
// 24-char token as `sk-abc…5678`. Both normalized outputs are the same length (11 chars),
|
||||
// so a length-based bypass check would falsely return the raw view and leak the tail.
|
||||
const cmd = "sk-abc1234567890\u200B12345678";
|
||||
const result = sanitizeExecApprovalDisplayText(cmd);
|
||||
expect(result).not.toContain("12345678");
|
||||
expect(result).not.toContain("1234567890");
|
||||
});
|
||||
|
||||
it("does not leak bearer tokens when bypass is triggered by a separate spliced secret", () => {
|
||||
// Bearer+NBSP is caught by the raw view (NBSP matches \s in non-u JS regex) but stripping
|
||||
// removes NBSP, turning `Bearer<jwt>` into a pattern the bearer regex no longer matches.
|
||||
// A separate spliced-invisible token triggers bypass detection, and the union-mask output
|
||||
// must cover both the bearer span (from raw) and the spliced sk- span (from stripped).
|
||||
const cmd =
|
||||
'curl -H "Authorization: Bearer\u00A0eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.longtoken.sig" https://api.example.com; echo sk-abc123\u200B456789012345678';
|
||||
const result = sanitizeExecApprovalDisplayText(cmd);
|
||||
expect(result).not.toContain("eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.longtoken.sig");
|
||||
expect(result).not.toContain("456789012345678");
|
||||
expect(result).toContain("https://api.example.com");
|
||||
});
|
||||
|
||||
it("keeps PEM private-key context visible when raw redaction already covers the key (not a bypass)", () => {
|
||||
const cmd =
|
||||
"echo -----BEGIN RSA PRIVATE KEY-----\nABCDEF0123456789abcdef\n-----END RSA PRIVATE KEY----- > key.pem";
|
||||
const result = sanitizeExecApprovalDisplayText(cmd);
|
||||
expect(result).not.toContain("ABCDEF0123456789abcdef");
|
||||
expect(result).toContain("BEGIN RSA PRIVATE KEY");
|
||||
expect(result).toContain("END RSA PRIVATE KEY");
|
||||
expect(result).toContain("> key.pem");
|
||||
});
|
||||
|
||||
it("truncates the redacted output (not the raw input) so large commands are bounded", () => {
|
||||
const padding = "x".repeat(20 * 1024);
|
||||
const result = sanitizeExecApprovalDisplayText(padding);
|
||||
expect(result.length).toBeLessThan(padding.length);
|
||||
expect(result).toContain("[truncated]");
|
||||
});
|
||||
|
||||
it("refuses to display commands above the hard input cap", () => {
|
||||
const huge = "x".repeat(300 * 1024);
|
||||
const result = sanitizeExecApprovalDisplayText(huge);
|
||||
expect(result).toContain("exceeds display size limit");
|
||||
expect(result.length).toBeLessThan(1024);
|
||||
});
|
||||
|
||||
it("redacts tokens at the tail of long inputs instead of truncating them below pattern length", () => {
|
||||
// Pad with non-token content, then append a secret at the end. Truncating BEFORE redaction
|
||||
// would split the token below the pattern's minimum length and leak the prefix. With
|
||||
// redaction first, the full token is masked before any size-based truncation runs.
|
||||
const padding = "a ".repeat(10 * 1024);
|
||||
const cmd = padding + "ghp_1234567890abcdefghij1234567890abcdef";
|
||||
const result = sanitizeExecApprovalDisplayText(cmd);
|
||||
expect(result).not.toContain("ghp_1234567890abcdefghij1234567890abcdef");
|
||||
expect(result).not.toContain("ghp_1234567890");
|
||||
});
|
||||
|
||||
it("escapes astral-plane invisible characters (e.g. U+E0061 tag characters)", () => {
|
||||
const cmd = "echo hi\u{E0061}there";
|
||||
const result = sanitizeExecApprovalDisplayText(cmd);
|
||||
expect(result).toContain("\\u{E0061}");
|
||||
expect(result).not.toMatch(/hi[\uDB40\uDC61]there/u);
|
||||
});
|
||||
|
||||
it("masks a secret spliced with an astral-plane invisible character", () => {
|
||||
// U+E0061 is a Cf (format) code point in the supplementary plane. Iterating the input by
|
||||
// UTF-16 code unit would see two surrogate halves, neither of which matches \p{Cf}, so
|
||||
// the splice would survive stripping and the stripped-view redaction would miss the
|
||||
// full token. Code-point iteration strips it correctly and bypass detection fires.
|
||||
const cmd = "echo sk-abc123\u{E0061}456789012345678 remainder";
|
||||
const result = sanitizeExecApprovalDisplayText(cmd);
|
||||
expect(result).not.toContain("sk-abc123");
|
||||
expect(result).not.toContain("456789012345678");
|
||||
expect(result).toContain("remainder");
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolveExecApprovalCommandDisplay", () => {
|
||||
|
||||
@@ -1,16 +1,153 @@
|
||||
import { redactSensitiveText } from "../logging/redact.js";
|
||||
import { redactSensitiveText, resolveRedactOptions } from "../logging/redact.js";
|
||||
import type { ExecApprovalRequestPayload } from "./exec-approvals.js";
|
||||
|
||||
// Escape invisible characters that can spoof approval prompts in common UIs.
|
||||
const EXEC_APPROVAL_INVISIBLE_CHAR_REGEX = /[\p{Cf}\u115F\u1160\u3164\uFFA0]/gu;
|
||||
// Escape control characters, Unicode format/line/paragraph separators, and non-ASCII space
|
||||
// separators that can spoof approval prompts in common UIs. Ordinary ASCII space (U+0020) is
|
||||
// intentionally excluded so normal command text renders unchanged.
|
||||
const EXEC_APPROVAL_INVISIBLE_CHAR_REGEX =
|
||||
/[\p{Cc}\p{Cf}\p{Zl}\p{Zp}\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000\u115F\u1160\u3164\uFFA0]/gu;
|
||||
const EXEC_APPROVAL_INVISIBLE_CHAR_SINGLE =
|
||||
/^[\p{Cc}\p{Cf}\p{Zl}\p{Zp}\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000\u115F\u1160\u3164\uFFA0]$/u;
|
||||
|
||||
// Hard cap on input the sanitizer will process at all. Above this size we return a constant
|
||||
// marker without running any regex work, so an attacker cannot force unbounded CPU/memory.
|
||||
const EXEC_APPROVAL_MAX_INPUT = 256 * 1024;
|
||||
// Soft cap on displayed output. Truncation happens AFTER redaction so a secret near the
|
||||
// cutoff is not partially exposed when the cut lands mid-token below a pattern's minimum
|
||||
// length (e.g. `ghp_` needs 20+ trailing chars before the `\b` match).
|
||||
const EXEC_APPROVAL_MAX_OUTPUT = 16 * 1024;
|
||||
const EXEC_APPROVAL_TRUNCATION_MARKER = "…[truncated]";
|
||||
const EXEC_APPROVAL_OVERSIZED_MARKER =
|
||||
"[exec approval command exceeds display size limit; full text suppressed]";
|
||||
|
||||
const BYPASS_MASK = "***";
|
||||
|
||||
function formatCodePointEscape(char: string): string {
|
||||
return `\\u{${char.codePointAt(0)?.toString(16).toUpperCase() ?? "FFFD"}}`;
|
||||
}
|
||||
|
||||
function escapeInvisibles(text: string): string {
|
||||
return text.replace(EXEC_APPROVAL_INVISIBLE_CHAR_REGEX, formatCodePointEscape);
|
||||
}
|
||||
|
||||
function truncateForDisplay(text: string): string {
|
||||
if (text.length <= EXEC_APPROVAL_MAX_OUTPUT) {
|
||||
return text;
|
||||
}
|
||||
return text.slice(0, EXEC_APPROVAL_MAX_OUTPUT) + EXEC_APPROVAL_TRUNCATION_MARKER;
|
||||
}
|
||||
|
||||
// Build a boolean bitmap of positions in `text` that ANY redaction pattern would match.
|
||||
// Patterns are applied independently to the raw text (not sequentially against a
|
||||
// progressively-redacted view) so later patterns can still find matches that the in-place
|
||||
// redaction would have replaced first. That is conservative — it may over-count overlapping
|
||||
// matches — but that is acceptable for a coverage check. Indices are UTF-16 code-unit
|
||||
// offsets, matching what `matchAll` returns and aligning with `String#length`.
|
||||
function computeRedactionBitmap(text: string, patterns: RegExp[]): boolean[] {
|
||||
const bitmap: boolean[] = Array.from({ length: text.length }, () => false);
|
||||
for (const pattern of patterns) {
|
||||
const iter = pattern.flags.includes("g")
|
||||
? new RegExp(pattern.source, pattern.flags)
|
||||
: new RegExp(pattern.source, `${pattern.flags}g`);
|
||||
for (const match of text.matchAll(iter)) {
|
||||
if (match.index === undefined) {
|
||||
continue;
|
||||
}
|
||||
const end = match.index + match[0].length;
|
||||
for (let i = match.index; i < end; i++) {
|
||||
bitmap[i] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return bitmap;
|
||||
}
|
||||
|
||||
// Iterate by full Unicode code point so astral-plane invisibles (e.g. U+E0061 TAG LATIN
|
||||
// SMALL LETTER A, category Cf) are matched as single characters instead of being seen as a
|
||||
// surrogate pair whose halves are category Cs and would escape the invisible-char regex.
|
||||
function buildStrippedView(original: string): { stripped: string; strippedToOrig: number[] } {
|
||||
const strippedChars: string[] = [];
|
||||
const strippedToOrig: number[] = [];
|
||||
let offset = 0;
|
||||
for (const cp of original) {
|
||||
if (!EXEC_APPROVAL_INVISIBLE_CHAR_SINGLE.test(cp)) {
|
||||
strippedChars.push(cp);
|
||||
for (let k = 0; k < cp.length; k++) {
|
||||
strippedToOrig.push(offset + k);
|
||||
}
|
||||
}
|
||||
offset += cp.length;
|
||||
}
|
||||
return { stripped: strippedChars.join(""), strippedToOrig };
|
||||
}
|
||||
|
||||
export function sanitizeExecApprovalDisplayText(commandText: string): string {
|
||||
const escaped = commandText.replace(EXEC_APPROVAL_INVISIBLE_CHAR_REGEX, formatCodePointEscape);
|
||||
return redactSensitiveText(escaped, { mode: "tools" });
|
||||
if (commandText.length > EXEC_APPROVAL_MAX_INPUT) {
|
||||
// Refuse to display inputs above the hard cap; anything larger must be approved through
|
||||
// another channel. Running redaction on a multi-megabyte payload would be a DoS vector.
|
||||
return EXEC_APPROVAL_OVERSIZED_MARKER;
|
||||
}
|
||||
const rawRedacted = redactSensitiveText(commandText, { mode: "tools" });
|
||||
const { stripped, strippedToOrig } = buildStrippedView(commandText);
|
||||
const strippedRedacted = redactSensitiveText(stripped, { mode: "tools" });
|
||||
// Fast path: stripping invisibles did not expose any additional secret-like content, so the
|
||||
// raw-view redaction is sufficient. Preserve structure and show invisible-character spoof
|
||||
// attempts as `\u{...}` escapes.
|
||||
if (strippedRedacted === stripped) {
|
||||
return truncateForDisplay(escapeInvisibles(rawRedacted));
|
||||
}
|
||||
// Detect bypass by position-bitmap coverage. Run each redaction pattern independently on
|
||||
// both views and map stripped-view match positions back to original coordinates. If every
|
||||
// position the stripped view would mask is also masked by the raw view, the raw view
|
||||
// already covered everything — for example, an ordinary multi-line PEM private key where
|
||||
// raw produces `BEGIN/…redacted…/END` while stripped collapses to `***`. A real bypass
|
||||
// exists only when the stripped view masks at least one original position raw missed (e.g.
|
||||
// the tail of an `sk-` token whose prefix-boundary was broken by a spliced zero-width or
|
||||
// NBSP character).
|
||||
const { patterns } = resolveRedactOptions({ mode: "tools" });
|
||||
const rawMask = computeRedactionBitmap(commandText, patterns);
|
||||
const strippedMask = computeRedactionBitmap(stripped, patterns);
|
||||
let bypassDetected = false;
|
||||
for (let i = 0; i < strippedMask.length; i++) {
|
||||
if (strippedMask[i] && !rawMask[strippedToOrig[i]]) {
|
||||
bypassDetected = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!bypassDetected) {
|
||||
return truncateForDisplay(escapeInvisibles(rawRedacted));
|
||||
}
|
||||
// Bypass path. Project the stripped-view mask back onto original positions, union with the
|
||||
// raw-view mask, and emit a rendering where each contiguous masked run becomes a single
|
||||
// `***` marker. Invisible characters that fall outside masked runs still render as visible
|
||||
// `\u{...}` escapes so multi-line structure and spliced invisibles stay readable. The
|
||||
// render loop advances by full code point so astral-plane invisibles are escaped as one
|
||||
// `\u{...}` token rather than two separate surrogate escapes (or, worse, passed through
|
||||
// unescaped because neither surrogate half matches the Cf regex).
|
||||
const unionMask = rawMask.slice();
|
||||
for (let i = 0; i < strippedMask.length; i++) {
|
||||
if (strippedMask[i]) {
|
||||
unionMask[strippedToOrig[i]] = true;
|
||||
}
|
||||
}
|
||||
let out = "";
|
||||
let i = 0;
|
||||
while (i < commandText.length) {
|
||||
if (unionMask[i]) {
|
||||
let j = i;
|
||||
while (j < commandText.length && unionMask[j]) {
|
||||
j++;
|
||||
}
|
||||
out += BYPASS_MASK;
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
const codePoint = commandText.codePointAt(i) ?? 0xfffd;
|
||||
const cp = String.fromCodePoint(codePoint);
|
||||
out += EXEC_APPROVAL_INVISIBLE_CHAR_SINGLE.test(cp) ? formatCodePointEscape(cp) : cp;
|
||||
i += cp.length;
|
||||
}
|
||||
return truncateForDisplay(out);
|
||||
}
|
||||
|
||||
function normalizePreview(commandText: string, commandPreview?: string | null): string | null {
|
||||
|
||||
@@ -565,7 +565,7 @@ describe("exec approval forwarder", () => {
|
||||
},
|
||||
{
|
||||
command: "echo `uname`\necho done",
|
||||
expectedText: "```\necho `uname`\necho done\n```",
|
||||
expectedText: "```\necho `uname`\\u{A}echo done\n```",
|
||||
},
|
||||
{
|
||||
command: "echo ```danger```",
|
||||
|
||||
Reference in New Issue
Block a user