From 8b5d73e47c10fee8d4b3ec849dcff4a7ae1de8af Mon Sep 17 00:00:00 2001 From: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com> Date: Sun, 3 May 2026 03:49:33 +0000 Subject: [PATCH] Repair shell command explainer automerge blockers --- CHANGELOG.md | 2 +- knip.config.ts | 1 + src/infra/command-explainer/extract.test.ts | 85 +++++++- src/infra/command-explainer/extract.ts | 200 ++++++++++++++---- .../command-explainer/tree-sitter-runtime.ts | 40 +++- 5 files changed, 266 insertions(+), 62 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 262647fc275..430439bd83e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ Docs: https://docs.openclaw.ai - Discord/status: add degraded Discord transport and gateway event-loop starvation signals to `openclaw channels status`, `openclaw status --deep`, and fetch-timeout logs so intermittent socket resets do not look like a healthy running channel. (#76327) Thanks @joshavant. - Plugins/update: on the beta OpenClaw update channel, default-line npm and ClawHub plugin updates try `@beta` first and fall back to default/latest when no plugin beta release exists. - Channels/WhatsApp: support explicit WhatsApp Channel/Newsletter `@newsletter` outbound message targets with channel session metadata instead of DM routing. Fixes #13417; carries forward the narrow outbound target idea from #13424. Thanks @vincentkoc and @agentz-manfred. -- Internals: add a tree-sitter-backed shell command explainer for future approval and command-review surfaces. (#75004) Thanks @jesse-merhi. +- Exec approvals: add a tree-sitter-backed shell command explainer for future approval and command-review surfaces. (#75004) Thanks @jesse-merhi. ### Fixes diff --git a/knip.config.ts b/knip.config.ts index 5679fcb88f3..6cb6bb2eea0 100644 --- a/knip.config.ts +++ b/knip.config.ts @@ -10,6 +10,7 @@ const rootEntries = [ "src/entry.ts!", "src/cli/daemon-cli.ts!", "src/infra/warning-filter.ts!", + "src/infra/command-explainer/index.ts!", bundledPluginFile("telegram", "src/audit.ts", "!"), bundledPluginFile("telegram", "src/token.ts", "!"), "src/hooks/bundled/*/handler.ts!", diff --git a/src/infra/command-explainer/extract.test.ts b/src/infra/command-explainer/extract.test.ts index 34606dba02f..9161d2ed0a3 100644 --- a/src/infra/command-explainer/extract.test.ts +++ b/src/infra/command-explainer/extract.test.ts @@ -1,6 +1,26 @@ -import { describe, expect, it } from "vitest"; +import { afterEach, describe, expect, it } from "vitest"; +import type { Parser } from "web-tree-sitter"; import { explainShellCommand } from "./extract.js"; -import { parseBashForCommandExplanation } from "./tree-sitter-runtime.js"; +import { + getBashParserForCommandExplanation, + parseBashForCommandExplanation, + resolvePackageFileForCommandExplanation, + setBashParserLoaderForCommandExplanationForTest, +} from "./tree-sitter-runtime.js"; + +let parserLoaderOverridden = false; + +function setParserLoaderForTest(loader: () => Promise): void { + parserLoaderOverridden = true; + setBashParserLoaderForCommandExplanationForTest(loader); +} + +afterEach(() => { + if (parserLoaderOverridden) { + setBashParserLoaderForCommandExplanationForTest(); + parserLoaderOverridden = false; + } +}); describe("command explainer tree-sitter runtime", () => { it("loads tree-sitter bash and parses a simple command", async () => { @@ -20,6 +40,37 @@ describe("command explainer tree-sitter runtime", () => { ); }); + it("retries parser initialization after a loader rejection", async () => { + const parser = {} as Parser; + let calls = 0; + setParserLoaderForTest(async () => { + calls += 1; + if (calls === 1) { + throw new Error("transient parser load failure"); + } + return parser; + }); + + await expect(getBashParserForCommandExplanation()).rejects.toThrow( + "transient parser load failure", + ); + await expect(getBashParserForCommandExplanation()).resolves.toBe(parser); + expect(calls).toBe(2); + }); + + it("reports missing parser packages and wasm files with explainer context", () => { + expect(() => + resolvePackageFileForCommandExplanation( + "definitely-missing-openclaw-parser-package", + "parser.wasm", + ), + ).toThrow("Unable to resolve definitely-missing-openclaw-parser-package"); + + expect(() => + resolvePackageFileForCommandExplanation("web-tree-sitter", "missing-openclaw-parser.wasm"), + ).toThrow("Unable to locate missing-openclaw-parser.wasm in web-tree-sitter"); + }); + it("explains a pipeline with python inline eval", async () => { const explanation = await explainShellCommand('ls | grep "stuff" | python -c \'print("hi")\''); @@ -255,6 +306,26 @@ describe("command explainer tree-sitter runtime", () => { } }); + it("maps decoded shell-wrapper payload spans back to original source escapes", async () => { + const explanation = await explainShellCommand('bash -lc "printf \\"hi\\" | wc -c"'); + + const wrappedPrintf = explanation.nestedCommands.find((step) => step.executable === "printf"); + const wrappedWc = explanation.nestedCommands.find((step) => step.executable === "wc"); + + expect(wrappedPrintf).toEqual( + expect.objectContaining({ + context: "wrapper-payload", + text: 'printf "hi"', + }), + ); + expect( + explanation.source.slice(wrappedPrintf?.span.startIndex, wrappedPrintf?.span.endIndex), + ).toBe('printf \\"hi\\"'); + expect(explanation.source.slice(wrappedWc?.span.startIndex, wrappedWc?.span.endIndex)).toBe( + "wc -c", + ); + }); + it("normalizes static shell words before classifying commands", async () => { const quotedCommand = await explainShellCommand("e'c'ho a\\ b \"c d\""); expect(quotedCommand.topLevelCommands).toEqual([ @@ -464,7 +535,7 @@ describe("command explainer tree-sitter runtime", () => { ); }); - it("parses and extracts a small approval-sized corpus quickly", async () => { + it("parses and extracts a repeated approval-sized corpus without parser state leakage", async () => { const corpus = [ 'ls | grep "stuff" | python -c \'print("hi")\'', "echo $(whoami)", @@ -472,14 +543,12 @@ describe("command explainer tree-sitter runtime", () => { 'find . -name "*.ts" -exec grep -n TODO {} +', 'bash -lc "echo hi | wc -c"', ]; - const iterations = 100; - const start = performance.now(); + const iterations = 10; for (let index = 0; index < iterations; index += 1) { for (const command of corpus) { - await explainShellCommand(command); + const explanation = await explainShellCommand(command); + expect(explanation.risks.length + explanation.topLevelCommands.length).toBeGreaterThan(0); } } - const elapsedMs = performance.now() - start; - expect(elapsedMs / (iterations * corpus.length)).toBeLessThan(20); }); }); diff --git a/src/infra/command-explainer/extract.ts b/src/infra/command-explainer/extract.ts index b838616c6af..239a5afc57e 100644 --- a/src/infra/command-explainer/extract.ts +++ b/src/infra/command-explainer/extract.ts @@ -37,8 +37,7 @@ type CommandArgument = { text: string; value: string; span: SourceSpan; - valueStartIndex: number; - valueStartPosition: SourceSpan["startPosition"]; + decodedSourceOffsets: number[]; }; type CommandArgv = { @@ -61,6 +60,7 @@ const SOURCE_EXECUTABLES = new Set([".", "source"]); type SpanBase = { startIndex: number; startPosition: SourceSpan["startPosition"]; + mapOffset?: (offset: number) => { index: number; position: SourceSpan["startPosition"] }; }; const ROOT_SPAN_BASE: SpanBase = { @@ -95,6 +95,16 @@ function translatePosition( } function translateSpan(span: SourceSpan, base: SpanBase): SourceSpan { + if (base.mapOffset) { + const start = base.mapOffset(span.startIndex); + const end = base.mapOffset(span.endIndex); + return { + startIndex: start.index, + endIndex: end.index, + startPosition: start.position, + endPosition: end.position, + }; + } return { startIndex: base.startIndex + span.startIndex, endIndex: base.startIndex + span.endIndex, @@ -149,6 +159,52 @@ function valuePrefixLength(node: TreeSitterNode): number { return 0; } +type DecodedShellText = { + value: string; + sourceOffsets: number[]; +}; + +function appendDecodedText( + decoded: DecodedShellText, + value: string, + sourceEndOffset: number, +): void { + decoded.value += value; + for (let index = 0; index < value.length; index += 1) { + decoded.sourceOffsets.push(sourceEndOffset); + } +} + +function identityDecodedShellText(text: string, sourceOffset = 0): DecodedShellText { + return { + value: text, + sourceOffsets: Array.from({ length: text.length + 1 }, (_, index) => sourceOffset + index), + }; +} + +function decodedSourceOffsetsForNode(node: TreeSitterNode, value: string): number[] { + let decoded: DecodedShellText; + switch (node.type) { + case "raw_string": + decoded = identityDecodedShellText(node.text.slice(1, -1), 1); + break; + case "string": + decoded = decodeDoubleQuotedTextWithOffsets(node.text); + break; + case "ansi_c_string": + decoded = decodeAnsiCStringWithOffsets(node.text); + break; + default: + decoded = decodeUnquotedShellTextWithOffsets(node.text); + break; + } + if (decoded.value === value && decoded.sourceOffsets.length === value.length + 1) { + return decoded.sourceOffsets; + } + const prefixLength = valuePrefixLength(node); + return Array.from({ length: value.length + 1 }, (_, index) => prefixLength + index); +} + function argumentFromNode( index: number, node: TreeSitterNode, @@ -156,14 +212,13 @@ function argumentFromNode( base: SpanBase, ): CommandArgument { const span = spanFromNode(node, base); - const prefixLength = valuePrefixLength(node); + const decodedSourceOffsets = decodedSourceOffsetsForNode(node, value.value); return { index, text: node.text, value: value.value, span, - valueStartIndex: span.startIndex + prefixLength, - valueStartPosition: advancePosition(span.startPosition, node.text.slice(0, prefixLength)), + decodedSourceOffsets, }; } @@ -219,87 +274,107 @@ function hasUnescapedDynamicPattern(text: string): boolean { return false; } -function decodeUnquotedShellText(text: string): string { - let output = ""; +function decodeUnquotedShellTextWithOffsets(text: string): DecodedShellText { + const decoded: DecodedShellText = { value: "", sourceOffsets: [0] }; for (let index = 0; index < text.length; index += 1) { const ch = text[index]; const next = text[index + 1]; if (ch === "\\" && next !== undefined) { if (next === "\r" && text[index + 2] === "\n") { + decoded.sourceOffsets[decoded.value.length] = index + 3; index += 2; continue; } if (next === "\n" || next === "\r") { + decoded.sourceOffsets[decoded.value.length] = index + 2; index += 1; continue; } - output += next; + appendDecodedText(decoded, next, index + 2); index += 1; continue; } - output += ch; + appendDecodedText(decoded, ch, index + 1); } - return output; + return decoded; } -function decodeDoubleQuotedText(text: string): string { - const body = text.startsWith('"') && text.endsWith('"') ? text.slice(1, -1) : text; - let output = ""; +function decodeUnquotedShellText(text: string): string { + return decodeUnquotedShellTextWithOffsets(text).value; +} + +function decodeDoubleQuotedTextWithOffsets(text: string): DecodedShellText { + const hasQuotes = text.startsWith('"') && text.endsWith('"'); + const bodyStart = hasQuotes ? 1 : 0; + const body = hasQuotes ? text.slice(1, -1) : text; + const decoded: DecodedShellText = { value: "", sourceOffsets: [bodyStart] }; for (let index = 0; index < body.length; index += 1) { const ch = body[index]; const next = body[index + 1]; + const sourceOffset = bodyStart + index; if (ch === "\\" && next !== undefined) { if (next === "\r" && body[index + 2] === "\n") { + decoded.sourceOffsets[decoded.value.length] = sourceOffset + 3; index += 2; continue; } if (["\\", '"', "$", "`", "\n", "\r"].includes(next)) { if (next !== "\n" && next !== "\r") { - output += next; + appendDecodedText(decoded, next, sourceOffset + 2); + } else { + decoded.sourceOffsets[decoded.value.length] = sourceOffset + 2; } index += 1; continue; } } - output += ch; + appendDecodedText(decoded, ch, sourceOffset + 1); } - return output; + return decoded; } -function decodeAnsiCString(text: string): string { - const body = text.startsWith("$'") && text.endsWith("'") ? text.slice(2, -1) : text; - let output = ""; +function decodeDoubleQuotedText(text: string): string { + return decodeDoubleQuotedTextWithOffsets(text).value; +} + +const ANSI_C_SIMPLE_ESCAPES: Record = { + "'": "'", + '"': '"', + "?": "?", + "\\": "\\", + a: "\u0007", + b: "\b", + e: "\u001B", + E: "\u001B", + f: "\f", + n: "\n", + r: "\r", + t: "\t", + v: "\v", +}; + +function decodeAnsiCStringWithOffsets(text: string): DecodedShellText { + const hasQuotes = text.startsWith("$'") && text.endsWith("'"); + const bodyStart = hasQuotes ? 2 : 0; + const body = hasQuotes ? text.slice(2, -1) : text; + const decoded: DecodedShellText = { value: "", sourceOffsets: [bodyStart] }; for (let index = 0; index < body.length; index += 1) { const ch = body[index]; + const sourceOffset = bodyStart + index; if (ch !== "\\") { - output += ch; + appendDecodedText(decoded, ch, sourceOffset + 1); continue; } const next = body[index + 1]; if (next === undefined) { - output += "\\"; + appendDecodedText(decoded, "\\", sourceOffset + 1); continue; } - const simpleEscapes: Record = { - "'": "'", - '"': '"', - "?": "?", - "\\": "\\", - a: "\u0007", - b: "\b", - e: "\u001B", - E: "\u001B", - f: "\f", - n: "\n", - r: "\r", - t: "\t", - v: "\v", - }; - const simple = simpleEscapes[next]; + const simple = ANSI_C_SIMPLE_ESCAPES[next]; if (simple !== undefined) { - output += simple; + appendDecodedText(decoded, simple, sourceOffset + 2); index += 1; continue; } @@ -307,7 +382,11 @@ function decodeAnsiCString(text: string): string { if (next === "x") { const hex = body.slice(index + 2).match(/^[0-9A-Fa-f]{1,2}/)?.[0] ?? ""; if (hex) { - output += String.fromCodePoint(Number.parseInt(hex, 16)); + appendDecodedText( + decoded, + String.fromCodePoint(Number.parseInt(hex, 16)), + sourceOffset + 2 + hex.length, + ); index += 1 + hex.length; continue; } @@ -320,9 +399,13 @@ function decodeAnsiCString(text: string): string { if (hex) { const codePoint = Number.parseInt(hex, 16); try { - output += String.fromCodePoint(codePoint); + appendDecodedText( + decoded, + String.fromCodePoint(codePoint), + sourceOffset + 2 + hex.length, + ); } catch { - output += `\\${next}${hex}`; + appendDecodedText(decoded, `\\${next}${hex}`, sourceOffset + 2 + hex.length); } index += 1 + hex.length; continue; @@ -332,16 +415,24 @@ function decodeAnsiCString(text: string): string { if (/^[0-7]$/.test(next)) { const octal = body.slice(index + 1).match(/^[0-7]{1,3}/)?.[0] ?? ""; if (octal) { - output += String.fromCodePoint(Number.parseInt(octal, 8)); + appendDecodedText( + decoded, + String.fromCodePoint(Number.parseInt(octal, 8)), + sourceOffset + 1 + octal.length, + ); index += octal.length; continue; } } - output += next; + appendDecodedText(decoded, next, sourceOffset + 2); index += 1; } - return output; + return decoded; +} + +function decodeAnsiCString(text: string): string { + return decodeAnsiCStringWithOffsets(text).value; } function hasDynamicWordPart(node: TreeSitterNode): boolean { @@ -650,10 +741,25 @@ function payloadBaseFromArgument(argument: CommandArgument, payload: string): Sp if (payloadOffset < 0) { return null; } - const prefix = argument.value.slice(0, payloadOffset); + const rawPayloadOffset = argument.decodedSourceOffsets[payloadOffset]; + if (rawPayloadOffset === undefined) { + return null; + } + const prefix = argument.text.slice(0, rawPayloadOffset); return { - startIndex: argument.valueStartIndex + payloadOffset, - startPosition: advancePosition(argument.valueStartPosition, prefix), + startIndex: argument.span.startIndex + rawPayloadOffset, + startPosition: advancePosition(argument.span.startPosition, prefix), + mapOffset(offset) { + const rawOffset = argument.decodedSourceOffsets[payloadOffset + offset]; + const mappedRawOffset = rawOffset ?? rawPayloadOffset + offset; + return { + index: argument.span.startIndex + mappedRawOffset, + position: advancePosition( + argument.span.startPosition, + argument.text.slice(0, mappedRawOffset), + ), + }; + }, }; } diff --git a/src/infra/command-explainer/tree-sitter-runtime.ts b/src/infra/command-explainer/tree-sitter-runtime.ts index 84ec2214dc2..140ab9ec158 100644 --- a/src/infra/command-explainer/tree-sitter-runtime.ts +++ b/src/infra/command-explainer/tree-sitter-runtime.ts @@ -6,13 +6,29 @@ import * as TreeSitter from "web-tree-sitter"; const require = createRequire(import.meta.url); let parserPromise: Promise | null = null; +let parserLoader: () => Promise = loadParser; const MAX_COMMAND_EXPLANATION_SOURCE_CHARS = 128 * 1024; const MAX_COMMAND_EXPLANATION_PARSE_MS = 500; -function resolvePackageFile(packageName: string, fileName: string): string { - let directory = path.dirname(require.resolve(packageName)); +export function resolvePackageFileForCommandExplanation( + packageName: string, + fileName: string, +): string { + let packageEntry: string; + try { + packageEntry = require.resolve(packageName); + } catch (error) { + throw new Error( + `Unable to resolve ${packageName} while loading the shell command explainer parser`, + { cause: error }, + ); + } + + let directory = path.dirname(packageEntry); + const searched: string[] = []; for (let depth = 0; depth < 5; depth += 1) { const candidate = path.join(directory, fileName); + searched.push(candidate); if (fs.existsSync(candidate)) { return candidate; } @@ -22,15 +38,17 @@ function resolvePackageFile(packageName: string, fileName: string): string { } directory = parent; } - return path.join(path.dirname(require.resolve(packageName)), fileName); + throw new Error( + `Unable to locate ${fileName} in ${packageName} while loading the shell command explainer parser; searched ${searched.join(", ")}`, + ); } function resolveWebTreeSitterFile(fileName: string): string { - return resolvePackageFile("web-tree-sitter", fileName); + return resolvePackageFileForCommandExplanation("web-tree-sitter", fileName); } function resolveBashWasmPath(): string { - return resolvePackageFile("tree-sitter-bash", "tree-sitter-bash.wasm"); + return resolvePackageFileForCommandExplanation("tree-sitter-bash", "tree-sitter-bash.wasm"); } async function loadParser(): Promise { @@ -44,10 +62,20 @@ async function loadParser(): Promise { } export function getBashParserForCommandExplanation(): Promise { - parserPromise ??= loadParser(); + parserPromise ??= parserLoader().catch((error: unknown) => { + parserPromise = null; + throw error; + }); return parserPromise; } +export function setBashParserLoaderForCommandExplanationForTest( + loader?: () => Promise, +): void { + parserPromise = null; + parserLoader = loader ?? loadParser; +} + /** * Low-level parser access for tests and parser diagnostics. * Callers own the returned Tree and must call tree.delete().