Repair shell command explainer automerge blockers

This commit is contained in:
clawsweeper
2026-05-03 03:49:33 +00:00
parent 30533f84d6
commit 8b5d73e47c
5 changed files with 266 additions and 62 deletions

View File

@@ -15,7 +15,7 @@ Docs: https://docs.openclaw.ai
- Discord/status: add degraded Discord transport and gateway event-loop starvation signals to `openclaw channels status`, `openclaw status --deep`, and fetch-timeout logs so intermittent socket resets do not look like a healthy running channel. (#76327) Thanks @joshavant.
- Plugins/update: on the beta OpenClaw update channel, default-line npm and ClawHub plugin updates try `@beta` first and fall back to default/latest when no plugin beta release exists.
- Channels/WhatsApp: support explicit WhatsApp Channel/Newsletter `@newsletter` outbound message targets with channel session metadata instead of DM routing. Fixes #13417; carries forward the narrow outbound target idea from #13424. Thanks @vincentkoc and @agentz-manfred.
- Internals: add a tree-sitter-backed shell command explainer for future approval and command-review surfaces. (#75004) Thanks @jesse-merhi.
- Exec approvals: add a tree-sitter-backed shell command explainer for future approval and command-review surfaces. (#75004) Thanks @jesse-merhi.
### Fixes

View File

@@ -10,6 +10,7 @@ const rootEntries = [
"src/entry.ts!",
"src/cli/daemon-cli.ts!",
"src/infra/warning-filter.ts!",
"src/infra/command-explainer/index.ts!",
bundledPluginFile("telegram", "src/audit.ts", "!"),
bundledPluginFile("telegram", "src/token.ts", "!"),
"src/hooks/bundled/*/handler.ts!",

View File

@@ -1,6 +1,26 @@
import { describe, expect, it } from "vitest";
import { afterEach, describe, expect, it } from "vitest";
import type { Parser } from "web-tree-sitter";
import { explainShellCommand } from "./extract.js";
import { parseBashForCommandExplanation } from "./tree-sitter-runtime.js";
import {
getBashParserForCommandExplanation,
parseBashForCommandExplanation,
resolvePackageFileForCommandExplanation,
setBashParserLoaderForCommandExplanationForTest,
} from "./tree-sitter-runtime.js";
let parserLoaderOverridden = false;
function setParserLoaderForTest(loader: () => Promise<Parser>): void {
parserLoaderOverridden = true;
setBashParserLoaderForCommandExplanationForTest(loader);
}
afterEach(() => {
if (parserLoaderOverridden) {
setBashParserLoaderForCommandExplanationForTest();
parserLoaderOverridden = false;
}
});
describe("command explainer tree-sitter runtime", () => {
it("loads tree-sitter bash and parses a simple command", async () => {
@@ -20,6 +40,37 @@ describe("command explainer tree-sitter runtime", () => {
);
});
it("retries parser initialization after a loader rejection", async () => {
const parser = {} as Parser;
let calls = 0;
setParserLoaderForTest(async () => {
calls += 1;
if (calls === 1) {
throw new Error("transient parser load failure");
}
return parser;
});
await expect(getBashParserForCommandExplanation()).rejects.toThrow(
"transient parser load failure",
);
await expect(getBashParserForCommandExplanation()).resolves.toBe(parser);
expect(calls).toBe(2);
});
it("reports missing parser packages and wasm files with explainer context", () => {
expect(() =>
resolvePackageFileForCommandExplanation(
"definitely-missing-openclaw-parser-package",
"parser.wasm",
),
).toThrow("Unable to resolve definitely-missing-openclaw-parser-package");
expect(() =>
resolvePackageFileForCommandExplanation("web-tree-sitter", "missing-openclaw-parser.wasm"),
).toThrow("Unable to locate missing-openclaw-parser.wasm in web-tree-sitter");
});
it("explains a pipeline with python inline eval", async () => {
const explanation = await explainShellCommand('ls | grep "stuff" | python -c \'print("hi")\'');
@@ -255,6 +306,26 @@ describe("command explainer tree-sitter runtime", () => {
}
});
it("maps decoded shell-wrapper payload spans back to original source escapes", async () => {
const explanation = await explainShellCommand('bash -lc "printf \\"hi\\" | wc -c"');
const wrappedPrintf = explanation.nestedCommands.find((step) => step.executable === "printf");
const wrappedWc = explanation.nestedCommands.find((step) => step.executable === "wc");
expect(wrappedPrintf).toEqual(
expect.objectContaining({
context: "wrapper-payload",
text: 'printf "hi"',
}),
);
expect(
explanation.source.slice(wrappedPrintf?.span.startIndex, wrappedPrintf?.span.endIndex),
).toBe('printf \\"hi\\"');
expect(explanation.source.slice(wrappedWc?.span.startIndex, wrappedWc?.span.endIndex)).toBe(
"wc -c",
);
});
it("normalizes static shell words before classifying commands", async () => {
const quotedCommand = await explainShellCommand("e'c'ho a\\ b \"c d\"");
expect(quotedCommand.topLevelCommands).toEqual([
@@ -464,7 +535,7 @@ describe("command explainer tree-sitter runtime", () => {
);
});
it("parses and extracts a small approval-sized corpus quickly", async () => {
it("parses and extracts a repeated approval-sized corpus without parser state leakage", async () => {
const corpus = [
'ls | grep "stuff" | python -c \'print("hi")\'',
"echo $(whoami)",
@@ -472,14 +543,12 @@ describe("command explainer tree-sitter runtime", () => {
'find . -name "*.ts" -exec grep -n TODO {} +',
'bash -lc "echo hi | wc -c"',
];
const iterations = 100;
const start = performance.now();
const iterations = 10;
for (let index = 0; index < iterations; index += 1) {
for (const command of corpus) {
await explainShellCommand(command);
const explanation = await explainShellCommand(command);
expect(explanation.risks.length + explanation.topLevelCommands.length).toBeGreaterThan(0);
}
}
const elapsedMs = performance.now() - start;
expect(elapsedMs / (iterations * corpus.length)).toBeLessThan(20);
});
});

View File

@@ -37,8 +37,7 @@ type CommandArgument = {
text: string;
value: string;
span: SourceSpan;
valueStartIndex: number;
valueStartPosition: SourceSpan["startPosition"];
decodedSourceOffsets: number[];
};
type CommandArgv = {
@@ -61,6 +60,7 @@ const SOURCE_EXECUTABLES = new Set([".", "source"]);
type SpanBase = {
startIndex: number;
startPosition: SourceSpan["startPosition"];
mapOffset?: (offset: number) => { index: number; position: SourceSpan["startPosition"] };
};
const ROOT_SPAN_BASE: SpanBase = {
@@ -95,6 +95,16 @@ function translatePosition(
}
function translateSpan(span: SourceSpan, base: SpanBase): SourceSpan {
if (base.mapOffset) {
const start = base.mapOffset(span.startIndex);
const end = base.mapOffset(span.endIndex);
return {
startIndex: start.index,
endIndex: end.index,
startPosition: start.position,
endPosition: end.position,
};
}
return {
startIndex: base.startIndex + span.startIndex,
endIndex: base.startIndex + span.endIndex,
@@ -149,6 +159,52 @@ function valuePrefixLength(node: TreeSitterNode): number {
return 0;
}
type DecodedShellText = {
value: string;
sourceOffsets: number[];
};
function appendDecodedText(
decoded: DecodedShellText,
value: string,
sourceEndOffset: number,
): void {
decoded.value += value;
for (let index = 0; index < value.length; index += 1) {
decoded.sourceOffsets.push(sourceEndOffset);
}
}
function identityDecodedShellText(text: string, sourceOffset = 0): DecodedShellText {
return {
value: text,
sourceOffsets: Array.from({ length: text.length + 1 }, (_, index) => sourceOffset + index),
};
}
function decodedSourceOffsetsForNode(node: TreeSitterNode, value: string): number[] {
let decoded: DecodedShellText;
switch (node.type) {
case "raw_string":
decoded = identityDecodedShellText(node.text.slice(1, -1), 1);
break;
case "string":
decoded = decodeDoubleQuotedTextWithOffsets(node.text);
break;
case "ansi_c_string":
decoded = decodeAnsiCStringWithOffsets(node.text);
break;
default:
decoded = decodeUnquotedShellTextWithOffsets(node.text);
break;
}
if (decoded.value === value && decoded.sourceOffsets.length === value.length + 1) {
return decoded.sourceOffsets;
}
const prefixLength = valuePrefixLength(node);
return Array.from({ length: value.length + 1 }, (_, index) => prefixLength + index);
}
function argumentFromNode(
index: number,
node: TreeSitterNode,
@@ -156,14 +212,13 @@ function argumentFromNode(
base: SpanBase,
): CommandArgument {
const span = spanFromNode(node, base);
const prefixLength = valuePrefixLength(node);
const decodedSourceOffsets = decodedSourceOffsetsForNode(node, value.value);
return {
index,
text: node.text,
value: value.value,
span,
valueStartIndex: span.startIndex + prefixLength,
valueStartPosition: advancePosition(span.startPosition, node.text.slice(0, prefixLength)),
decodedSourceOffsets,
};
}
@@ -219,87 +274,107 @@ function hasUnescapedDynamicPattern(text: string): boolean {
return false;
}
function decodeUnquotedShellText(text: string): string {
let output = "";
function decodeUnquotedShellTextWithOffsets(text: string): DecodedShellText {
const decoded: DecodedShellText = { value: "", sourceOffsets: [0] };
for (let index = 0; index < text.length; index += 1) {
const ch = text[index];
const next = text[index + 1];
if (ch === "\\" && next !== undefined) {
if (next === "\r" && text[index + 2] === "\n") {
decoded.sourceOffsets[decoded.value.length] = index + 3;
index += 2;
continue;
}
if (next === "\n" || next === "\r") {
decoded.sourceOffsets[decoded.value.length] = index + 2;
index += 1;
continue;
}
output += next;
appendDecodedText(decoded, next, index + 2);
index += 1;
continue;
}
output += ch;
appendDecodedText(decoded, ch, index + 1);
}
return output;
return decoded;
}
function decodeDoubleQuotedText(text: string): string {
const body = text.startsWith('"') && text.endsWith('"') ? text.slice(1, -1) : text;
let output = "";
function decodeUnquotedShellText(text: string): string {
return decodeUnquotedShellTextWithOffsets(text).value;
}
function decodeDoubleQuotedTextWithOffsets(text: string): DecodedShellText {
const hasQuotes = text.startsWith('"') && text.endsWith('"');
const bodyStart = hasQuotes ? 1 : 0;
const body = hasQuotes ? text.slice(1, -1) : text;
const decoded: DecodedShellText = { value: "", sourceOffsets: [bodyStart] };
for (let index = 0; index < body.length; index += 1) {
const ch = body[index];
const next = body[index + 1];
const sourceOffset = bodyStart + index;
if (ch === "\\" && next !== undefined) {
if (next === "\r" && body[index + 2] === "\n") {
decoded.sourceOffsets[decoded.value.length] = sourceOffset + 3;
index += 2;
continue;
}
if (["\\", '"', "$", "`", "\n", "\r"].includes(next)) {
if (next !== "\n" && next !== "\r") {
output += next;
appendDecodedText(decoded, next, sourceOffset + 2);
} else {
decoded.sourceOffsets[decoded.value.length] = sourceOffset + 2;
}
index += 1;
continue;
}
}
output += ch;
appendDecodedText(decoded, ch, sourceOffset + 1);
}
return output;
return decoded;
}
function decodeAnsiCString(text: string): string {
const body = text.startsWith("$'") && text.endsWith("'") ? text.slice(2, -1) : text;
let output = "";
function decodeDoubleQuotedText(text: string): string {
return decodeDoubleQuotedTextWithOffsets(text).value;
}
const ANSI_C_SIMPLE_ESCAPES: Record<string, string> = {
"'": "'",
'"': '"',
"?": "?",
"\\": "\\",
a: "\u0007",
b: "\b",
e: "\u001B",
E: "\u001B",
f: "\f",
n: "\n",
r: "\r",
t: "\t",
v: "\v",
};
function decodeAnsiCStringWithOffsets(text: string): DecodedShellText {
const hasQuotes = text.startsWith("$'") && text.endsWith("'");
const bodyStart = hasQuotes ? 2 : 0;
const body = hasQuotes ? text.slice(2, -1) : text;
const decoded: DecodedShellText = { value: "", sourceOffsets: [bodyStart] };
for (let index = 0; index < body.length; index += 1) {
const ch = body[index];
const sourceOffset = bodyStart + index;
if (ch !== "\\") {
output += ch;
appendDecodedText(decoded, ch, sourceOffset + 1);
continue;
}
const next = body[index + 1];
if (next === undefined) {
output += "\\";
appendDecodedText(decoded, "\\", sourceOffset + 1);
continue;
}
const simpleEscapes: Record<string, string> = {
"'": "'",
'"': '"',
"?": "?",
"\\": "\\",
a: "\u0007",
b: "\b",
e: "\u001B",
E: "\u001B",
f: "\f",
n: "\n",
r: "\r",
t: "\t",
v: "\v",
};
const simple = simpleEscapes[next];
const simple = ANSI_C_SIMPLE_ESCAPES[next];
if (simple !== undefined) {
output += simple;
appendDecodedText(decoded, simple, sourceOffset + 2);
index += 1;
continue;
}
@@ -307,7 +382,11 @@ function decodeAnsiCString(text: string): string {
if (next === "x") {
const hex = body.slice(index + 2).match(/^[0-9A-Fa-f]{1,2}/)?.[0] ?? "";
if (hex) {
output += String.fromCodePoint(Number.parseInt(hex, 16));
appendDecodedText(
decoded,
String.fromCodePoint(Number.parseInt(hex, 16)),
sourceOffset + 2 + hex.length,
);
index += 1 + hex.length;
continue;
}
@@ -320,9 +399,13 @@ function decodeAnsiCString(text: string): string {
if (hex) {
const codePoint = Number.parseInt(hex, 16);
try {
output += String.fromCodePoint(codePoint);
appendDecodedText(
decoded,
String.fromCodePoint(codePoint),
sourceOffset + 2 + hex.length,
);
} catch {
output += `\\${next}${hex}`;
appendDecodedText(decoded, `\\${next}${hex}`, sourceOffset + 2 + hex.length);
}
index += 1 + hex.length;
continue;
@@ -332,16 +415,24 @@ function decodeAnsiCString(text: string): string {
if (/^[0-7]$/.test(next)) {
const octal = body.slice(index + 1).match(/^[0-7]{1,3}/)?.[0] ?? "";
if (octal) {
output += String.fromCodePoint(Number.parseInt(octal, 8));
appendDecodedText(
decoded,
String.fromCodePoint(Number.parseInt(octal, 8)),
sourceOffset + 1 + octal.length,
);
index += octal.length;
continue;
}
}
output += next;
appendDecodedText(decoded, next, sourceOffset + 2);
index += 1;
}
return output;
return decoded;
}
function decodeAnsiCString(text: string): string {
return decodeAnsiCStringWithOffsets(text).value;
}
function hasDynamicWordPart(node: TreeSitterNode): boolean {
@@ -650,10 +741,25 @@ function payloadBaseFromArgument(argument: CommandArgument, payload: string): Sp
if (payloadOffset < 0) {
return null;
}
const prefix = argument.value.slice(0, payloadOffset);
const rawPayloadOffset = argument.decodedSourceOffsets[payloadOffset];
if (rawPayloadOffset === undefined) {
return null;
}
const prefix = argument.text.slice(0, rawPayloadOffset);
return {
startIndex: argument.valueStartIndex + payloadOffset,
startPosition: advancePosition(argument.valueStartPosition, prefix),
startIndex: argument.span.startIndex + rawPayloadOffset,
startPosition: advancePosition(argument.span.startPosition, prefix),
mapOffset(offset) {
const rawOffset = argument.decodedSourceOffsets[payloadOffset + offset];
const mappedRawOffset = rawOffset ?? rawPayloadOffset + offset;
return {
index: argument.span.startIndex + mappedRawOffset,
position: advancePosition(
argument.span.startPosition,
argument.text.slice(0, mappedRawOffset),
),
};
},
};
}

View File

@@ -6,13 +6,29 @@ import * as TreeSitter from "web-tree-sitter";
const require = createRequire(import.meta.url);
let parserPromise: Promise<TreeSitter.Parser> | null = null;
let parserLoader: () => Promise<TreeSitter.Parser> = loadParser;
const MAX_COMMAND_EXPLANATION_SOURCE_CHARS = 128 * 1024;
const MAX_COMMAND_EXPLANATION_PARSE_MS = 500;
function resolvePackageFile(packageName: string, fileName: string): string {
let directory = path.dirname(require.resolve(packageName));
export function resolvePackageFileForCommandExplanation(
packageName: string,
fileName: string,
): string {
let packageEntry: string;
try {
packageEntry = require.resolve(packageName);
} catch (error) {
throw new Error(
`Unable to resolve ${packageName} while loading the shell command explainer parser`,
{ cause: error },
);
}
let directory = path.dirname(packageEntry);
const searched: string[] = [];
for (let depth = 0; depth < 5; depth += 1) {
const candidate = path.join(directory, fileName);
searched.push(candidate);
if (fs.existsSync(candidate)) {
return candidate;
}
@@ -22,15 +38,17 @@ function resolvePackageFile(packageName: string, fileName: string): string {
}
directory = parent;
}
return path.join(path.dirname(require.resolve(packageName)), fileName);
throw new Error(
`Unable to locate ${fileName} in ${packageName} while loading the shell command explainer parser; searched ${searched.join(", ")}`,
);
}
function resolveWebTreeSitterFile(fileName: string): string {
return resolvePackageFile("web-tree-sitter", fileName);
return resolvePackageFileForCommandExplanation("web-tree-sitter", fileName);
}
function resolveBashWasmPath(): string {
return resolvePackageFile("tree-sitter-bash", "tree-sitter-bash.wasm");
return resolvePackageFileForCommandExplanation("tree-sitter-bash", "tree-sitter-bash.wasm");
}
async function loadParser(): Promise<TreeSitter.Parser> {
@@ -44,10 +62,20 @@ async function loadParser(): Promise<TreeSitter.Parser> {
}
export function getBashParserForCommandExplanation(): Promise<TreeSitter.Parser> {
parserPromise ??= loadParser();
parserPromise ??= parserLoader().catch((error: unknown) => {
parserPromise = null;
throw error;
});
return parserPromise;
}
export function setBashParserLoaderForCommandExplanationForTest(
loader?: () => Promise<TreeSitter.Parser>,
): void {
parserPromise = null;
parserLoader = loader ?? loadParser;
}
/**
* Low-level parser access for tests and parser diagnostics.
* Callers own the returned Tree and must call tree.delete().