feat: add shell command explainer

This commit is contained in:
Jesse Merhi
2026-05-03 13:41:29 +10:00
committed by clawsweeper
parent e27b7c2d74
commit 30533f84d6
9 changed files with 1674 additions and 3 deletions

View File

@@ -15,6 +15,7 @@ Docs: https://docs.openclaw.ai
- Discord/status: add degraded Discord transport and gateway event-loop starvation signals to `openclaw channels status`, `openclaw status --deep`, and fetch-timeout logs so intermittent socket resets do not look like a healthy running channel. (#76327) Thanks @joshavant.
- Plugins/update: on the beta OpenClaw update channel, default-line npm and ClawHub plugin updates try `@beta` first and fall back to default/latest when no plugin beta release exists.
- Channels/WhatsApp: support explicit WhatsApp Channel/Newsletter `@newsletter` outbound message targets with channel session metadata instead of DM routing. Fixes #13417; carries forward the narrow outbound target idea from #13424. Thanks @vincentkoc and @agentz-manfred.
- Internals: add a tree-sitter-backed shell command explainer for future approval and command-review surfaces. (#75004) Thanks @jesse-merhi.
### Fixes

View File

@@ -1700,10 +1700,12 @@
"sqlite-vec": "0.1.9",
"tar": "7.5.13",
"tokenjuice": "0.7.0",
"tree-sitter-bash": "^0.25.1",
"tslog": "^4.10.2",
"typebox": "1.1.37",
"undici": "8.1.0",
"web-push": "^3.6.7",
"web-tree-sitter": "^0.26.8",
"ws": "^8.20.0",
"yaml": "^2.8.3",
"zod": "^4.4.1"
@@ -1784,7 +1786,8 @@
"sharp"
],
"ignoredBuiltDependencies": [
"koffi"
"koffi",
"tree-sitter-bash"
],
"packageExtensions": {
"@mariozechner/pi-coding-agent": {

33
pnpm-lock.yaml generated
View File

@@ -193,6 +193,9 @@ importers:
tokenjuice:
specifier: 0.7.0
version: 0.7.0
tree-sitter-bash:
specifier: ^0.25.1
version: 0.25.1
tslog:
specifier: ^4.10.2
version: 4.10.2
@@ -205,6 +208,9 @@ importers:
web-push:
specifier: ^3.6.7
version: 3.6.7
web-tree-sitter:
specifier: ^0.26.8
version: 0.26.8
ws:
specifier: ^8.20.0
version: 8.20.0
@@ -6379,6 +6385,10 @@ packages:
resolution: {integrity: sha512-9MdFxmkKaOYVTV+XVRG8ArDwwQ77XIgIPyKASB1k3JPq3M8fGQQQE3YpMOrKm6g//Ktx8ivZr8xo1Qmtqub+GA==}
engines: {node: ^18 || ^20 || >= 21}
node-gyp-build@4.8.4:
resolution: {integrity: sha512-LA4ZjwlnUblHVgq0oBF3Jl/6h/Nvs5fzBLwdEF4nuxnFdsfajde4WfxtJr3CaiH+F6ewcIB/q4jQ4UzPyid+CQ==}
hasBin: true
node-downloader-helper@2.1.11:
resolution: {integrity: sha512-882fH2C9AWdiPCwz/2beq5t8FGMZK9Dx8TJUOIxzMCbvG7XUKM5BuJwN5f0NKo4SCQK6jR4p2TPm54mYGdGchQ==}
engines: {node: '>=14.18'}
@@ -7388,6 +7398,14 @@ packages:
resolution: {integrity: sha512-L0Orpi8qGpRG//Nd+H90vFB+3iHnue1zSSGmNOOCh1GLJ7rUKVwV2HvijphGQS2UmhUZewS9VgvxYIdgr+fG1A==}
hasBin: true
tree-sitter-bash@0.25.1:
resolution: {integrity: sha512-7hMytuYIMoXOq24yRulgIxthE9YmggZIOHCyPTTuJcu6EU54tYD+4G39cUb28kxC6jMf/AbPfWGLQtgPTdh3xw==}
peerDependencies:
tree-sitter: ^0.25.0
peerDependenciesMeta:
tree-sitter:
optional: true
trim-lines@3.0.1:
resolution: {integrity: sha512-kRj8B+YHZCc9kQYdWfJB2/oUl9rA99qbowYYBtr4ui4mZyAQ2JpvVBd/6U2YloATfqBhBTSMhTpgBHtU0Mf3Rg==}
@@ -7665,6 +7683,9 @@ packages:
resolution: {integrity: sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==}
engines: {node: '>= 8'}
web-tree-sitter@0.26.8:
resolution: {integrity: sha512-4sUwi7ZyOrIk5KLgYLkc2A/F0LFMQnBhfb+2Cdl7ik4ePJ6JD+fk4ofI2sA5eGawBKBaK4Vntt7Ww5KcEsay4A==}
webidl-conversions@3.0.1:
resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==}
@@ -13555,8 +13576,9 @@ snapshots:
netmask@2.1.1: {}
node-addon-api@8.7.0:
optional: true
node-addon-api@8.7.0: {}
node-gyp-build@4.8.4: {}
node-downloader-helper@2.1.11: {}
@@ -14755,6 +14777,11 @@ snapshots:
tree-kill@1.2.2: {}
tree-sitter-bash@0.25.1:
dependencies:
node-addon-api: 8.7.0
node-gyp-build: 4.8.4
trim-lines@3.0.1: {}
trough@2.2.0: {}
@@ -14976,6 +15003,8 @@ snapshots:
web-streams-polyfill@3.3.3: {}
web-tree-sitter@0.26.8: {}
webidl-conversions@3.0.1: {}
webidl-conversions@8.0.1: {}

View File

@@ -48,3 +48,4 @@ onlyBuiltDependencies:
ignoredBuiltDependencies:
- koffi
- tree-sitter-bash

View File

@@ -0,0 +1,485 @@
import { describe, expect, it } from "vitest";
import { explainShellCommand } from "./extract.js";
import { parseBashForCommandExplanation } from "./tree-sitter-runtime.js";
describe("command explainer tree-sitter runtime", () => {
it("loads tree-sitter bash and parses a simple command", async () => {
const tree = await parseBashForCommandExplanation("ls | grep stuff");
try {
expect(tree.rootNode.type).toBe("program");
expect(tree.rootNode.toString()).toContain("pipeline");
} finally {
tree.delete();
}
});
it("rejects oversized parser input before parsing", async () => {
await expect(parseBashForCommandExplanation("x".repeat(128 * 1024 + 1))).rejects.toThrow(
"Shell command is too large to explain",
);
});
it("explains a pipeline with python inline eval", async () => {
const explanation = await explainShellCommand('ls | grep "stuff" | python -c \'print("hi")\'');
expect(explanation.ok).toBe(true);
expect(explanation.shapes).toContain("pipeline");
expect(explanation.topLevelCommands.map((step) => step.executable)).toEqual([
"ls",
"grep",
"python",
]);
expect(explanation.topLevelCommands[2]?.argv).toEqual(["python", "-c", 'print("hi")']);
expect(explanation.nestedCommands).toEqual([]);
expect(explanation.topLevelCommands[2]?.span).toEqual(
expect.objectContaining({ startIndex: expect.any(Number), endIndex: expect.any(Number) }),
);
expect(explanation.risks).toContainEqual(
expect.objectContaining({
kind: "inline-eval",
command: "python",
flag: "-c",
text: "python -c 'print(\"hi\")'",
}),
);
});
it("separates command substitution in an argument", async () => {
const explanation = await explainShellCommand("echo $(whoami)");
expect(explanation.topLevelCommands.map((step) => step.executable)).toEqual(["echo"]);
expect(explanation.nestedCommands).toEqual([
expect.objectContaining({ context: "command-substitution", executable: "whoami" }),
]);
expect(explanation.risks).toContainEqual(
expect.objectContaining({ kind: "command-substitution", text: "$(whoami)" }),
);
});
it("marks command substitution in executable position as dynamic", async () => {
const explanation = await explainShellCommand("$(whoami) --help");
expect(explanation.topLevelCommands).toEqual([]);
expect(explanation.nestedCommands).toEqual([
expect.objectContaining({ context: "command-substitution", executable: "whoami" }),
]);
expect(explanation.risks).toContainEqual(
expect.objectContaining({ kind: "dynamic-executable", text: "$(whoami)" }),
);
});
it("separates process substitution commands", async () => {
const explanation = await explainShellCommand("diff <(ls a) <(ls b)");
expect(explanation.topLevelCommands.map((step) => step.executable)).toEqual(["diff"]);
expect(explanation.nestedCommands.map((step) => `${step.context}:${step.executable}`)).toEqual([
"process-substitution:ls",
"process-substitution:ls",
]);
expect(explanation.risks.map((risk) => risk.kind)).toContain("process-substitution");
});
it("detects AND OR and sequence shapes", async () => {
const explanation = await explainShellCommand("pnpm test && pnpm build || echo failed; pwd");
expect(explanation.shapes).toEqual(expect.arrayContaining(["and", "or", "sequence"]));
expect(explanation.topLevelCommands.map((step) => step.executable)).toEqual([
"pnpm",
"pnpm",
"echo",
"pwd",
]);
});
it("detects newline sequences and background commands", async () => {
const newlineSequence = await explainShellCommand("echo a\necho b");
expect(newlineSequence.shapes).toContain("sequence");
expect(newlineSequence.topLevelCommands.map((step) => step.executable)).toEqual([
"echo",
"echo",
]);
const background = await explainShellCommand("echo a & echo b");
expect(background.shapes).toEqual(expect.arrayContaining(["background", "sequence"]));
expect(background.topLevelCommands.map((step) => step.executable)).toEqual(["echo", "echo"]);
});
it("detects conditionals", async () => {
const explanation = await explainShellCommand(
"if test -f package.json; then pnpm test; else echo missing; fi",
);
expect(explanation.shapes).toContain("if");
expect(explanation.topLevelCommands.map((step) => step.executable)).toEqual([
"test",
"pnpm",
"echo",
]);
});
it("detects declaration and test command forms", async () => {
const declaration = await explainShellCommand("export A=$(whoami)");
expect(declaration.topLevelCommands).toEqual([
expect.objectContaining({ executable: "export", argv: ["export", "A=$(whoami)"] }),
]);
expect(declaration.nestedCommands).toEqual([
expect.objectContaining({ context: "command-substitution", executable: "whoami" }),
]);
const testCommand = await explainShellCommand("[ -f package.json ]");
expect(testCommand.topLevelCommands).toEqual([
expect.objectContaining({ executable: "[", argv: ["[", "-f", "package.json"] }),
]);
const doubleBracket = await explainShellCommand("[[ -f package.json ]]");
expect(doubleBracket.topLevelCommands).toEqual([
expect.objectContaining({ executable: "[[", argv: ["[[", "-f", "package.json"] }),
]);
});
it("detects shell wrappers", async () => {
const explanation = await explainShellCommand('bash -lc "echo hi | wc -c"');
expect(explanation.topLevelCommands.map((step) => step.executable)).toEqual(["bash"]);
expect(explanation.nestedCommands).toEqual([
expect.objectContaining({ context: "wrapper-payload", executable: "echo" }),
expect.objectContaining({ context: "wrapper-payload", executable: "wc" }),
]);
const [wrappedEcho, wrappedWc] = explanation.nestedCommands;
expect(explanation.source.slice(wrappedEcho?.span.startIndex, wrappedEcho?.span.endIndex)).toBe(
"echo hi",
);
expect(explanation.source.slice(wrappedWc?.span.startIndex, wrappedWc?.span.endIndex)).toBe(
"wc -c",
);
expect(explanation.shapes).toContain("pipeline");
expect(explanation.risks).toContainEqual(
expect.objectContaining({
kind: "shell-wrapper",
executable: "bash",
flag: "-lc",
payload: "echo hi | wc -c",
text: 'bash -lc "echo hi | wc -c"',
}),
);
const combinedFlags = await explainShellCommand('bash -euxc "echo hi"');
expect(combinedFlags.risks).toContainEqual(
expect.objectContaining({
kind: "shell-wrapper",
executable: "bash",
flag: "-euxc",
payload: "echo hi",
}),
);
const combinedInline = await explainShellCommand('bash -c"echo hi"');
expect(combinedInline.risks).toContainEqual(
expect.objectContaining({
kind: "shell-wrapper",
executable: "bash",
payload: "echo hi",
}),
);
const powershell = await explainShellCommand('pwsh -Command "Get-ChildItem"');
expect(powershell.risks).toContainEqual(
expect.objectContaining({
kind: "shell-wrapper",
executable: "pwsh",
flag: "-Command",
payload: "Get-ChildItem",
}),
);
const powershellWithOptions = await explainShellCommand(
"pwsh -ExecutionPolicy Bypass -Command Get-ChildItem",
);
expect(powershellWithOptions.risks).toContainEqual(
expect.objectContaining({
kind: "shell-wrapper",
executable: "pwsh",
flag: "-Command",
payload: "Get-ChildItem",
}),
);
const dynamicPayload = await explainShellCommand('bash -lc "$CMD"');
expect(dynamicPayload.nestedCommands).toEqual([]);
expect(dynamicPayload.risks).toContainEqual(
expect.objectContaining({
kind: "shell-wrapper",
executable: "bash",
flag: "-lc",
payload: "$CMD",
}),
);
const invalidPayload = await explainShellCommand("bash -lc 'echo &&'");
expect(invalidPayload.ok).toBe(false);
expect(invalidPayload.risks).toContainEqual(expect.objectContaining({ kind: "syntax-error" }));
const powershellPipeline = await explainShellCommand(
'pwsh -Command "Get-ChildItem | Select Name"',
);
expect(powershellPipeline.nestedCommands).toEqual([]);
expect(powershellPipeline.risks).toContainEqual(
expect.objectContaining({
kind: "shell-wrapper",
executable: "pwsh",
flag: "-Command",
payload: "Get-ChildItem | Select Name",
}),
);
for (const [command, carrier] of [
["time bash -lc 'id'", "time"],
["nice bash -lc 'id'", "nice"],
["timeout 1 bash -lc 'id'", "timeout"],
["caffeinate -d -w 42 bash -lc 'id'", "caffeinate"],
] as const) {
const wrapped = await explainShellCommand(command);
expect(wrapped.risks).toContainEqual(
expect.objectContaining({
kind: "shell-wrapper-through-carrier",
command: carrier,
}),
);
expect(wrapped.nestedCommands).toContainEqual(
expect.objectContaining({ context: "wrapper-payload", executable: "id" }),
);
const wrappedId = wrapped.nestedCommands.find((step) => step.executable === "id");
expect(wrapped.source.slice(wrappedId?.span.startIndex, wrappedId?.span.endIndex)).toBe("id");
}
});
it("normalizes static shell words before classifying commands", async () => {
const quotedCommand = await explainShellCommand("e'c'ho a\\ b \"c d\"");
expect(quotedCommand.topLevelCommands).toEqual([
expect.objectContaining({ executable: "echo", argv: ["echo", "a b", "c d"] }),
]);
const ansiCString = await explainShellCommand("$'ec\\x68o' hi");
expect(ansiCString.topLevelCommands).toEqual([
expect.objectContaining({ executable: "echo", argv: ["echo", "hi"] }),
]);
const wrappedShell = await explainShellCommand("b'a'sh -lc 'echo hi'");
expect(wrappedShell.risks).toContainEqual(
expect.objectContaining({
kind: "shell-wrapper",
executable: "bash",
flag: "-lc",
payload: "echo hi",
}),
);
});
it("does not normalize dynamic executable names into trusted commands", async () => {
const dynamicPrefix = await explainShellCommand("e${CMD}ho hi");
expect(dynamicPrefix.topLevelCommands).toEqual([]);
expect(dynamicPrefix.risks).toContainEqual(
expect.objectContaining({ kind: "dynamic-executable", text: "e${CMD}ho" }),
);
const dynamicQuoted = await explainShellCommand('"${CMD}" hi');
expect(dynamicQuoted.topLevelCommands).toEqual([]);
expect(dynamicQuoted.risks).toContainEqual(
expect.objectContaining({ kind: "dynamic-executable", text: '"${CMD}"' }),
);
const dynamicGlob = await explainShellCommand("./ec* hi");
expect(dynamicGlob.topLevelCommands).toEqual([]);
expect(dynamicGlob.risks).toContainEqual(
expect.objectContaining({ kind: "dynamic-executable", text: "./ec*" }),
);
const dynamicBraceExpansion = await explainShellCommand("./{echo,printf} hi");
expect(dynamicBraceExpansion.topLevelCommands).toEqual([]);
expect(dynamicBraceExpansion.risks).toContainEqual(
expect.objectContaining({ kind: "dynamic-executable", text: "./{echo,printf}" }),
);
const dynamicArgument = await explainShellCommand("echo ./ec*");
expect(dynamicArgument.topLevelCommands).toEqual([
expect.objectContaining({ executable: "echo", argv: ["echo", "./ec*"] }),
]);
expect(dynamicArgument.risks).toContainEqual(
expect.objectContaining({
kind: "dynamic-argument",
command: "echo",
argumentIndex: 1,
text: "./ec*",
}),
);
const dynamicShellFlag = await explainShellCommand("bash $FLAGS id");
expect(dynamicShellFlag.risks).toContainEqual(
expect.objectContaining({
kind: "dynamic-argument",
command: "bash",
argumentIndex: 1,
text: "$FLAGS",
}),
);
const lineContinuation = await explainShellCommand("ec\\\nho hi");
expect(lineContinuation.topLevelCommands).toEqual([]);
expect(lineContinuation.risks).toContainEqual(
expect.objectContaining({ kind: "line-continuation" }),
);
expect(lineContinuation.risks).toContainEqual(
expect.objectContaining({ kind: "dynamic-executable" }),
);
const continuedArgument = await explainShellCommand("pnpm test \\\n --filter foo");
expect(continuedArgument.topLevelCommands).toEqual([
expect.objectContaining({
executable: "pnpm",
argv: ["pnpm", "test", "--filter", "foo"],
}),
]);
expect(continuedArgument.risks).toContainEqual(
expect.objectContaining({ kind: "line-continuation" }),
);
const invalidObfuscation = await explainShellCommand("e'c'h'o hi");
expect(invalidObfuscation.ok).toBe(false);
expect(invalidObfuscation.risks).toContainEqual(
expect.objectContaining({ kind: "syntax-error" }),
);
});
it("detects command carriers", async () => {
const find = await explainShellCommand('find . -name "*.ts" -exec grep -n TODO {} +');
expect(find.risks).toContainEqual(
expect.objectContaining({ kind: "command-carrier", command: "find", flag: "-exec" }),
);
const xargs = await explainShellCommand('printf "%s\\n" a b | xargs -I{} sh -c "echo {}"');
expect(xargs.risks).toContainEqual(
expect.objectContaining({ kind: "command-carrier", command: "xargs" }),
);
const envSplitString = await explainShellCommand("env -S 'sh -c \"id\"'");
expect(envSplitString.risks).toContainEqual(
expect.objectContaining({ kind: "command-carrier", command: "env", flag: "-S" }),
);
for (const command of [
'env python -c "print(1)"',
'sudo python -c "print(1)"',
'command python -c "print(1)"',
]) {
const explanation = await explainShellCommand(command);
expect(explanation.risks).toContainEqual(
expect.objectContaining({
kind: "inline-eval",
command: "python",
flag: "-c",
}),
);
}
});
it("detects eval, source, aliases, and carrier shell wrappers", async () => {
const evalCommand = await explainShellCommand('eval "$OPENCLAW_CMD"');
expect(evalCommand.risks).toContainEqual(expect.objectContaining({ kind: "eval" }));
const builtinEval = await explainShellCommand("builtin eval 'echo hi'");
expect(builtinEval.risks).toContainEqual(expect.objectContaining({ kind: "eval" }));
const sourceCommand = await explainShellCommand(". ./some-script.sh");
expect(sourceCommand.risks).toContainEqual(
expect.objectContaining({ kind: "source", command: "." }),
);
const aliasCommand = await explainShellCommand("alias ll='ls -l'");
expect(aliasCommand.risks).toContainEqual(expect.objectContaining({ kind: "alias" }));
const sudoShell = await explainShellCommand('sudo sh -c "id && whoami"');
expect(sudoShell.risks).toContainEqual(
expect.objectContaining({ kind: "shell-wrapper-through-carrier", command: "sudo" }),
);
const commandShell = await explainShellCommand("command bash -lc 'id && whoami'");
expect(commandShell.risks).toContainEqual(
expect.objectContaining({ kind: "shell-wrapper-through-carrier", command: "command" }),
);
const sudoCombinedFlags = await explainShellCommand('sudo bash -euxc "id && whoami"');
expect(sudoCombinedFlags.risks).toContainEqual(
expect.objectContaining({ kind: "shell-wrapper-through-carrier", command: "sudo" }),
);
});
it("treats function bodies as nested command context", async () => {
const explanation = await explainShellCommand("ls() { echo hi; }; ls /tmp");
expect(explanation.topLevelCommands).toEqual([
expect.objectContaining({ context: "top-level", executable: "ls", argv: ["ls", "/tmp"] }),
]);
expect(explanation.nestedCommands).toEqual([
expect.objectContaining({ context: "function-definition", executable: "echo" }),
]);
expect(explanation.risks).toContainEqual(
expect.objectContaining({ kind: "function-definition", name: "ls" }),
);
});
it("does not treat literal operator text as command shapes", async () => {
const quotedSemicolon = await explainShellCommand('echo ";"');
expect(quotedSemicolon.shapes).not.toContain("sequence");
const heredoc = await explainShellCommand("cat <<EOF\n;\nEOF");
expect(heredoc.shapes).not.toContain("sequence");
});
it("marks redirects heredocs and here-strings as risks", async () => {
const redirect = await explainShellCommand("echo hi > out.txt");
const redirectRisks = redirect.risks.filter((risk) => risk.kind === "redirect");
expect(redirectRisks).toEqual([expect.objectContaining({ text: "> out.txt" })]);
const heredoc = await explainShellCommand("cat <<EOF\nhello\nEOF");
expect(heredoc.risks).toContainEqual(expect.objectContaining({ kind: "heredoc" }));
const hereString = await explainShellCommand('cat <<< "hello"');
expect(hereString.risks).toContainEqual(expect.objectContaining({ kind: "here-string" }));
});
it("reports syntax errors with source spans", async () => {
const explanation = await explainShellCommand("echo 'unterminated");
expect(explanation.ok).toBe(false);
expect(explanation.risks).toContainEqual(
expect.objectContaining({
kind: "syntax-error",
span: expect.objectContaining({
startIndex: expect.any(Number),
endIndex: expect.any(Number),
}),
}),
);
});
it("parses and extracts a small approval-sized corpus quickly", async () => {
const corpus = [
'ls | grep "stuff" | python -c \'print("hi")\'',
"echo $(whoami)",
"diff <(ls a) <(ls b)",
'find . -name "*.ts" -exec grep -n TODO {} +',
'bash -lc "echo hi | wc -c"',
];
const iterations = 100;
const start = performance.now();
for (let index = 0; index < iterations; index += 1) {
for (const command of corpus) {
await explainShellCommand(command);
}
}
const elapsedMs = performance.now() - start;
expect(elapsedMs / (iterations * corpus.length)).toBeLessThan(20);
});
});

View File

@@ -0,0 +1,998 @@
import type { Node as TreeSitterNode } from "web-tree-sitter";
import { unwrapKnownDispatchWrapperInvocation } from "../dispatch-wrapper-resolution.js";
import { detectInterpreterInlineEvalArgv } from "../exec-inline-eval.js";
import { normalizeExecutableToken } from "../exec-wrapper-resolution.js";
import {
extractShellWrapperCommand,
isShellWrapperExecutable,
POSIX_SHELL_WRAPPERS,
resolveShellWrapperTransportArgv,
} from "../shell-wrapper-resolution.js";
import { parseBashForCommandExplanation } from "./tree-sitter-runtime.js";
import type {
CommandContext,
CommandExplanation,
CommandRisk,
CommandShape,
CommandStep,
SourceSpan,
} from "./types.js";
type MutableExplanation = {
shapes: Set<CommandShape>;
commands: CommandStep[];
risks: CommandRisk[];
hasParseError: boolean;
};
type DynamicArgument = {
index: number;
text: string;
value: string;
span: SourceSpan;
};
type CommandArgument = {
index: number;
text: string;
value: string;
span: SourceSpan;
valueStartIndex: number;
valueStartPosition: SourceSpan["startPosition"];
};
type CommandArgv = {
argv: string[];
arguments: CommandArgument[];
dynamicArguments: DynamicArgument[];
};
type WalkState = {
wrapperPayloadDepth: number;
spanBase: SpanBase;
};
const MAX_WRAPPER_PAYLOAD_DEPTH = 2;
const PARSEABLE_SHELL_WRAPPERS = new Set<string>(POSIX_SHELL_WRAPPERS);
const SHELL_CARRIER_EXECUTABLES = new Set(["sudo", "doas", "env", "command", "builtin"]);
const SOURCE_EXECUTABLES = new Set([".", "source"]);
type SpanBase = {
startIndex: number;
startPosition: SourceSpan["startPosition"];
};
const ROOT_SPAN_BASE: SpanBase = {
startIndex: 0,
startPosition: { row: 0, column: 0 },
};
function children(node: TreeSitterNode): TreeSitterNode[] {
return Array.from({ length: node.childCount }, (_, index) => node.child(index)).filter(
(child): child is TreeSitterNode => child !== null,
);
}
function namedChildren(node: TreeSitterNode): TreeSitterNode[] {
return Array.from({ length: node.namedChildCount }, (_, index) => node.namedChild(index)).filter(
(child): child is TreeSitterNode => child !== null,
);
}
function hasDirectChildType(node: TreeSitterNode, type: string): boolean {
return children(node).some((child) => child.type === type);
}
function translatePosition(
position: SourceSpan["startPosition"],
base: SourceSpan["startPosition"],
): SourceSpan["startPosition"] {
return {
row: base.row + position.row,
column: position.row === 0 ? base.column + position.column : position.column,
};
}
function translateSpan(span: SourceSpan, base: SpanBase): SourceSpan {
return {
startIndex: base.startIndex + span.startIndex,
endIndex: base.startIndex + span.endIndex,
startPosition: translatePosition(span.startPosition, base.startPosition),
endPosition: translatePosition(span.endPosition, base.startPosition),
};
}
function spanFromNode(node: TreeSitterNode, base: SpanBase = ROOT_SPAN_BASE): SourceSpan {
const span = {
startIndex: node.startIndex,
endIndex: node.endIndex,
startPosition: { row: node.startPosition.row, column: node.startPosition.column },
endPosition: { row: node.endPosition.row, column: node.endPosition.column },
};
return translateSpan(span, base);
}
function advancePosition(
position: SourceSpan["startPosition"],
text: string,
): SourceSpan["startPosition"] {
let row = position.row;
let column = position.column;
for (let index = 0; index < text.length; index += 1) {
const ch = text[index];
if (ch === "\r") {
if (text[index + 1] === "\n") {
index += 1;
}
row += 1;
column = 0;
continue;
}
if (ch === "\n") {
row += 1;
column = 0;
continue;
}
column += 1;
}
return { row, column };
}
function valuePrefixLength(node: TreeSitterNode): number {
if (node.type === "string" || node.type === "raw_string") {
return 1;
}
if (node.type === "ansi_c_string") {
return 2;
}
return 0;
}
function argumentFromNode(
index: number,
node: TreeSitterNode,
value: ShellWordValue,
base: SpanBase,
): CommandArgument {
const span = spanFromNode(node, base);
const prefixLength = valuePrefixLength(node);
return {
index,
text: node.text,
value: value.value,
span,
valueStartIndex: span.startIndex + prefixLength,
valueStartPosition: advancePosition(span.startPosition, node.text.slice(0, prefixLength)),
};
}
type ShellWordValue = { kind: "literal"; value: string } | { kind: "dynamic"; value: string };
const DYNAMIC_WORD_NODE_TYPES = new Set([
"arithmetic_expansion",
"command_substitution",
"expansion",
"process_substitution",
"simple_expansion",
]);
const COMMAND_ARGUMENT_NODE_TYPES = new Set([
"ansi_c_string",
"arithmetic_expansion",
"command_substitution",
"concatenation",
"expansion",
"number",
"process_substitution",
"raw_string",
"simple_expansion",
"string",
"word",
]);
function hasEscapedLineContinuation(text: string): boolean {
return /\\(?:\r\n|[\r\n])/.test(text);
}
function hasExecutableLineContinuation(text: string): boolean {
return /^[^\s]*\\(?:\r\n|[\r\n])/.test(text);
}
function hasUnescapedDynamicPattern(text: string): boolean {
for (let index = 0; index < text.length; index += 1) {
const ch = text[index];
if (ch === "\\") {
index += 1;
continue;
}
if (ch === "*" || ch === "?") {
return true;
}
if (ch === "[" && text.indexOf("]", index + 1) > index + 1) {
return true;
}
if (ch === "{" && text.indexOf("}", index + 1) > index + 1) {
return true;
}
}
return false;
}
function decodeUnquotedShellText(text: string): string {
let output = "";
for (let index = 0; index < text.length; index += 1) {
const ch = text[index];
const next = text[index + 1];
if (ch === "\\" && next !== undefined) {
if (next === "\r" && text[index + 2] === "\n") {
index += 2;
continue;
}
if (next === "\n" || next === "\r") {
index += 1;
continue;
}
output += next;
index += 1;
continue;
}
output += ch;
}
return output;
}
function decodeDoubleQuotedText(text: string): string {
const body = text.startsWith('"') && text.endsWith('"') ? text.slice(1, -1) : text;
let output = "";
for (let index = 0; index < body.length; index += 1) {
const ch = body[index];
const next = body[index + 1];
if (ch === "\\" && next !== undefined) {
if (next === "\r" && body[index + 2] === "\n") {
index += 2;
continue;
}
if (["\\", '"', "$", "`", "\n", "\r"].includes(next)) {
if (next !== "\n" && next !== "\r") {
output += next;
}
index += 1;
continue;
}
}
output += ch;
}
return output;
}
function decodeAnsiCString(text: string): string {
const body = text.startsWith("$'") && text.endsWith("'") ? text.slice(2, -1) : text;
let output = "";
for (let index = 0; index < body.length; index += 1) {
const ch = body[index];
if (ch !== "\\") {
output += ch;
continue;
}
const next = body[index + 1];
if (next === undefined) {
output += "\\";
continue;
}
const simpleEscapes: Record<string, string> = {
"'": "'",
'"': '"',
"?": "?",
"\\": "\\",
a: "\u0007",
b: "\b",
e: "\u001B",
E: "\u001B",
f: "\f",
n: "\n",
r: "\r",
t: "\t",
v: "\v",
};
const simple = simpleEscapes[next];
if (simple !== undefined) {
output += simple;
index += 1;
continue;
}
if (next === "x") {
const hex = body.slice(index + 2).match(/^[0-9A-Fa-f]{1,2}/)?.[0] ?? "";
if (hex) {
output += String.fromCodePoint(Number.parseInt(hex, 16));
index += 1 + hex.length;
continue;
}
}
if (next === "u" || next === "U") {
const maxLength = next === "u" ? 4 : 8;
const hex =
body.slice(index + 2).match(new RegExp(`^[0-9A-Fa-f]{1,${maxLength}}`))?.[0] ?? "";
if (hex) {
const codePoint = Number.parseInt(hex, 16);
try {
output += String.fromCodePoint(codePoint);
} catch {
output += `\\${next}${hex}`;
}
index += 1 + hex.length;
continue;
}
}
if (/^[0-7]$/.test(next)) {
const octal = body.slice(index + 1).match(/^[0-7]{1,3}/)?.[0] ?? "";
if (octal) {
output += String.fromCodePoint(Number.parseInt(octal, 8));
index += octal.length;
continue;
}
}
output += next;
index += 1;
}
return output;
}
function hasDynamicWordPart(node: TreeSitterNode): boolean {
return (
DYNAMIC_WORD_NODE_TYPES.has(node.type) ||
namedChildren(node).some((child) => hasDynamicWordPart(child))
);
}
function shellWordValue(node: TreeSitterNode): ShellWordValue {
if (DYNAMIC_WORD_NODE_TYPES.has(node.type)) {
return { kind: "dynamic", value: node.text };
}
if (
node.type !== "command_name" &&
node.type !== "concatenation" &&
namedChildren(node).some((child) => hasDynamicWordPart(child))
) {
return {
kind: "dynamic",
value: node.type === "string" ? decodeDoubleQuotedText(node.text) : node.text,
};
}
switch (node.type) {
case "command_name": {
const parts = namedChildren(node);
if (parts.length === 0) {
return hasUnescapedDynamicPattern(node.text)
? { kind: "dynamic", value: decodeUnquotedShellText(node.text) }
: { kind: "literal", value: decodeUnquotedShellText(node.text) };
}
let value = "";
for (const part of parts) {
const partValue = shellWordValue(part);
value += partValue.value;
if (partValue.kind !== "literal") {
return { kind: "dynamic", value };
}
}
return { kind: "literal", value };
}
case "word":
return hasUnescapedDynamicPattern(node.text)
? { kind: "dynamic", value: decodeUnquotedShellText(node.text) }
: { kind: "literal", value: decodeUnquotedShellText(node.text) };
case "raw_string":
return { kind: "literal", value: node.text.slice(1, -1) };
case "string":
return { kind: "literal", value: decodeDoubleQuotedText(node.text) };
case "ansi_c_string":
return { kind: "literal", value: decodeAnsiCString(node.text) };
case "concatenation": {
if (hasUnescapedDynamicPattern(node.text)) {
return { kind: "dynamic", value: decodeUnquotedShellText(node.text) };
}
let value = "";
let dynamic = false;
for (const child of namedChildren(node)) {
const childValue = shellWordValue(child);
value += childValue.value;
if (childValue.kind !== "literal") {
dynamic = true;
}
}
return dynamic ? { kind: "dynamic", value } : { kind: "literal", value };
}
default:
return namedChildren(node).some((child) => shellWordValue(child).kind === "dynamic")
? { kind: "dynamic", value: decodeUnquotedShellText(node.text) }
: { kind: "literal", value: decodeUnquotedShellText(node.text) };
}
}
function commandNameNode(node: TreeSitterNode): TreeSitterNode | null {
return (
node.childForFieldName("name") ??
namedChildren(node).find((child) => child.type === "command_name") ??
null
);
}
function argvFromCommand(
node: TreeSitterNode,
nameNode: TreeSitterNode,
state: WalkState,
): CommandArgv | null {
if (hasEscapedLineContinuation(nameNode.text) || hasExecutableLineContinuation(node.text)) {
return null;
}
const executable = shellWordValue(nameNode);
if (executable.kind !== "literal") {
return null;
}
const skipped = new Set<TreeSitterNode>([nameNode, ...namedChildren(nameNode)]);
const argv = [executable.value];
const argumentsList: CommandArgument[] = [];
const dynamicArguments: DynamicArgument[] = [];
for (const child of namedChildren(node)) {
if (
skipped.has(child) ||
child.type === "command_name" ||
child.type === "variable_assignment" ||
!COMMAND_ARGUMENT_NODE_TYPES.has(child.type)
) {
continue;
}
const value = shellWordValue(child);
const argument = argumentFromNode(argv.length, child, value, state.spanBase);
argumentsList.push(argument);
if (value.kind === "dynamic") {
dynamicArguments.push({
index: argument.index,
text: argument.text,
value: argument.value,
span: argument.span,
});
}
argv.push(value.value);
}
return { argv, arguments: argumentsList, dynamicArguments };
}
function firstShellToken(text: string): string {
return text.trimStart().match(/^\S+/)?.[0] ?? "";
}
function argvFromDeclarationCommand(node: TreeSitterNode, state: WalkState): CommandArgv | null {
const executable = firstShellToken(node.text);
if (!executable) {
return null;
}
const argv = [executable];
const argumentsList: CommandArgument[] = [];
const dynamicArguments: DynamicArgument[] = [];
for (const child of namedChildren(node)) {
if (!COMMAND_ARGUMENT_NODE_TYPES.has(child.type) && child.type !== "variable_assignment") {
continue;
}
const value = shellWordValue(child);
const argument = argumentFromNode(argv.length, child, value, state.spanBase);
argumentsList.push(argument);
if (value.kind === "dynamic") {
dynamicArguments.push({
index: argument.index,
text: argument.text,
value: argument.value,
span: argument.span,
});
}
argv.push(value.value);
}
return { argv, arguments: argumentsList, dynamicArguments };
}
function appendTestCommandArguments(
node: TreeSitterNode,
argv: string[],
argumentsList: CommandArgument[],
dynamicArguments: DynamicArgument[],
state: WalkState,
): void {
if (node.type === "test_operator" || COMMAND_ARGUMENT_NODE_TYPES.has(node.type)) {
const value = shellWordValue(node);
const argument = argumentFromNode(argv.length, node, value, state.spanBase);
argumentsList.push(argument);
if (value.kind === "dynamic") {
dynamicArguments.push({
index: argument.index,
text: argument.text,
value: argument.value,
span: argument.span,
});
}
argv.push(value.value);
return;
}
for (const child of namedChildren(node)) {
appendTestCommandArguments(child, argv, argumentsList, dynamicArguments, state);
}
}
function argvFromTestCommand(node: TreeSitterNode, state: WalkState): CommandArgv | null {
const trimmed = node.text.trimStart();
const executable = trimmed.startsWith("[[") ? "[[" : trimmed.startsWith("[") ? "[" : "";
if (!executable) {
return null;
}
const argv = [executable];
const argumentsList: CommandArgument[] = [];
const dynamicArguments: DynamicArgument[] = [];
for (const child of namedChildren(node)) {
appendTestCommandArguments(child, argv, argumentsList, dynamicArguments, state);
}
return { argv, arguments: argumentsList, dynamicArguments };
}
function isCommandLikeNode(node: TreeSitterNode): boolean {
return (
node.type === "command" || node.type === "declaration_command" || node.type === "test_command"
);
}
function recordShape(node: TreeSitterNode, output: MutableExplanation): void {
if (
(node.type === "program" || node.type === "list") &&
(hasDirectChildType(node, ";") || namedChildren(node).filter(isCommandLikeNode).length > 1)
) {
output.shapes.add("sequence");
}
if (hasDirectChildType(node, "&")) {
output.shapes.add("background");
}
if (node.type === "pipeline") {
output.shapes.add("pipeline");
}
if (node.type === "list") {
if (hasDirectChildType(node, "&&")) {
output.shapes.add("and");
}
if (hasDirectChildType(node, "||")) {
output.shapes.add("or");
}
}
if (node.type === "if_statement") {
output.shapes.add("if");
}
if (node.type === "for_statement") {
output.shapes.add("for");
}
if (node.type === "while_statement") {
output.shapes.add("while");
}
if (node.type === "case_statement") {
output.shapes.add("case");
}
if (node.type === "subshell") {
output.shapes.add("subshell");
}
if (node.type === "compound_statement") {
output.shapes.add("group");
}
}
function shellCommandFlag(
argv: string[],
startIndex: number,
): { flag: string; index: number } | null {
const shell = normalizeExecutableToken(argv[startIndex - 1] ?? argv[0] ?? "");
for (let index = startIndex; index < argv.length; index += 1) {
const token = argv[index]?.trim();
if (!token) {
continue;
}
if (token === "--") {
break;
}
const lower = token.toLowerCase();
if (shell === "cmd") {
if (lower === "/c" || lower === "/k") {
return { flag: token, index };
}
continue;
}
if (shell === "powershell" || shell === "pwsh") {
if (
lower === "-c" ||
lower === "-command" ||
lower === "--command" ||
lower === "-encodedcommand" ||
lower === "-enc" ||
lower === "-e" ||
lower === "-f" ||
lower === "-file"
) {
return { flag: token, index };
}
continue;
}
if (lower === "-c" || lower === "--command") {
return { flag: token, index };
}
if (token.startsWith("-") && !token.startsWith("--") && lower.slice(1).includes("c")) {
return { flag: token, index };
}
}
return null;
}
function canParseShellWrapperPayload(transportArgv: string[], commandFlag: string | null): boolean {
const shellExecutable = normalizeExecutableToken(transportArgv[0] ?? "");
if (!PARSEABLE_SHELL_WRAPPERS.has(shellExecutable)) {
return false;
}
const lowerFlag = commandFlag?.toLowerCase() ?? "";
return lowerFlag === "-c" || lowerFlag === "--command" || /^-[^-]*c[^-]*$/i.test(lowerFlag);
}
function isDynamicPayload(payload: string, dynamicArguments: DynamicArgument[]): boolean {
return dynamicArguments.some((argument) => argument.value === payload);
}
function payloadBaseFromArgument(argument: CommandArgument, payload: string): SpanBase | null {
const payloadOffset = argument.value.indexOf(payload);
if (payloadOffset < 0) {
return null;
}
const prefix = argument.value.slice(0, payloadOffset);
return {
startIndex: argument.valueStartIndex + payloadOffset,
startPosition: advancePosition(argument.valueStartPosition, prefix),
};
}
function payloadBaseFromArguments(
payload: string,
argumentsList: CommandArgument[],
): SpanBase | null {
const exactArgument = argumentsList.find((argument) => argument.value === payload);
if (exactArgument) {
return payloadBaseFromArgument(exactArgument, payload);
}
for (const argument of argumentsList) {
const base = payloadBaseFromArgument(argument, payload);
if (base) {
return base;
}
}
return null;
}
function shellWrapperPayloadForParsing(
argv: string[],
argumentsList: CommandArgument[],
dynamicArguments: DynamicArgument[],
): { command: string; spanBase: SpanBase } | null {
const shellWrapper = extractShellWrapperCommand(argv);
if (
!shellWrapper.isWrapper ||
!shellWrapper.command ||
isDynamicPayload(shellWrapper.command, dynamicArguments)
) {
return null;
}
const spanBase = payloadBaseFromArguments(shellWrapper.command, argumentsList);
if (!spanBase) {
return null;
}
const transportArgv = resolveShellWrapperTransportArgv(argv) ?? argv;
const commandFlag = shellCommandFlag(transportArgv, 1) ?? shellCommandFlag(argv, 1);
if (!canParseShellWrapperPayload(transportArgv, commandFlag?.flag ?? null)) {
return null;
}
return { command: shellWrapper.command, spanBase };
}
type InlineEvalHit = NonNullable<ReturnType<typeof detectInterpreterInlineEvalArgv>>;
function detectCarrierInlineEvalArgv(argv: string[]): InlineEvalHit | null {
const dispatchUnwrap = unwrapKnownDispatchWrapperInvocation(argv);
if (dispatchUnwrap.kind === "unwrapped") {
return detectInterpreterInlineEvalArgv(dispatchUnwrap.argv);
}
const executable = normalizeExecutableToken(argv[0] ?? "");
if (!SHELL_CARRIER_EXECUTABLES.has(executable)) {
return null;
}
for (let index = 1; index < argv.length; index += 1) {
const hit = detectInterpreterInlineEvalArgv(argv.slice(index));
if (hit) {
return hit;
}
}
return null;
}
function envSplitStringFlag(argv: string[]): string | null {
if (normalizeExecutableToken(argv[0] ?? "") !== "env") {
return null;
}
for (const arg of argv.slice(1)) {
const token = arg.trim();
if (token === "-S" || token === "--split-string") {
return token;
}
if (token.startsWith("--split-string=") || (token.startsWith("-S") && token.length > 2)) {
return token.startsWith("--") ? "--split-string" : "-S";
}
}
return null;
}
function recordInlineEvalRisk(
inlineEval: InlineEvalHit,
text: string,
span: SourceSpan,
output: MutableExplanation,
): void {
output.risks.push({
kind: "inline-eval",
command: inlineEval.normalizedExecutable,
flag: inlineEval.flag,
text,
span,
});
}
function recordDynamicArgumentRisks(
command: string,
dynamicArguments: DynamicArgument[],
output: MutableExplanation,
): void {
for (const argument of dynamicArguments) {
output.risks.push({
kind: "dynamic-argument",
command,
argumentIndex: argument.index,
text: argument.text,
span: argument.span,
});
}
}
function recordCommandRisks(
argv: string[],
dynamicArguments: DynamicArgument[],
text: string,
span: SourceSpan,
output: MutableExplanation,
): void {
const executable = argv[0];
if (!executable) {
return;
}
const normalizedExecutable = normalizeExecutableToken(executable);
recordDynamicArgumentRisks(normalizedExecutable, dynamicArguments, output);
const inlineEval = detectInterpreterInlineEvalArgv(argv) ?? detectCarrierInlineEvalArgv(argv);
if (inlineEval) {
recordInlineEvalRisk(inlineEval, text, span, output);
}
const shellWrapper = extractShellWrapperCommand(argv);
if (shellWrapper.isWrapper && shellWrapper.command) {
const transportArgv = resolveShellWrapperTransportArgv(argv) ?? argv;
const shellExecutable = transportArgv[0] ?? executable;
const commandFlag = shellCommandFlag(transportArgv, 1) ?? shellCommandFlag(argv, 1);
if (isShellWrapperExecutable(executable)) {
output.risks.push({
kind: "shell-wrapper",
executable: shellExecutable,
flag: commandFlag?.flag ?? "-c",
payload: shellWrapper.command,
text,
span,
});
} else {
output.risks.push({
kind: "shell-wrapper-through-carrier",
command: normalizedExecutable,
text,
span,
});
}
}
if (normalizedExecutable === "find") {
const flag = argv.find((arg) => ["-exec", "-execdir", "-ok", "-okdir"].includes(arg));
if (flag) {
output.risks.push({ kind: "command-carrier", command: executable, flag, text, span });
}
}
if (normalizedExecutable === "xargs") {
output.risks.push({ kind: "command-carrier", command: normalizedExecutable, text, span });
}
const splitStringFlag = envSplitStringFlag(argv);
if (splitStringFlag) {
output.risks.push({
kind: "command-carrier",
command: normalizedExecutable,
flag: splitStringFlag,
text,
span,
});
}
if (normalizedExecutable === "eval") {
output.risks.push({ kind: "eval", text, span });
}
if (SOURCE_EXECUTABLES.has(normalizedExecutable)) {
output.risks.push({ kind: "source", command: normalizedExecutable, text, span });
}
if (normalizedExecutable === "alias") {
output.risks.push({ kind: "alias", text, span });
}
if (!shellWrapper.isWrapper && SHELL_CARRIER_EXECUTABLES.has(normalizedExecutable)) {
const shellIndex = argv.findIndex((arg) => isShellWrapperExecutable(arg));
if (shellIndex >= 0 && shellCommandFlag(argv, shellIndex + 1)) {
output.risks.push({
kind: "shell-wrapper-through-carrier",
command: normalizedExecutable,
text,
span,
});
}
const carriedCommand = argv.slice(1).find((arg) => {
const normalized = normalizeExecutableToken(arg);
return normalized === "eval" || SOURCE_EXECUTABLES.has(normalized);
});
const normalizedCarriedCommand = carriedCommand
? normalizeExecutableToken(carriedCommand)
: undefined;
if (normalizedCarriedCommand === "eval") {
output.risks.push({ kind: "eval", text, span });
} else if (normalizedCarriedCommand && SOURCE_EXECUTABLES.has(normalizedCarriedCommand)) {
output.risks.push({
kind: "source",
command: normalizedCarriedCommand,
text,
span,
});
}
}
}
async function walk(
node: TreeSitterNode,
output: MutableExplanation,
context: CommandContext,
state: WalkState,
): Promise<void> {
recordShape(node, output);
const span = spanFromNode(node, state.spanBase);
let childContext = context;
if (node.type === "program" && hasEscapedLineContinuation(node.text)) {
output.risks.push({ kind: "line-continuation", text: node.text, span });
}
if (node.type === "function_definition") {
const nameNode = node.childForFieldName("name");
output.risks.push({
kind: "function-definition",
name: nameNode?.text ?? "",
text: node.text,
span,
});
childContext = "function-definition";
} else if (node.type === "command_substitution") {
output.risks.push({ kind: "command-substitution", text: node.text, span });
childContext = "command-substitution";
} else if (node.type === "process_substitution") {
output.risks.push({ kind: "process-substitution", text: node.text, span });
childContext = "process-substitution";
} else if (node.type === "heredoc_redirect") {
output.risks.push({ kind: "heredoc", text: node.text, span });
} else if (node.type === "herestring_redirect") {
output.risks.push({ kind: "here-string", text: node.text, span });
} else if (node.type === "file_redirect") {
output.risks.push({ kind: "redirect", text: node.text, span });
} else if (node.type === "ERROR") {
output.risks.push({ kind: "syntax-error", text: node.text, span });
}
if (
node.type === "command" ||
node.type === "declaration_command" ||
node.type === "test_command"
) {
const nameNode = node.type === "command" ? commandNameNode(node) : null;
const parsed =
node.type === "command"
? nameNode
? argvFromCommand(node, nameNode, state)
: null
: node.type === "declaration_command"
? argvFromDeclarationCommand(node, state)
: argvFromTestCommand(node, state);
if (node.type === "command" && nameNode && !parsed) {
output.risks.push({
kind: "dynamic-executable",
text: nameNode.text,
span: spanFromNode(nameNode, state.spanBase),
});
} else if (parsed) {
const step: CommandStep = {
context,
executable: parsed.argv[0] ?? "",
argv: parsed.argv,
text: node.text,
span,
};
if (step.executable) {
output.commands.push(step);
recordCommandRisks(parsed.argv, parsed.dynamicArguments, node.text, span, output);
const wrapperPayload = shellWrapperPayloadForParsing(
parsed.argv,
parsed.arguments,
parsed.dynamicArguments,
);
if (wrapperPayload && state.wrapperPayloadDepth < MAX_WRAPPER_PAYLOAD_DEPTH) {
const wrapperTree = await parseBashForCommandExplanation(wrapperPayload.command);
try {
if (wrapperTree.rootNode.hasError) {
output.hasParseError = true;
output.risks.push({
kind: "syntax-error",
text: wrapperPayload.command,
span: spanFromNode(wrapperTree.rootNode, wrapperPayload.spanBase),
});
}
await walk(wrapperTree.rootNode, output, "wrapper-payload", {
wrapperPayloadDepth: state.wrapperPayloadDepth + 1,
spanBase: wrapperPayload.spanBase,
});
} finally {
wrapperTree.delete();
}
}
}
}
}
for (const child of namedChildren(node)) {
await walk(child, output, childContext, state);
}
}
export async function explainShellCommand(source: string): Promise<CommandExplanation> {
const tree = await parseBashForCommandExplanation(source);
try {
const output: MutableExplanation = {
shapes: new Set(),
commands: [],
risks: [],
hasParseError: tree.rootNode.hasError,
};
await walk(tree.rootNode, output, "top-level", {
wrapperPayloadDepth: 0,
spanBase: ROOT_SPAN_BASE,
});
const topLevelCommands = output.commands.filter((command) => command.context === "top-level");
return {
ok: !output.hasParseError,
source,
shapes: [...output.shapes],
topLevelCommands,
nestedCommands: output.commands.filter((command) => command.context !== "top-level"),
risks: output.risks,
};
} finally {
tree.delete();
}
}

View File

@@ -0,0 +1,9 @@
export { explainShellCommand } from "./extract.js";
export type {
CommandContext,
CommandExplanation,
CommandRisk,
CommandShape,
CommandStep,
SourceSpan,
} from "./types.js";

View File

@@ -0,0 +1,70 @@
import fs from "node:fs";
import { createRequire } from "node:module";
import path from "node:path";
import * as TreeSitter from "web-tree-sitter";
const require = createRequire(import.meta.url);
let parserPromise: Promise<TreeSitter.Parser> | null = null;
const MAX_COMMAND_EXPLANATION_SOURCE_CHARS = 128 * 1024;
const MAX_COMMAND_EXPLANATION_PARSE_MS = 500;
function resolvePackageFile(packageName: string, fileName: string): string {
let directory = path.dirname(require.resolve(packageName));
for (let depth = 0; depth < 5; depth += 1) {
const candidate = path.join(directory, fileName);
if (fs.existsSync(candidate)) {
return candidate;
}
const parent = path.dirname(directory);
if (parent === directory) {
break;
}
directory = parent;
}
return path.join(path.dirname(require.resolve(packageName)), fileName);
}
function resolveWebTreeSitterFile(fileName: string): string {
return resolvePackageFile("web-tree-sitter", fileName);
}
function resolveBashWasmPath(): string {
return resolvePackageFile("tree-sitter-bash", "tree-sitter-bash.wasm");
}
async function loadParser(): Promise<TreeSitter.Parser> {
await TreeSitter.Parser.init({
locateFile: resolveWebTreeSitterFile,
});
const language = await TreeSitter.Language.load(resolveBashWasmPath());
const parser = new TreeSitter.Parser();
parser.setLanguage(language);
return parser;
}
export function getBashParserForCommandExplanation(): Promise<TreeSitter.Parser> {
parserPromise ??= loadParser();
return parserPromise;
}
/**
* Low-level parser access for tests and parser diagnostics.
* Callers own the returned Tree and must call tree.delete().
* Prefer explainShellCommand for normal command-explainer use.
*/
export async function parseBashForCommandExplanation(source: string): Promise<TreeSitter.Tree> {
if (source.length > MAX_COMMAND_EXPLANATION_SOURCE_CHARS) {
throw new Error("Shell command is too large to explain");
}
const parser = await getBashParserForCommandExplanation();
const deadlineMs = performance.now() + MAX_COMMAND_EXPLANATION_PARSE_MS;
const tree = parser.parse(source, null, {
progressCallback: () => performance.now() > deadlineMs,
});
if (!tree) {
parser.reset();
throw new Error("tree-sitter-bash returned no parse tree");
}
return tree;
}

View File

@@ -0,0 +1,75 @@
export type CommandContext =
| "top-level"
| "command-substitution"
| "process-substitution"
| "function-definition"
| "wrapper-payload";
export type CommandShape =
| "pipeline"
| "and"
| "or"
| "sequence"
| "if"
| "for"
| "while"
| "case"
| "subshell"
| "group"
| "background";
export type SourceSpan = {
startIndex: number;
endIndex: number;
startPosition: { row: number; column: number };
endPosition: { row: number; column: number };
};
export type CommandStep = {
context: CommandContext;
executable: string;
argv: string[];
text: string;
span: SourceSpan;
};
export type CommandRisk =
| { kind: "inline-eval"; command: string; flag: string; text: string; span: SourceSpan }
| {
kind: "shell-wrapper";
executable: string;
flag: string;
payload: string;
text: string;
span: SourceSpan;
}
| { kind: "shell-wrapper-through-carrier"; command: string; text: string; span: SourceSpan }
| { kind: "command-carrier"; command: string; flag?: string; text: string; span: SourceSpan }
| { kind: "command-substitution"; text: string; span: SourceSpan }
| { kind: "process-substitution"; text: string; span: SourceSpan }
| { kind: "dynamic-executable"; text: string; span: SourceSpan }
| {
kind: "dynamic-argument";
command: string;
argumentIndex: number;
text: string;
span: SourceSpan;
}
| { kind: "eval"; text: string; span: SourceSpan }
| { kind: "source"; command: string; text: string; span: SourceSpan }
| { kind: "alias"; text: string; span: SourceSpan }
| { kind: "function-definition"; name: string; text: string; span: SourceSpan }
| { kind: "line-continuation"; text: string; span: SourceSpan }
| { kind: "heredoc"; text: string; span: SourceSpan }
| { kind: "here-string"; text: string; span: SourceSpan }
| { kind: "redirect"; text: string; span: SourceSpan }
| { kind: "syntax-error"; text: string; span: SourceSpan };
export type CommandExplanation = {
ok: boolean;
source: string;
shapes: CommandShape[];
topLevelCommands: CommandStep[];
nestedCommands: CommandStep[];
risks: CommandRisk[];
};