openclaw/src/infra/command-explainer/extract.test.ts

import { afterEach, describe, expect, it, vi } from "vitest";
import type { Parser } from "web-tree-sitter";
import { explainShellCommand } from "./extract.js";
import {
  getBashParserForCommandExplanation,
  parseBashForCommandExplanation,
  resolvePackageFileForCommandExplanation,
  setBashParserLoaderForCommandExplanationForTest,
} from "./tree-sitter-runtime.js";

let parserLoaderOverridden = false;

function setParserLoaderForTest(loader: () => Promise<Parser>): void {
  parserLoaderOverridden = true;
  setBashParserLoaderForCommandExplanationForTest(loader);
}

afterEach(() => {
  if (parserLoaderOverridden) {
    setBashParserLoaderForCommandExplanationForTest();
    parserLoaderOverridden = false;
  }
  vi.restoreAllMocks();
});

describe("command explainer tree-sitter runtime", () => {
  it("loads tree-sitter bash and parses a simple command", async () => {
    const tree = await parseBashForCommandExplanation("ls | grep stuff");

    try {
      expect(tree.rootNode.type).toBe("program");
      expect(tree.rootNode.toString()).toContain("pipeline");
    } finally {
      tree.delete();
    }
  });

  it("rejects oversized parser input before parsing", async () => {
    await expect(parseBashForCommandExplanation("x".repeat(128 * 1024 + 1))).rejects.toThrow(
      "Shell command is too large to explain",
    );
  });

  it("retries parser initialization after a loader rejection", async () => {
    const parser = {} as Parser;
    let calls = 0;
    setParserLoaderForTest(async () => {
      calls += 1;
      if (calls === 1) {
        throw new Error("transient parser load failure");
      }
      return parser;
    });

    await expect(getBashParserForCommandExplanation()).rejects.toThrow(
      "transient parser load failure",
    );
    await expect(getBashParserForCommandExplanation()).resolves.toBe(parser);
    expect(calls).toBe(2);
  });

  it("reports missing parser packages and wasm files with explainer context", () => {
    expect(() =>
      resolvePackageFileForCommandExplanation(
        "definitely-missing-openclaw-parser-package",
        "parser.wasm",
      ),
    ).toThrow("Unable to resolve definitely-missing-openclaw-parser-package");

    expect(() =>
      resolvePackageFileForCommandExplanation("web-tree-sitter", "missing-openclaw-parser.wasm"),
    ).toThrow("Unable to locate missing-openclaw-parser.wasm in web-tree-sitter");
  });

  it("reports parser progress cancellation as a timeout", async () => {
    const reset = vi.fn();
    const parser = {
      parse: (
        _source: string,
        _oldTree: unknown,
        options?: { progressCallback?: (state: unknown) => boolean },
      ) => {
        options?.progressCallback?.({ currentOffset: 0, hasError: false });
        return null;
      },
      reset,
    } as unknown as Parser;
    vi.spyOn(performance, "now").mockReturnValueOnce(0).mockReturnValue(501);
    setParserLoaderForTest(async () => parser);

    await expect(parseBashForCommandExplanation("echo hi")).rejects.toThrow(
      "tree-sitter-bash timed out after 500ms while parsing shell command",
    );
    expect(reset).toHaveBeenCalledOnce();
  });

  it("explains a pipeline with python inline eval", async () => {
    const explanation = await explainShellCommand('ls | grep "stuff" | python -c \'print("hi")\'');

    expect(explanation.ok).toBe(true);
    expect(explanation.shapes).toContain("pipeline");
    expect(explanation.topLevelCommands.map((step) => step.executable)).toEqual([
      "ls",
      "grep",
      "python",
    ]);
    expect(explanation.topLevelCommands[2]?.argv).toEqual(["python", "-c", 'print("hi")']);
    expect(explanation.nestedCommands).toEqual([]);
    expect(explanation.topLevelCommands[2]?.span).toEqual(
      expect.objectContaining({ startIndex: expect.any(Number), endIndex: expect.any(Number) }),
    );
    expect(explanation.risks).toContainEqual(
      expect.objectContaining({
        kind: "inline-eval",
        command: "python",
        flag: "-c",
        text: "python -c 'print(\"hi\")'",
      }),
    );
  });

  it("separates command substitution in an argument", async () => {
    const explanation = await explainShellCommand("echo $(whoami)");

    expect(explanation.topLevelCommands.map((step) => step.executable)).toEqual(["echo"]);
    expect(explanation.nestedCommands).toEqual([
      expect.objectContaining({ context: "command-substitution", executable: "whoami" }),
    ]);
    expect(explanation.risks).toContainEqual(
      expect.objectContaining({ kind: "command-substitution", text: "$(whoami)" }),
    );
  });

  it("marks command substitution in executable position as dynamic", async () => {
    const explanation = await explainShellCommand("$(whoami) --help");

    expect(explanation.topLevelCommands).toEqual([]);
    expect(explanation.nestedCommands).toEqual([
      expect.objectContaining({ context: "command-substitution", executable: "whoami" }),
    ]);
    expect(explanation.risks).toContainEqual(
      expect.objectContaining({ kind: "dynamic-executable", text: "$(whoami)" }),
    );
  });

  it("separates process substitution commands", async () => {
    const explanation = await explainShellCommand("diff <(ls a) <(ls b)");

    expect(explanation.topLevelCommands.map((step) => step.executable)).toEqual(["diff"]);
    expect(explanation.nestedCommands.map((step) => `${step.context}:${step.executable}`)).toEqual([
      "process-substitution:ls",
      "process-substitution:ls",
    ]);
    expect(explanation.risks.map((risk) => risk.kind)).toContain("process-substitution");
  });

  it("detects AND OR and sequence shapes", async () => {
    const explanation = await explainShellCommand("pnpm test && pnpm build || echo failed; pwd");

    expect(explanation.shapes).toEqual(expect.arrayContaining(["and", "or", "sequence"]));
    expect(explanation.topLevelCommands.map((step) => step.executable)).toEqual([
      "pnpm",
      "pnpm",
      "echo",
      "pwd",
    ]);
  });

  it("detects newline sequences and background commands", async () => {
    const newlineSequence = await explainShellCommand("echo a\necho b");
    expect(newlineSequence.shapes).toContain("sequence");
    expect(newlineSequence.topLevelCommands.map((step) => step.executable)).toEqual([
      "echo",
      "echo",
    ]);

    const background = await explainShellCommand("echo a & echo b");
    expect(background.shapes).toEqual(expect.arrayContaining(["background", "sequence"]));
    expect(background.topLevelCommands.map((step) => step.executable)).toEqual(["echo", "echo"]);
  });

  it("detects conditionals", async () => {
    const explanation = await explainShellCommand(
      "if test -f package.json; then pnpm test; else echo missing; fi",
    );

    expect(explanation.shapes).toContain("if");
    expect(explanation.topLevelCommands.map((step) => step.executable)).toEqual([
      "test",
      "pnpm",
      "echo",
    ]);
  });

  it("detects declaration and test command forms", async () => {
    const declaration = await explainShellCommand("export A=$(whoami)");

    expect(declaration.topLevelCommands).toEqual([
      expect.objectContaining({ executable: "export", argv: ["export", "A=$(whoami)"] }),
    ]);
    expect(declaration.nestedCommands).toEqual([
      expect.objectContaining({ context: "command-substitution", executable: "whoami" }),
    ]);

    const testCommand = await explainShellCommand("[ -f package.json ]");
    expect(testCommand.topLevelCommands).toEqual([
      expect.objectContaining({ executable: "[", argv: ["[", "-f", "package.json"] }),
    ]);

    const doubleBracket = await explainShellCommand("[[ -f package.json ]]");
    expect(doubleBracket.topLevelCommands).toEqual([
      expect.objectContaining({ executable: "[[", argv: ["[[", "-f", "package.json"] }),
    ]);
  });

  it("detects shell wrappers", async () => {
    const explanation = await explainShellCommand('bash -lc "echo hi | wc -c"');

    expect(explanation.topLevelCommands.map((step) => step.executable)).toEqual(["bash"]);
    expect(explanation.nestedCommands).toEqual([
      expect.objectContaining({ context: "wrapper-payload", executable: "echo" }),
      expect.objectContaining({ context: "wrapper-payload", executable: "wc" }),
    ]);
    const [wrappedEcho, wrappedWc] = explanation.nestedCommands;
    expect(explanation.source.slice(wrappedEcho?.span.startIndex, wrappedEcho?.span.endIndex)).toBe(
      "echo hi",
    );
    expect(explanation.source.slice(wrappedWc?.span.startIndex, wrappedWc?.span.endIndex)).toBe(
      "wc -c",
    );
    expect(explanation.shapes).toContain("pipeline");
    expect(explanation.risks).toContainEqual(
      expect.objectContaining({
        kind: "shell-wrapper",
        executable: "bash",
        flag: "-lc",
        payload: "echo hi | wc -c",
        text: 'bash -lc "echo hi | wc -c"',
      }),
    );

    const combinedFlags = await explainShellCommand('bash -euxc "echo hi"');
    expect(combinedFlags.risks).toContainEqual(
      expect.objectContaining({
        kind: "shell-wrapper",
        executable: "bash",
        flag: "-euxc",
        payload: "echo hi",
      }),
    );

    const combinedInline = await explainShellCommand('bash -c"echo hi"');
    expect(combinedInline.risks).toContainEqual(
      expect.objectContaining({
        kind: "shell-wrapper",
        executable: "bash",
        payload: "echo hi",
      }),
    );

    const powershell = await explainShellCommand('pwsh -Command "Get-ChildItem"');
    expect(powershell.risks).toContainEqual(
      expect.objectContaining({
        kind: "shell-wrapper",
        executable: "pwsh",
        flag: "-Command",
        payload: "Get-ChildItem",
      }),
    );

    const powershellWithOptions = await explainShellCommand(
      "pwsh -ExecutionPolicy Bypass -Command Get-ChildItem",
    );
    expect(powershellWithOptions.risks).toContainEqual(
      expect.objectContaining({
        kind: "shell-wrapper",
        executable: "pwsh",
        flag: "-Command",
        payload: "Get-ChildItem",
      }),
    );

    const dynamicPayload = await explainShellCommand('bash -lc "$CMD"');
    expect(dynamicPayload.nestedCommands).toEqual([]);
    expect(dynamicPayload.risks).toContainEqual(
      expect.objectContaining({
        kind: "shell-wrapper",
        executable: "bash",
        flag: "-lc",
        payload: "$CMD",
      }),
    );

    const invalidPayload = await explainShellCommand("bash -lc 'echo &&'");
    expect(invalidPayload.ok).toBe(false);
    expect(invalidPayload.risks).toContainEqual(expect.objectContaining({ kind: "syntax-error" }));

    const powershellPipeline = await explainShellCommand(
      'pwsh -Command "Get-ChildItem | Select Name"',
    );
    expect(powershellPipeline.nestedCommands).toEqual([]);
    expect(powershellPipeline.risks).toContainEqual(
      expect.objectContaining({
        kind: "shell-wrapper",
        executable: "pwsh",
        flag: "-Command",
        payload: "Get-ChildItem | Select Name",
      }),
    );

    for (const [command, carrier] of [
      ["time bash -lc 'id'", "time"],
      ["nice bash -lc 'id'", "nice"],
      ["timeout 1 bash -lc 'id'", "timeout"],
      ["caffeinate -d -w 42 bash -lc 'id'", "caffeinate"],
    ] as const) {
      const wrapped = await explainShellCommand(command);
      expect(wrapped.risks).toContainEqual(
        expect.objectContaining({
          kind: "shell-wrapper-through-carrier",
          command: carrier,
        }),
      );
      expect(wrapped.nestedCommands).toContainEqual(
        expect.objectContaining({ context: "wrapper-payload", executable: "id" }),
      );
      const wrappedId = wrapped.nestedCommands.find((step) => step.executable === "id");
      expect(wrapped.source.slice(wrappedId?.span.startIndex, wrappedId?.span.endIndex)).toBe("id");
    }
  });

  it("maps decoded shell-wrapper payload spans back to original source escapes", async () => {
    const explanation = await explainShellCommand('bash -lc "printf \\"hi\\" | wc -c"');

    const wrappedPrintf = explanation.nestedCommands.find((step) => step.executable === "printf");
    const wrappedWc = explanation.nestedCommands.find((step) => step.executable === "wc");

    expect(wrappedPrintf).toEqual(
      expect.objectContaining({
        context: "wrapper-payload",
        text: 'printf "hi"',
      }),
    );
    expect(
      explanation.source.slice(wrappedPrintf?.span.startIndex, wrappedPrintf?.span.endIndex),
    ).toBe('printf \\"hi\\"');
    expect(explanation.source.slice(wrappedWc?.span.startIndex, wrappedWc?.span.endIndex)).toBe(
      "wc -c",
    );
  });

  it("normalizes static shell words before classifying commands", async () => {
    const quotedCommand = await explainShellCommand("e'c'ho a\\ b \"c d\"");
    expect(quotedCommand.topLevelCommands).toEqual([
      expect.objectContaining({ executable: "echo", argv: ["echo", "a b", "c d"] }),
    ]);

    const ansiCString = await explainShellCommand("$'ec\\x68o' hi");
    expect(ansiCString.topLevelCommands).toEqual([
      expect.objectContaining({ executable: "echo", argv: ["echo", "hi"] }),
    ]);

    const wrappedShell = await explainShellCommand("b'a'sh -lc 'echo hi'");
    expect(wrappedShell.risks).toContainEqual(
      expect.objectContaining({
        kind: "shell-wrapper",
        executable: "bash",
        flag: "-lc",
        payload: "echo hi",
      }),
    );
  });

  it("does not normalize dynamic executable names into trusted commands", async () => {
    const dynamicPrefix = await explainShellCommand("e${CMD}ho hi");
    expect(dynamicPrefix.topLevelCommands).toEqual([]);
    expect(dynamicPrefix.risks).toContainEqual(
      expect.objectContaining({ kind: "dynamic-executable", text: "e${CMD}ho" }),
    );

    const dynamicQuoted = await explainShellCommand('"${CMD}" hi');
    expect(dynamicQuoted.topLevelCommands).toEqual([]);
    expect(dynamicQuoted.risks).toContainEqual(
      expect.objectContaining({ kind: "dynamic-executable", text: '"${CMD}"' }),
    );

    const dynamicGlob = await explainShellCommand("./ec* hi");
    expect(dynamicGlob.topLevelCommands).toEqual([]);
    expect(dynamicGlob.risks).toContainEqual(
      expect.objectContaining({ kind: "dynamic-executable", text: "./ec*" }),
    );

    const dynamicBraceExpansion = await explainShellCommand("./{echo,printf} hi");
    expect(dynamicBraceExpansion.topLevelCommands).toEqual([]);
    expect(dynamicBraceExpansion.risks).toContainEqual(
      expect.objectContaining({ kind: "dynamic-executable", text: "./{echo,printf}" }),
    );

    const dynamicArgument = await explainShellCommand("echo ./ec*");
    expect(dynamicArgument.topLevelCommands).toEqual([
      expect.objectContaining({ executable: "echo", argv: ["echo", "./ec*"] }),
    ]);
    expect(dynamicArgument.risks).toContainEqual(
      expect.objectContaining({
        kind: "dynamic-argument",
        command: "echo",
        argumentIndex: 1,
        text: "./ec*",
      }),
    );

    const dynamicShellFlag = await explainShellCommand("bash $FLAGS id");
    expect(dynamicShellFlag.risks).toContainEqual(
      expect.objectContaining({
        kind: "dynamic-argument",
        command: "bash",
        argumentIndex: 1,
        text: "$FLAGS",
      }),
    );

    const lineContinuation = await explainShellCommand("ec\\\nho hi");
    expect(lineContinuation.topLevelCommands).toEqual([]);
    expect(lineContinuation.risks).toContainEqual(
      expect.objectContaining({ kind: "line-continuation" }),
    );
    expect(lineContinuation.risks).toContainEqual(
      expect.objectContaining({ kind: "dynamic-executable" }),
    );

    const continuedArgument = await explainShellCommand("pnpm test \\\n --filter foo");
    expect(continuedArgument.topLevelCommands).toEqual([
      expect.objectContaining({
        executable: "pnpm",
        argv: ["pnpm", "test", "--filter", "foo"],
      }),
    ]);
    expect(continuedArgument.risks).toContainEqual(
      expect.objectContaining({ kind: "line-continuation" }),
    );

    const invalidObfuscation = await explainShellCommand("e'c'h'o hi");
    expect(invalidObfuscation.ok).toBe(false);
    expect(invalidObfuscation.risks).toContainEqual(
      expect.objectContaining({ kind: "syntax-error" }),
    );
  });

  it("detects command carriers", async () => {
    const find = await explainShellCommand('find . -name "*.ts" -exec grep -n TODO {} +');
    expect(find.risks).toContainEqual(
      expect.objectContaining({ kind: "command-carrier", command: "find", flag: "-exec" }),
    );

    const xargs = await explainShellCommand('printf "%s\\n" a b | xargs -I{} sh -c "echo {}"');
    expect(xargs.risks).toContainEqual(
      expect.objectContaining({ kind: "command-carrier", command: "xargs" }),
    );

    const envSplitString = await explainShellCommand("env -S 'sh -c \"id\"'");
    expect(envSplitString.risks).toContainEqual(
      expect.objectContaining({ kind: "command-carrier", command: "env", flag: "-S" }),
    );

    for (const command of [
      'env python -c "print(1)"',
      'sudo python -c "print(1)"',
      'command python -c "print(1)"',
    ]) {
      const explanation = await explainShellCommand(command);
      expect(explanation.risks).toContainEqual(
        expect.objectContaining({
          kind: "inline-eval",
          command: "python",
          flag: "-c",
        }),
      );
    }
  });

  it("detects eval, source, aliases, and carrier shell wrappers", async () => {
    const evalCommand = await explainShellCommand('eval "$OPENCLAW_CMD"');
    expect(evalCommand.risks).toContainEqual(expect.objectContaining({ kind: "eval" }));

    const builtinEval = await explainShellCommand("builtin eval 'echo hi'");
    expect(builtinEval.risks).toContainEqual(expect.objectContaining({ kind: "eval" }));

    const sourceCommand = await explainShellCommand(". ./some-script.sh");
    expect(sourceCommand.risks).toContainEqual(
      expect.objectContaining({ kind: "source", command: "." }),
    );

    const aliasCommand = await explainShellCommand("alias ll='ls -l'");
    expect(aliasCommand.risks).toContainEqual(expect.objectContaining({ kind: "alias" }));

    const sudoShell = await explainShellCommand('sudo sh -c "id && whoami"');
    expect(sudoShell.risks).toContainEqual(
      expect.objectContaining({ kind: "shell-wrapper-through-carrier", command: "sudo" }),
    );

    const commandShell = await explainShellCommand("command bash -lc 'id && whoami'");
    expect(commandShell.risks).toContainEqual(
      expect.objectContaining({ kind: "shell-wrapper-through-carrier", command: "command" }),
    );

    const sudoCombinedFlags = await explainShellCommand('sudo bash -euxc "id && whoami"');
    expect(sudoCombinedFlags.risks).toContainEqual(
      expect.objectContaining({ kind: "shell-wrapper-through-carrier", command: "sudo" }),
    );
  });

  it("treats function bodies as nested command context", async () => {
    const explanation = await explainShellCommand("ls() { echo hi; }; ls /tmp");

    expect(explanation.topLevelCommands).toEqual([
      expect.objectContaining({ context: "top-level", executable: "ls", argv: ["ls", "/tmp"] }),
    ]);
    expect(explanation.nestedCommands).toEqual([
      expect.objectContaining({ context: "function-definition", executable: "echo" }),
    ]);
    expect(explanation.risks).toContainEqual(
      expect.objectContaining({ kind: "function-definition", name: "ls" }),
    );
  });

  it("does not treat literal operator text as command shapes", async () => {
    const quotedSemicolon = await explainShellCommand('echo ";"');
    expect(quotedSemicolon.shapes).not.toContain("sequence");

    const heredoc = await explainShellCommand("cat <<EOF\n;\nEOF");
    expect(heredoc.shapes).not.toContain("sequence");
  });

  it("marks redirects heredocs and here-strings as risks", async () => {
    const redirect = await explainShellCommand("echo hi > out.txt");
    const redirectRisks = redirect.risks.filter((risk) => risk.kind === "redirect");
    expect(redirectRisks).toEqual([expect.objectContaining({ text: "> out.txt" })]);

    const heredoc = await explainShellCommand("cat <<EOF\nhello\nEOF");
    expect(heredoc.risks).toContainEqual(expect.objectContaining({ kind: "heredoc" }));

    const hereString = await explainShellCommand('cat <<< "hello"');
    expect(hereString.risks).toContainEqual(expect.objectContaining({ kind: "here-string" }));
  });

  it("reports syntax errors with source spans", async () => {
    const explanation = await explainShellCommand("echo 'unterminated");

    expect(explanation.ok).toBe(false);
    expect(explanation.risks).toContainEqual(
      expect.objectContaining({
        kind: "syntax-error",
        span: expect.objectContaining({
          startIndex: expect.any(Number),
          endIndex: expect.any(Number),
        }),
      }),
    );
  });

  it("parses and extracts a repeated approval-sized corpus without parser state leakage", async () => {
    const corpus = [
      'ls | grep "stuff" | python -c \'print("hi")\'',
      "echo $(whoami)",
      "diff <(ls a) <(ls b)",
      'find . -name "*.ts" -exec grep -n TODO {} +',
      'bash -lc "echo hi | wc -c"',
    ];
    const iterations = 10;
    for (let index = 0; index < iterations; index += 1) {
      for (const command of corpus) {
        const explanation = await explainShellCommand(command);
        expect(explanation.risks.length + explanation.topLevelCommands.length).toBeGreaterThan(0);
      }
    }
  });
});