diff --git a/CHANGELOG.md b/CHANGELOG.md index 79590fdadf8..255a89dd550 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ Docs: https://docs.openclaw.ai - Zalo/webhook: scope replay-dedupe cache key to path and account using `JSON.stringify` so multi-account deployments do not silently drop events due to cross-account cache poisoning. (#59387) Thanks @pgondhi987. - Plugins/Google: separate OAuth CSRF state from PKCE code verifier during Gemini browser sign-in so state validation and token exchange use independent values. (#59116) Thanks @eleqtrizit. - Exec/Windows: reject malformed drive-less rooted executable paths like `:\Users\...` so approval and allowlist candidate resolution no longer treat them as cwd-relative commands. (#58040) Thanks @SnowSky1. +- Exec/preflight: fail closed on complex interpreter invocations that would otherwise skip script-content validation, and correctly inspect quoted script paths before host execution. Thanks @pgondhi987. ## 2026.4.2-beta.1 diff --git a/extensions/discord/src/components.ts b/extensions/discord/src/components.ts index 552649d754b..18f0c951427 100644 --- a/extensions/discord/src/components.ts +++ b/extensions/discord/src/components.ts @@ -28,7 +28,6 @@ import { buildDiscordModalCustomId as buildDiscordModalCustomIdImpl, parseDiscordModalCustomIdForCarbon as parseDiscordModalCustomIdForCarbonImpl, } from "./component-custom-id.js"; - // Some test-only module graphs partially mock `@buape/carbon` and can drop `Modal`. // Keep dynamic form definitions loadable instead of crashing unrelated suites. const ModalBase: typeof Modal = (Modal ?? class {}) as typeof Modal; diff --git a/extensions/discord/src/monitor/agent-components.wildcard.test.ts b/extensions/discord/src/monitor/agent-components.wildcard.test.ts index 04d484cc022..575389241b6 100644 --- a/extensions/discord/src/monitor/agent-components.wildcard.test.ts +++ b/extensions/discord/src/monitor/agent-components.wildcard.test.ts @@ -1,7 +1,7 @@ -import { beforeAll, describe, expect, it, vi } from "vitest"; +import { beforeAll, describe, expect, it } from "vitest"; -let buildDiscordComponentCustomId: typeof import("../components.js").buildDiscordComponentCustomId; -let buildDiscordModalCustomId: typeof import("../components.js").buildDiscordModalCustomId; +let buildDiscordComponentCustomId: typeof import("../component-custom-id.js").buildDiscordComponentCustomId; +let buildDiscordModalCustomId: typeof import("../component-custom-id.js").buildDiscordModalCustomId; let createDiscordComponentButton: typeof import("./agent-components.js").createDiscordComponentButton; let createDiscordComponentChannelSelect: typeof import("./agent-components.js").createDiscordComponentChannelSelect; let createDiscordComponentMentionableSelect: typeof import("./agent-components.js").createDiscordComponentMentionableSelect; @@ -11,7 +11,8 @@ let createDiscordComponentStringSelect: typeof import("./agent-components.js").c let createDiscordComponentUserSelect: typeof import("./agent-components.js").createDiscordComponentUserSelect; beforeAll(async () => { - ({ buildDiscordComponentCustomId, buildDiscordModalCustomId } = await import("../components.js")); + ({ buildDiscordComponentCustomId, buildDiscordModalCustomId } = + await import("../component-custom-id.js")); ({ createDiscordComponentButton, createDiscordComponentChannelSelect, diff --git a/src/agents/bash-tools.exec.script-preflight.test.ts b/src/agents/bash-tools.exec.script-preflight.test.ts index c5544887ad9..86bf7d33c28 100644 --- a/src/agents/bash-tools.exec.script-preflight.test.ts +++ b/src/agents/bash-tools.exec.script-preflight.test.ts @@ -7,6 +7,7 @@ import { createExecTool } from "./bash-tools.exec.js"; const isWin = process.platform === "win32"; const describeNonWin = isWin ? describe.skip : describe; +const describeWin = isWin ? describe : describe.skip; describeNonWin("exec script preflight", () => { it("blocks shell env var injection tokens in python scripts before execution", async () => { @@ -58,18 +59,194 @@ describeNonWin("exec script preflight", () => { }); }); - it("skips preflight when script token is quoted and unresolved by fast parser", async () => { + it("blocks shell env var injection when script path is quoted", async () => { await withTempDir("openclaw-exec-preflight-", async (tmp) => { const jsPath = path.join(tmp, "bad.js"); await fs.writeFile(jsPath, "const value = $DM_JSON;", "utf-8"); const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); - const result = await tool.execute("call-quoted", { - command: 'node "bad.js"', - workdir: tmp, - }); - const text = result.content.find((block) => block.type === "text")?.text ?? ""; - expect(text).not.toMatch(/exec preflight:/); + await expect( + tool.execute("call-quoted", { + command: 'node "bad.js"', + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: detected likely shell variable injection \(\$DM_JSON\)/); + }); + }); + + it("validates python scripts when interpreter is prefixed with env", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const pyPath = path.join(tmp, "bad.py"); + await fs.writeFile(pyPath, "payload = $DM_JSON", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + await expect( + tool.execute("call-env-python", { + command: "env python bad.py", + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: detected likely shell variable injection \(\$DM_JSON\)/); + }); + }); + + it("validates python scripts when interpreter is prefixed with path-qualified env", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const pyPath = path.join(tmp, "bad.py"); + await fs.writeFile(pyPath, "payload = $DM_JSON", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + await expect( + tool.execute("call-abs-env-python", { + command: "/usr/bin/env python bad.py", + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: detected likely shell variable injection \(\$DM_JSON\)/); + }); + }); + + it("validates node scripts when interpreter is prefixed with env", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const jsPath = path.join(tmp, "bad.js"); + await fs.writeFile(jsPath, "const value = $DM_JSON;", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + await expect( + tool.execute("call-env-node", { + command: "env node bad.js", + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: detected likely shell variable injection \(\$DM_JSON\)/); + }); + }); + + it("validates the first positional python script operand when extra args follow", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + await fs.writeFile(path.join(tmp, "bad.py"), "payload = $DM_JSON", "utf-8"); + await fs.writeFile(path.join(tmp, "ghost.py"), "print('ok')", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + await expect( + tool.execute("call-python-first-script", { + command: "python bad.py ghost.py", + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: detected likely shell variable injection \(\$DM_JSON\)/); + }); + }); + + it("validates python script operand even when trailing option values look like scripts", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + await fs.writeFile(path.join(tmp, "script.py"), "payload = $DM_JSON", "utf-8"); + await fs.writeFile(path.join(tmp, "out.py"), "print('ok')", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + await expect( + tool.execute("call-python-trailing-option-value", { + command: "python script.py --output out.py", + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: detected likely shell variable injection \(\$DM_JSON\)/); + }); + }); + + it("validates the first positional node script operand when extra args follow", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + await fs.writeFile(path.join(tmp, "app.js"), "const value = $DM_JSON;", "utf-8"); + await fs.writeFile(path.join(tmp, "config.js"), "console.log('ok')", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + await expect( + tool.execute("call-node-first-script", { + command: "node app.js config.js", + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: detected likely shell variable injection \(\$DM_JSON\)/); + }); + }); + + it("still resolves node script when --require consumes a preceding .js option value", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + await fs.writeFile(path.join(tmp, "bootstrap.js"), "console.log('bootstrap')", "utf-8"); + await fs.writeFile(path.join(tmp, "app.js"), "const value = $DM_JSON;", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + await expect( + tool.execute("call-node-require-script", { + command: "node --require bootstrap.js app.js", + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: detected likely shell variable injection \(\$DM_JSON\)/); + }); + }); + + it("validates node --require preload modules before a benign entry script", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + await fs.writeFile(path.join(tmp, "bad-preload.js"), "const value = $DM_JSON;", "utf-8"); + await fs.writeFile(path.join(tmp, "app.js"), "console.log('ok')", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + await expect( + tool.execute("call-node-preload-before-entry", { + command: "node --require bad-preload.js app.js", + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: detected likely shell variable injection \(\$DM_JSON\)/); + }); + }); + + it("validates node --require preload modules when no entry script is provided", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + await fs.writeFile(path.join(tmp, "bad.js"), "const value = $DM_JSON;", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + await expect( + tool.execute("call-node-require-only", { + command: "node --require bad.js", + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: detected likely shell variable injection \(\$DM_JSON\)/); + }); + }); + + it("validates node --import preload modules when no entry script is provided", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + await fs.writeFile(path.join(tmp, "bad.js"), "const value = $DM_JSON;", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + await expect( + tool.execute("call-node-import-only", { + command: "node --import bad.js", + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: detected likely shell variable injection \(\$DM_JSON\)/); + }); + }); + + it("validates node --require preload modules even when -e is present", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + await fs.writeFile(path.join(tmp, "bad.js"), "const value = $DM_JSON;", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + await expect( + tool.execute("call-node-require-with-eval", { + command: 'node --require bad.js -e "console.log(123)"', + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: detected likely shell variable injection \(\$DM_JSON\)/); + }); + }); + + it("validates node --import preload modules even when -e is present", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + await fs.writeFile(path.join(tmp, "bad.js"), "const value = $DM_JSON;", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + await expect( + tool.execute("call-node-import-with-eval", { + command: 'node --import bad.js -e "console.log(123)"', + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: detected likely shell variable injection \(\$DM_JSON\)/); }); }); @@ -90,4 +267,519 @@ describeNonWin("exec script preflight", () => { expect(text).not.toMatch(/exec preflight:/); }); }); + + it("fails closed for piped interpreter commands that bypass direct script parsing", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const pyPath = path.join(tmp, "bad.py"); + await fs.writeFile(pyPath, "payload = $DM_JSON", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + await expect( + tool.execute("call-pipe", { + command: "cat bad.py | python", + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: complex interpreter invocation detected/); + }); + }); + + it("fails closed for top-level interpreter invocations inside shell control-flow", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const pyPath = path.join(tmp, "bad.py"); + await fs.writeFile(pyPath, "payload = $DM_JSON", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + await expect( + tool.execute("call-top-level-control-flow", { + command: "if true; then python bad.py; fi", + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: complex interpreter invocation detected/); + }); + }); + + it("fails closed for multiline top-level control-flow interpreter invocations", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const pyPath = path.join(tmp, "bad.py"); + await fs.writeFile(pyPath, "payload = $DM_JSON", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + await expect( + tool.execute("call-top-level-control-flow-multiline", { + command: "if true; then\npython bad.py\nfi", + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: complex interpreter invocation detected/); + }); + }); + + it("fails closed for shell-wrapped interpreter invocations with quoted script paths", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const pyPath = path.join(tmp, "bad.py"); + await fs.writeFile(pyPath, "payload = $DM_JSON", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + await expect( + tool.execute("call-shell-wrap-quoted-script", { + command: `bash -c "python '${path.basename(pyPath)}'"`, + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: complex interpreter invocation detected/); + }); + }); + + it("fails closed for top-level control-flow with quoted interpreter script paths", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const pyPath = path.join(tmp, "bad.py"); + await fs.writeFile(pyPath, "payload = $DM_JSON", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + await expect( + tool.execute("call-top-level-control-flow-quoted-script", { + command: 'if true; then python "bad.py"; fi', + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: complex interpreter invocation detected/); + }); + }); + + it("fails closed for shell-wrapped interpreter invocations", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const pyPath = path.join(tmp, "bad.py"); + await fs.writeFile(pyPath, "payload = $DM_JSON", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + await expect( + tool.execute("call-shell-wrap", { + command: 'bash -c "python bad.py"', + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: complex interpreter invocation detected/); + }); + }); + + it("does not fail closed for shell-wrapped payloads that only echo interpreter words", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + const result = await tool.execute("call-shell-wrap-echo-text", { + command: 'bash -c "echo python"', + workdir: tmp, + }); + const text = result.content.find((block) => block.type === "text")?.text ?? ""; + expect(text).toContain("python"); + expect(text).not.toMatch(/exec preflight:/); + }); + }); + + it("fails closed for shell-wrapped interpreter invocations inside control-flow payloads", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const pyPath = path.join(tmp, "bad.py"); + await fs.writeFile(pyPath, "payload = $DM_JSON", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + await expect( + tool.execute("call-shell-wrap-control-flow", { + command: 'bash -c "if true; then python bad.py; fi"', + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: complex interpreter invocation detected/); + }); + }); + + it("fails closed for env-prefixed shell-wrapped interpreter invocations", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const pyPath = path.join(tmp, "bad.py"); + await fs.writeFile(pyPath, "payload = $DM_JSON", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + await expect( + tool.execute("call-env-shell-wrap", { + command: 'env bash -c "python bad.py"', + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: complex interpreter invocation detected/); + }); + }); + + it("fails closed for shell-wrapped interpreter invocations via absolute shell paths", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const pyPath = path.join(tmp, "bad.py"); + await fs.writeFile(pyPath, "payload = $DM_JSON", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + await expect( + tool.execute("call-shell-wrap-abs-path", { + command: '/bin/bash -c "python bad.py"', + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: complex interpreter invocation detected/); + }); + }); + + it("fails closed for shell-wrapped interpreter invocations when long options take separate values", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const pyPath = path.join(tmp, "bad.py"); + await fs.writeFile(pyPath, "payload = $DM_JSON", "utf-8"); + await fs.writeFile(path.join(tmp, "shell.rc"), "# rc", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + await expect( + tool.execute("call-shell-wrap-long-option-value", { + command: 'bash --rcfile shell.rc -c "python bad.py"', + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: complex interpreter invocation detected/); + }); + }); + + it("fails closed for shell-wrapped interpreter invocations with leading long options", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const pyPath = path.join(tmp, "bad.py"); + await fs.writeFile(pyPath, "payload = $DM_JSON", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + await expect( + tool.execute("call-shell-wrap-long-options", { + command: 'bash --noprofile --norc -c "python bad.py"', + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: complex interpreter invocation detected/); + }); + }); + + it("fails closed for shell-wrapped interpreter invocations with combined shell flags", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const pyPath = path.join(tmp, "bad.py"); + await fs.writeFile(pyPath, "payload = $DM_JSON", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + await expect( + tool.execute("call-shell-wrap-combined", { + command: 'bash -xc "python bad.py"', + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: complex interpreter invocation detected/); + }); + }); + + it("fails closed for shell-wrapped interpreter invocations when -O consumes a separate value", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const pyPath = path.join(tmp, "bad.py"); + await fs.writeFile(pyPath, "payload = $DM_JSON", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + await expect( + tool.execute("call-shell-wrap-short-option-O-value", { + command: 'bash -O extglob -c "python bad.py"', + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: complex interpreter invocation detected/); + }); + }); + + it("fails closed for shell-wrapped interpreter invocations when -o consumes a separate value", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const pyPath = path.join(tmp, "bad.py"); + await fs.writeFile(pyPath, "payload = $DM_JSON", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + await expect( + tool.execute("call-shell-wrap-short-option-o-value", { + command: 'bash -o errexit -c "python bad.py"', + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: complex interpreter invocation detected/); + }); + }); + + it("fails closed for shell-wrapped interpreter invocations when -c is not the trailing short flag", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const pyPath = path.join(tmp, "bad.py"); + await fs.writeFile(pyPath, "payload = $DM_JSON", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + await expect( + tool.execute("call-shell-wrap-short-flags", { + command: 'bash -ceu "python bad.py"', + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: complex interpreter invocation detected/); + }); + }); + + it("fails closed for process-substitution interpreter invocations", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const pyPath = path.join(tmp, "bad.py"); + await fs.writeFile(pyPath, "payload = $DM_JSON", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + await expect( + tool.execute("call-process-substitution", { + command: "python <(cat bad.py)", + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: complex interpreter invocation detected/); + }); + }); + + it("allows direct inline interpreter commands with no script file hint", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + const result = await tool.execute("call-inline", { + command: 'node -e "console.log(123)"', + workdir: tmp, + }); + const text = result.content.find((block) => block.type === "text")?.text ?? ""; + expect(text).toContain("123"); + expect(text).not.toMatch(/exec preflight:/); + }); + }); + + it("does not fail closed when interpreter and script hints only appear in echoed text", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + const result = await tool.execute("call-echo-text", { + command: "echo 'python bad.py | python'", + workdir: tmp, + }); + const text = result.content.find((block) => block.type === "text")?.text ?? ""; + expect(text).toContain("python bad.py | python"); + expect(text).not.toMatch(/exec preflight:/); + }); + }); + + it("does not fail closed when shell keyword-like text appears only as echo arguments", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + const result = await tool.execute("call-echo-keyword-like-text", { + command: "echo time python bad.py; cat", + workdir: tmp, + }); + const text = result.content.find((block) => block.type === "text")?.text ?? ""; + expect(text).toContain("time python bad.py"); + expect(text).not.toMatch(/exec preflight:/); + }); + }); + + it("does not fail closed for pipelines that only contain interpreter words as plain text", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + const result = await tool.execute("call-echo-pipe-text", { + command: "echo python | cat", + workdir: tmp, + }); + const text = result.content.find((block) => block.type === "text")?.text ?? ""; + expect(text).toContain("python"); + expect(text).not.toMatch(/exec preflight:/); + }); + }); + + it("does not fail closed for non-executing pipelines that only print interpreter words", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + const result = await tool.execute("call-printf-pipe-text", { + command: "printf node | wc -c", + workdir: tmp, + }); + const text = result.content.find((block) => block.type === "text")?.text ?? ""; + expect(text).toContain("4"); + expect(text).not.toMatch(/exec preflight:/); + }); + }); + + it("does not fail closed when script-like text is in a separate command segment", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + const result = await tool.execute("call-separate-script-hint-segment", { + command: "echo bad.py; python --version", + workdir: tmp, + }); + const text = result.content.find((block) => block.type === "text")?.text ?? ""; + expect(text).toContain("bad.py"); + expect(text).not.toMatch(/exec preflight:/); + }); + }); + + it("does not fail closed when script hints appear outside the interpreter segment with &&", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + await fs.writeFile(path.join(tmp, "sample.py"), "print('ok')", "utf-8"); + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + const result = await tool.execute("call-interpreter-version-and-list", { + command: "node --version && ls *.py", + workdir: tmp, + }); + const text = result.content.find((block) => block.type === "text")?.text ?? ""; + expect(text).toContain("sample.py"); + expect(text).not.toMatch(/exec preflight:/); + }); + }); + + it("does not fail closed for piped interpreter version commands with script-like upstream text", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + const result = await tool.execute("call-piped-interpreter-version", { + command: "echo bad.py | node --version", + workdir: tmp, + }); + const text = result.content.find((block) => block.type === "text")?.text ?? ""; + expect(text).toMatch(/v\d+/); + expect(text).not.toMatch(/exec preflight:/); + }); + }); + + it("does not fail closed for piped node -c syntax-check commands with script-like upstream text", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + await fs.writeFile(path.join(tmp, "ok.js"), "console.log('ok')", "utf-8"); + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + const result = await tool.execute("call-piped-node-check", { + command: "echo bad.py | node -c ok.js", + workdir: tmp, + }); + const text = result.content.find((block) => block.type === "text")?.text ?? ""; + expect(text).not.toMatch(/exec preflight:/); + }); + }); + + it("does not fail closed for piped node -e commands when inline code contains script-like text", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + const result = await tool.execute("call-piped-node-e-inline-script-hint", { + command: "node -e \"console.log('bad.py')\" | cat", + workdir: tmp, + }); + const text = result.content.find((block) => block.type === "text")?.text ?? ""; + expect(text).toContain("bad.py"); + expect(text).not.toMatch(/exec preflight:/); + }); + }); + + it("does not fail closed when shell operator characters are escaped", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + const result = await tool.execute("call-echo-escaped-operator", { + command: "echo python bad.py \\| node", + workdir: tmp, + }); + const text = result.content.find((block) => block.type === "text")?.text ?? ""; + expect(text).toContain("python bad.py | node"); + expect(text).not.toMatch(/exec preflight:/); + }); + }); + + it("does not fail closed when escaped semicolons appear with interpreter hints", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + const result = await tool.execute("call-echo-escaped-semicolon", { + command: "echo python bad.py \\; node", + workdir: tmp, + }); + const text = result.content.find((block) => block.type === "text")?.text ?? ""; + expect(text).toContain("python bad.py ; node"); + expect(text).not.toMatch(/exec preflight:/); + }); + }); + + it("does not fail closed for node -e when .py appears inside quoted inline code", async () => { + await withTempDir("openclaw-exec-preflight-", async (tmp) => { + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + + const result = await tool.execute("call-inline-script-hint", { + command: "node -e \"console.log('bad.py')\"", + workdir: tmp, + }); + const text = result.content.find((block) => block.type === "text")?.text ?? ""; + expect(text).toContain("bad.py"); + expect(text).not.toMatch(/exec preflight:/); + }); + }); +}); + +describeWin("exec script preflight on windows path syntax", () => { + it("preserves windows-style python relative path separators during script extraction", async () => { + await withTempDir("openclaw-exec-preflight-win-", async (tmp) => { + await fs.writeFile(path.join(tmp, "bad.py"), "payload = $DM_JSON", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + await expect( + tool.execute("call-win-python-relative", { + command: "python .\\bad.py", + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: detected likely shell variable injection \(\$DM_JSON\)/); + }); + }); + + it("preserves windows-style node relative path separators during script extraction", async () => { + await withTempDir("openclaw-exec-preflight-win-", async (tmp) => { + await fs.writeFile(path.join(tmp, "bad.js"), "const value = $DM_JSON;", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + await expect( + tool.execute("call-win-node-relative", { + command: "node .\\bad.js", + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: detected likely shell variable injection \(\$DM_JSON\)/); + }); + }); + + it("preserves windows-style python absolute drive paths during script extraction", async () => { + await withTempDir("openclaw-exec-preflight-win-", async (tmp) => { + const absPath = path.join(tmp, "bad.py"); + await fs.writeFile(absPath, "payload = $DM_JSON", "utf-8"); + const winAbsPath = absPath.replaceAll("/", "\\"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + await expect( + tool.execute("call-win-python-absolute", { + command: `python "${winAbsPath}"`, + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: detected likely shell variable injection \(\$DM_JSON\)/); + }); + }); + + it("preserves windows-style nested relative path separators during script extraction", async () => { + await withTempDir("openclaw-exec-preflight-win-", async (tmp) => { + await fs.mkdir(path.join(tmp, "subdir"), { recursive: true }); + await fs.writeFile(path.join(tmp, "subdir", "bad.py"), "payload = $DM_JSON", "utf-8"); + + const tool = createExecTool({ host: "gateway", security: "full", ask: "off" }); + await expect( + tool.execute("call-win-python-subdir-relative", { + command: "python subdir\\bad.py", + workdir: tmp, + }), + ).rejects.toThrow(/exec preflight: detected likely shell variable injection \(\$DM_JSON\)/); + }); + }); }); diff --git a/src/agents/bash-tools.exec.ts b/src/agents/bash-tools.exec.ts index 605b5273f24..d3e99a16b3a 100644 --- a/src/agents/bash-tools.exec.ts +++ b/src/agents/bash-tools.exec.ts @@ -86,99 +86,765 @@ function buildExecForegroundResult(params: { }); } +const PREFLIGHT_ENV_OPTIONS_WITH_VALUES = new Set([ + "-C", + "-S", + "-u", + "--argv0", + "--block-signal", + "--chdir", + "--default-signal", + "--ignore-signal", + "--split-string", + "--unset", +]); + +function isShellEnvAssignmentToken(token: string): boolean { + return /^[A-Za-z_][A-Za-z0-9_]*=.*$/u.test(token); +} + +function isEnvExecutableToken(token: string | undefined): boolean { + if (!token) { + return false; + } + const base = token.split(/[\\/]/u).at(-1)?.toLowerCase() ?? ""; + const normalizedBase = base.endsWith(".exe") ? base.slice(0, -4) : base; + return normalizedBase === "env"; +} + +function stripPreflightEnvPrefix(argv: string[]): string[] { + if (argv.length === 0) { + return argv; + } + let idx = 0; + while (idx < argv.length && isShellEnvAssignmentToken(argv[idx])) { + idx += 1; + } + if (!isEnvExecutableToken(argv[idx])) { + return argv; + } + idx += 1; + while (idx < argv.length) { + const token = argv[idx]; + if (token === "--") { + idx += 1; + break; + } + if (isShellEnvAssignmentToken(token)) { + idx += 1; + continue; + } + if (!token.startsWith("-") || token === "-") { + break; + } + idx += 1; + const option = token.split("=", 1)[0]; + if ( + PREFLIGHT_ENV_OPTIONS_WITH_VALUES.has(option) && + !token.includes("=") && + idx < argv.length + ) { + idx += 1; + } + } + return argv.slice(idx); +} + function extractScriptTargetFromCommand( command: string, -): { kind: "python"; relOrAbsPath: string } | { kind: "node"; relOrAbsPath: string } | null { +): { kind: "python"; relOrAbsPaths: string[] } | { kind: "node"; relOrAbsPaths: string[] } | null { const raw = command.trim(); - if (!raw) { + const splitShellArgsPreservingBackslashes = (value: string): string[] | null => { + const tokens: string[] = []; + let buf = ""; + let inSingle = false; + let inDouble = false; + + const pushToken = () => { + if (buf.length > 0) { + tokens.push(buf); + buf = ""; + } + }; + + for (let i = 0; i < value.length; i += 1) { + const ch = value[i]; + if (inSingle) { + if (ch === "'") { + inSingle = false; + } else { + buf += ch; + } + continue; + } + if (inDouble) { + if (ch === '"') { + inDouble = false; + } else { + buf += ch; + } + continue; + } + if (ch === "'") { + inSingle = true; + continue; + } + if (ch === '"') { + inDouble = true; + continue; + } + if (/\s/.test(ch)) { + pushToken(); + continue; + } + buf += ch; + } + + if (inSingle || inDouble) { + return null; + } + pushToken(); + return tokens; + }; + const shouldUseWindowsPathTokenizer = + process.platform === "win32" && + /(?:^|[\s"'`])(?:[A-Za-z]:\\|\\\\|[^\s"'`|&;()<>]+\\[^\s"'`|&;()<>]+)/.test(raw); + const candidateArgv = shouldUseWindowsPathTokenizer + ? [splitShellArgsPreservingBackslashes(raw)] + : [splitShellArgs(raw)]; + + const findFirstPythonScriptArg = (tokens: string[]): string | null => { + const optionsWithSeparateValue = new Set(["-W", "-X", "-Q", "--check-hash-based-pycs"]); + for (let i = 0; i < tokens.length; i += 1) { + const token = tokens[i]; + if (token === "--") { + const next = tokens[i + 1]; + return next?.toLowerCase().endsWith(".py") ? next : null; + } + if (token === "-") { + return null; + } + if (token === "-c" || token === "-m") { + return null; + } + if ((token.startsWith("-c") || token.startsWith("-m")) && token.length > 2) { + return null; + } + if (optionsWithSeparateValue.has(token)) { + i += 1; + continue; + } + if (token.startsWith("-")) { + continue; + } + return token.toLowerCase().endsWith(".py") ? token : null; + } + return null; + }; + const findNodeScriptArgs = (tokens: string[]): string[] => { + const optionsWithSeparateValue = new Set(["-r", "--require", "--import"]); + const preloadScripts: string[] = []; + let entryScript: string | null = null; + let hasInlineEvalOrPrint = false; + for (let i = 0; i < tokens.length; i += 1) { + const token = tokens[i]; + if (token === "--") { + if (!hasInlineEvalOrPrint && !entryScript) { + const next = tokens[i + 1]; + if (next?.toLowerCase().endsWith(".js")) { + entryScript = next; + } + } + break; + } + if ( + token === "-e" || + token === "-p" || + token === "--eval" || + token === "--print" || + token.startsWith("--eval=") || + token.startsWith("--print=") || + ((token.startsWith("-e") || token.startsWith("-p")) && token.length > 2) + ) { + hasInlineEvalOrPrint = true; + if (token === "-e" || token === "-p" || token === "--eval" || token === "--print") { + i += 1; + } + continue; + } + if (optionsWithSeparateValue.has(token)) { + const next = tokens[i + 1]; + if (next?.toLowerCase().endsWith(".js")) { + preloadScripts.push(next); + } + i += 1; + continue; + } + if ( + (token.startsWith("-r") && token.length > 2) || + token.startsWith("--require=") || + token.startsWith("--import=") + ) { + const inlineValue = token.startsWith("-r") + ? token.slice(2) + : token.slice(token.indexOf("=") + 1); + if (inlineValue.toLowerCase().endsWith(".js")) { + preloadScripts.push(inlineValue); + } + continue; + } + if (token.startsWith("-")) { + continue; + } + if (!hasInlineEvalOrPrint && !entryScript && token.toLowerCase().endsWith(".js")) { + entryScript = token; + } + break; + } + const targets = [...preloadScripts]; + if (entryScript) { + targets.push(entryScript); + } + return targets; + }; + const extractTargetFromArgv = ( + argv: string[] | null, + ): + | { kind: "python"; relOrAbsPaths: string[] } + | { kind: "node"; relOrAbsPaths: string[] } + | null => { + if (!argv || argv.length === 0) { + return null; + } + let commandIdx = 0; + while (commandIdx < argv.length && /^[A-Za-z_][A-Za-z0-9_]*=.*$/u.test(argv[commandIdx])) { + commandIdx += 1; + } + const executable = argv[commandIdx]?.toLowerCase(); + if (!executable) { + return null; + } + const args = argv.slice(commandIdx + 1); + if (/^python(?:3(?:\.\d+)?)?$/i.test(executable)) { + const script = findFirstPythonScriptArg(args); + if (script) { + return { kind: "python", relOrAbsPaths: [script] }; + } + return null; + } + if (executable === "node") { + const scripts = findNodeScriptArgs(args); + if (scripts.length > 0) { + return { kind: "node", relOrAbsPaths: scripts }; + } + return null; + } + return null; + }; + + for (const argv of candidateArgv) { + const attempts = [argv, argv ? stripPreflightEnvPrefix(argv) : null]; + for (const attempt of attempts) { + const target = extractTargetFromArgv(attempt); + if (target) { + return target; + } + } + } + return null; +} + +function extractUnquotedShellText(raw: string): string | null { + let out = ""; + let inSingle = false; + let inDouble = false; + let escaped = false; + + for (let i = 0; i < raw.length; i += 1) { + const ch = raw[i]; + if (escaped) { + if (!inSingle && !inDouble) { + // Preserve escapes outside quotes so downstream heuristics can distinguish + // escaped literals (e.g. `\|`) from executable shell operators. + out += `\\${ch}`; + } + escaped = false; + continue; + } + if (!inSingle && ch === "\\") { + escaped = true; + continue; + } + if (inSingle) { + if (ch === "'") { + inSingle = false; + } + continue; + } + if (inDouble) { + const next = raw[i + 1]; + if (ch === "\\" && next && /[\\'"$`\n\r]/.test(next)) { + i += 1; + continue; + } + if (ch === '"') { + inDouble = false; + } + continue; + } + if (ch === "'") { + inSingle = true; + continue; + } + if (ch === '"') { + inDouble = true; + continue; + } + out += ch; + } + + if (escaped || inSingle || inDouble) { return null; } + return out; +} - // Intentionally simple parsing: we only support common forms like - // python file.py - // python3 -u file.py - // node --experimental-something file.js - // If the command is more complex (pipes, heredocs, quoted paths with spaces), skip preflight. - const pythonMatch = raw.match(/^\s*(python3?|python)\s+(?:-[^\s]+\s+)*([^\s]+\.py)\b/i); - if (pythonMatch?.[2]) { - return { kind: "python", relOrAbsPath: pythonMatch[2] }; - } - const nodeMatch = raw.match(/^\s*(node)\s+(?:--[^\s]+\s+)*([^\s]+\.js)\b/i); - if (nodeMatch?.[2]) { - return { kind: "node", relOrAbsPath: nodeMatch[2] }; - } +function analyzeInterpreterHeuristicsFromUnquoted(raw: string): { + hasPython: boolean; + hasNode: boolean; + hasComplexSyntax: boolean; + hasProcessSubstitution: boolean; + hasScriptHint: boolean; +} { + const hasPython = + /(?:^|\s|(?\n\r`$])/i.test( + raw, + ); + const hasNode = + /(?:^|\s|(?\n\r`$])/i.test( + raw, + ); + const hasProcessSubstitution = /(?\(/u.test(raw); + const hasComplexSyntax = + /(?])[^"'`\s|&;()<>]+\.(?:py|js)(?=$|[\s|&;()<>])/i.test(raw); + return { hasPython, hasNode, hasComplexSyntax, hasProcessSubstitution, hasScriptHint }; +} + +function extractShellWrappedCommandPayload( + executable: string | undefined, + args: string[], +): string | null { + if (!executable) { + return null; + } + const executableBase = executable.split(/[\\/]/u).at(-1)?.toLowerCase() ?? ""; + const normalizedExecutable = executableBase.endsWith(".exe") + ? executableBase.slice(0, -4) + : executableBase; + if (!/^(?:bash|dash|fish|ksh|sh|zsh)$/i.test(normalizedExecutable)) { + return null; + } + const shortOptionsWithSeparateValue = new Set(["-O", "-o"]); + for (let i = 0; i < args.length; i += 1) { + const arg = args[i]; + if (arg === "--") { + return null; + } + if (arg === "-c") { + return args[i + 1] ?? null; + } + if (/^-[A-Za-z]+$/u.test(arg)) { + if (arg.includes("c")) { + return args[i + 1] ?? null; + } + if (shortOptionsWithSeparateValue.has(arg)) { + i += 1; + } + continue; + } + if (/^--[A-Za-z0-9][A-Za-z0-9-]*(?:=.*)?$/u.test(arg)) { + if (!arg.includes("=")) { + const next = args[i + 1]; + if (next && next !== "--" && !next.startsWith("-")) { + i += 1; + } + } + continue; + } + return null; + } return null; } +function shouldFailClosedInterpreterPreflight(command: string): { + hasInterpreterInvocation: boolean; + hasComplexSyntax: boolean; + hasProcessSubstitution: boolean; + hasInterpreterSegmentScriptHint: boolean; + hasInterpreterPipelineScriptHint: boolean; + isDirectInterpreterCommand: boolean; +} { + const raw = command.trim(); + const rawArgv = splitShellArgs(raw); + const argv = rawArgv ? stripPreflightEnvPrefix(rawArgv) : null; + let commandIdx = 0; + while ( + argv && + commandIdx < argv.length && + /^[A-Za-z_][A-Za-z0-9_]*=.*$/u.test(argv[commandIdx]) + ) { + commandIdx += 1; + } + const directExecutable = argv?.[commandIdx]?.toLowerCase(); + const args = argv ? argv.slice(commandIdx + 1) : []; + + const isDirectPythonExecutable = Boolean( + directExecutable && /^python(?:3(?:\.\d+)?)?$/i.test(directExecutable), + ); + const isDirectNodeExecutable = directExecutable === "node"; + const isDirectInterpreterCommand = isDirectPythonExecutable || isDirectNodeExecutable; + + const unquotedRaw = extractUnquotedShellText(raw) ?? raw; + const topLevel = analyzeInterpreterHeuristicsFromUnquoted(unquotedRaw); + + const shellWrappedPayload = extractShellWrappedCommandPayload(directExecutable, args); + const nestedUnquoted = shellWrappedPayload + ? (extractUnquotedShellText(shellWrappedPayload) ?? shellWrappedPayload) + : ""; + const nested = shellWrappedPayload + ? analyzeInterpreterHeuristicsFromUnquoted(nestedUnquoted) + : { + hasPython: false, + hasNode: false, + hasComplexSyntax: false, + hasProcessSubstitution: false, + hasScriptHint: false, + }; + const splitShellSegmentsOutsideQuotes = ( + rawText: string, + params: { splitPipes: boolean }, + ): string[] => { + const segments: string[] = []; + let buf = ""; + let inSingle = false; + let inDouble = false; + let escaped = false; + + const pushSegment = () => { + if (buf.trim().length > 0) { + segments.push(buf); + } + buf = ""; + }; + + for (let i = 0; i < rawText.length; i += 1) { + const ch = rawText[i]; + const next = rawText[i + 1]; + + if (escaped) { + buf += ch; + escaped = false; + continue; + } + + if (!inSingle && ch === "\\") { + buf += ch; + escaped = true; + continue; + } + + if (inSingle) { + buf += ch; + if (ch === "'") { + inSingle = false; + } + continue; + } + + if (inDouble) { + buf += ch; + if (ch === '"') { + inDouble = false; + } + continue; + } + + if (ch === "'") { + inSingle = true; + buf += ch; + continue; + } + + if (ch === '"') { + inDouble = true; + buf += ch; + continue; + } + + if (ch === "\n" || ch === "\r") { + pushSegment(); + continue; + } + if (ch === ";") { + pushSegment(); + continue; + } + if (ch === "&" && next === "&") { + pushSegment(); + i += 1; + continue; + } + if (ch === "|" && next === "|") { + pushSegment(); + i += 1; + continue; + } + if (params.splitPipes && ch === "|") { + pushSegment(); + continue; + } + + buf += ch; + } + pushSegment(); + return segments; + }; + const hasInterpreterInvocationInSegment = (rawSegment: string): boolean => { + const segment = extractUnquotedShellText(rawSegment) ?? rawSegment; + return /^\s*(?:(?:if|then|do|elif|else|while|until|time)\s+)?(?:[A-Za-z_][A-Za-z0-9_]*=.*\s+)*(?:python(?:3(?:\.\d+)?)?|node)(?=$|[\s|&;()<>\n\r`$])/i.test( + segment, + ); + }; + const isScriptExecutingInterpreterCommand = (rawCommand: string): boolean => { + const argv = splitShellArgs(rawCommand.trim()); + if (!argv || argv.length === 0) { + return false; + } + const withoutLeadingKeyword = /^(?:if|then|do|elif|else|while|until|time)$/i.test(argv[0] ?? "") + ? argv.slice(1) + : argv; + if (withoutLeadingKeyword.length === 0) { + return false; + } + const normalizedArgv = stripPreflightEnvPrefix(withoutLeadingKeyword); + let commandIdx = 0; + while ( + commandIdx < normalizedArgv.length && + /^[A-Za-z_][A-Za-z0-9_]*=.*$/u.test(normalizedArgv[commandIdx] ?? "") + ) { + commandIdx += 1; + } + const executable = normalizedArgv[commandIdx]?.toLowerCase(); + if (!executable) { + return false; + } + const args = normalizedArgv.slice(commandIdx + 1); + + if (/^python(?:3(?:\.\d+)?)?$/i.test(executable)) { + const pythonInfoOnlyFlags = new Set(["-V", "--version", "-h", "--help"]); + if (args.some((arg) => pythonInfoOnlyFlags.has(arg))) { + return false; + } + if ( + args.some( + (arg) => + arg === "-c" || + arg === "-m" || + arg.startsWith("-c") || + arg.startsWith("-m") || + arg === "--check-hash-based-pycs", + ) + ) { + return false; + } + return true; + } + + if (executable === "node") { + const nodeInfoOnlyFlags = new Set(["-v", "--version", "-h", "--help", "-c", "--check"]); + if (args.some((arg) => nodeInfoOnlyFlags.has(arg))) { + return false; + } + if ( + args.some( + (arg) => + arg === "-e" || + arg === "-p" || + arg === "--eval" || + arg === "--print" || + arg.startsWith("--eval=") || + arg.startsWith("--print=") || + ((arg.startsWith("-e") || arg.startsWith("-p")) && arg.length > 2), + ) + ) { + return false; + } + return true; + } + + return false; + }; + const hasScriptHintInSegment = (segment: string): boolean => + /(?:^|[\s()<>])(?:"[^"\n\r`|&;()<>]*\.(?:py|js)"|'[^'\n\r`|&;()<>]*\.(?:py|js)'|[^"'`\s|&;()<>]+\.(?:py|js))(?=$|[\s()<>])/i.test( + segment, + ); + const hasInterpreterAndScriptHintInSameSegment = (rawText: string): boolean => { + const segments = splitShellSegmentsOutsideQuotes(rawText, { splitPipes: true }); + return segments.some((segment) => { + if (!isScriptExecutingInterpreterCommand(segment)) { + return false; + } + return hasScriptHintInSegment(segment); + }); + }; + const hasInterpreterPipelineScriptHintInSameSegment = (rawText: string): boolean => { + const commandSegments = splitShellSegmentsOutsideQuotes(rawText, { splitPipes: false }); + return commandSegments.some((segment) => { + const pipelineCommands = splitShellSegmentsOutsideQuotes(segment, { splitPipes: true }); + const hasScriptExecutingPipedInterpreter = pipelineCommands + .slice(1) + .some((pipelineCommand) => isScriptExecutingInterpreterCommand(pipelineCommand)); + if (!hasScriptExecutingPipedInterpreter) { + return false; + } + return hasScriptHintInSegment(segment); + }); + }; + const hasInterpreterSegmentScriptHint = + hasInterpreterAndScriptHintInSameSegment(raw) || + (shellWrappedPayload !== null && hasInterpreterAndScriptHintInSameSegment(shellWrappedPayload)); + const hasInterpreterPipelineScriptHint = + hasInterpreterPipelineScriptHintInSameSegment(raw) || + (shellWrappedPayload !== null && + hasInterpreterPipelineScriptHintInSameSegment(shellWrappedPayload)); + const hasShellWrappedInterpreterSegmentScriptHint = + shellWrappedPayload !== null && hasInterpreterAndScriptHintInSameSegment(shellWrappedPayload); + const hasShellWrappedInterpreterInvocation = + (nested.hasPython || nested.hasNode) && + (hasShellWrappedInterpreterSegmentScriptHint || + nested.hasScriptHint || + nested.hasComplexSyntax || + nested.hasProcessSubstitution); + const hasTopLevelInterpreterInvocation = splitShellSegmentsOutsideQuotes(raw, { + splitPipes: true, + }).some((segment) => hasInterpreterInvocationInSegment(segment)); + const hasInterpreterInvocation = + isDirectInterpreterCommand || + hasShellWrappedInterpreterInvocation || + hasTopLevelInterpreterInvocation; + + return { + hasInterpreterInvocation, + hasComplexSyntax: topLevel.hasComplexSyntax || hasShellWrappedInterpreterInvocation, + hasProcessSubstitution: topLevel.hasProcessSubstitution || nested.hasProcessSubstitution, + hasInterpreterSegmentScriptHint, + hasInterpreterPipelineScriptHint, + isDirectInterpreterCommand, + }; +} + async function validateScriptFileForShellBleed(params: { command: string; workdir: string; }): Promise { const target = extractScriptTargetFromCommand(params.command); if (!target) { - return; - } - - const absPath = path.isAbsolute(target.relOrAbsPath) - ? path.resolve(target.relOrAbsPath) - : path.resolve(params.workdir, target.relOrAbsPath); - - // Best-effort: only validate if file exists and is reasonably small. - let stat: { isFile(): boolean; size: number }; - try { - await assertSandboxPath({ - filePath: absPath, - cwd: params.workdir, - root: params.workdir, - }); - stat = await fs.stat(absPath); - } catch { - return; - } - if (!stat.isFile()) { - return; - } - if (stat.size > 512 * 1024) { - return; - } - - const content = await fs.readFile(absPath, "utf-8"); - - // Common failure mode: shell env var syntax leaking into Python/JS. - // We deliberately match all-caps/underscore vars to avoid false positives with `$` as a JS identifier. - const envVarRegex = /\$[A-Z_][A-Z0-9_]{1,}/g; - const first = envVarRegex.exec(content); - if (first) { - const idx = first.index; - const before = content.slice(0, idx); - const line = before.split("\n").length; - const token = first[0]; - throw new Error( - [ - `exec preflight: detected likely shell variable injection (${token}) in ${target.kind} script: ${path.basename( - absPath, - )}:${line}.`, - target.kind === "python" - ? `In Python, use os.environ.get(${JSON.stringify(token.slice(1))}) instead of raw ${token}.` - : `In Node.js, use process.env[${JSON.stringify(token.slice(1))}] instead of raw ${token}.`, - "(If this is inside a string literal on purpose, escape it or restructure the code.)", - ].join("\n"), - ); - } - - // Another recurring pattern from the issue: shell commands accidentally emitted as JS. - if (target.kind === "node") { - const firstNonEmpty = content - .split(/\r?\n/) - .map((l) => l.trim()) - .find((l) => l.length > 0); - if (firstNonEmpty && /^NODE\b/.test(firstNonEmpty)) { + const { + hasInterpreterInvocation, + hasComplexSyntax, + hasProcessSubstitution, + hasInterpreterSegmentScriptHint, + hasInterpreterPipelineScriptHint, + isDirectInterpreterCommand, + } = shouldFailClosedInterpreterPreflight(params.command); + if ( + hasInterpreterInvocation && + hasComplexSyntax && + (hasInterpreterSegmentScriptHint || + hasInterpreterPipelineScriptHint || + (hasProcessSubstitution && isDirectInterpreterCommand)) + ) { + // Fail closed when interpreter-driven script execution is ambiguous; otherwise + // attackers can route script content through forms our fast parser cannot validate. throw new Error( - `exec preflight: JS file starts with shell syntax (${firstNonEmpty}). ` + - `This looks like a shell command, not JavaScript.`, + "exec preflight: complex interpreter invocation detected; refusing to run without script preflight validation. " + + "Use a direct `python .py` or `node .js` command.", ); } + return; + } + + for (const relOrAbsPath of target.relOrAbsPaths) { + const absPath = path.isAbsolute(relOrAbsPath) + ? path.resolve(relOrAbsPath) + : path.resolve(params.workdir, relOrAbsPath); + + // Best-effort: only validate if file exists and is reasonably small. + let stat: { isFile(): boolean; size: number }; + try { + await assertSandboxPath({ + filePath: absPath, + cwd: params.workdir, + root: params.workdir, + }); + stat = await fs.stat(absPath); + } catch { + continue; + } + if (!stat.isFile()) { + continue; + } + if (stat.size > 512 * 1024) { + continue; + } + + const content = await fs.readFile(absPath, "utf-8"); + + // Common failure mode: shell env var syntax leaking into Python/JS. + // We deliberately match all-caps/underscore vars to avoid false positives with `$` as a JS identifier. + const envVarRegex = /\$[A-Z_][A-Z0-9_]{1,}/g; + const first = envVarRegex.exec(content); + if (first) { + const idx = first.index; + const before = content.slice(0, idx); + const line = before.split("\n").length; + const token = first[0]; + throw new Error( + [ + `exec preflight: detected likely shell variable injection (${token}) in ${target.kind} script: ${path.basename( + absPath, + )}:${line}.`, + target.kind === "python" + ? `In Python, use os.environ.get(${JSON.stringify(token.slice(1))}) instead of raw ${token}.` + : `In Node.js, use process.env[${JSON.stringify(token.slice(1))}] instead of raw ${token}.`, + "(If this is inside a string literal on purpose, escape it or restructure the code.)", + ].join("\n"), + ); + } + + // Another recurring pattern from the issue: shell commands accidentally emitted as JS. + if (target.kind === "node") { + const firstNonEmpty = content + .split(/\r?\n/) + .map((l) => l.trim()) + .find((l) => l.length > 0); + if (firstNonEmpty && /^NODE\b/.test(firstNonEmpty)) { + throw new Error( + `exec preflight: JS file starts with shell syntax (${firstNonEmpty}). ` + + `This looks like a shell command, not JavaScript.`, + ); + } + } } }