diff --git a/docs/tools/exec.md b/docs/tools/exec.md index 4b44ed6cff9..e4a152dfa16 100644 --- a/docs/tools/exec.md +++ b/docs/tools/exec.md @@ -109,7 +109,7 @@ Notes: - YOLO comes from the host-policy defaults (`security=full`, `ask=off`), not from `host=auto`. If you want to force gateway or node routing, set `tools.exec.host` or use `/exec host=...`. - In `security=full` plus `ask=off` mode, host exec follows the configured policy directly; there is no extra heuristic command-obfuscation prefilter or script-preflight rejection layer. - `tools.exec.node` (default: unset) -- `tools.exec.strictInlineEval` (default: false): when true, inline interpreter eval forms such as `python -c`, `node -e`, `ruby -e`, `perl -e`, `php -r`, `lua -e`, and `osascript -e` require reviewer or explicit approval. In `mode=auto`, the native auto reviewer may allow a clearly low-risk one-off command; if the reviewer asks, the request goes to a human. `allow-always` can still persist benign interpreter/script invocations, but inline-eval forms do not become durable allow rules. +- `tools.exec.strictInlineEval` (default: false): when true, inline interpreter eval forms such as `python -c`, `node -e`, `ruby -e`, `perl -e`, `php -r`, `lua -e`, and `osascript -e` require reviewer or explicit approval. In `mode=auto`, the normal exec approval path may let the native auto reviewer allow a clearly low-risk one-off command; direct node-host `system.run` calls still require an explicit approval because they cannot hand the command to a human approval route. If the reviewer asks, the request goes to a human. `allow-always` can still persist benign interpreter/script invocations, but inline-eval forms do not become durable allow rules. - `tools.exec.commandHighlighting` (default: false): when true, approval prompts can highlight parser-derived command spans in the command text. Set to `true` globally or per agent to enable command text highlighting without changing exec approval policy. - `tools.exec.pathPrepend`: list of directories to prepend to `PATH` for exec runs (gateway + sandbox only). - `tools.exec.safeBins`: stdin-only safe binaries that can run without explicit allowlist entries. For behavior details, see [Safe bins](/tools/exec-approvals-advanced#safe-bins-stdin-only). diff --git a/src/agents/bash-tools.exec-host-node-phases.ts b/src/agents/bash-tools.exec-host-node-phases.ts index bf0535ef48a..ca6e1db5d22 100644 --- a/src/agents/bash-tools.exec-host-node-phases.ts +++ b/src/agents/bash-tools.exec-host-node-phases.ts @@ -6,6 +6,7 @@ import { import { detectPolicyInlineEval } from "../infra/command-analysis/policy.js"; import { type ExecApprovalsFile, + type ExecAllowlistEntry, type ExecAsk, type ExecSecurity, type SystemRunApprovalPlan, @@ -19,7 +20,11 @@ import { parsePreparedSystemRunPayload, type PreparedRunExecPolicy, } from "../infra/system-run-approval-context.js"; -import { formatExecCommand, resolveSystemRunCommandRequest } from "../infra/system-run-command.js"; +import { + extractShellCommandFromArgv, + formatExecCommand, + resolveSystemRunCommandRequest, +} from "../infra/system-run-command.js"; import { normalizeNullableString } from "../shared/string-coerce.js"; import { resolveSafeTimeoutDelayMs } from "../utils/timer-delay.js"; import type { ExecuteNodeHostCommandParams } from "./bash-tools.exec-host-node.types.js"; @@ -85,6 +90,47 @@ function resolveNodeRunTimeoutMs(runTimeoutSec: number): number { : 0; } +type NodePolicyCommandEval = { + command: string; + cwd: string | undefined; + allowlistEval: ReturnType; +}; + +function hasExactCommandDurableApproval(params: { + allowlist: readonly ExecAllowlistEntry[]; + commandText: string; +}): boolean { + const normalizedCommand = params.commandText.trim(); + if (!normalizedCommand) { + return false; + } + const commandPattern = `=command:${crypto + .createHash("sha256") + .update(normalizedCommand) + .digest("hex") + .slice(0, 16)}`; + return params.allowlist.some( + (entry) => + entry.source === "allow-always" && + (entry.pattern === commandPattern || + (typeof entry.commandText === "string" && entry.commandText.trim() === normalizedCommand)), + ); +} + +function extractPreparedNodeShellPayload(argv: readonly string[]): string | null { + const extracted = extractShellCommandFromArgv([...argv]); + if (extracted) { + return extracted; + } + const executable = argv[0]?.split(/[\\/]/).pop()?.toLowerCase(); + const flag = argv[1]?.trim(); + const payload = argv[2]?.trim(); + if (argv.length === 3 && executable === "sh" && flag === "-lc" && payload) { + return payload; + } + return null; +} + export function shouldSkipNodeApprovalPrepare(params: { hostSecurity: ExecSecurity; hostAsk: ExecAsk; @@ -332,22 +378,76 @@ export async function analyzeNodeApprovalRequirement(params: { hostSecurity: ExecSecurity; hostAsk: ExecAsk; }): Promise { + const approvalCommand = params.prepared.rawCommand; + const approvalCwd = params.prepared.cwd ?? params.request.workdir; const baseAllowlistEval = evaluateShellAllowlist({ - command: params.request.command, + command: approvalCommand, allowlist: [], safeBins: new Set(), - cwd: params.request.workdir, + cwd: approvalCwd, env: params.request.env, platform: params.target.platform, trustedSafeBinDirs: params.request.trustedSafeBinDirs, }); + const bindingCommandEvals: NodePolicyCommandEval[] = [ + { + command: approvalCommand, + cwd: approvalCwd, + allowlistEval: baseAllowlistEval, + }, + ]; + const addCommandEval = ( + entries: NodePolicyCommandEval[], + command: string | null | undefined, + cwd: string | undefined, + ) => { + const normalizedCommand = command?.trim(); + if (!normalizedCommand) { + return; + } + if (entries.some((entry) => entry.command.trim() === normalizedCommand && entry.cwd === cwd)) { + return; + } + entries.push({ + command: normalizedCommand, + cwd, + allowlistEval: evaluateShellAllowlist({ + command: normalizedCommand, + allowlist: [], + safeBins: new Set(), + cwd, + env: params.request.env, + platform: params.target.platform, + trustedSafeBinDirs: params.request.trustedSafeBinDirs, + }), + }); + }; + const preparedCommand = resolveSystemRunCommandRequest({ + command: params.prepared.argv, + rawCommand: params.prepared.rawCommand, + }); + const preparedShellPayload = + extractPreparedNodeShellPayload(params.prepared.argv) ?? + (preparedCommand.ok ? preparedCommand.shellPayload : null); + addCommandEval(bindingCommandEvals, preparedShellPayload, approvalCwd); + const autoReviewBindingCommand = preparedShellPayload?.trim() || approvalCommand; + const autoReviewBindingEval = + bindingCommandEvals.find( + (entry) => + entry.command.trim() === autoReviewBindingCommand.trim() && entry.cwd === approvalCwd, + )?.allowlistEval ?? baseAllowlistEval; + const policyCommandEvals = [...bindingCommandEvals]; + addCommandEval(policyCommandEvals, params.prepared.plan.commandPreview, approvalCwd); + addCommandEval(policyCommandEvals, params.request.command, params.request.workdir); let analysisOk = baseAllowlistEval.analysisOk; let allowlistSatisfied = false; let durableApprovalSatisfied = false; let nodeApprovalsFileKnown = false; const inlineEvalHit = params.request.strictInlineEval === true - ? detectPolicyInlineEval(baseAllowlistEval.segments) + ? (policyCommandEvals + .map((entry) => detectPolicyInlineEval(entry.allowlistEval.segments)) + .find((hit) => hit !== null) ?? null) : null; if (inlineEvalHit) { params.request.warnings.push( @@ -356,13 +456,21 @@ export async function analyzeNodeApprovalRequirement(params: { )}.`, ); } + const suppressionCommandEvals = + preparedShellPayload && preparedShellPayload.trim().length > 0 + ? policyCommandEvals.filter( + (entry) => entry.command.trim() !== approvalCommand.trim() || entry.cwd !== approvalCwd, + ) + : policyCommandEvals; const requiresSecurityAuditSuppressionApproval = - commandRequiresSecurityAuditSuppressionApproval({ - command: params.request.command, - cwd: params.request.workdir, - env: params.request.env, - segments: baseAllowlistEval.segments, - }) && !(params.hostSecurity === "full" && params.hostAsk === "off"); + suppressionCommandEvals.some((entry) => + commandRequiresSecurityAuditSuppressionApproval({ + command: entry.command, + cwd: entry.cwd, + env: params.request.env, + segments: entry.allowlistEval.segments, + }), + ) && !(params.hostSecurity === "full" && params.hostAsk === "off"); if ( (params.hostAsk === "always" || params.hostSecurity === "allowlist" || @@ -383,27 +491,48 @@ export async function analyzeNodeApprovalRequirement(params: { nodeApprovalsFileKnown = true; const resolved = resolveExecApprovalsFromFile({ file: approvalsFile as ExecApprovalsFile, - agentId: params.request.agentId, + agentId: params.prepared.agentId, overrides: { security: "full" }, }); // Allowlist-only precheck; safe bins are node-local and may diverge. - const allowlistEval = evaluateShellAllowlist({ - command: params.request.command, - allowlist: resolved.allowlist, - safeBins: new Set(), - cwd: params.request.workdir, - env: params.request.env, - platform: params.target.platform, - trustedSafeBinDirs: params.request.trustedSafeBinDirs, + // POSIX node transport wraps commands, so mirror node policy by + // accepting either the prepared wrapper or its semantic inner command. + const allowlistEvals = bindingCommandEvals.map((entry) => { + const allowlistEval = evaluateShellAllowlist({ + command: entry.command, + allowlist: resolved.allowlist, + safeBins: new Set(), + cwd: entry.cwd, + env: params.request.env, + platform: params.target.platform, + trustedSafeBinDirs: params.request.trustedSafeBinDirs, + }); + return { + command: entry.command, + allowlistEligible: + !preparedShellPayload || entry.command.trim() === preparedShellPayload.trim(), + exactDurableApprovalSatisfied: hasExactCommandDurableApproval({ + allowlist: resolved.allowlist, + commandText: entry.command, + }), + allowlistEval, + durableApprovalSatisfied: hasDurableExecApproval({ + analysisOk: allowlistEval.analysisOk, + segmentAllowlistEntries: allowlistEval.segmentAllowlistEntries, + allowlist: resolved.allowlist, + commandText: entry.command, + }), + }; }); - durableApprovalSatisfied = hasDurableExecApproval({ - analysisOk: allowlistEval.analysisOk, - segmentAllowlistEntries: allowlistEval.segmentAllowlistEntries, - allowlist: resolved.allowlist, - commandText: params.prepared.rawCommand, - }); - allowlistSatisfied = allowlistEval.allowlistSatisfied; - analysisOk = allowlistEval.analysisOk; + durableApprovalSatisfied = allowlistEvals.some( + (entry) => + entry.durableApprovalSatisfied && + (entry.allowlistEligible || entry.exactDurableApprovalSatisfied), + ); + allowlistSatisfied = allowlistEvals.some( + (entry) => entry.allowlistEligible && entry.allowlistEval.allowlistSatisfied, + ); + analysisOk = allowlistEvals.some((entry) => entry.allowlistEval.analysisOk); } } catch { // Fall back to requiring approval if node approvals cannot be fetched. @@ -419,10 +548,10 @@ export async function analyzeNodeApprovalRequirement(params: { inlineEvalHit, requiresSecurityAuditSuppressionApproval, autoReviewArgv: - baseAllowlistEval.segments.length === 1 && - (baseAllowlistEval.segments[0]?.raw === undefined || - baseAllowlistEval.segments[0].raw.trim() === params.request.command.trim()) - ? baseAllowlistEval.segments[0].argv + autoReviewBindingEval.segments.length === 1 && + (autoReviewBindingEval.segments[0]?.raw === undefined || + autoReviewBindingEval.segments[0].raw.trim() === autoReviewBindingCommand.trim()) + ? autoReviewBindingEval.segments[0].argv : undefined, }; } diff --git a/src/agents/bash-tools.exec-host-node.test.ts b/src/agents/bash-tools.exec-host-node.test.ts index 9ebfee5991c..d8351ff6df5 100644 --- a/src/agents/bash-tools.exec-host-node.test.ts +++ b/src/agents/bash-tools.exec-host-node.test.ts @@ -66,6 +66,7 @@ const resolveExecApprovalsFromFileMock = vi.hoisted(() => })), ); const requiresExecApprovalMock = vi.hoisted(() => vi.fn(() => true)); +const hasDurableExecApprovalMock = vi.hoisted(() => vi.fn(() => false)); const resolveExecHostApprovalContextMock = vi.hoisted(() => vi.fn(() => ({ approvals: { allowlist: [], file: { version: 1, agents: {} } }, @@ -117,7 +118,7 @@ vi.mock("../infra/exec-approvals.js", () => ({ evaluateShellAllowlist: evaluateShellAllowlistMock, commandRequiresSecurityAuditSuppressionApproval: commandRequiresSecurityAuditSuppressionApprovalMock, - hasDurableExecApproval: vi.fn(() => false), + hasDurableExecApproval: hasDurableExecApprovalMock, requiresExecApproval: requiresExecApprovalMock, resolveExecApprovalAllowedDecisions: vi.fn(() => ["allow-once", "allow-always", "deny"]), resolveExecApprovalsFromFile: resolveExecApprovalsFromFileMock, @@ -318,6 +319,8 @@ describe("executeNodeHostCommand", () => { }); requiresExecApprovalMock.mockReset(); requiresExecApprovalMock.mockReturnValue(true); + hasDurableExecApprovalMock.mockReset(); + hasDurableExecApprovalMock.mockReturnValue(false); resolveExecHostApprovalContextMock.mockReset(); resolveExecHostApprovalContextMock.mockReturnValue({ approvals: { allowlist: [], file: { version: 1, agents: {} } }, @@ -425,6 +428,10 @@ describe("executeNodeHostCommand", () => { hostAsk: "on-miss", askFallback: "deny", }); + requiresExecApprovalMock.mockImplementation( + (params?: { allowlistSatisfied?: boolean; durableApprovalSatisfied?: boolean }) => + params?.allowlistSatisfied !== true && params?.durableApprovalSatisfied !== true, + ); const result = await executeNodeHostCommand({ command: "bun ./script.ts", @@ -466,6 +473,614 @@ describe("executeNodeHostCommand", () => { ); }); + it("reviews the prepared node plan before suppressing human approval", async () => { + const divergentPlan = { + argv: ["rm", "-rf", "/tmp/work"], + cwd: "/tmp/work", + commandText: "rm -rf /tmp/work", + commandPreview: "./scripts/check_mail.sh --limit 5", + agentId: "prepared-agent", + sessionKey: "prepared-session", + }; + parsePreparedSystemRunPayloadMock.mockReturnValue({ + plan: divergentPlan, + execPolicy: { security: "full", ask: "off" }, + }); + const nodeAllowlist = [{ pattern: "./scripts/check_mail.sh" }]; + resolveExecApprovalsFromFileMock.mockReturnValue({ + allowlist: nodeAllowlist, + file: { version: 1, agents: {} }, + agent: { + security: "full", + ask: "off", + askFallback: "deny", + autoAllowSkills: false, + }, + }); + evaluateShellAllowlistMock.mockImplementation( + (params?: { command?: string; allowlist?: unknown[] }) => { + const command = params?.command ?? ""; + const hasNodeAllowlist = Array.isArray(params?.allowlist) && params.allowlist.length > 0; + const previewMatch = command === "./scripts/check_mail.sh --limit 5"; + return { + allowlistMatches: previewMatch && hasNodeAllowlist ? [{}] : [], + analysisOk: true, + allowlistSatisfied: previewMatch && hasNodeAllowlist, + segments: [ + previewMatch + ? { + resolution: null, + argv: ["./scripts/check_mail.sh", "--limit", "5"], + raw: "./scripts/check_mail.sh --limit 5", + } + : { + resolution: null, + argv: ["rm", "-rf", "/tmp/work"], + raw: "rm -rf /tmp/work", + }, + ], + segmentAllowlistEntries: previewMatch && hasNodeAllowlist ? [{}] : [], + }; + }, + ); + const autoReviewer = vi.fn(async (input) => + input.command.includes("rm -rf") + ? { + decision: "ask", + risk: "high", + rationale: "destructive prepared plan", + } + : { + decision: "allow-once", + risk: "low", + rationale: "safe requested text", + }, + ); + resolveExecHostApprovalContextMock.mockReturnValue({ + approvals: { allowlist: [], file: { version: 1, agents: {} } }, + hostSecurity: "allowlist", + hostAsk: "on-miss", + askFallback: "deny", + }); + requiresExecApprovalMock.mockImplementation( + (params?: { allowlistSatisfied?: boolean; durableApprovalSatisfied?: boolean }) => + params?.allowlistSatisfied !== true && params?.durableApprovalSatisfied !== true, + ); + + const result = await executeNodeHostCommand({ + command: "echo SAFE", + workdir: "/tmp/work", + env: {}, + security: "allowlist", + ask: "on-miss", + autoReview: true, + autoReviewer, + defaultTimeoutSec: 30, + approvalRunningNoticeMs: 0, + warnings: [], + agentId: "requested-agent", + sessionKey: "requested-session", + }); + + expect(result.details?.status).toBe("approval-pending"); + expect(autoReviewer).toHaveBeenCalledWith( + expect.objectContaining({ + command: "rm -rf /tmp/work", + argv: ["rm", "-rf", "/tmp/work"], + agent: { + id: "prepared-agent", + sessionKey: "prepared-session", + }, + }), + ); + expect(createAndRegisterDefaultExecApprovalRequestMock).toHaveBeenCalled(); + expect(callGatewayToolMock).not.toHaveBeenCalledWith( + "exec.approval.resolve", + expect.anything(), + expect.anything(), + expect.anything(), + ); + }); + + it("honors node allowlist matches on prepared POSIX shell payloads", async () => { + const wrapperPlan = { + argv: ["/bin/sh", "-lc", "./scripts/check_mail.sh --limit 5"], + cwd: "/tmp/work", + commandText: `/bin/sh -lc "./scripts/check_mail.sh --limit 5"`, + commandPreview: "./scripts/check_mail.sh --limit 5", + agentId: "prepared-agent", + sessionKey: "prepared-session", + }; + parsePreparedSystemRunPayloadMock.mockReturnValue({ + plan: wrapperPlan, + execPolicy: { security: "full", ask: "off" }, + }); + const nodeAllowlist = [{ pattern: "./scripts/check_mail.sh" }]; + resolveExecApprovalsFromFileMock.mockReturnValue({ + allowlist: nodeAllowlist, + file: { version: 1, agents: {} }, + agent: { + security: "full", + ask: "off", + askFallback: "deny", + autoAllowSkills: false, + }, + }); + evaluateShellAllowlistMock.mockImplementation( + (params?: { command?: string; allowlist?: unknown[] }) => { + const command = params?.command ?? ""; + const hasNodeAllowlist = Array.isArray(params?.allowlist) && params.allowlist.length > 0; + const semanticMatch = command === "./scripts/check_mail.sh --limit 5"; + return { + allowlistMatches: semanticMatch && hasNodeAllowlist ? [{}] : [], + analysisOk: true, + allowlistSatisfied: semanticMatch && hasNodeAllowlist, + segments: [ + command.startsWith("/bin/sh") + ? { + resolution: null, + argv: ["/bin/sh", "-lc", "./scripts/check_mail.sh --limit 5"], + raw: `/bin/sh -lc "./scripts/check_mail.sh --limit 5"`, + } + : { + resolution: null, + argv: ["./scripts/check_mail.sh", "--limit", "5"], + raw: "./scripts/check_mail.sh --limit 5", + }, + ], + segmentAllowlistEntries: semanticMatch && hasNodeAllowlist ? [{}] : [], + }; + }, + ); + const autoReviewer = vi.fn(async () => ({ + decision: "ask", + risk: "medium", + rationale: "should not be needed", + })); + resolveExecHostApprovalContextMock.mockReturnValue({ + approvals: { allowlist: [], file: { version: 1, agents: {} } }, + hostSecurity: "allowlist", + hostAsk: "on-miss", + askFallback: "deny", + }); + requiresExecApprovalMock.mockImplementation( + (params?: { allowlistSatisfied?: boolean; durableApprovalSatisfied?: boolean }) => + params?.allowlistSatisfied !== true && params?.durableApprovalSatisfied !== true, + ); + + const result = await executeNodeHostCommand({ + command: "./scripts/check_mail.sh --limit 5", + workdir: "/tmp/work", + env: {}, + security: "allowlist", + ask: "on-miss", + autoReview: true, + autoReviewer, + defaultTimeoutSec: 30, + approvalRunningNoticeMs: 0, + warnings: [], + agentId: "requested-agent", + sessionKey: "requested-session", + }); + + expect(result.details?.status).toBe("completed"); + expect(autoReviewer).not.toHaveBeenCalled(); + expect(createAndRegisterDefaultExecApprovalRequestMock).not.toHaveBeenCalled(); + expect(resolveExecApprovalsFromFileMock).toHaveBeenCalledWith( + expect.objectContaining({ agentId: "prepared-agent" }), + ); + expectSystemRunInvoke({ invokeTimeoutMs: 35_000, runTimeoutMs: 30_000 }); + }); + + it("does not let transport wrapper allowlist matches approve shell payloads", async () => { + const wrapperPlan = { + argv: ["/bin/sh", "-lc", "./scripts/untrusted.sh"], + cwd: "/tmp/work", + commandText: `/bin/sh -lc "./scripts/untrusted.sh"`, + commandPreview: "./scripts/untrusted.sh", + agentId: "prepared-agent", + sessionKey: "prepared-session", + }; + parsePreparedSystemRunPayloadMock.mockReturnValue({ + plan: wrapperPlan, + execPolicy: { security: "full", ask: "off" }, + }); + const nodeAllowlist = [{ pattern: "/bin/sh" }]; + resolveExecApprovalsFromFileMock.mockReturnValue({ + allowlist: nodeAllowlist, + file: { version: 1, agents: {} }, + agent: { + security: "full", + ask: "off", + askFallback: "deny", + autoAllowSkills: false, + }, + }); + evaluateShellAllowlistMock.mockImplementation( + (params?: { command?: string; allowlist?: unknown[] }) => { + const command = params?.command ?? ""; + const hasNodeAllowlist = Array.isArray(params?.allowlist) && params.allowlist.length > 0; + const wrapperMatch = command.startsWith("/bin/sh") && hasNodeAllowlist; + return { + allowlistMatches: wrapperMatch ? [{}] : [], + analysisOk: true, + allowlistSatisfied: wrapperMatch, + segments: [ + command.startsWith("/bin/sh") + ? { + resolution: null, + argv: ["/bin/sh", "-lc", "./scripts/untrusted.sh"], + raw: `/bin/sh -lc "./scripts/untrusted.sh"`, + } + : { + resolution: null, + argv: ["./scripts/untrusted.sh"], + raw: "./scripts/untrusted.sh", + }, + ], + segmentAllowlistEntries: wrapperMatch ? [{}] : [], + }; + }, + ); + requiresExecApprovalMock.mockImplementation( + (params?: { allowlistSatisfied?: boolean; durableApprovalSatisfied?: boolean }) => + params?.allowlistSatisfied !== true && params?.durableApprovalSatisfied !== true, + ); + hasDurableExecApprovalMock.mockImplementation( + (params?: { segmentAllowlistEntries?: unknown[] }) => + Array.isArray(params?.segmentAllowlistEntries) && params.segmentAllowlistEntries.length > 0, + ); + const autoReviewer = vi.fn(async () => ({ + decision: "ask", + risk: "medium", + rationale: "inner payload is not allowlisted", + })); + resolveExecHostApprovalContextMock.mockReturnValue({ + approvals: { allowlist: [], file: { version: 1, agents: {} } }, + hostSecurity: "allowlist", + hostAsk: "on-miss", + askFallback: "deny", + }); + + const result = await executeNodeHostCommand({ + command: "./scripts/untrusted.sh", + workdir: "/tmp/work", + env: {}, + security: "allowlist", + ask: "on-miss", + autoReview: true, + autoReviewer, + defaultTimeoutSec: 30, + approvalRunningNoticeMs: 0, + warnings: [], + agentId: "requested-agent", + sessionKey: "requested-session", + }); + + expect(result.details?.status).toBe("approval-pending"); + expect(autoReviewer).toHaveBeenCalledWith( + expect.objectContaining({ + command: `/bin/sh -lc "./scripts/untrusted.sh"`, + argv: ["./scripts/untrusted.sh"], + }), + ); + expect(createAndRegisterDefaultExecApprovalRequestMock).toHaveBeenCalled(); + }); + + it("reuses exact durable approvals for prepared shell wrappers", async () => { + const wrapperPlan = { + argv: ["/bin/sh", "-lc", "cd ."], + cwd: "/tmp/work", + commandText: `/bin/sh -lc "cd ."`, + commandPreview: "cd .", + agentId: "prepared-agent", + sessionKey: "prepared-session", + }; + parsePreparedSystemRunPayloadMock.mockReturnValue({ + plan: wrapperPlan, + execPolicy: { security: "full", ask: "off" }, + }); + resolveExecApprovalsFromFileMock.mockReturnValue({ + allowlist: [ + { + pattern: "=command:placeholder", + source: "allow-always", + commandText: wrapperPlan.commandText, + }, + ], + file: { version: 1, agents: {} }, + agent: { + security: "full", + ask: "off", + askFallback: "deny", + autoAllowSkills: false, + }, + }); + evaluateShellAllowlistMock.mockImplementation((params?: { command?: string }) => { + const command = params?.command ?? ""; + return { + allowlistMatches: [], + analysisOk: true, + allowlistSatisfied: false, + segments: [ + command.startsWith("/bin/sh") + ? { + resolution: null, + argv: ["/bin/sh", "-lc", "cd ."], + raw: `/bin/sh -lc "cd ."`, + } + : { + resolution: null, + argv: ["cd", "."], + raw: "cd .", + }, + ], + segmentAllowlistEntries: [], + }; + }); + hasDurableExecApprovalMock.mockImplementation( + (params?: { commandText?: string | null }) => params?.commandText === wrapperPlan.commandText, + ); + requiresExecApprovalMock.mockImplementation( + (params?: { allowlistSatisfied?: boolean; durableApprovalSatisfied?: boolean }) => + params?.allowlistSatisfied !== true && params?.durableApprovalSatisfied !== true, + ); + resolveExecHostApprovalContextMock.mockReturnValue({ + approvals: { allowlist: [], file: { version: 1, agents: {} } }, + hostSecurity: "allowlist", + hostAsk: "on-miss", + askFallback: "deny", + }); + + const result = await executeNodeHostCommand({ + command: "cd .", + workdir: "/tmp/work", + env: {}, + security: "allowlist", + ask: "on-miss", + autoReview: true, + defaultTimeoutSec: 30, + approvalRunningNoticeMs: 0, + warnings: [], + agentId: "requested-agent", + sessionKey: "requested-session", + }); + + expect(result.details?.status).toBe("completed"); + expect(createAndRegisterDefaultExecApprovalRequestMock).not.toHaveBeenCalled(); + expectSystemRunInvoke({ invokeTimeoutMs: 35_000, runTimeoutMs: 30_000 }); + }); + + it("keeps non-transport login shells approval-gated", async () => { + const loginPlan = { + argv: ["bash", "-lc", "./scripts/check_mail.sh --limit 5"], + cwd: "/tmp/work", + commandText: `bash -lc "./scripts/check_mail.sh --limit 5"`, + commandPreview: "./scripts/check_mail.sh --limit 5", + agentId: "prepared-agent", + sessionKey: "prepared-session", + }; + parsePreparedSystemRunPayloadMock.mockReturnValue({ + plan: loginPlan, + execPolicy: { security: "full", ask: "off" }, + }); + const nodeAllowlist = [{ pattern: "./scripts/check_mail.sh" }]; + resolveExecApprovalsFromFileMock.mockReturnValue({ + allowlist: nodeAllowlist, + file: { version: 1, agents: {} }, + agent: { + security: "full", + ask: "off", + askFallback: "deny", + autoAllowSkills: false, + }, + }); + evaluateShellAllowlistMock.mockImplementation( + (params?: { command?: string; allowlist?: unknown[] }) => { + const command = params?.command ?? ""; + const hasNodeAllowlist = Array.isArray(params?.allowlist) && params.allowlist.length > 0; + const semanticMatch = command === "./scripts/check_mail.sh --limit 5"; + return { + allowlistMatches: semanticMatch && hasNodeAllowlist ? [{}] : [], + analysisOk: true, + allowlistSatisfied: semanticMatch && hasNodeAllowlist, + segments: [ + command.startsWith("bash") + ? { + resolution: null, + argv: ["bash", "-lc", "./scripts/check_mail.sh --limit 5"], + raw: `bash -lc "./scripts/check_mail.sh --limit 5"`, + } + : { + resolution: null, + argv: ["./scripts/check_mail.sh", "--limit", "5"], + raw: "./scripts/check_mail.sh --limit 5", + }, + ], + segmentAllowlistEntries: semanticMatch && hasNodeAllowlist ? [{}] : [], + }; + }, + ); + requiresExecApprovalMock.mockImplementation( + (params?: { allowlistSatisfied?: boolean; durableApprovalSatisfied?: boolean }) => + params?.allowlistSatisfied !== true && params?.durableApprovalSatisfied !== true, + ); + const autoReviewer = vi.fn(async () => ({ + decision: "ask", + risk: "medium", + rationale: "login shell needs human approval", + })); + resolveExecHostApprovalContextMock.mockReturnValue({ + approvals: { allowlist: [], file: { version: 1, agents: {} } }, + hostSecurity: "allowlist", + hostAsk: "on-miss", + askFallback: "deny", + }); + + const result = await executeNodeHostCommand({ + command: "./scripts/check_mail.sh --limit 5", + workdir: "/tmp/work", + env: {}, + security: "allowlist", + ask: "on-miss", + autoReview: true, + autoReviewer, + defaultTimeoutSec: 30, + approvalRunningNoticeMs: 0, + warnings: [], + agentId: "requested-agent", + sessionKey: "requested-session", + }); + + expect(result.details?.status).toBe("approval-pending"); + expect(autoReviewer).toHaveBeenCalledWith( + expect.objectContaining({ + command: `bash -lc "./scripts/check_mail.sh --limit 5"`, + argv: ["bash", "-lc", "./scripts/check_mail.sh --limit 5"], + }), + ); + expect(createAndRegisterDefaultExecApprovalRequestMock).toHaveBeenCalled(); + }); + + it("requires human approval when prepared shell payload has multiple commands", async () => { + const chainPlan = { + argv: ["/bin/sh", "-lc", "openclaw status; id"], + cwd: "/tmp/work", + commandText: `/bin/sh -lc "openclaw status; id"`, + commandPreview: "openclaw status; id", + agentId: "prepared-agent", + sessionKey: "prepared-session", + }; + parsePreparedSystemRunPayloadMock.mockReturnValue({ + plan: chainPlan, + execPolicy: { security: "full", ask: "off" }, + }); + evaluateShellAllowlistMock.mockImplementation((params?: { command?: string }) => { + const command = params?.command ?? ""; + return { + allowlistMatches: [], + analysisOk: true, + allowlistSatisfied: false, + segments: command.startsWith("/bin/sh") + ? [ + { + resolution: null, + argv: ["/bin/sh", "-lc", "openclaw status; id"], + raw: `/bin/sh -lc "openclaw status; id"`, + }, + ] + : [ + { + resolution: null, + argv: ["openclaw", "status"], + raw: "openclaw status", + }, + { + resolution: null, + argv: ["id"], + raw: "id", + }, + ], + segmentAllowlistEntries: [], + }; + }); + const autoReviewer = vi.fn(async () => ({ + decision: "allow-once", + risk: "low", + rationale: "test reviewer would allow it", + })); + resolveExecHostApprovalContextMock.mockReturnValue({ + approvals: { allowlist: [], file: { version: 1, agents: {} } }, + hostSecurity: "allowlist", + hostAsk: "on-miss", + askFallback: "deny", + }); + + const result = await executeNodeHostCommand({ + command: "openclaw status; id", + workdir: "/tmp/work", + env: {}, + security: "allowlist", + ask: "on-miss", + autoReview: true, + autoReviewer, + defaultTimeoutSec: 30, + approvalRunningNoticeMs: 0, + warnings: [], + agentId: "requested-agent", + sessionKey: "requested-session", + }); + + expect(result.details?.status).toBe("approval-pending"); + expect(autoReviewer).not.toHaveBeenCalled(); + expect(createAndRegisterDefaultExecApprovalRequestMock).toHaveBeenCalled(); + }); + + it("does not treat read-only suppression inspections as wrapper writes", async () => { + const wrapperPlan = { + argv: ["/bin/sh", "-lc", "openclaw config get security.audit.suppressions"], + cwd: "/tmp/work", + commandText: `/bin/sh -lc "openclaw config get security.audit.suppressions"`, + commandPreview: "openclaw config get security.audit.suppressions", + agentId: "prepared-agent", + sessionKey: "prepared-session", + }; + parsePreparedSystemRunPayloadMock.mockReturnValue({ + plan: wrapperPlan, + execPolicy: { security: "full", ask: "off" }, + }); + evaluateShellAllowlistMock.mockImplementation((params?: { command?: string }) => { + const command = params?.command ?? ""; + return { + allowlistMatches: [], + analysisOk: true, + allowlistSatisfied: true, + segments: [ + command.startsWith("/bin/sh") + ? { + resolution: null, + argv: ["/bin/sh", "-lc", "openclaw config get security.audit.suppressions"], + raw: `/bin/sh -lc "openclaw config get security.audit.suppressions"`, + } + : { + resolution: null, + argv: ["openclaw", "config", "get", "security.audit.suppressions"], + raw: "openclaw config get security.audit.suppressions", + }, + ], + segmentAllowlistEntries: [], + }; + }); + commandRequiresSecurityAuditSuppressionApprovalMock.mockImplementation( + (params?: { command?: string }) => params?.command?.startsWith("/bin/sh") === true, + ); + requiresExecApprovalMock.mockReturnValue(false); + resolveExecHostApprovalContextMock.mockReturnValue({ + approvals: { allowlist: [], file: { version: 1, agents: {} } }, + hostSecurity: "allowlist", + hostAsk: "on-miss", + askFallback: "deny", + }); + + const result = await executeNodeHostCommand({ + command: "openclaw config get security.audit.suppressions", + workdir: "/tmp/work", + env: {}, + security: "allowlist", + ask: "on-miss", + autoReview: true, + defaultTimeoutSec: 30, + approvalRunningNoticeMs: 0, + warnings: [], + agentId: "requested-agent", + sessionKey: "requested-session", + }); + + expect(result.details?.status).toBe("completed"); + expect(createAndRegisterDefaultExecApprovalRequestMock).not.toHaveBeenCalled(); + expectSystemRunInvoke({ invokeTimeoutMs: 35_000, runTimeoutMs: 30_000 }); + }); + it("requests human approval when node auto-review asks on an approval miss", async () => { const autoReviewer = vi.fn(async () => ({ decision: "ask", @@ -744,24 +1359,46 @@ describe("executeNodeHostCommand", () => { }); it("auto-reviews strict inline-eval commands before asking a human", async () => { + const inlinePlan = { + argv: ["/bin/sh", "-lc", "python3 -c 'print(1)'"], + cwd: "/tmp/work", + commandText: `/bin/sh -lc "python3 -c 'print(1)'"`, + commandPreview: "python3 -c 'print(1)'", + agentId: "requested-agent", + sessionKey: "requested-session", + }; + parsePreparedSystemRunPayloadMock.mockReturnValue({ + plan: inlinePlan, + execPolicy: { security: "full", ask: "off" }, + }); const autoReviewer = vi.fn(async () => ({ decision: "allow-once", risk: "low", rationale: "safe inline eval", })); - detectInterpreterInlineEvalArgvMock.mockReturnValue(INLINE_EVAL_HIT); - evaluateShellAllowlistMock.mockReturnValue({ - allowlistMatches: [], - analysisOk: true, - allowlistSatisfied: false, - segments: [ - { - resolution: null, - argv: ["python3", "-c", "print(1)"], - raw: "python3 -c 'print(1)'", - }, - ], - segmentAllowlistEntries: [], + detectInterpreterInlineEvalArgvMock.mockImplementation((argv?: unknown) => + Array.isArray(argv) && argv[0] === "python3" ? INLINE_EVAL_HIT : null, + ); + evaluateShellAllowlistMock.mockImplementation((params?: { command?: string }) => { + const command = params?.command ?? ""; + const segment = command.startsWith("/bin/sh") + ? { + resolution: null, + argv: ["/bin/sh", "-lc", "python3 -c 'print(1)'"], + raw: `/bin/sh -lc "python3 -c 'print(1)'"`, + } + : { + resolution: null, + argv: ["python3", "-c", "print(1)"], + raw: "python3 -c 'print(1)'", + }; + return { + allowlistMatches: [], + analysisOk: true, + allowlistSatisfied: false, + segments: [segment], + segmentAllowlistEntries: [], + }; }); resolveExecHostApprovalContextMock.mockReturnValue({ approvals: { allowlist: [], file: { version: 1, agents: {} } }, @@ -790,7 +1427,7 @@ describe("executeNodeHostCommand", () => { expect(result.details?.status).toBe("completed"); expect(autoReviewer).toHaveBeenCalledWith( expect.objectContaining({ - command: "python3 -c 'print(1)'", + command: `/bin/sh -lc "python3 -c 'print(1)'"`, argv: ["python3", "-c", "print(1)"], host: "node", reason: "strict-inline-eval", @@ -1273,6 +1910,18 @@ describe("executeNodeHostCommand", () => { }); it("auto-reviews strict inline-eval commands with full/off host policy when node policy is available", async () => { + const inlinePlan = { + argv: ["python3", "-c", "print(1)"], + cwd: "/tmp/work", + commandText: "python3 -c 'print(1)'", + commandPreview: null, + agentId: "requested-agent", + sessionKey: "requested-session", + }; + parsePreparedSystemRunPayloadMock.mockReturnValue({ + plan: inlinePlan, + execPolicy: { security: "full", ask: "off" }, + }); const autoReviewer = vi.fn(async () => ({ decision: "allow-once", risk: "low", diff --git a/src/agents/bash-tools.exec-host-node.ts b/src/agents/bash-tools.exec-host-node.ts index 4f1132b727e..faafc173af2 100644 --- a/src/agents/bash-tools.exec-host-node.ts +++ b/src/agents/bash-tools.exec-host-node.ts @@ -190,7 +190,7 @@ export async function executeNodeHostCommand( ) { const reviewer = params.autoReviewer ?? defaultExecAutoReviewer; const decision = await reviewer({ - command: params.command, + command: prepared.rawCommand, argv: autoReviewArgv, cwd: prepared.cwd, envKeys: Object.keys(params.requestedEnv ?? {}).toSorted(), @@ -209,8 +209,8 @@ export async function executeNodeHostCommand( inlineEval: inlineEvalHit !== null, }, agent: { - id: params.agentId, - sessionKey: params.sessionKey, + id: prepared.agentId, + sessionKey: prepared.sessionKey, }, }); if (decision.decision === "allow-once") {