Agents/Tools: preflight exec script files for shell var injection (#18457)

* fix(agents): don't force store=true for codex responses

* test: stabilize respawn + subagent usage assertions

* Agents/Tools: preflight exec to detect shell variable injection in scripts

* Changelog: fix merge marker formatting
This commit is contained in:
Vignesh
2026-02-16 10:34:29 -08:00
committed by GitHub
parent 9b70849567
commit b0a01fe482
4 changed files with 187 additions and 0 deletions

View File

@@ -1,5 +1,7 @@
import type { AgentTool, AgentToolResult } from "@mariozechner/pi-agent-core";
import crypto from "node:crypto";
import fs from "node:fs/promises";
import path from "node:path";
import type { BashSandboxConfig } from "./bash-tools.shared.js";
import {
type ExecAsk,
@@ -118,6 +120,97 @@ export type ExecToolDetails =
nodeId?: string;
};
function extractScriptTargetFromCommand(
command: string,
): { kind: "python"; relOrAbsPath: string } | { kind: "node"; relOrAbsPath: string } | null {
const raw = command.trim();
if (!raw) {
return null;
}
// Intentionally simple parsing: we only support common forms like
// python file.py
// python3 -u file.py
// node --experimental-something file.js
// If the command is more complex (pipes, heredocs, quoted paths with spaces), skip preflight.
const pythonMatch = raw.match(/^\s*(python3?|python)\s+(?:-[^\s]+\s+)*([^\s]+\.py)\b/i);
if (pythonMatch?.[2]) {
return { kind: "python", relOrAbsPath: pythonMatch[2] };
}
const nodeMatch = raw.match(/^\s*(node)\s+(?:--[^\s]+\s+)*([^\s]+\.js)\b/i);
if (nodeMatch?.[2]) {
return { kind: "node", relOrAbsPath: nodeMatch[2] };
}
return null;
}
async function validateScriptFileForShellBleed(params: {
command: string;
workdir: string;
}): Promise<void> {
const target = extractScriptTargetFromCommand(params.command);
if (!target) {
return;
}
const absPath = path.isAbsolute(target.relOrAbsPath)
? path.resolve(target.relOrAbsPath)
: path.resolve(params.workdir, target.relOrAbsPath);
// Best-effort: only validate if file exists and is reasonably small.
let stat: { isFile(): boolean; size: number };
try {
stat = await fs.stat(absPath);
} catch {
return;
}
if (!stat.isFile()) {
return;
}
if (stat.size > 512 * 1024) {
return;
}
const content = await fs.readFile(absPath, "utf-8");
// Common failure mode: shell env var syntax leaking into Python/JS.
// We deliberately match all-caps/underscore vars to avoid false positives with `$` as a JS identifier.
const envVarRegex = /\$[A-Z_][A-Z0-9_]{1,}/g;
const first = envVarRegex.exec(content);
if (first) {
const idx = first.index;
const before = content.slice(0, idx);
const line = before.split("\n").length;
const token = first[0];
throw new Error(
[
`exec preflight: detected likely shell variable injection (${token}) in ${target.kind} script: ${path.basename(
absPath,
)}:${line}.`,
target.kind === "python"
? `In Python, use os.environ.get(${JSON.stringify(token.slice(1))}) instead of raw ${token}.`
: `In Node.js, use process.env[${JSON.stringify(token.slice(1))}] instead of raw ${token}.`,
"(If this is inside a string literal on purpose, escape it or restructure the code.)",
].join("\n"),
);
}
// Another recurring pattern from the issue: shell commands accidentally emitted as JS.
if (target.kind === "node") {
const firstNonEmpty = content
.split(/\r?\n/)
.map((l) => l.trim())
.find((l) => l.length > 0);
if (firstNonEmpty && /^NODE\b/.test(firstNonEmpty)) {
throw new Error(
`exec preflight: JS file starts with shell syntax (${firstNonEmpty}). ` +
`This looks like a shell command, not JavaScript.`,
);
}
}
}
export function createExecTool(
defaults?: ExecToolDefaults,
// oxlint-disable-next-line typescript/no-explicit-any
@@ -874,6 +967,11 @@ export function createExecTool(
typeof params.timeout === "number" ? params.timeout : defaultTimeoutSec;
const getWarningText = () => (warnings.length ? `${warnings.join("\n")}\n\n` : "");
const usePty = params.pty === true && !sandbox;
// Preflight: catch a common model failure mode (shell syntax leaking into Python/JS sources)
// before we execute and burn tokens in cron loops.
await validateScriptFileForShellBleed({ command: params.command, workdir });
const run = await runExecProcess({
command: params.command,
execCommand: execCommandOverride,