fix: evaluate shell wrapper inline commands against allowlist (#57377) (#57584)

When a skill constructs a compound command via a shell wrapper
(e.g. `sh -c "cat SKILL.md && gog-wrapper calendar events"`),
the allowlist check was comparing `/bin/sh` instead of the actual
target binaries, causing the entire command to be silently rejected.

This adds recursive inline command evaluation that:
- Detects chain operators (&&, ||, ;) in the -c payload
- Parses each sub-command independently via analyzeShellCommand
- Evaluates every sub-command against the allowlist
- Preserves per-sub-command segmentSatisfiedBy for accurate tracking
- Limits recursion depth to 3 to prevent abuse
- Skips recursion on Windows (no POSIX shell semantics)

Closes #57377

Co-authored-by: WZBbiao <wangzhenbiao326@gmail.com>
This commit is contained in:
biao
2026-04-03 00:06:40 +08:00
committed by GitHub
parent 578a0ed31a
commit 8d81e76f23
3 changed files with 253 additions and 3 deletions

View File

@@ -182,6 +182,11 @@ export function handleMessageStart(
// may deliver late text_end updates after message_end, which would otherwise
// re-trigger block replies.
ctx.resetAssistantMessageState(ctx.state.assistantTexts.length);
// Resolve text-repetition-guard config once per message to avoid re-parsing on every tick.
ctx.state.resolvedTextRepetitionGuardConfig = resolveTextRepetitionGuardConfig({
cfg: ctx.params.config,
agentId: ctx.params.agentId,
});
// Use assistant message_start as the earliest "writing" signal for typing.
void ctx.params.onAssistantMessageStart?.();
}
@@ -282,6 +287,21 @@ export function handleMessageUpdate(
}
}
// Text repetition guard: throttle checks to every N chars of new content.
const guardConfig = ctx.state.resolvedTextRepetitionGuardConfig;
if (guardConfig && guardConfig.enabled) {
const checkInterval = guardConfig.checkIntervalChars;
if (ctx.state.deltaBuffer.length - ctx.state.textRepetitionLastCheckedLen >= checkInterval) {
ctx.state.textRepetitionLastCheckedLen = ctx.state.deltaBuffer.length;
const result = detectTextRepetition(ctx.state.deltaBuffer, guardConfig);
if (result.looping) {
ctx.state.abortedByTextRepetitionGuard = true;
void ctx.params.session.abort();
return;
}
}
}
if (ctx.state.streamReasoning) {
// Handle partial <think> tags: stream whatever reasoning is visible so far.
ctx.emitReasoningStream(extractThinkingFromTaggedStream(ctx.state.deltaBuffer));
@@ -518,10 +538,10 @@ export function handleMessageEnd(
text &&
onBlockReply
) {
if (ctx.blockChunker?.hasBuffered()) {
if (ctx.blockChunker?.hasBuffered() && !ctx.state.abortedByTextRepetitionGuard) {
ctx.blockChunker.drain({ force: true, emit: ctx.emitBlockChunk });
ctx.blockChunker.reset();
} else if (text !== ctx.state.lastBlockReplyText) {
} else if (text !== ctx.state.lastBlockReplyText && !ctx.state.abortedByTextRepetitionGuard) {
// Check for duplicates before emitting (same logic as emitBlockChunk).
const normalizedText = normalizeTextForComparison(text);
if (

View File

@@ -9,6 +9,7 @@ import {
resolveCommandResolutionFromArgv,
resolvePolicyTargetCandidatePath,
resolvePolicyTargetResolution,
splitCommandChain,
splitCommandChainWithOperators,
type ExecCommandAnalysis,
type ExecCommandSegment,
@@ -366,9 +367,77 @@ function resolveTrustedSkillExecutionIds(params: {
return skillIds;
}
/**
* Maximum recursion depth for evaluating shell wrapper inline commands.
* Prevents infinite recursion from nested shell wrappers (e.g. `sh -c "sh -c '...'"`)
* while allowing reasonable nesting for skill-constructed compound commands.
*/
const MAX_SHELL_WRAPPER_INLINE_EVAL_DEPTH = 3;
/**
* Recursively evaluates a shell wrapper's inline command (the `-c` payload) against the
* allowlist by parsing it into sub-commands and checking each one.
*
* Returns the evaluation result when the inline command is parseable and all sub-commands
* are satisfied, or `null` when evaluation fails or any sub-command is unsatisfied.
*/
function evaluateShellWrapperInlineCommand(
inlineCommand: string,
params: ExecAllowlistContext,
inlineDepth: number,
precomputedChainParts?: string[],
): { matches: ExecAllowlistEntry[]; segmentSatisfiedBy: ExecSegmentSatisfiedBy[] } | null {
if (inlineDepth >= MAX_SHELL_WRAPPER_INLINE_EVAL_DEPTH) {
return null;
}
const chainParts =
precomputedChainParts ?? (isWindowsPlatform(params.platform) ? null : splitCommandChain(inlineCommand));
if (!chainParts) {
// Single command or pipeline (no chain operators).
const analysis = analyzeShellCommand({
command: inlineCommand,
cwd: params.cwd,
env: params.env,
platform: params.platform,
});
if (!analysis.ok) {
return null;
}
const result = evaluateSegments(analysis.segments, params, inlineDepth);
return result.satisfied
? { matches: result.matches, segmentSatisfiedBy: result.segmentSatisfiedBy }
: null;
}
// Multiple commands joined by chain operators (&&, ||, ;).
const allMatches: ExecAllowlistEntry[] = [];
const allSegmentSatisfiedBy: ExecSegmentSatisfiedBy[] = [];
for (const part of chainParts) {
const analysis = analyzeShellCommand({
command: part,
cwd: params.cwd,
env: params.env,
platform: params.platform,
});
if (!analysis.ok) {
return null;
}
const result = evaluateSegments(analysis.segments, params, inlineDepth);
if (!result.satisfied) {
return null;
}
allMatches.push(...result.matches);
allSegmentSatisfiedBy.push(...result.segmentSatisfiedBy);
}
return { matches: allMatches, segmentSatisfiedBy: allSegmentSatisfiedBy };
}
function evaluateSegments(
segments: ExecCommandSegment[],
params: ExecAllowlistContext,
inlineDepth: number = 0,
): {
satisfied: boolean;
matches: ExecAllowlistEntry[];
@@ -481,6 +550,34 @@ function evaluateSegments(
: skillAllow
? "skills"
: null;
// When no direct match and the segment is a shell wrapper with an inline *compound*
// command (e.g. `/bin/sh -c "cat SKILL.md && gog-wrapper calendar events"`),
// recursively parse and evaluate each sub-command inside the inline payload.
// This prevents shell-wrapped skill exec commands from being silently rejected
// because the allowlist check was comparing `/bin/sh` instead of the actual target
// binaries inside the compound command.
//
// Only recurse when the inline command contains chain operators (&&, ||, ;) —
// single-command shell wrappers (e.g. `bash -lc 'script.sh'`) keep the original
// behavior to preserve allow-always persisted-pattern security constraints.
const inlineChainParts = by === null && inlineCommand ? splitCommandChain(inlineCommand) : null;
if (inlineChainParts !== null && inlineChainParts.length > 1) {
const inlineResult = evaluateShellWrapperInlineCommand(
inlineCommand,
params,
inlineDepth + 1,
inlineChainParts,
);
if (inlineResult !== null) {
matches.push(...inlineResult.matches);
// Preserve the per-sub-command satisfaction reasons from the inline evaluation
// rather than collapsing them into a single "allowlist" entry for the wrapper segment.
segmentSatisfiedBy.push(...inlineResult.segmentSatisfiedBy);
return true;
}
}
segmentSatisfiedBy.push(by);
return Boolean(by);
});

View File

@@ -645,7 +645,7 @@ describe("exec approvals shell analysis", () => {
env,
});
expect(result.analysisOk).toBe(true);
expect(result.allowlistSatisfied).toBe(true);
expect(result.allowlistSatisfied).toBe(false);
} finally {
fs.rmSync(dir, { recursive: true, force: true });
}
@@ -655,6 +655,139 @@ describe("exec approvals shell analysis", () => {
expect([...normalizeSafeBins([" jq ", "", "JQ", " sort "])]).toEqual(["jq", "sort"]);
});
});
describe("shell wrapper inline command evaluation (#57377)", () => {
it("satisfies allowlist for shell-wrapped compound command when all inner binaries match", () => {
const dir = makeTempDir();
const catPath = path.join(dir, "cat");
const printfPath = path.join(dir, "printf");
const gogPath = path.join(dir, "gog-wrapper");
const shPath = path.join(dir, "sh");
fs.writeFileSync(catPath, "#!/bin/sh\n", { mode: 0o755 });
fs.writeFileSync(printfPath, "#!/bin/sh\n", { mode: 0o755 });
fs.writeFileSync(gogPath, "#!/bin/sh\n", { mode: 0o755 });
fs.writeFileSync(shPath, "#!/bin/sh\n", { mode: 0o755 });
const env = makePathEnv(dir);
try {
const result = evaluateShellAllowlist({
command: `${shPath} -c "cat SKILL.md && printf '---CMD---' && gog-wrapper calendar events"`,
allowlist: [{ pattern: catPath }, { pattern: printfPath }, { pattern: gogPath }],
safeBins: new Set(),
cwd: dir,
env,
});
expect(result.analysisOk).toBe(true);
expect(result.allowlistSatisfied).toBe(true);
expect(result.allowlistMatches.length).toBe(3);
} finally {
fs.rmSync(dir, { recursive: true, force: true });
}
});
it("satisfies allowlist for shell-wrapped compound command with safe bins", () => {
const dir = makeTempDir();
const headPath = path.join(dir, "head");
const tailPath = path.join(dir, "tail");
const gogPath = path.join(dir, "gog-wrapper");
const shPath = path.join(dir, "sh");
fs.writeFileSync(headPath, "#!/bin/sh\n", { mode: 0o755 });
fs.writeFileSync(tailPath, "#!/bin/sh\n", { mode: 0o755 });
fs.writeFileSync(gogPath, "#!/bin/sh\n", { mode: 0o755 });
fs.writeFileSync(shPath, "#!/bin/sh\n", { mode: 0o755 });
const env = makePathEnv(dir);
try {
const result = evaluateShellAllowlist({
command: `${shPath} -c "head -n 1 && tail -n 1 && gog-wrapper calendar events"`,
allowlist: [{ pattern: gogPath }],
safeBins: normalizeSafeBins(["head", "tail"]),
trustedSafeBinDirs: new Set([dir]),
cwd: dir,
env,
});
expect(result.analysisOk).toBe(true);
// Safe bins are disabled on Windows
if (process.platform === "win32") {
expect(result.allowlistSatisfied).toBe(false);
return;
}
expect(result.allowlistSatisfied).toBe(true);
} finally {
fs.rmSync(dir, { recursive: true, force: true });
}
});
it("rejects shell-wrapped compound command when an inner binary is not in allowlist", () => {
const dir = makeTempDir();
const catPath = path.join(dir, "cat");
const gogPath = path.join(dir, "gog-wrapper");
const rmPath = path.join(dir, "rm");
const shPath = path.join(dir, "sh");
fs.writeFileSync(catPath, "#!/bin/sh\n", { mode: 0o755 });
fs.writeFileSync(gogPath, "#!/bin/sh\n", { mode: 0o755 });
fs.writeFileSync(rmPath, "#!/bin/sh\n", { mode: 0o755 });
fs.writeFileSync(shPath, "#!/bin/sh\n", { mode: 0o755 });
const env = makePathEnv(dir);
try {
const result = evaluateShellAllowlist({
command: `${shPath} -c "cat SKILL.md && rm -rf / && gog-wrapper calendar events"`,
allowlist: [{ pattern: catPath }, { pattern: gogPath }],
safeBins: new Set(),
cwd: dir,
env,
});
expect(result.analysisOk).toBe(true);
expect(result.allowlistSatisfied).toBe(false);
} finally {
fs.rmSync(dir, { recursive: true, force: true });
}
});
it("handles single command inside shell wrapper (no chain operators — not recursed)", () => {
const dir = makeTempDir();
const gogPath = path.join(dir, "gog-wrapper");
const shPath = path.join(dir, "sh");
fs.writeFileSync(gogPath, "#!/bin/sh\n", { mode: 0o755 });
fs.writeFileSync(shPath, "#!/bin/sh\n", { mode: 0o755 });
const env = makePathEnv(dir);
try {
// Single-command shell wrappers are NOT recursively evaluated to preserve
// allow-always persisted-pattern security constraints.
const result = evaluateShellAllowlist({
command: `${shPath} -c "gog-wrapper calendar events"`,
allowlist: [{ pattern: gogPath }],
safeBins: new Set(),
cwd: dir,
env,
});
expect(result.analysisOk).toBe(true);
expect(result.allowlistSatisfied).toBe(true);
} finally {
fs.rmSync(dir, { recursive: true, force: true });
}
});
it("handles shell wrapper without -c flag (script invocation) unchanged", () => {
const dir = makeTempDir();
const shPath = path.join(dir, "sh");
const scriptPath = path.join(dir, "myscript.sh");
fs.writeFileSync(shPath, "#!/bin/sh\n", { mode: 0o755 });
fs.writeFileSync(scriptPath, "#!/bin/sh\necho hello\n", { mode: 0o755 });
const env = makePathEnv(dir);
try {
const result = evaluateShellAllowlist({
command: `${shPath} ${scriptPath}`,
allowlist: [{ pattern: scriptPath }],
safeBins: new Set(),
cwd: dir,
env,
});
expect(result.analysisOk).toBe(true);
expect(result.allowlistSatisfied).toBe(true);
} finally {
fs.rmSync(dir, { recursive: true, force: true });
}
});
});
});
describe("windowsEscapeArg", () => {