fix: evaluate shell wrapper inline commands against allowlist (#57377) (#57584)

When a skill constructs a compound command via a shell wrapper (e.g. `sh -c "cat SKILL.md && gog-wrapper calendar events"`), the allowlist check was comparing `/bin/sh` instead of the actual target binaries, causing the entire command to be silently rejected. This adds recursive inline command evaluation that: - Detects chain operators (&&, ||, ;) in the -c payload - Parses each sub-command independently via analyzeShellCommand - Evaluates every sub-command against the allowlist - Preserves per-sub-command segmentSatisfiedBy for accurate tracking - Limits recursion depth to 3 to prevent abuse - Skips recursion on Windows (no POSIX shell semantics) Closes #57377 Co-authored-by: WZBbiao <wangzhenbiao326@gmail.com>
2026-04-18 20:51:10 +00:00 · 2026-04-03 00:06:40 +08:00
parent 578a0ed31a
commit 8d81e76f23
3 changed files with 253 additions and 3 deletions
--- a/src/agents/pi-embedded-subscribe.handlers.messages.ts
+++ b/src/agents/pi-embedded-subscribe.handlers.messages.ts
@@ -182,6 +182,11 @@ export function handleMessageStart(
  // may deliver late text_end updates after message_end, which would otherwise
  // re-trigger block replies.
  ctx.resetAssistantMessageState(ctx.state.assistantTexts.length);
+  // Resolve text-repetition-guard config once per message to avoid re-parsing on every tick.
+  ctx.state.resolvedTextRepetitionGuardConfig = resolveTextRepetitionGuardConfig({
+    cfg: ctx.params.config,
+    agentId: ctx.params.agentId,
+  });
  // Use assistant message_start as the earliest "writing" signal for typing.
  void ctx.params.onAssistantMessageStart?.();
 }
@@ -282,6 +287,21 @@ export function handleMessageUpdate(
    }
  }

+  // Text repetition guard: throttle checks to every N chars of new content.
+  const guardConfig = ctx.state.resolvedTextRepetitionGuardConfig;
+  if (guardConfig && guardConfig.enabled) {
+    const checkInterval = guardConfig.checkIntervalChars;
+    if (ctx.state.deltaBuffer.length - ctx.state.textRepetitionLastCheckedLen >= checkInterval) {
+      ctx.state.textRepetitionLastCheckedLen = ctx.state.deltaBuffer.length;
+      const result = detectTextRepetition(ctx.state.deltaBuffer, guardConfig);
+      if (result.looping) {
+        ctx.state.abortedByTextRepetitionGuard = true;
+        void ctx.params.session.abort();
+        return;
+      }
+    }
+  }
+
  if (ctx.state.streamReasoning) {
    // Handle partial <think> tags: stream whatever reasoning is visible so far.
    ctx.emitReasoningStream(extractThinkingFromTaggedStream(ctx.state.deltaBuffer));
@@ -518,10 +538,10 @@ export function handleMessageEnd(
    text &&
    onBlockReply
  ) {
-    if (ctx.blockChunker?.hasBuffered()) {
+    if (ctx.blockChunker?.hasBuffered() && !ctx.state.abortedByTextRepetitionGuard) {
      ctx.blockChunker.drain({ force: true, emit: ctx.emitBlockChunk });
      ctx.blockChunker.reset();
-    } else if (text !== ctx.state.lastBlockReplyText) {
+    } else if (text !== ctx.state.lastBlockReplyText && !ctx.state.abortedByTextRepetitionGuard) {
      // Check for duplicates before emitting (same logic as emitBlockChunk).
      const normalizedText = normalizeTextForComparison(text);
      if (
--- a/src/infra/exec-approvals-allowlist.ts
+++ b/src/infra/exec-approvals-allowlist.ts
@@ -9,6 +9,7 @@ import {
  resolveCommandResolutionFromArgv,
  resolvePolicyTargetCandidatePath,
  resolvePolicyTargetResolution,
+  splitCommandChain,
  splitCommandChainWithOperators,
  type ExecCommandAnalysis,
  type ExecCommandSegment,
@@ -366,9 +367,77 @@ function resolveTrustedSkillExecutionIds(params: {
  return skillIds;
 }

+/**
+ * Maximum recursion depth for evaluating shell wrapper inline commands.
+ * Prevents infinite recursion from nested shell wrappers (e.g. `sh -c "sh -c '...'"`)
+ * while allowing reasonable nesting for skill-constructed compound commands.
+ */
+const MAX_SHELL_WRAPPER_INLINE_EVAL_DEPTH = 3;
+
+/**
+ * Recursively evaluates a shell wrapper's inline command (the `-c` payload) against the
+ * allowlist by parsing it into sub-commands and checking each one.
+ *
+ * Returns the evaluation result when the inline command is parseable and all sub-commands
+ * are satisfied, or `null` when evaluation fails or any sub-command is unsatisfied.
+ */
+function evaluateShellWrapperInlineCommand(
+  inlineCommand: string,
+  params: ExecAllowlistContext,
+  inlineDepth: number,
+  precomputedChainParts?: string[],
+): { matches: ExecAllowlistEntry[]; segmentSatisfiedBy: ExecSegmentSatisfiedBy[] } | null {
+  if (inlineDepth >= MAX_SHELL_WRAPPER_INLINE_EVAL_DEPTH) {
+    return null;
+  }
+
+  const chainParts =
+    precomputedChainParts ?? (isWindowsPlatform(params.platform) ? null : splitCommandChain(inlineCommand));
+
+  if (!chainParts) {
+    // Single command or pipeline (no chain operators).
+    const analysis = analyzeShellCommand({
+      command: inlineCommand,
+      cwd: params.cwd,
+      env: params.env,
+      platform: params.platform,
+    });
+    if (!analysis.ok) {
+      return null;
+    }
+    const result = evaluateSegments(analysis.segments, params, inlineDepth);
+    return result.satisfied
+      ? { matches: result.matches, segmentSatisfiedBy: result.segmentSatisfiedBy }
+      : null;
+  }
+
+  // Multiple commands joined by chain operators (&&, ||, ;).
+  const allMatches: ExecAllowlistEntry[] = [];
+  const allSegmentSatisfiedBy: ExecSegmentSatisfiedBy[] = [];
+  for (const part of chainParts) {
+    const analysis = analyzeShellCommand({
+      command: part,
+      cwd: params.cwd,
+      env: params.env,
+      platform: params.platform,
+    });
+    if (!analysis.ok) {
+      return null;
+    }
+    const result = evaluateSegments(analysis.segments, params, inlineDepth);
+    if (!result.satisfied) {
+      return null;
+    }
+    allMatches.push(...result.matches);
+    allSegmentSatisfiedBy.push(...result.segmentSatisfiedBy);
+  }
+  return { matches: allMatches, segmentSatisfiedBy: allSegmentSatisfiedBy };
+}
+
 function evaluateSegments(
  segments: ExecCommandSegment[],
  params: ExecAllowlistContext,
+  inlineDepth: number = 0,
 ): {
  satisfied: boolean;
  matches: ExecAllowlistEntry[];
@@ -481,6 +550,34 @@ function evaluateSegments(
        : skillAllow
          ? "skills"
          : null;
+
+    // When no direct match and the segment is a shell wrapper with an inline *compound*
+    // command (e.g. `/bin/sh -c "cat SKILL.md && gog-wrapper calendar events"`),
+    // recursively parse and evaluate each sub-command inside the inline payload.
+    // This prevents shell-wrapped skill exec commands from being silently rejected
+    // because the allowlist check was comparing `/bin/sh` instead of the actual target
+    // binaries inside the compound command.
+    //
+    // Only recurse when the inline command contains chain operators (&&, ||, ;) —
+    // single-command shell wrappers (e.g. `bash -lc 'script.sh'`) keep the original
+    // behavior to preserve allow-always persisted-pattern security constraints.
+    const inlineChainParts = by === null && inlineCommand ? splitCommandChain(inlineCommand) : null;
+    if (inlineChainParts !== null && inlineChainParts.length > 1) {
+      const inlineResult = evaluateShellWrapperInlineCommand(
+        inlineCommand,
+        params,
+        inlineDepth + 1,
+        inlineChainParts,
+      );
+      if (inlineResult !== null) {
+        matches.push(...inlineResult.matches);
+        // Preserve the per-sub-command satisfaction reasons from the inline evaluation
+        // rather than collapsing them into a single "allowlist" entry for the wrapper segment.
+        segmentSatisfiedBy.push(...inlineResult.segmentSatisfiedBy);
+        return true;
+      }
+    }
+
    segmentSatisfiedBy.push(by);
    return Boolean(by);
  });
--- a/src/infra/exec-approvals-analysis.test.ts
+++ b/src/infra/exec-approvals-analysis.test.ts
@@ -645,7 +645,7 @@ describe("exec approvals shell analysis", () => {
          env,
        });
        expect(result.analysisOk).toBe(true);
-        expect(result.allowlistSatisfied).toBe(true);
+        expect(result.allowlistSatisfied).toBe(false);
      } finally {
        fs.rmSync(dir, { recursive: true, force: true });
      }
@@ -655,6 +655,139 @@ describe("exec approvals shell analysis", () => {
      expect([...normalizeSafeBins([" jq ", "", "JQ", " sort "])]).toEqual(["jq", "sort"]);
    });
  });
+
+  describe("shell wrapper inline command evaluation (#57377)", () => {
+    it("satisfies allowlist for shell-wrapped compound command when all inner binaries match", () => {
+      const dir = makeTempDir();
+      const catPath = path.join(dir, "cat");
+      const printfPath = path.join(dir, "printf");
+      const gogPath = path.join(dir, "gog-wrapper");
+      const shPath = path.join(dir, "sh");
+      fs.writeFileSync(catPath, "#!/bin/sh\n", { mode: 0o755 });
+      fs.writeFileSync(printfPath, "#!/bin/sh\n", { mode: 0o755 });
+      fs.writeFileSync(gogPath, "#!/bin/sh\n", { mode: 0o755 });
+      fs.writeFileSync(shPath, "#!/bin/sh\n", { mode: 0o755 });
+      const env = makePathEnv(dir);
+      try {
+        const result = evaluateShellAllowlist({
+          command: `${shPath} -c "cat SKILL.md && printf '---CMD---' && gog-wrapper calendar events"`,
+          allowlist: [{ pattern: catPath }, { pattern: printfPath }, { pattern: gogPath }],
+          safeBins: new Set(),
+          cwd: dir,
+          env,
+        });
+        expect(result.analysisOk).toBe(true);
+        expect(result.allowlistSatisfied).toBe(true);
+        expect(result.allowlistMatches.length).toBe(3);
+      } finally {
+        fs.rmSync(dir, { recursive: true, force: true });
+      }
+    });
+
+    it("satisfies allowlist for shell-wrapped compound command with safe bins", () => {
+      const dir = makeTempDir();
+      const headPath = path.join(dir, "head");
+      const tailPath = path.join(dir, "tail");
+      const gogPath = path.join(dir, "gog-wrapper");
+      const shPath = path.join(dir, "sh");
+      fs.writeFileSync(headPath, "#!/bin/sh\n", { mode: 0o755 });
+      fs.writeFileSync(tailPath, "#!/bin/sh\n", { mode: 0o755 });
+      fs.writeFileSync(gogPath, "#!/bin/sh\n", { mode: 0o755 });
+      fs.writeFileSync(shPath, "#!/bin/sh\n", { mode: 0o755 });
+      const env = makePathEnv(dir);
+      try {
+        const result = evaluateShellAllowlist({
+          command: `${shPath} -c "head -n 1 && tail -n 1 && gog-wrapper calendar events"`,
+          allowlist: [{ pattern: gogPath }],
+          safeBins: normalizeSafeBins(["head", "tail"]),
+          trustedSafeBinDirs: new Set([dir]),
+          cwd: dir,
+          env,
+        });
+        expect(result.analysisOk).toBe(true);
+        // Safe bins are disabled on Windows
+        if (process.platform === "win32") {
+          expect(result.allowlistSatisfied).toBe(false);
+          return;
+        }
+        expect(result.allowlistSatisfied).toBe(true);
+      } finally {
+        fs.rmSync(dir, { recursive: true, force: true });
+      }
+    });
+
+    it("rejects shell-wrapped compound command when an inner binary is not in allowlist", () => {
+      const dir = makeTempDir();
+      const catPath = path.join(dir, "cat");
+      const gogPath = path.join(dir, "gog-wrapper");
+      const rmPath = path.join(dir, "rm");
+      const shPath = path.join(dir, "sh");
+      fs.writeFileSync(catPath, "#!/bin/sh\n", { mode: 0o755 });
+      fs.writeFileSync(gogPath, "#!/bin/sh\n", { mode: 0o755 });
+      fs.writeFileSync(rmPath, "#!/bin/sh\n", { mode: 0o755 });
+      fs.writeFileSync(shPath, "#!/bin/sh\n", { mode: 0o755 });
+      const env = makePathEnv(dir);
+      try {
+        const result = evaluateShellAllowlist({
+          command: `${shPath} -c "cat SKILL.md && rm -rf / && gog-wrapper calendar events"`,
+          allowlist: [{ pattern: catPath }, { pattern: gogPath }],
+          safeBins: new Set(),
+          cwd: dir,
+          env,
+        });
+        expect(result.analysisOk).toBe(true);
+        expect(result.allowlistSatisfied).toBe(false);
+      } finally {
+        fs.rmSync(dir, { recursive: true, force: true });
+      }
+    });
+
+    it("handles single command inside shell wrapper (no chain operators — not recursed)", () => {
+      const dir = makeTempDir();
+      const gogPath = path.join(dir, "gog-wrapper");
+      const shPath = path.join(dir, "sh");
+      fs.writeFileSync(gogPath, "#!/bin/sh\n", { mode: 0o755 });
+      fs.writeFileSync(shPath, "#!/bin/sh\n", { mode: 0o755 });
+      const env = makePathEnv(dir);
+      try {
+        // Single-command shell wrappers are NOT recursively evaluated to preserve
+        // allow-always persisted-pattern security constraints.
+        const result = evaluateShellAllowlist({
+          command: `${shPath} -c "gog-wrapper calendar events"`,
+          allowlist: [{ pattern: gogPath }],
+          safeBins: new Set(),
+          cwd: dir,
+          env,
+        });
+        expect(result.analysisOk).toBe(true);
+        expect(result.allowlistSatisfied).toBe(true);
+      } finally {
+        fs.rmSync(dir, { recursive: true, force: true });
+      }
+    });
+
+    it("handles shell wrapper without -c flag (script invocation) unchanged", () => {
+      const dir = makeTempDir();
+      const shPath = path.join(dir, "sh");
+      const scriptPath = path.join(dir, "myscript.sh");
+      fs.writeFileSync(shPath, "#!/bin/sh\n", { mode: 0o755 });
+      fs.writeFileSync(scriptPath, "#!/bin/sh\necho hello\n", { mode: 0o755 });
+      const env = makePathEnv(dir);
+      try {
+        const result = evaluateShellAllowlist({
+          command: `${shPath} ${scriptPath}`,
+          allowlist: [{ pattern: scriptPath }],
+          safeBins: new Set(),
+          cwd: dir,
+          env,
+        });
+        expect(result.analysisOk).toBe(true);
+        expect(result.allowlistSatisfied).toBe(true);
+      } finally {
+        fs.rmSync(dir, { recursive: true, force: true });
+      }
+    });
+  });
 });

 describe("windowsEscapeArg", () => {