fix(pi-embedded-runner): gate silent-error retry on replay safety

Per @steipete review on #68310: the silent-error retry must not fire when the failed attempt already recorded potential side effects (messaging tool sent, cron add, or a mutating tool call that wasn't round-tripped as replay-safe). Otherwise resubmission can duplicate those actions. Adds `!attempt.replayMetadata.hadPotentialSideEffects` to the retry condition, mirroring the gate used by resolveEmptyResponseRetryInstruction and the planning-only / reasoning-only retry resolvers in run/incomplete-turn.ts. Adds a new negative regression test: "does not retry when the failed attempt recorded side effects" which reproduces the reviewer's repro — stopReason=error + output=0 + empty content, but replayMetadata={hadPotentialSideEffects: true, replaySafe: false}. Expected: no retry, surfaces incomplete-turn error. Confirmed locally.
2026-05-06 07:10:43 +00:00 · 2026-04-19 19:18:54 -07:00
parent 5fb302ebf1
commit dcb525de50
2 changed files with 42 additions and 0 deletions
--- a/src/agents/pi-embedded-runner/run.empty-error-retry.test.ts
+++ b/src/agents/pi-embedded-runner/run.empty-error-retry.test.ts
@@ -153,4 +153,38 @@ describe("runEmbeddedPiAgent silent-error retry", () => {
    expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
    expect(result.payloads?.[0]?.isError).toBeFalsy();
  });
+
+  it("does not retry when the failed attempt recorded side effects", async () => {
+    // If the errored turn already sent a message / added a cron / ran a
+    // mutating tool whose result wasn't captured as replay-safe,
+    // resubmission would duplicate those actions. Mirror the gate used by
+    // the other retry resolvers (resolveEmptyResponseRetryInstruction et al.)
+    // and surface the incomplete-turn error instead of retrying blind.
+    mockedRunEmbeddedAttempt.mockResolvedValueOnce(
+      makeAttemptResult({
+        assistantTexts: [],
+        lastAssistant: {
+          stopReason: "error",
+          provider: "ollama",
+          model: "glm-5.1:cloud",
+          content: [],
+          usage: { input: 100, output: 0, totalTokens: 100 },
+        } as unknown as EmbeddedRunAttemptResult["lastAssistant"],
+        replayMetadata: {
+          hadPotentialSideEffects: true,
+          replaySafe: false,
+        },
+      }),
+    );
+
+    const result = await runEmbeddedPiAgent({
+      ...overflowBaseRunParams,
+      provider: "ollama",
+      model: "glm-5.1:cloud",
+      runId: "run-empty-error-retry-skip-side-effects",
+    });
+
+    expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
+    expect(result.payloads?.[0]?.isError).toBe(true);
+  });
 });
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -1933,6 +1933,13 @@ export async function runEmbeddedPiAgent(
          // Content-empty guard: a reasoning-only error (content has thinking
          // blocks) is a distinct failure mode handled elsewhere; only retry
          // when the assistant truly produced nothing.
+          //
+          // Side-effect guard: if the failed attempt already recorded potential
+          // side effects (messaging tool sent, cron add, mutating tool
+          // call that wasn't round-tripped as replay-safe), resubmission can
+          // duplicate those actions. Mirror the gate the other retry resolvers
+          // use (resolveEmptyResponseRetryInstruction, reasoning-only, planning-
+          // only), which short-circuit on attempt.replayMetadata.hadPotentialSideEffects.
          const silentErrorContent = sessionLastAssistant?.content as Array<unknown> | undefined;
          if (
            incompleteTurnText &&
@@ -1942,6 +1949,7 @@ export async function runEmbeddedPiAgent(
            sessionLastAssistant?.stopReason === "error" &&
            ((sessionLastAssistant?.usage as { output?: number } | undefined)?.output ?? 0) === 0 &&
            (silentErrorContent?.length ?? 0) === 0 &&
+            !attempt.replayMetadata.hadPotentialSideEffects &&
            emptyErrorRetries < MAX_EMPTY_ERROR_RETRIES
          ) {
            emptyErrorRetries += 1;