From dcb525de50ff96c80bc7929d3c7a46165243a0ee Mon Sep 17 00:00:00 2001 From: Watchtower Date: Sun, 19 Apr 2026 19:18:54 -0700 Subject: [PATCH] fix(pi-embedded-runner): gate silent-error retry on replay safety MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per @steipete review on #68310: the silent-error retry must not fire when the failed attempt already recorded potential side effects (messaging tool sent, cron add, or a mutating tool call that wasn't round-tripped as replay-safe). Otherwise resubmission can duplicate those actions. Adds `!attempt.replayMetadata.hadPotentialSideEffects` to the retry condition, mirroring the gate used by resolveEmptyResponseRetryInstruction and the planning-only / reasoning-only retry resolvers in run/incomplete-turn.ts. Adds a new negative regression test: "does not retry when the failed attempt recorded side effects" which reproduces the reviewer's repro — stopReason=error + output=0 + empty content, but replayMetadata={hadPotentialSideEffects: true, replaySafe: false}. Expected: no retry, surfaces incomplete-turn error. Confirmed locally. --- .../run.empty-error-retry.test.ts | 34 +++++++++++++++++++ src/agents/pi-embedded-runner/run.ts | 8 +++++ 2 files changed, 42 insertions(+) diff --git a/src/agents/pi-embedded-runner/run.empty-error-retry.test.ts b/src/agents/pi-embedded-runner/run.empty-error-retry.test.ts index 69911ff1527..a3c714bbb9d 100644 --- a/src/agents/pi-embedded-runner/run.empty-error-retry.test.ts +++ b/src/agents/pi-embedded-runner/run.empty-error-retry.test.ts @@ -153,4 +153,38 @@ describe("runEmbeddedPiAgent silent-error retry", () => { expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2); expect(result.payloads?.[0]?.isError).toBeFalsy(); }); + + it("does not retry when the failed attempt recorded side effects", async () => { + // If the errored turn already sent a message / added a cron / ran a + // mutating tool whose result wasn't captured as replay-safe, + // resubmission would duplicate those actions. Mirror the gate used by + // the other retry resolvers (resolveEmptyResponseRetryInstruction et al.) + // and surface the incomplete-turn error instead of retrying blind. + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + makeAttemptResult({ + assistantTexts: [], + lastAssistant: { + stopReason: "error", + provider: "ollama", + model: "glm-5.1:cloud", + content: [], + usage: { input: 100, output: 0, totalTokens: 100 }, + } as unknown as EmbeddedRunAttemptResult["lastAssistant"], + replayMetadata: { + hadPotentialSideEffects: true, + replaySafe: false, + }, + }), + ); + + const result = await runEmbeddedPiAgent({ + ...overflowBaseRunParams, + provider: "ollama", + model: "glm-5.1:cloud", + runId: "run-empty-error-retry-skip-side-effects", + }); + + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1); + expect(result.payloads?.[0]?.isError).toBe(true); + }); }); diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 59352f1e080..9dd358a3df2 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -1933,6 +1933,13 @@ export async function runEmbeddedPiAgent( // Content-empty guard: a reasoning-only error (content has thinking // blocks) is a distinct failure mode handled elsewhere; only retry // when the assistant truly produced nothing. + // + // Side-effect guard: if the failed attempt already recorded potential + // side effects (messaging tool sent, cron add, mutating tool + // call that wasn't round-tripped as replay-safe), resubmission can + // duplicate those actions. Mirror the gate the other retry resolvers + // use (resolveEmptyResponseRetryInstruction, reasoning-only, planning- + // only), which short-circuit on attempt.replayMetadata.hadPotentialSideEffects. const silentErrorContent = sessionLastAssistant?.content as Array | undefined; if ( incompleteTurnText && @@ -1942,6 +1949,7 @@ export async function runEmbeddedPiAgent( sessionLastAssistant?.stopReason === "error" && ((sessionLastAssistant?.usage as { output?: number } | undefined)?.output ?? 0) === 0 && (silentErrorContent?.length ?? 0) === 0 && + !attempt.replayMetadata.hadPotentialSideEffects && emptyErrorRetries < MAX_EMPTY_ERROR_RETRIES ) { emptyErrorRetries += 1;