fix(pi-embedded-runner): gate silent-error retry on replay safety

Per @steipete review on #68310: the silent-error retry must not fire when the
failed attempt already recorded potential side effects (messaging tool sent,
cron add, or a mutating tool call that wasn't round-tripped as replay-safe).
Otherwise resubmission can duplicate those actions.

Adds `!attempt.replayMetadata.hadPotentialSideEffects` to the retry condition,
mirroring the gate used by resolveEmptyResponseRetryInstruction and the
planning-only / reasoning-only retry resolvers in run/incomplete-turn.ts.

Adds a new negative regression test:
  "does not retry when the failed attempt recorded side effects"
which reproduces the reviewer's repro — stopReason=error + output=0 + empty
content, but replayMetadata={hadPotentialSideEffects: true, replaySafe: false}.
Expected: no retry, surfaces incomplete-turn error. Confirmed locally.
This commit is contained in:
Watchtower
2026-04-19 19:18:54 -07:00
committed by Peter Steinberger
parent 5fb302ebf1
commit dcb525de50
2 changed files with 42 additions and 0 deletions

View File

@@ -153,4 +153,38 @@ describe("runEmbeddedPiAgent silent-error retry", () => {
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
expect(result.payloads?.[0]?.isError).toBeFalsy();
});
it("does not retry when the failed attempt recorded side effects", async () => {
// If the errored turn already sent a message / added a cron / ran a
// mutating tool whose result wasn't captured as replay-safe,
// resubmission would duplicate those actions. Mirror the gate used by
// the other retry resolvers (resolveEmptyResponseRetryInstruction et al.)
// and surface the incomplete-turn error instead of retrying blind.
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
makeAttemptResult({
assistantTexts: [],
lastAssistant: {
stopReason: "error",
provider: "ollama",
model: "glm-5.1:cloud",
content: [],
usage: { input: 100, output: 0, totalTokens: 100 },
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
replayMetadata: {
hadPotentialSideEffects: true,
replaySafe: false,
},
}),
);
const result = await runEmbeddedPiAgent({
...overflowBaseRunParams,
provider: "ollama",
model: "glm-5.1:cloud",
runId: "run-empty-error-retry-skip-side-effects",
});
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
expect(result.payloads?.[0]?.isError).toBe(true);
});
});

View File

@@ -1933,6 +1933,13 @@ export async function runEmbeddedPiAgent(
// Content-empty guard: a reasoning-only error (content has thinking
// blocks) is a distinct failure mode handled elsewhere; only retry
// when the assistant truly produced nothing.
//
// Side-effect guard: if the failed attempt already recorded potential
// side effects (messaging tool sent, cron add, mutating tool
// call that wasn't round-tripped as replay-safe), resubmission can
// duplicate those actions. Mirror the gate the other retry resolvers
// use (resolveEmptyResponseRetryInstruction, reasoning-only, planning-
// only), which short-circuit on attempt.replayMetadata.hadPotentialSideEffects.
const silentErrorContent = sessionLastAssistant?.content as Array<unknown> | undefined;
if (
incompleteTurnText &&
@@ -1942,6 +1949,7 @@ export async function runEmbeddedPiAgent(
sessionLastAssistant?.stopReason === "error" &&
((sessionLastAssistant?.usage as { output?: number } | undefined)?.output ?? 0) === 0 &&
(silentErrorContent?.length ?? 0) === 0 &&
!attempt.replayMetadata.hadPotentialSideEffects &&
emptyErrorRetries < MAX_EMPTY_ERROR_RETRIES
) {
emptyErrorRetries += 1;