From 5fb302ebf1bbfe9929cd46009b20b617f1360517 Mon Sep 17 00:00:00 2001 From: Watchtower Date: Fri, 17 Apr 2026 13:01:22 -0700 Subject: [PATCH] fix(pi-embedded-runner): retry silent stopReason=error turns (non-frontier models) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ollama/glm-5.1:cloud (and occasionally other models) can end a turn with stopReason="error", usage.output=0, and empty content[] after a successful tool-call sequence. The existing empty-response retry path in src/agents/pi-embedded-runner/run/incomplete-turn.ts is gated on isStrictAgenticSupportedProviderModel (gpt-5 family only), so non-frontier models fall through to "incomplete turn detected" with payloads=0 and no recovery. The user sees no reply and has to nudge. Add a narrow, model-agnostic resubmission inside the attempt loop, placed before the incompleteTurnText surface-to-user return: - stopReason === "error" - usage.output === 0 - content.length === 0 (excludes reasoning-only error turns) - bounded by MAX_EMPTY_ERROR_RETRIES = 3 No instruction injection, no model gating; same prompt, same session transcript (tool results already captured), just let the loop try again. New test file run.empty-error-retry.test.ts covers: 1. Retries for ollama/glm-5.1:cloud → succeeds on 2nd attempt. 2. Caps at 3 retries → 4 total attempts → surfaces incomplete-turn error. 3. Does NOT retry when output > 0 (preserve produced text). 4. Does NOT retry when stopReason=stop + output=0 (NO_REPLY path). 5. Retries for anthropic/claude-opus-4-7 too — model-agnostic. Relates to #68281. --- .../run.empty-error-retry.test.ts | 156 ++++++++++++++++++ src/agents/pi-embedded-runner/run.ts | 43 +++++ 2 files changed, 199 insertions(+) create mode 100644 src/agents/pi-embedded-runner/run.empty-error-retry.test.ts diff --git a/src/agents/pi-embedded-runner/run.empty-error-retry.test.ts b/src/agents/pi-embedded-runner/run.empty-error-retry.test.ts new file mode 100644 index 00000000000..69911ff1527 --- /dev/null +++ b/src/agents/pi-embedded-runner/run.empty-error-retry.test.ts @@ -0,0 +1,156 @@ +import { beforeAll, beforeEach, describe, expect, it } from "vitest"; +import { makeAttemptResult } from "./run.overflow-compaction.fixture.js"; +import { + loadRunOverflowCompactionHarness, + mockedClassifyFailoverReason, + mockedGlobalHookRunner, + mockedRunEmbeddedAttempt, + overflowBaseRunParams, + resetRunOverflowCompactionHarnessMocks, +} from "./run.overflow-compaction.harness.js"; +import type { EmbeddedRunAttemptResult } from "./run/types.js"; + +// Regression coverage for the silent-error retry in runEmbeddedPiAgent. +// +// Symptom: ollama/glm-5.1 occasionally ends a turn with stopReason="error" and +// zero output tokens after a successful tool-call sequence. The user sees no +// reply and has to nudge. The existing empty-response retry path is gated on +// the strict-agentic contract (gpt-5 only), so non-frontier models fell +// through to "incomplete turn detected". This suite locks in a narrower, +// model-agnostic resubmission. + +let runEmbeddedPiAgent: typeof import("./run.js").runEmbeddedPiAgent; + +function emptyErrorAttempt( + provider: string, + model: string, + outputTokens = 0, +): EmbeddedRunAttemptResult { + return makeAttemptResult({ + assistantTexts: [], + lastAssistant: { + stopReason: "error", + provider, + model, + content: [], + usage: { input: 100, output: outputTokens, totalTokens: 100 + outputTokens }, + } as unknown as EmbeddedRunAttemptResult["lastAssistant"], + }); +} + +function successAttempt(provider: string, model: string): EmbeddedRunAttemptResult { + return makeAttemptResult({ + assistantTexts: ["Done."], + lastAssistant: { + stopReason: "stop", + provider, + model, + content: [{ type: "text", text: "Done." }], + usage: { input: 100, output: 5, totalTokens: 105 }, + } as unknown as EmbeddedRunAttemptResult["lastAssistant"], + }); +} + +describe("runEmbeddedPiAgent silent-error retry", () => { + beforeAll(async () => { + ({ runEmbeddedPiAgent } = await loadRunOverflowCompactionHarness()); + }); + + beforeEach(() => { + resetRunOverflowCompactionHarnessMocks(); + mockedGlobalHookRunner.hasHooks.mockImplementation(() => false); + mockedClassifyFailoverReason.mockReturnValue(null); + }); + + it("retries when a turn ends with stopReason=error and zero output tokens", async () => { + mockedRunEmbeddedAttempt.mockResolvedValueOnce(emptyErrorAttempt("ollama", "glm-5.1:cloud")); + mockedRunEmbeddedAttempt.mockResolvedValueOnce(successAttempt("ollama", "glm-5.1:cloud")); + + const result = await runEmbeddedPiAgent({ + ...overflowBaseRunParams, + provider: "ollama", + model: "glm-5.1:cloud", + runId: "run-empty-error-retry-basic", + }); + + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2); + expect(result.payloads?.[0]?.isError).toBeFalsy(); + }); + + it("caps retries at MAX_EMPTY_ERROR_RETRIES and surfaces incomplete-turn error", async () => { + // 1 initial + 3 retries = 4 attempts, all returning empty-error. + for (let i = 0; i < 4; i += 1) { + mockedRunEmbeddedAttempt.mockResolvedValueOnce(emptyErrorAttempt("ollama", "glm-5.1:cloud")); + } + + const result = await runEmbeddedPiAgent({ + ...overflowBaseRunParams, + provider: "ollama", + model: "glm-5.1:cloud", + runId: "run-empty-error-retry-exhausted", + }); + + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(4); + expect(result.payloads?.[0]?.isError).toBe(true); + }); + + it("does not retry when stopReason=error but output tokens > 0", async () => { + // Model produced something before erroring; surfacing that text is better + // than silent resubmission. + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + emptyErrorAttempt("ollama", "glm-5.1:cloud", 12), + ); + + await runEmbeddedPiAgent({ + ...overflowBaseRunParams, + provider: "ollama", + model: "glm-5.1:cloud", + runId: "run-empty-error-retry-skip-with-output", + }); + + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1); + }); + + it("does not retry when stopReason=stop and output=0 (out of scope)", async () => { + // Clean stop with no output is a legitimate silent reply (e.g. NO_REPLY + // token path), not a crash. This retry must not trigger there. + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + makeAttemptResult({ + assistantTexts: [], + lastAssistant: { + stopReason: "stop", + provider: "ollama", + model: "glm-5.1:cloud", + content: [], + usage: { input: 100, output: 0, totalTokens: 100 }, + } as unknown as EmbeddedRunAttemptResult["lastAssistant"], + }), + ); + + await runEmbeddedPiAgent({ + ...overflowBaseRunParams, + provider: "ollama", + model: "glm-5.1:cloud", + runId: "run-empty-error-retry-skip-clean-stop", + }); + + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1); + }); + + it("retries for frontier models too — the fix is model-agnostic", async () => { + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + emptyErrorAttempt("anthropic", "claude-opus-4-7"), + ); + mockedRunEmbeddedAttempt.mockResolvedValueOnce(successAttempt("anthropic", "claude-opus-4-7")); + + const result = await runEmbeddedPiAgent({ + ...overflowBaseRunParams, + provider: "anthropic", + model: "claude-opus-4-7", + runId: "run-empty-error-retry-frontier", + }); + + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2); + expect(result.payloads?.[0]?.isError).toBeFalsy(); + }); +}); diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 927ce7f3998..59352f1e080 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -489,6 +489,13 @@ export async function runEmbeddedPiAgent( }); let rateLimitProfileRotations = 0; let timeoutCompactionAttempts = 0; + // Silent-error retry: non-strict-agentic models (e.g. ollama/glm-5.1) can + // end a turn with stopReason="error" + zero output tokens, producing no + // user-visible text. The existing empty-response retry is gated on + // isStrictAgenticSupportedProviderModel (gpt-5 only). This is an + // orthogonal, model-agnostic resubmission. + const MAX_EMPTY_ERROR_RETRIES = 3; + let emptyErrorRetries = 0; const overloadFailoverBackoffMs = resolveOverloadFailoverBackoffMs(params.config); const overloadProfileRotationLimit = resolveOverloadProfileRotationLimit(params.config); const rateLimitProfileRotationLimit = resolveRateLimitProfileRotationLimit(params.config); @@ -1911,6 +1918,42 @@ export async function runEmbeddedPiAgent( `provider=${activeErrorContext.provider}/${activeErrorContext.model} attempts=${emptyResponseRetryAttempts}/${maxEmptyResponseRetryAttempts} — surfacing incomplete-turn error`, ); } + // ── silent-error retry ──────────────────────────────────────────── + // Observed with ollama/glm-5.1: a turn can end with stopReason="error" + // and zero output tokens AND empty content after a successful + // tool-call sequence, producing no user-visible text at all. The + // existing empty-response retry path (resolveEmptyResponseRetryInstruction) + // is gated on the strict-agentic contract (gpt-5 only), so non-frontier + // models fall through to "incomplete turn detected" → silent gap + // until the user nudges. This is a narrower, model-agnostic + // resubmission: same prompt, same session transcript (tool results + // already captured), no instruction injection. Placed before the + // incompleteTurnText return so it actually gets a chance to fire. + // + // Content-empty guard: a reasoning-only error (content has thinking + // blocks) is a distinct failure mode handled elsewhere; only retry + // when the assistant truly produced nothing. + const silentErrorContent = sessionLastAssistant?.content as Array | undefined; + if ( + incompleteTurnText && + !aborted && + !promptError && + !timedOut && + sessionLastAssistant?.stopReason === "error" && + ((sessionLastAssistant?.usage as { output?: number } | undefined)?.output ?? 0) === 0 && + (silentErrorContent?.length ?? 0) === 0 && + emptyErrorRetries < MAX_EMPTY_ERROR_RETRIES + ) { + emptyErrorRetries += 1; + log.warn( + `[empty-error-retry] stopReason=error output=0; resubmitting ` + + `attempt=${emptyErrorRetries}/${MAX_EMPTY_ERROR_RETRIES} ` + + `provider=${sessionLastAssistant?.provider ?? provider} ` + + `model=${sessionLastAssistant?.model ?? model.id} ` + + `sessionKey=${params.sessionKey ?? params.sessionId}`, + ); + continue; + } if (incompleteTurnText) { const replayInvalid = resolveReplayInvalidForAttempt(incompleteTurnText); const livenessState = resolveRunLivenessState({