fix(agents): retry thinking-only errored turns (#92191)

Retry replay-safe reasoning-only provider errors before assistant failover while preserving classified fallback and terminal-output ownership. Adds deterministic Anthropic gateway fault-injection coverage and focused regression tests.\n\nCo-authored-by: ai-hpc <mail.speedy.hpc@hotmail.com>
2026-06-22 16:28:09 +00:00 · 2026-06-14 09:39:27 -07:00
parent 364461949d
commit ecaebfc51b
10 changed files with 739 additions and 58 deletions
--- a/extensions/qa-lab/src/providers/mock-openai/server.ts
+++ b/extensions/qa-lab/src/providers/mock-openai/server.ts
@@ -149,6 +149,7 @@ const TINY_PNG_BASE64 =
  "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO7Z0nQAAAAASUVORK5CYII=";
 const QA_REASONING_ONLY_RECOVERY_PROMPT_RE = /reasoning-only continuation qa check/i;
 const QA_REASONING_ONLY_SIDE_EFFECT_PROMPT_RE = /reasoning-only after write safety check/i;
+const QA_ANTHROPIC_THINKING_ERROR_RECOVERY_PROMPT_RE = /anthropic thinking error qa check/i;
 const QA_THINKING_VISIBILITY_OFF_PROMPT_RE = /qa thinking visibility check off/i;
 const QA_THINKING_VISIBILITY_MAX_PROMPT_RE = /qa thinking visibility check max/i;
 const QA_EMPTY_RESPONSE_RECOVERY_PROMPT_RE = /empty response continuation qa check/i;
@@ -189,6 +190,7 @@ const QA_GROUP_AUDIO_MIN_MULTIPART_BODY_CHARS = 48_000;
 const QA_MCP_CODE_MODE_API_FILE_PROMPT_RE = /mcp code mode api file qa check/i;

 type MockScenarioState = {
+  anthropicThinkingErrorPhase: number;
  subagentFanoutPhase: number;
  subagentHandoffSpawned: boolean;
 };
@@ -3128,6 +3130,90 @@ function buildAnthropicMessageResponse(params: {
  };
 }

+const QA_ANTHROPIC_THINKING_ERROR_TEXT =
+  "QA replay-safe read completed, but the provider stream failed after signed thinking.";
+const QA_ANTHROPIC_THINKING_ERROR_SIGNATURE = "qa_signed_thinking_block_91953";
+const QA_ANTHROPIC_THINKING_ERROR_MESSAGE = "QA injected provider stream failure";
+
+function buildAnthropicThinkingErrorResponse(params: { model: string }): Record<string, unknown> {
+  return {
+    type: "error",
+    error: {
+      type: "api_error",
+      message: QA_ANTHROPIC_THINKING_ERROR_MESSAGE,
+    },
+    model: params.model || "claude-opus-4-8",
+  };
+}
+
+function buildAnthropicThinkingErrorStreamEvents(params: {
+  model: string;
+}): AnthropicStreamEvent[] {
+  const messageId = `msg_mock_${Math.floor(Math.random() * 1_000_000).toString(16)}`;
+  return [
+    {
+      type: "message_start",
+      message: {
+        id: messageId,
+        type: "message",
+        role: "assistant",
+        model: params.model || "claude-opus-4-8",
+        content: [],
+        stop_reason: null,
+        stop_sequence: null,
+        usage: {
+          input_tokens: 64,
+          output_tokens: 0,
+        },
+      },
+    },
+    {
+      type: "content_block_start",
+      index: 0,
+      content_block: {
+        type: "thinking",
+        thinking: "",
+        signature: "",
+      },
+    },
+    {
+      type: "content_block_delta",
+      index: 0,
+      delta: {
+        type: "thinking_delta",
+        thinking: QA_ANTHROPIC_THINKING_ERROR_TEXT,
+      },
+    },
+    {
+      type: "content_block_delta",
+      index: 0,
+      delta: {
+        type: "signature_delta",
+        signature: QA_ANTHROPIC_THINKING_ERROR_SIGNATURE,
+      },
+    },
+    {
+      type: "content_block_stop",
+      index: 0,
+    },
+    {
+      type: "message_delta",
+      delta: {},
+      usage: {
+        input_tokens: 64,
+        output_tokens: 1120,
+      },
+    },
+    {
+      type: "error",
+      error: {
+        type: "api_error",
+        message: QA_ANTHROPIC_THINKING_ERROR_MESSAGE,
+      },
+    },
+  ];
+}
+
 function buildAnthropicMessageStreamEvents(params: {
  model: string;
  extracted: ExtractedAssistantOutput;
@@ -3254,6 +3340,35 @@ async function buildMessagesPayload(
    stream: false,
    ...(Array.isArray(body.tools) ? { tools: body.tools } : {}),
  };
+  const allInputText = extractAllRequestTexts(input, dispatchBody);
+  if (QA_ANTHROPIC_THINKING_ERROR_RECOVERY_PROMPT_RE.test(allInputText)) {
+    const toolOutput = extractToolOutput(input);
+    const shouldEmitThinkingError =
+      toolOutput.length > 0 && scenarioState.anthropicThinkingErrorPhase === 0;
+    const events =
+      toolOutput.length === 0
+        ? buildToolCallEventsWithArgs("read", { path: "QA_KICKOFF_TASK.md" })
+        : shouldEmitThinkingError
+          ? (() => {
+              scenarioState.anthropicThinkingErrorPhase = 1;
+              return buildAssistantEvents("");
+            })()
+          : buildAssistantEvents("ANTHROPIC-THINKING-ERROR-RECOVERED-OK");
+    const extracted = extractFinalAssistantOutputFromEvents(events);
+    const responseBody = shouldEmitThinkingError
+      ? buildAnthropicThinkingErrorResponse({ model: normalizedModel })
+      : buildAnthropicMessageResponse({
+          model: normalizedModel,
+          extracted,
+        });
+    const streamEvents = shouldEmitThinkingError
+      ? buildAnthropicThinkingErrorStreamEvents({ model: normalizedModel })
+      : buildAnthropicMessageStreamEvents({
+          model: normalizedModel,
+          extracted,
+        });
+    return { events, input, extracted, responseBody, streamEvents, model: normalizedModel };
+  }
  const events = await buildResponsesPayload(dispatchBody, scenarioState);
  const extracted = extractFinalAssistantOutputFromEvents(events);
  const responseBody = buildAnthropicMessageResponse({
@@ -3270,6 +3385,7 @@ async function buildMessagesPayload(
 export async function startQaMockOpenAiServer(params?: { host?: string; port?: number }) {
  const host = params?.host ?? "127.0.0.1";
  const scenarioState: MockScenarioState = {
+    anthropicThinkingErrorPhase: 0,
    subagentFanoutPhase: 0,
    subagentHandoffSpawned: false,
  };
--- a/qa/scenarios/runtime/anthropic-thinking-error-recovery-replay-safe-read.md
+++ b/qa/scenarios/runtime/anthropic-thinking-error-recovery-replay-safe-read.md
@@ -0,0 +1,99 @@
+# Anthropic thinking error recovery after replay-safe read
+
+```yaml qa-scenario
+id: anthropic-thinking-error-recovery-replay-safe-read
+title: Anthropic thinking error recovery after replay-safe read
+surface: runtime
+coverage:
+  primary:
+    - runtime.anthropic-thinking-error-recovery
+  secondary:
+    - runtime.retry-policy
+gatewayConfigPatch:
+  agents:
+    defaults:
+      models:
+        anthropic/claude-opus-4-8:
+          params: {}
+objective: Verify an Anthropic stream error after signed thinking and a replay-safe read retries the same prompt into a visible answer.
+successCriteria:
+  - Scenario is mock-openai only so live lanes do not pick it up implicitly.
+  - The agent performs a replay-safe read before the Anthropic stream error.
+  - The runtime retries the same prompt without injecting the visible-answer continuation instruction.
+  - The final visible reply contains the exact recovery marker.
+docsRefs:
+  - docs/help/testing.md
+codeRefs:
+  - extensions/qa-lab/src/providers/mock-openai/server.ts
+  - src/agents/embedded-agent-runner/run/incomplete-turn.ts
+execution:
+  kind: flow
+  summary: Verify Anthropic stream errors after signed thinking recover after a replay-safe read.
+  config:
+    requiredProviderMode: mock-openai
+    anthropicModelRef: anthropic/claude-opus-4-8
+    promptSnippet: Anthropic thinking error QA check
+    prompt: "Anthropic thinking error QA check: read QA_KICKOFF_TASK.md, then answer with exactly ANTHROPIC-THINKING-ERROR-RECOVERED-OK."
+    expectedReply: ANTHROPIC-THINKING-ERROR-RECOVERED-OK
+    visibleAnswerRetryNeedle: The previous attempt did not produce a user-visible answer.
+```
+
+```yaml qa-flow
+steps:
+  - name: retries a thinking-only Anthropic error after a replay-safe read
+    actions:
+      - assert:
+          expr: "env.providerMode === 'mock-openai'"
+          message: this seeded scenario is mock-openai only
+      - call: waitForGatewayHealthy
+        args:
+          - ref: env
+          - 60000
+      - call: reset
+      - set: requestCountBefore
+        value:
+          expr: "env.mock ? (await fetchJson(`${env.mock.baseUrl}/debug/requests`)).length : 0"
+      - set: sessionKey
+        value:
+          expr: "`agent:qa:anthropic-thinking-error:${randomUUID().slice(0, 8)}`"
+      - set: modelAck
+        value:
+          expr: "await env.gateway.call('sessions.patch', { key: sessionKey, model: config.anthropicModelRef }, { timeoutMs: liveTurnTimeoutMs(env, 45000) })"
+      - call: runAgentPrompt
+        args:
+          - ref: env
+          - sessionKey:
+              ref: sessionKey
+            message:
+              expr: config.prompt
+            timeoutMs:
+              expr: liveTurnTimeoutMs(env, 45000)
+      - call: waitForOutboundMessage
+        saveAs: outbound
+        args:
+          - ref: state
+          - lambda:
+              params: [candidate]
+              expr: "candidate.conversation.id === 'qa-operator' && candidate.text.includes(config.expectedReply)"
+          - expr: liveTurnTimeoutMs(env, 30000)
+      - assert:
+          expr: "outbound.text.includes(config.expectedReply)"
+          message:
+            expr: "`missing Anthropic thinking-error recovery marker: ${outbound.text}`"
+      - if:
+          expr: "Boolean(env.mock)"
+          then:
+            - set: scenarioRequests
+              value:
+                expr: "(await fetchJson(`${env.mock.baseUrl}/debug/requests`)).slice(requestCountBefore)"
+            - assert:
+                expr: "scenarioRequests.some((request) => String(request.allInputText ?? '').includes(config.promptSnippet) && request.providerVariant === 'anthropic' && request.plannedToolName === 'read')"
+                message: expected replay-safe read request on the Anthropic mock route
+            - assert:
+                expr: "scenarioRequests.filter((request) => String(request.allInputText ?? '').includes(config.promptSnippet) && request.providerVariant === 'anthropic').length >= 3"
+                message: expected initial read, terminal-error attempt, and same-prompt retry
+            - assert:
+                expr: "!scenarioRequests.some((request) => String(request.allInputText ?? '').includes(config.visibleAnswerRetryNeedle))"
+                message: expected same-prompt retry, not visible-answer continuation retry
+    detailsExpr: "env.mock ? `${outbound.text}\\nrequests=${String(scenarioRequests?.length ?? 0)}` : outbound.text"
+```
--- a/src/agents/embedded-agent-helpers.ts
+++ b/src/agents/embedded-agent-helpers.ts
@@ -38,6 +38,7 @@ export {
  isLikelyContextOverflowError,
  isFailoverAssistantError,
  isFailoverErrorMessage,
+  isGenericUnknownStreamErrorMessage,
  isImageDimensionErrorMessage,
  isImageSizeError,
  isOverloadedErrorMessage,
--- a/src/agents/embedded-agent-helpers/errors.ts
+++ b/src/agents/embedded-agent-helpers/errors.ts
@@ -960,7 +960,7 @@ function isBilling429MessageForProvider(raw: string, provider: string | undefine
 // stream ends with stopReason === "aborted" | "error" without specific info. Treat
 // it as a transient transport failure so the configured fallback chain rotates
 // instead of returning the bare string to the user (#71620).
-function isGenericUnknownStreamError(raw: string): boolean {
+export function isGenericUnknownStreamErrorMessage(raw: string): boolean {
  return /^\s*an unknown error occurred\.?\s*$/i.test(raw);
 }

@@ -1064,7 +1064,7 @@ function classifyFailoverClassificationFromMessage(
  if (isAuthErrorMessage(raw)) {
    return toReasonClassification("auth");
  }
-  if (isGenericUnknownStreamError(raw)) {
+  if (isGenericUnknownStreamErrorMessage(raw)) {
    return toReasonClassification("timeout");
  }
  if (isOpenRouterProviderReturnedError(raw, provider)) {
--- a/src/agents/embedded-agent-runner/run.empty-error-retry.test.ts
+++ b/src/agents/embedded-agent-runner/run.empty-error-retry.test.ts
@@ -3,6 +3,7 @@ import { beforeAll, beforeEach, describe, expect, it } from "vitest";
 import { makeAttemptResult } from "./run.overflow-compaction.fixture.js";
 import {
  loadRunOverflowCompactionHarness,
+  mockedClassifyAssistantFailoverReason,
  mockedClassifyFailoverReason,
  mockedGlobalHookRunner,
  mockedRunEmbeddedAttempt,
@@ -13,21 +14,27 @@ import type { EmbeddedRunAttemptResult } from "./run/types.js";

 let runEmbeddedAgent: typeof import("./run.js").runEmbeddedAgent;

+type AssistantContent = NonNullable<EmbeddedRunAttemptResult["lastAssistant"]>["content"];
+
 function emptyErrorAttempt(
  provider: string,
  model: string,
  outputTokens = 0,
+  content: AssistantContent = [],
+  errorMessage?: string,
 ): EmbeddedRunAttemptResult {
  // Models can report stopReason=error with no output after tool activity; that
  // is replay-safe only when the attempt metadata records no side effects.
  return makeAttemptResult({
    assistantTexts: [],
    lastAssistant: {
+      role: "assistant",
      stopReason: "error",
      provider,
      model,
-      content: [],
+      content,
      usage: { input: 100, output: outputTokens, totalTokens: 100 + outputTokens },
+      ...(errorMessage ? { errorMessage } : {}),
    } as unknown as EmbeddedRunAttemptResult["lastAssistant"],
  });
 }
@@ -36,6 +43,7 @@ function successAttempt(provider: string, model: string): EmbeddedRunAttemptResu
  return makeAttemptResult({
    assistantTexts: ["Done."],
    lastAssistant: {
+      role: "assistant",
      stopReason: "stop",
      provider,
      model,
@@ -71,6 +79,118 @@ describe("runEmbeddedAgent silent-error retry", () => {
    expect(result.payloads).toBeUndefined();
  });

+  it("retries when stopReason=error emitted only thinking blocks and output tokens", async () => {
+    mockedRunEmbeddedAttempt.mockResolvedValueOnce(
+      emptyErrorAttempt("anthropic", "claude-opus-4-8", 1120, [
+        {
+          type: "thinking",
+          thinking: "internal reasoning before provider error",
+          thinkingSignature: JSON.stringify({ id: "rs_error", type: "reasoning" }),
+        },
+      ]),
+    );
+    mockedRunEmbeddedAttempt.mockResolvedValueOnce(successAttempt("anthropic", "claude-opus-4-8"));
+
+    const result = await runEmbeddedAgent({
+      ...overflowBaseRunParams,
+      provider: "anthropic",
+      model: "claude-opus-4-8",
+      runId: "run-empty-error-retry-thinking-only",
+    });
+
+    expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
+    expect(result.payloads).toBeUndefined();
+  });
+
+  it("retries thinking-only unknown provider errors before assistant failover", async () => {
+    mockedClassifyFailoverReason.mockReturnValue("timeout");
+    mockedRunEmbeddedAttempt.mockResolvedValueOnce(
+      emptyErrorAttempt(
+        "anthropic",
+        "claude-opus-4-8",
+        1120,
+        [
+          {
+            type: "thinking",
+            thinking: "internal reasoning before provider error",
+            thinkingSignature: JSON.stringify({ id: "rs_error", type: "reasoning" }),
+          },
+        ],
+        "An unknown error occurred",
+      ),
+    );
+    mockedRunEmbeddedAttempt.mockResolvedValueOnce(successAttempt("anthropic", "claude-opus-4-8"));
+
+    const result = await runEmbeddedAgent({
+      ...overflowBaseRunParams,
+      provider: "anthropic",
+      model: "claude-opus-4-8",
+      runId: "run-empty-error-retry-before-assistant-failover",
+    });
+
+    expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
+    expect(result.payloads).toBeUndefined();
+  });
+
+  it.each([
+    ["timeout", "LLM request timed out."],
+    ["server_error", "Internal server error"],
+  ] as const)("does not intercept recognized %s failover errors", async (reason, errorMessage) => {
+    mockedClassifyAssistantFailoverReason.mockReturnValue(reason);
+    mockedRunEmbeddedAttempt.mockResolvedValueOnce(
+      emptyErrorAttempt(
+        "anthropic",
+        "claude-opus-4-8",
+        1120,
+        [
+          {
+            type: "thinking",
+            thinking: "internal reasoning before provider error",
+            thinkingSignature: JSON.stringify({ id: "rs_error", type: "reasoning" }),
+          },
+        ],
+        errorMessage,
+      ),
+    );
+
+    await runEmbeddedAgent({
+      ...overflowBaseRunParams,
+      provider: "anthropic",
+      model: "claude-opus-4-8",
+      runId: `run-empty-error-retry-${reason}`,
+    });
+
+    expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
+  });
+
+  it("does not intercept concrete non-transient failover errors", async () => {
+    mockedClassifyFailoverReason.mockReturnValue("model_not_found");
+    mockedRunEmbeddedAttempt.mockResolvedValueOnce(
+      emptyErrorAttempt(
+        "anthropic",
+        "missing-model",
+        1120,
+        [
+          {
+            type: "thinking",
+            thinking: "internal reasoning before provider error",
+            thinkingSignature: JSON.stringify({ id: "rs_missing_model", type: "reasoning" }),
+          },
+        ],
+        "model not found",
+      ),
+    );
+
+    await runEmbeddedAgent({
+      ...overflowBaseRunParams,
+      provider: "anthropic",
+      model: "missing-model",
+      runId: "run-empty-error-retry-non-transient",
+    });
+
+    expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
+  });
+
  it("caps retries at MAX_EMPTY_ERROR_RETRIES and surfaces incomplete-turn error", async () => {
    // 1 initial + 3 retries = 4 attempts, all returning empty-error.
    for (let i = 0; i < 4; i += 1) {
@@ -113,6 +233,7 @@ describe("runEmbeddedAgent silent-error retry", () => {
      makeAttemptResult({
        assistantTexts: [],
        lastAssistant: {
+          role: "assistant",
          stopReason: "stop",
          provider: "plain-provider",
          model: "plain-model",
@@ -156,6 +277,7 @@ describe("runEmbeddedAgent silent-error retry", () => {
      makeAttemptResult({
        assistantTexts: [],
        lastAssistant: {
+          role: "assistant",
          stopReason: "error",
          provider: "ollama",
          model: "glm-5.1:cloud",
@@ -179,4 +301,57 @@ describe("runEmbeddedAgent silent-error retry", () => {
    expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
    expect(result.payloads?.[0]?.isError).toBe(true);
  });
+
+  it.each([
+    [
+      "client tool calls",
+      { clientToolCalls: [{ name: "browser", params: { url: "https://example.com" } }] },
+    ],
+    ["yield", { yieldDetected: true }],
+    ["approval prompts", { didSendDeterministicApprovalPrompt: true }],
+    [
+      "heartbeat responses",
+      {
+        heartbeatToolResponse: {
+          outcome: "progress",
+          notify: false,
+          summary: "Still working",
+        },
+      },
+    ],
+    ["tool media", { toolMediaUrls: ["file:///tmp/render.png"] }],
+    ["voice media", { toolAudioAsVoice: true }],
+    ["trusted local media", { toolTrustedLocalMedia: true }],
+    [
+      "source reply payloads",
+      { messagingToolSourceReplyPayloads: [{ text: "Delivered through the source reply." }] },
+    ],
+    ["delivered source replies", { didDeliverSourceReplyViaMessageTool: true }],
+    ["tool errors", { lastToolError: { toolName: "read", error: "read failed" } }],
+  ] satisfies Array<[string, Partial<EmbeddedRunAttemptResult>]>)(
+    "does not retry after terminal %s",
+    async (_label, attemptState) => {
+      mockedRunEmbeddedAttempt.mockResolvedValueOnce(
+        makeAttemptResult({
+          ...emptyErrorAttempt("anthropic", "claude-opus-4-8", 1120, [
+            {
+              type: "thinking",
+              thinking: "internal reasoning before provider error",
+              thinkingSignature: JSON.stringify({ id: "rs_error", type: "reasoning" }),
+            },
+          ]),
+          ...attemptState,
+        }),
+      );
+
+      await runEmbeddedAgent({
+        ...overflowBaseRunParams,
+        provider: "anthropic",
+        model: "claude-opus-4-8",
+        runId: `run-empty-error-retry-terminal-${_label.replaceAll(" ", "-")}`,
+      });
+
+      expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
+    },
+  );
 });
--- a/src/agents/embedded-agent-runner/run.incomplete-turn.test.ts
+++ b/src/agents/embedded-agent-runner/run.incomplete-turn.test.ts
@@ -41,6 +41,7 @@ import {
  resolveRunLivenessState,
  resolveSilentToolResultReplyPayload,
  shouldRetryMissingAssistantTurn,
+  shouldRetrySilentErrorAssistantTurn,
  shouldTreatEmptyAssistantReplyAsSilent,
 } from "./run/incomplete-turn.js";
 import type { EmbeddedRunAttemptResult } from "./run/types.js";
@@ -693,7 +694,7 @@ describe("runEmbeddedAgent incomplete-turn safety", () => {
    expect(result.payloads).toBeUndefined();
  });

-  it("does not retry reasoning-only turns when the assistant ended in error", async () => {
+  it("retries reasoning-only turns when the assistant ended in error", async () => {
    mockedClassifyFailoverReason.mockReturnValue(null);
    mockedRunEmbeddedAttempt.mockResolvedValueOnce(
      makeAttemptResult({
@@ -714,6 +715,18 @@ describe("runEmbeddedAgent incomplete-turn safety", () => {
        } as unknown as EmbeddedRunAttemptResult["lastAssistant"],
      }),
    );
+    mockedRunEmbeddedAttempt.mockResolvedValueOnce(
+      makeAttemptResult({
+        assistantTexts: ["Recovered."],
+        lastAssistant: {
+          role: "assistant",
+          stopReason: "stop",
+          provider: "openai",
+          model: "gpt-5.4",
+          content: [{ type: "text", text: "Recovered." }],
+        } as unknown as EmbeddedRunAttemptResult["lastAssistant"],
+      }),
+    );

    const result = await runEmbeddedAgent({
      ...overflowBaseRunParams,
@@ -722,9 +735,8 @@ describe("runEmbeddedAgent incomplete-turn safety", () => {
      runId: "run-reasoning-only-assistant-error",
    });

-    expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
-    expect(result.payloads?.[0]?.isError).toBe(true);
-    expect(result.payloads?.[0]?.text).toContain("Please try again");
+    expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
+    expect(result.payloads).toBeUndefined();
  });

  it("does not retry reasoning-only turns for non-strict-agentic providers", async () => {
@@ -2529,6 +2541,191 @@ describe("runEmbeddedAgent incomplete-turn safety", () => {
    expect(retryInstruction).toBeNull();
  });

+  it("surfaces incomplete-turn text for errored signed-thinking-only turns with payloads", () => {
+    const incompleteTurnText = resolveIncompleteTurnPayloadText({
+      payloadCount: 1,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptResult({
+        assistantTexts: [],
+        lastAssistant: {
+          role: "assistant",
+          stopReason: "error",
+          provider: "anthropic",
+          model: "claude-opus-4-8",
+          content: [
+            {
+              type: "thinking",
+              thinking: "internal reasoning before provider error",
+              thinkingSignature: JSON.stringify({ id: "rs_error_payload", type: "reasoning" }),
+            },
+          ],
+        } as unknown as EmbeddedRunAttemptResult["lastAssistant"],
+      }),
+    });
+
+    expect(incompleteTurnText).toContain("couldn't generate a response");
+  });
+
+  it.each([
+    [
+      "heartbeat responses",
+      {
+        heartbeatToolResponse: {
+          outcome: "progress" as const,
+          notify: false,
+          summary: "Still working",
+        },
+      },
+    ],
+    ["tool media", { toolMediaUrls: ["file:///tmp/render.png"] }],
+    ["voice media", { toolAudioAsVoice: true }],
+    ["trusted local media", { toolTrustedLocalMedia: true }],
+    [
+      "source reply payloads",
+      { messagingToolSourceReplyPayloads: [{ text: "Delivered through the source reply." }] },
+    ],
+    ["delivered source replies", { didDeliverSourceReplyViaMessageTool: true }],
+  ] satisfies Array<[string, Partial<EmbeddedRunAttemptResult>]>)(
+    "does not replace terminal %s with an incomplete-turn warning",
+    (_label, attemptState) => {
+      const incompleteTurnText = resolveIncompleteTurnPayloadText({
+        payloadCount: 1,
+        aborted: false,
+        timedOut: false,
+        attempt: makeAttemptResult({
+          assistantTexts: [],
+          ...attemptState,
+          lastAssistant: {
+            role: "assistant",
+            stopReason: "error",
+            provider: "anthropic",
+            model: "claude-opus-4-8",
+            content: [
+              {
+                type: "thinking",
+                thinking: "internal reasoning before provider error",
+                thinkingSignature: JSON.stringify({
+                  id: "rs_terminal_payload",
+                  type: "reasoning",
+                }),
+              },
+            ],
+          } as unknown as EmbeddedRunAttemptResult["lastAssistant"],
+        }),
+      });
+
+      expect(incompleteTurnText).toBeNull();
+    },
+  );
+
+  it("retries replay-safe errored turns that only emitted thinking blocks", () => {
+    const assistant = {
+      role: "assistant",
+      stopReason: "error",
+      provider: "anthropic",
+      model: "claude-opus-4-8",
+      content: [
+        {
+          type: "thinking",
+          thinking: "internal reasoning before provider error",
+          thinkingSignature: JSON.stringify({ id: "rs_error", type: "reasoning" }),
+        },
+        { type: "redacted_thinking", data: "opaque" },
+        { type: "text", text: " " },
+      ],
+      usage: { input: 100, output: 1120, totalTokens: 1220 },
+    } as unknown as EmbeddedRunAttemptResult["lastAssistant"];
+    expect(
+      shouldRetrySilentErrorAssistantTurn({
+        attempt: makeAttemptResult({ assistantTexts: [], lastAssistant: assistant }),
+        assistant,
+      }),
+    ).toBe(true);
+  });
+
+  it("does not retry errored empty turns when non-zero output may indicate progress", () => {
+    const assistant = {
+      role: "assistant",
+      stopReason: "error",
+      provider: "ollama",
+      model: "glm-5.1:cloud",
+      content: [],
+      usage: { input: 100, output: 12, totalTokens: 112 },
+    } as unknown as EmbeddedRunAttemptResult["lastAssistant"];
+    expect(
+      shouldRetrySilentErrorAssistantTurn({
+        attempt: makeAttemptResult({ assistantTexts: [], lastAssistant: assistant }),
+        assistant,
+      }),
+    ).toBe(false);
+  });
+
+  it.each([
+    {
+      name: "visible text",
+      content: [
+        { type: "thinking", thinking: "internal", thinkingSignature: "sig" },
+        { type: "text", text: "partial answer" },
+      ],
+    },
+    {
+      name: "tool call",
+      content: [
+        { type: "thinking", thinking: "internal", thinkingSignature: "sig" },
+        { type: "toolCall", id: "call_1", name: "read", arguments: { path: "README.md" } },
+      ],
+    },
+    {
+      name: "unknown block",
+      content: [{ type: "provider_metadata", value: "opaque" }],
+    },
+  ])("does not retry errored turns containing $name", ({ content }) => {
+    const assistant = {
+      role: "assistant",
+      stopReason: "error",
+      provider: "anthropic",
+      model: "claude-opus-4-8",
+      content,
+      usage: { input: 100, output: 1120, totalTokens: 1220 },
+    } as unknown as EmbeddedRunAttemptResult["lastAssistant"];
+    expect(
+      shouldRetrySilentErrorAssistantTurn({
+        attempt: makeAttemptResult({ assistantTexts: [], lastAssistant: assistant }),
+        assistant,
+      }),
+    ).toBe(false);
+  });
+
+  it("does not retry errored thinking-only turns after side effects", () => {
+    const assistant = {
+      role: "assistant",
+      stopReason: "error",
+      provider: "anthropic",
+      model: "claude-opus-4-8",
+      content: [
+        {
+          type: "redacted_thinking",
+          data: "opaque",
+        },
+      ],
+      usage: { input: 100, output: 1120, totalTokens: 1220 },
+    } as unknown as EmbeddedRunAttemptResult["lastAssistant"];
+    expect(
+      shouldRetrySilentErrorAssistantTurn({
+        attempt: makeAttemptResult({
+          assistantTexts: [],
+          replayMetadata: {
+            hadPotentialSideEffects: true,
+            replaySafe: false,
+          },
+          lastAssistant: assistant,
+        }),
+        assistant,
+      }),
+    ).toBe(false);
+  });
+
  it("detects empty openai-compatible stop turns with non-zero output usage", () => {
    const retryInstruction = resolveEmptyResponseRetryInstruction({
      provider: "llamacpp",
--- a/src/agents/embedded-agent-runner/run.overflow-compaction.harness.ts
+++ b/src/agents/embedded-agent-runner/run.overflow-compaction.harness.ts
@@ -225,6 +225,9 @@ export const mockedIsBillingAssistantError = vi.fn(() => false);
 export const mockedIsCompactionFailureError = vi.fn(() => false);
 export const mockedIsFailoverAssistantError = vi.fn<MockAssistantErrorProbe>(() => false);
 export const mockedIsFailoverErrorMessage = vi.fn(() => false);
+export const mockedIsGenericUnknownStreamErrorMessage = vi.fn((raw: string) =>
+  /^\s*an unknown error occurred\.?\s*$/i.test(raw),
+);
 export const mockedIsLikelyContextOverflowError = vi.fn((msg?: string) => {
  const lower = normalizeLowercaseStringOrEmpty(msg ?? "");
  return (
@@ -412,6 +415,10 @@ export function resetRunOverflowCompactionHarnessMocks(): void {
  mockedIsFailoverAssistantError.mockReturnValue(false);
  mockedIsFailoverErrorMessage.mockReset();
  mockedIsFailoverErrorMessage.mockReturnValue(false);
+  mockedIsGenericUnknownStreamErrorMessage.mockReset();
+  mockedIsGenericUnknownStreamErrorMessage.mockImplementation((raw: string) =>
+    /^\s*an unknown error occurred\.?\s*$/i.test(raw),
+  );
  mockedIsLikelyContextOverflowError.mockReset();
  mockedIsLikelyContextOverflowError.mockImplementation((msg?: string) => {
    const lower = normalizeLowercaseStringOrEmpty(msg ?? "");
@@ -642,6 +649,7 @@ export async function loadRunOverflowCompactionHarness(): Promise<{
    isLikelyContextOverflowError: mockedIsLikelyContextOverflowError,
    isFailoverAssistantError: mockedIsFailoverAssistantError,
    isFailoverErrorMessage: mockedIsFailoverErrorMessage,
+    isGenericUnknownStreamErrorMessage: mockedIsGenericUnknownStreamErrorMessage,
    parseImageSizeError: mockedParseImageSizeError,
    parseImageDimensionError: mockedParseImageDimensionError,
    isRateLimitAssistantError: mockedIsRateLimitAssistantError,
--- a/src/agents/embedded-agent-runner/run.ts
+++ b/src/agents/embedded-agent-runner/run.ts
@@ -72,6 +72,7 @@ import {
  isCompactionFailureError,
  isFailoverAssistantError,
  isFailoverErrorMessage,
+  isGenericUnknownStreamErrorMessage,
  isLikelyContextOverflowError,
  isRateLimitAssistantError,
  parseImageDimensionError,
@@ -107,6 +108,7 @@ import {
  resolveSelectedOpenAIRuntimeProvider,
 } from "../openai-routing.js";
 import { resolveProviderIdForAuth } from "../provider-auth-aliases.js";
+import { hasOnlyAssistantReasoningContent } from "../replay-turn-classification.js";
 import { runAgentCleanupStep } from "../run-cleanup-timeout.js";
 import { buildAgentRuntimeAuthPlan } from "../runtime-plan/auth.js";
 import { buildAgentRuntimePlan } from "../runtime-plan/build.js";
@@ -195,6 +197,7 @@ import {
  resolveReplayInvalidFlag,
  resolveRunLivenessState,
  shouldRetryMissingAssistantTurn,
+  shouldRetrySilentErrorAssistantTurn,
  shouldTreatEmptyAssistantReplyAsSilent,
 } from "./run/incomplete-turn.js";
 import type { RunEmbeddedAgentParams } from "./run/params.js";
@@ -2936,6 +2939,43 @@ async function runEmbeddedAgentInternal(
          const imageDimensionError = parseImageDimensionError(
            assistantForFailover?.errorMessage ?? "",
          );
+          // The shared runtime wraps interrupted streams as a timeout. Retry that
+          // wrapper only for reasoning-only output so ordinary timeouts keep failover.
+          const genericUnknownReasoningError =
+            assistantFailoverReason === "timeout" &&
+            isGenericUnknownStreamErrorMessage(assistantForFailover?.errorMessage ?? "") &&
+            Boolean(assistantForFailover && hasOnlyAssistantReasoningContent(assistantForFailover));
+          const silentErrorRetryReason =
+            assistantFailoverReason === null ||
+            genericUnknownReasoningError ||
+            assistantFailoverReason === "no_error_details" ||
+            assistantFailoverReason === "unclassified" ||
+            assistantFailoverReason === "unknown";
+          // Retry replay-safe non-visible provider errors before assistant
+          // failover surfaces them as terminal provider failures.
+          if (
+            !authFailure &&
+            !rateLimitFailure &&
+            !billingFailure &&
+            !cloudCodeAssistFormatError &&
+            !imageDimensionError &&
+            !aborted &&
+            !promptError &&
+            !timedOut &&
+            silentErrorRetryReason &&
+            shouldRetrySilentErrorAssistantTurn({ attempt, assistant: assistantForFailover }) &&
+            emptyErrorRetries < MAX_EMPTY_ERROR_RETRIES
+          ) {
+            emptyErrorRetries += 1;
+            log.warn(
+              `[empty-error-retry] stopReason=error non-visible-output; resubmitting ` +
+                `attempt=${emptyErrorRetries}/${MAX_EMPTY_ERROR_RETRIES} ` +
+                `provider=${assistantForFailover?.provider ?? provider} ` +
+                `model=${assistantForFailover?.model ?? model.id} ` +
+                `sessionKey=${params.sessionKey ?? params.sessionId}`,
+            );
+            continue;
+          }
          // Capture the failing profile before auth-profile rotation mutates `lastProfileId`.
          const failedAssistantProfileId = lastProfileId;
          const logAssistantFailoverDecision = createFailoverDecisionLogger({
@@ -3602,47 +3642,6 @@ async function runEmbeddedAgentInternal(
                `provider=${activeErrorContext.provider}/${activeErrorContext.model} attempts=${emptyResponseRetryAttempts}/${maxEmptyResponseRetryAttempts} — surfacing incomplete-turn error`,
            );
          }
-          // ── silent-error retry ────────────────────────────────────────────
-          // Observed with ollama/glm-5.1: a turn can end with stopReason="error"
-          // and zero output tokens AND empty content after a successful
-          // tool-call sequence, producing no user-visible text at all. This
-          // path is narrower than the empty-response continuation retry:
-          // same prompt, same session transcript (tool results already
-          // captured), no instruction injection. Placed before the
-          // incompleteTurnText return so it actually gets a chance to fire.
-          //
-          // Content-empty guard: a reasoning-only error (content has thinking
-          // blocks) is a distinct failure mode handled elsewhere; only retry
-          // when the assistant truly produced nothing.
-          //
-          // Side-effect guard: if the failed attempt already recorded potential
-          // side effects (messaging tool sent, cron add, mutating tool
-          // call that wasn't round-tripped as replay-safe), resubmission can
-          // duplicate those actions. Mirror the gate the other retry resolvers
-          // use (resolveEmptyResponseRetryInstruction, reasoning-only, planning-
-          // only), which short-circuit on attempt.replayMetadata.hadPotentialSideEffects.
-          const silentErrorContent = sessionLastAssistant?.content as Array<unknown> | undefined;
-          if (
-            incompleteTurnText &&
-            !aborted &&
-            !promptError &&
-            !timedOut &&
-            sessionLastAssistant?.stopReason === "error" &&
-            ((sessionLastAssistant?.usage as { output?: number } | undefined)?.output ?? 0) === 0 &&
-            (silentErrorContent?.length ?? 0) === 0 &&
-            (attempt.replayMetadata ? !attempt.replayMetadata.hadPotentialSideEffects : false) &&
-            emptyErrorRetries < MAX_EMPTY_ERROR_RETRIES
-          ) {
-            emptyErrorRetries += 1;
-            log.warn(
-              `[empty-error-retry] stopReason=error output=0; resubmitting ` +
-                `attempt=${emptyErrorRetries}/${MAX_EMPTY_ERROR_RETRIES} ` +
-                `provider=${sessionLastAssistant?.provider ?? provider} ` +
-                `model=${sessionLastAssistant?.model ?? model.id} ` +
-                `sessionKey=${params.sessionKey ?? params.sessionId}`,
-            );
-            continue;
-          }
          if (incompleteTurnText) {
            const replayInvalid = resolveReplayInvalidForAttempt(incompleteTurnText);
            const livenessState = resolveRunLivenessState({
--- a/src/agents/embedded-agent-runner/run/incomplete-turn.ts
+++ b/src/agents/embedded-agent-runner/run/incomplete-turn.ts
@@ -16,6 +16,7 @@ import {
  isStrictAgenticSupportedProviderModel,
  stripProviderPrefix,
 } from "../../execution-contract.js";
+import { hasOnlyAssistantReasoningContent } from "../../replay-turn-classification.js";
 import type { AgentMessage } from "../../runtime/index.js";
 import { isLikelyMutatingToolName } from "../../tool-mutation.js";
 import {
@@ -44,6 +45,12 @@ type IncompleteTurnAttempt = Pick<
  | "currentAttemptAssistant"
  | "yieldDetected"
  | "didSendDeterministicApprovalPrompt"
+  | "heartbeatToolResponse"
+  | "toolMediaUrls"
+  | "toolAudioAsVoice"
+  | "toolTrustedLocalMedia"
+  | "didDeliverSourceReplyViaMessageTool"
+  | "messagingToolSourceReplyPayloads"
  | "didSendViaMessagingTool"
  | "messagingToolSentTexts"
  | "messagingToolSentMediaUrls"
@@ -262,6 +269,35 @@ export function resolveAttemptReplayMetadata(attempt: {
  return attempt.replayMetadata ?? REPLAY_UNSAFE_FALLBACK_METADATA;
 }

+type TerminalAttemptState = Pick<
+  EmbeddedRunAttemptResult,
+  | "clientToolCalls"
+  | "yieldDetected"
+  | "didSendDeterministicApprovalPrompt"
+  | "heartbeatToolResponse"
+  | "lastToolError"
+  | "toolMediaUrls"
+  | "toolAudioAsVoice"
+  | "toolTrustedLocalMedia"
+  | "didDeliverSourceReplyViaMessageTool"
+  | "messagingToolSourceReplyPayloads"
+>;
+
+function hasAttemptTerminalState(attempt: TerminalAttemptState): boolean {
+  return Boolean(
+    attempt.clientToolCalls ||
+    attempt.yieldDetected ||
+    attempt.didSendDeterministicApprovalPrompt ||
+    attempt.heartbeatToolResponse ||
+    attempt.lastToolError ||
+    attempt.toolMediaUrls?.some((url) => url.trim().length > 0) ||
+    attempt.toolAudioAsVoice ||
+    attempt.toolTrustedLocalMedia ||
+    attempt.didDeliverSourceReplyViaMessageTool ||
+    attempt.messagingToolSourceReplyPayloads?.length,
+  );
+}
+
 /**
 * Builds the user-visible incomplete-turn warning when a terminal attempt did
 * not produce a safe final assistant response and no committed delivery/progress
@@ -281,16 +317,17 @@ export function resolveIncompleteTurnPayloadText(params: {
  // produced. (#76477)
  const toolUseTerminal = params.attempt.lastAssistant?.stopReason === "toolUse";
  const assistant = params.attempt.currentAttemptAssistant ?? params.attempt.lastAssistant;
-  // Unsigned thinking payloads count toward payloadCount but carry no user-visible
-  // content; bypass the visible-text guard when unsigned thinking was the only output
-  // so that incomplete-turn stall detection fires below. (#89787)
-  const unsignedThinkingOnlyTerminal =
+  // Thinking payloads can count toward payloadCount but carry no user-visible
+  // content; bypass the visible-text guard when thinking was the only output
+  // so that incomplete-turn stall detection fires below. (#89787, #91953)
+  const thinkingOnlyTerminal =
    params.payloadCount !== 0 &&
    !joinAssistantTexts(params.attempt.assistantTexts).length &&
-    isUnsignedThinkingOnlyAssistantTurn(assistant);
+    !hasAttemptTerminalState(params.attempt) &&
+    Boolean(assistant && hasOnlyAssistantReasoningContent(assistant));

  if (
-    (params.payloadCount !== 0 && !toolUseTerminal && !unsignedThinkingOnlyTerminal) ||
+    (params.payloadCount !== 0 && !toolUseTerminal && !thinkingOnlyTerminal) ||
    (params.aborted && params.externalAbort) ||
    params.timedOut ||
    params.attempt.clientToolCalls ||
@@ -330,7 +367,7 @@ export function resolveIncompleteTurnPayloadText(params: {
  if (
    !incompleteTerminalAssistant &&
    !reasoningOnlyAssistant &&
-    !unsignedThinkingOnlyTerminal &&
+    !thinkingOnlyTerminal &&
    !emptyResponseAssistant &&
    stopReason !== "error"
  ) {
@@ -555,6 +592,50 @@ function isUnsignedThinkingOnlyAssistantTurn(message: unknown): boolean {
  return assessLastAssistantMessage(message as AgentMessage) === "incomplete-thinking";
 }

+export function shouldRetrySilentErrorAssistantTurn(params: {
+  attempt: Pick<
+    EmbeddedRunAttemptResult,
+    | "assistantTexts"
+    | "clientToolCalls"
+    | "yieldDetected"
+    | "didSendDeterministicApprovalPrompt"
+    | "heartbeatToolResponse"
+    | "lastToolError"
+    | "toolMediaUrls"
+    | "toolAudioAsVoice"
+    | "toolTrustedLocalMedia"
+    | "didDeliverSourceReplyViaMessageTool"
+    | "messagingToolSourceReplyPayloads"
+    | "replayMetadata"
+  >;
+  assistant: EmbeddedRunAttemptResult["lastAssistant"] | null | undefined;
+}): boolean {
+  if (joinAssistantTexts(params.attempt.assistantTexts).length > 0) {
+    return false;
+  }
+  if (hasAttemptTerminalState(params.attempt)) {
+    return false;
+  }
+  if (resolveAttemptReplayMetadata(params.attempt).hadPotentialSideEffects) {
+    return false;
+  }
+
+  const assistant = params.assistant;
+  if (!assistant || assistant.stopReason !== "error") {
+    return false;
+  }
+
+  const content = (assistant as { content?: unknown }).content;
+  if (!Array.isArray(content)) {
+    return false;
+  }
+  if (content.length === 0) {
+    return !hasPositiveOutputTokenUsage(assistant);
+  }
+
+  return hasOnlyAssistantReasoningContent(assistant);
+}
+
 function isEmptyResponseAssistantTurn(params: {
  payloadCount: number;
  attempt: Pick<
--- a/src/agents/replay-turn-classification.ts
+++ b/src/agents/replay-turn-classification.ts
@@ -4,9 +4,9 @@ type AssistantTurnLike = {
  content?: unknown;
 };

-/** Returns true when a token-limited turn contains only incomplete provider reasoning. */
-export function isReasoningOnlyLengthAssistantTurn(message: AssistantTurnLike): boolean {
-  if (message.role !== "assistant" || message.stopReason !== "length") {
+/** Returns true when an assistant turn contains only provider reasoning and blank text. */
+export function hasOnlyAssistantReasoningContent(message: AssistantTurnLike): boolean {
+  if (message.role !== "assistant") {
    return false;
  }
  const content = Array.isArray(message.content)
@@ -31,3 +31,8 @@ export function isReasoningOnlyLengthAssistantTurn(message: AssistantTurnLike):
  }
  return hasThinking;
 }
+
+/** Returns true when a token-limited turn contains only incomplete provider reasoning. */
+export function isReasoningOnlyLengthAssistantTurn(message: AssistantTurnLike): boolean {
+  return message.stopReason === "length" && hasOnlyAssistantReasoningContent(message);
+}