mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-22 16:28:09 +00:00
fix(agents): retry thinking-only errored turns (#92191)
Retry replay-safe reasoning-only provider errors before assistant failover while preserving classified fallback and terminal-output ownership. Adds deterministic Anthropic gateway fault-injection coverage and focused regression tests.\n\nCo-authored-by: ai-hpc <mail.speedy.hpc@hotmail.com>
This commit is contained in:
@@ -149,6 +149,7 @@ const TINY_PNG_BASE64 =
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO7Z0nQAAAAASUVORK5CYII=";
|
||||
const QA_REASONING_ONLY_RECOVERY_PROMPT_RE = /reasoning-only continuation qa check/i;
|
||||
const QA_REASONING_ONLY_SIDE_EFFECT_PROMPT_RE = /reasoning-only after write safety check/i;
|
||||
const QA_ANTHROPIC_THINKING_ERROR_RECOVERY_PROMPT_RE = /anthropic thinking error qa check/i;
|
||||
const QA_THINKING_VISIBILITY_OFF_PROMPT_RE = /qa thinking visibility check off/i;
|
||||
const QA_THINKING_VISIBILITY_MAX_PROMPT_RE = /qa thinking visibility check max/i;
|
||||
const QA_EMPTY_RESPONSE_RECOVERY_PROMPT_RE = /empty response continuation qa check/i;
|
||||
@@ -189,6 +190,7 @@ const QA_GROUP_AUDIO_MIN_MULTIPART_BODY_CHARS = 48_000;
|
||||
const QA_MCP_CODE_MODE_API_FILE_PROMPT_RE = /mcp code mode api file qa check/i;
|
||||
|
||||
type MockScenarioState = {
|
||||
anthropicThinkingErrorPhase: number;
|
||||
subagentFanoutPhase: number;
|
||||
subagentHandoffSpawned: boolean;
|
||||
};
|
||||
@@ -3128,6 +3130,90 @@ function buildAnthropicMessageResponse(params: {
|
||||
};
|
||||
}
|
||||
|
||||
const QA_ANTHROPIC_THINKING_ERROR_TEXT =
|
||||
"QA replay-safe read completed, but the provider stream failed after signed thinking.";
|
||||
const QA_ANTHROPIC_THINKING_ERROR_SIGNATURE = "qa_signed_thinking_block_91953";
|
||||
const QA_ANTHROPIC_THINKING_ERROR_MESSAGE = "QA injected provider stream failure";
|
||||
|
||||
function buildAnthropicThinkingErrorResponse(params: { model: string }): Record<string, unknown> {
|
||||
return {
|
||||
type: "error",
|
||||
error: {
|
||||
type: "api_error",
|
||||
message: QA_ANTHROPIC_THINKING_ERROR_MESSAGE,
|
||||
},
|
||||
model: params.model || "claude-opus-4-8",
|
||||
};
|
||||
}
|
||||
|
||||
function buildAnthropicThinkingErrorStreamEvents(params: {
|
||||
model: string;
|
||||
}): AnthropicStreamEvent[] {
|
||||
const messageId = `msg_mock_${Math.floor(Math.random() * 1_000_000).toString(16)}`;
|
||||
return [
|
||||
{
|
||||
type: "message_start",
|
||||
message: {
|
||||
id: messageId,
|
||||
type: "message",
|
||||
role: "assistant",
|
||||
model: params.model || "claude-opus-4-8",
|
||||
content: [],
|
||||
stop_reason: null,
|
||||
stop_sequence: null,
|
||||
usage: {
|
||||
input_tokens: 64,
|
||||
output_tokens: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "content_block_start",
|
||||
index: 0,
|
||||
content_block: {
|
||||
type: "thinking",
|
||||
thinking: "",
|
||||
signature: "",
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "content_block_delta",
|
||||
index: 0,
|
||||
delta: {
|
||||
type: "thinking_delta",
|
||||
thinking: QA_ANTHROPIC_THINKING_ERROR_TEXT,
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "content_block_delta",
|
||||
index: 0,
|
||||
delta: {
|
||||
type: "signature_delta",
|
||||
signature: QA_ANTHROPIC_THINKING_ERROR_SIGNATURE,
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "content_block_stop",
|
||||
index: 0,
|
||||
},
|
||||
{
|
||||
type: "message_delta",
|
||||
delta: {},
|
||||
usage: {
|
||||
input_tokens: 64,
|
||||
output_tokens: 1120,
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "error",
|
||||
error: {
|
||||
type: "api_error",
|
||||
message: QA_ANTHROPIC_THINKING_ERROR_MESSAGE,
|
||||
},
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
function buildAnthropicMessageStreamEvents(params: {
|
||||
model: string;
|
||||
extracted: ExtractedAssistantOutput;
|
||||
@@ -3254,6 +3340,35 @@ async function buildMessagesPayload(
|
||||
stream: false,
|
||||
...(Array.isArray(body.tools) ? { tools: body.tools } : {}),
|
||||
};
|
||||
const allInputText = extractAllRequestTexts(input, dispatchBody);
|
||||
if (QA_ANTHROPIC_THINKING_ERROR_RECOVERY_PROMPT_RE.test(allInputText)) {
|
||||
const toolOutput = extractToolOutput(input);
|
||||
const shouldEmitThinkingError =
|
||||
toolOutput.length > 0 && scenarioState.anthropicThinkingErrorPhase === 0;
|
||||
const events =
|
||||
toolOutput.length === 0
|
||||
? buildToolCallEventsWithArgs("read", { path: "QA_KICKOFF_TASK.md" })
|
||||
: shouldEmitThinkingError
|
||||
? (() => {
|
||||
scenarioState.anthropicThinkingErrorPhase = 1;
|
||||
return buildAssistantEvents("");
|
||||
})()
|
||||
: buildAssistantEvents("ANTHROPIC-THINKING-ERROR-RECOVERED-OK");
|
||||
const extracted = extractFinalAssistantOutputFromEvents(events);
|
||||
const responseBody = shouldEmitThinkingError
|
||||
? buildAnthropicThinkingErrorResponse({ model: normalizedModel })
|
||||
: buildAnthropicMessageResponse({
|
||||
model: normalizedModel,
|
||||
extracted,
|
||||
});
|
||||
const streamEvents = shouldEmitThinkingError
|
||||
? buildAnthropicThinkingErrorStreamEvents({ model: normalizedModel })
|
||||
: buildAnthropicMessageStreamEvents({
|
||||
model: normalizedModel,
|
||||
extracted,
|
||||
});
|
||||
return { events, input, extracted, responseBody, streamEvents, model: normalizedModel };
|
||||
}
|
||||
const events = await buildResponsesPayload(dispatchBody, scenarioState);
|
||||
const extracted = extractFinalAssistantOutputFromEvents(events);
|
||||
const responseBody = buildAnthropicMessageResponse({
|
||||
@@ -3270,6 +3385,7 @@ async function buildMessagesPayload(
|
||||
export async function startQaMockOpenAiServer(params?: { host?: string; port?: number }) {
|
||||
const host = params?.host ?? "127.0.0.1";
|
||||
const scenarioState: MockScenarioState = {
|
||||
anthropicThinkingErrorPhase: 0,
|
||||
subagentFanoutPhase: 0,
|
||||
subagentHandoffSpawned: false,
|
||||
};
|
||||
|
||||
@@ -0,0 +1,99 @@
|
||||
# Anthropic thinking error recovery after replay-safe read
|
||||
|
||||
```yaml qa-scenario
|
||||
id: anthropic-thinking-error-recovery-replay-safe-read
|
||||
title: Anthropic thinking error recovery after replay-safe read
|
||||
surface: runtime
|
||||
coverage:
|
||||
primary:
|
||||
- runtime.anthropic-thinking-error-recovery
|
||||
secondary:
|
||||
- runtime.retry-policy
|
||||
gatewayConfigPatch:
|
||||
agents:
|
||||
defaults:
|
||||
models:
|
||||
anthropic/claude-opus-4-8:
|
||||
params: {}
|
||||
objective: Verify an Anthropic stream error after signed thinking and a replay-safe read retries the same prompt into a visible answer.
|
||||
successCriteria:
|
||||
- Scenario is mock-openai only so live lanes do not pick it up implicitly.
|
||||
- The agent performs a replay-safe read before the Anthropic stream error.
|
||||
- The runtime retries the same prompt without injecting the visible-answer continuation instruction.
|
||||
- The final visible reply contains the exact recovery marker.
|
||||
docsRefs:
|
||||
- docs/help/testing.md
|
||||
codeRefs:
|
||||
- extensions/qa-lab/src/providers/mock-openai/server.ts
|
||||
- src/agents/embedded-agent-runner/run/incomplete-turn.ts
|
||||
execution:
|
||||
kind: flow
|
||||
summary: Verify Anthropic stream errors after signed thinking recover after a replay-safe read.
|
||||
config:
|
||||
requiredProviderMode: mock-openai
|
||||
anthropicModelRef: anthropic/claude-opus-4-8
|
||||
promptSnippet: Anthropic thinking error QA check
|
||||
prompt: "Anthropic thinking error QA check: read QA_KICKOFF_TASK.md, then answer with exactly ANTHROPIC-THINKING-ERROR-RECOVERED-OK."
|
||||
expectedReply: ANTHROPIC-THINKING-ERROR-RECOVERED-OK
|
||||
visibleAnswerRetryNeedle: The previous attempt did not produce a user-visible answer.
|
||||
```
|
||||
|
||||
```yaml qa-flow
|
||||
steps:
|
||||
- name: retries a thinking-only Anthropic error after a replay-safe read
|
||||
actions:
|
||||
- assert:
|
||||
expr: "env.providerMode === 'mock-openai'"
|
||||
message: this seeded scenario is mock-openai only
|
||||
- call: waitForGatewayHealthy
|
||||
args:
|
||||
- ref: env
|
||||
- 60000
|
||||
- call: reset
|
||||
- set: requestCountBefore
|
||||
value:
|
||||
expr: "env.mock ? (await fetchJson(`${env.mock.baseUrl}/debug/requests`)).length : 0"
|
||||
- set: sessionKey
|
||||
value:
|
||||
expr: "`agent:qa:anthropic-thinking-error:${randomUUID().slice(0, 8)}`"
|
||||
- set: modelAck
|
||||
value:
|
||||
expr: "await env.gateway.call('sessions.patch', { key: sessionKey, model: config.anthropicModelRef }, { timeoutMs: liveTurnTimeoutMs(env, 45000) })"
|
||||
- call: runAgentPrompt
|
||||
args:
|
||||
- ref: env
|
||||
- sessionKey:
|
||||
ref: sessionKey
|
||||
message:
|
||||
expr: config.prompt
|
||||
timeoutMs:
|
||||
expr: liveTurnTimeoutMs(env, 45000)
|
||||
- call: waitForOutboundMessage
|
||||
saveAs: outbound
|
||||
args:
|
||||
- ref: state
|
||||
- lambda:
|
||||
params: [candidate]
|
||||
expr: "candidate.conversation.id === 'qa-operator' && candidate.text.includes(config.expectedReply)"
|
||||
- expr: liveTurnTimeoutMs(env, 30000)
|
||||
- assert:
|
||||
expr: "outbound.text.includes(config.expectedReply)"
|
||||
message:
|
||||
expr: "`missing Anthropic thinking-error recovery marker: ${outbound.text}`"
|
||||
- if:
|
||||
expr: "Boolean(env.mock)"
|
||||
then:
|
||||
- set: scenarioRequests
|
||||
value:
|
||||
expr: "(await fetchJson(`${env.mock.baseUrl}/debug/requests`)).slice(requestCountBefore)"
|
||||
- assert:
|
||||
expr: "scenarioRequests.some((request) => String(request.allInputText ?? '').includes(config.promptSnippet) && request.providerVariant === 'anthropic' && request.plannedToolName === 'read')"
|
||||
message: expected replay-safe read request on the Anthropic mock route
|
||||
- assert:
|
||||
expr: "scenarioRequests.filter((request) => String(request.allInputText ?? '').includes(config.promptSnippet) && request.providerVariant === 'anthropic').length >= 3"
|
||||
message: expected initial read, terminal-error attempt, and same-prompt retry
|
||||
- assert:
|
||||
expr: "!scenarioRequests.some((request) => String(request.allInputText ?? '').includes(config.visibleAnswerRetryNeedle))"
|
||||
message: expected same-prompt retry, not visible-answer continuation retry
|
||||
detailsExpr: "env.mock ? `${outbound.text}\\nrequests=${String(scenarioRequests?.length ?? 0)}` : outbound.text"
|
||||
```
|
||||
@@ -38,6 +38,7 @@ export {
|
||||
isLikelyContextOverflowError,
|
||||
isFailoverAssistantError,
|
||||
isFailoverErrorMessage,
|
||||
isGenericUnknownStreamErrorMessage,
|
||||
isImageDimensionErrorMessage,
|
||||
isImageSizeError,
|
||||
isOverloadedErrorMessage,
|
||||
|
||||
@@ -960,7 +960,7 @@ function isBilling429MessageForProvider(raw: string, provider: string | undefine
|
||||
// stream ends with stopReason === "aborted" | "error" without specific info. Treat
|
||||
// it as a transient transport failure so the configured fallback chain rotates
|
||||
// instead of returning the bare string to the user (#71620).
|
||||
function isGenericUnknownStreamError(raw: string): boolean {
|
||||
export function isGenericUnknownStreamErrorMessage(raw: string): boolean {
|
||||
return /^\s*an unknown error occurred\.?\s*$/i.test(raw);
|
||||
}
|
||||
|
||||
@@ -1064,7 +1064,7 @@ function classifyFailoverClassificationFromMessage(
|
||||
if (isAuthErrorMessage(raw)) {
|
||||
return toReasonClassification("auth");
|
||||
}
|
||||
if (isGenericUnknownStreamError(raw)) {
|
||||
if (isGenericUnknownStreamErrorMessage(raw)) {
|
||||
return toReasonClassification("timeout");
|
||||
}
|
||||
if (isOpenRouterProviderReturnedError(raw, provider)) {
|
||||
|
||||
@@ -3,6 +3,7 @@ import { beforeAll, beforeEach, describe, expect, it } from "vitest";
|
||||
import { makeAttemptResult } from "./run.overflow-compaction.fixture.js";
|
||||
import {
|
||||
loadRunOverflowCompactionHarness,
|
||||
mockedClassifyAssistantFailoverReason,
|
||||
mockedClassifyFailoverReason,
|
||||
mockedGlobalHookRunner,
|
||||
mockedRunEmbeddedAttempt,
|
||||
@@ -13,21 +14,27 @@ import type { EmbeddedRunAttemptResult } from "./run/types.js";
|
||||
|
||||
let runEmbeddedAgent: typeof import("./run.js").runEmbeddedAgent;
|
||||
|
||||
type AssistantContent = NonNullable<EmbeddedRunAttemptResult["lastAssistant"]>["content"];
|
||||
|
||||
function emptyErrorAttempt(
|
||||
provider: string,
|
||||
model: string,
|
||||
outputTokens = 0,
|
||||
content: AssistantContent = [],
|
||||
errorMessage?: string,
|
||||
): EmbeddedRunAttemptResult {
|
||||
// Models can report stopReason=error with no output after tool activity; that
|
||||
// is replay-safe only when the attempt metadata records no side effects.
|
||||
return makeAttemptResult({
|
||||
assistantTexts: [],
|
||||
lastAssistant: {
|
||||
role: "assistant",
|
||||
stopReason: "error",
|
||||
provider,
|
||||
model,
|
||||
content: [],
|
||||
content,
|
||||
usage: { input: 100, output: outputTokens, totalTokens: 100 + outputTokens },
|
||||
...(errorMessage ? { errorMessage } : {}),
|
||||
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
|
||||
});
|
||||
}
|
||||
@@ -36,6 +43,7 @@ function successAttempt(provider: string, model: string): EmbeddedRunAttemptResu
|
||||
return makeAttemptResult({
|
||||
assistantTexts: ["Done."],
|
||||
lastAssistant: {
|
||||
role: "assistant",
|
||||
stopReason: "stop",
|
||||
provider,
|
||||
model,
|
||||
@@ -71,6 +79,118 @@ describe("runEmbeddedAgent silent-error retry", () => {
|
||||
expect(result.payloads).toBeUndefined();
|
||||
});
|
||||
|
||||
it("retries when stopReason=error emitted only thinking blocks and output tokens", async () => {
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
emptyErrorAttempt("anthropic", "claude-opus-4-8", 1120, [
|
||||
{
|
||||
type: "thinking",
|
||||
thinking: "internal reasoning before provider error",
|
||||
thinkingSignature: JSON.stringify({ id: "rs_error", type: "reasoning" }),
|
||||
},
|
||||
]),
|
||||
);
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(successAttempt("anthropic", "claude-opus-4-8"));
|
||||
|
||||
const result = await runEmbeddedAgent({
|
||||
...overflowBaseRunParams,
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-8",
|
||||
runId: "run-empty-error-retry-thinking-only",
|
||||
});
|
||||
|
||||
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
|
||||
expect(result.payloads).toBeUndefined();
|
||||
});
|
||||
|
||||
it("retries thinking-only unknown provider errors before assistant failover", async () => {
|
||||
mockedClassifyFailoverReason.mockReturnValue("timeout");
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
emptyErrorAttempt(
|
||||
"anthropic",
|
||||
"claude-opus-4-8",
|
||||
1120,
|
||||
[
|
||||
{
|
||||
type: "thinking",
|
||||
thinking: "internal reasoning before provider error",
|
||||
thinkingSignature: JSON.stringify({ id: "rs_error", type: "reasoning" }),
|
||||
},
|
||||
],
|
||||
"An unknown error occurred",
|
||||
),
|
||||
);
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(successAttempt("anthropic", "claude-opus-4-8"));
|
||||
|
||||
const result = await runEmbeddedAgent({
|
||||
...overflowBaseRunParams,
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-8",
|
||||
runId: "run-empty-error-retry-before-assistant-failover",
|
||||
});
|
||||
|
||||
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
|
||||
expect(result.payloads).toBeUndefined();
|
||||
});
|
||||
|
||||
it.each([
|
||||
["timeout", "LLM request timed out."],
|
||||
["server_error", "Internal server error"],
|
||||
] as const)("does not intercept recognized %s failover errors", async (reason, errorMessage) => {
|
||||
mockedClassifyAssistantFailoverReason.mockReturnValue(reason);
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
emptyErrorAttempt(
|
||||
"anthropic",
|
||||
"claude-opus-4-8",
|
||||
1120,
|
||||
[
|
||||
{
|
||||
type: "thinking",
|
||||
thinking: "internal reasoning before provider error",
|
||||
thinkingSignature: JSON.stringify({ id: "rs_error", type: "reasoning" }),
|
||||
},
|
||||
],
|
||||
errorMessage,
|
||||
),
|
||||
);
|
||||
|
||||
await runEmbeddedAgent({
|
||||
...overflowBaseRunParams,
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-8",
|
||||
runId: `run-empty-error-retry-${reason}`,
|
||||
});
|
||||
|
||||
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("does not intercept concrete non-transient failover errors", async () => {
|
||||
mockedClassifyFailoverReason.mockReturnValue("model_not_found");
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
emptyErrorAttempt(
|
||||
"anthropic",
|
||||
"missing-model",
|
||||
1120,
|
||||
[
|
||||
{
|
||||
type: "thinking",
|
||||
thinking: "internal reasoning before provider error",
|
||||
thinkingSignature: JSON.stringify({ id: "rs_missing_model", type: "reasoning" }),
|
||||
},
|
||||
],
|
||||
"model not found",
|
||||
),
|
||||
);
|
||||
|
||||
await runEmbeddedAgent({
|
||||
...overflowBaseRunParams,
|
||||
provider: "anthropic",
|
||||
model: "missing-model",
|
||||
runId: "run-empty-error-retry-non-transient",
|
||||
});
|
||||
|
||||
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("caps retries at MAX_EMPTY_ERROR_RETRIES and surfaces incomplete-turn error", async () => {
|
||||
// 1 initial + 3 retries = 4 attempts, all returning empty-error.
|
||||
for (let i = 0; i < 4; i += 1) {
|
||||
@@ -113,6 +233,7 @@ describe("runEmbeddedAgent silent-error retry", () => {
|
||||
makeAttemptResult({
|
||||
assistantTexts: [],
|
||||
lastAssistant: {
|
||||
role: "assistant",
|
||||
stopReason: "stop",
|
||||
provider: "plain-provider",
|
||||
model: "plain-model",
|
||||
@@ -156,6 +277,7 @@ describe("runEmbeddedAgent silent-error retry", () => {
|
||||
makeAttemptResult({
|
||||
assistantTexts: [],
|
||||
lastAssistant: {
|
||||
role: "assistant",
|
||||
stopReason: "error",
|
||||
provider: "ollama",
|
||||
model: "glm-5.1:cloud",
|
||||
@@ -179,4 +301,57 @@ describe("runEmbeddedAgent silent-error retry", () => {
|
||||
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
|
||||
expect(result.payloads?.[0]?.isError).toBe(true);
|
||||
});
|
||||
|
||||
it.each([
|
||||
[
|
||||
"client tool calls",
|
||||
{ clientToolCalls: [{ name: "browser", params: { url: "https://example.com" } }] },
|
||||
],
|
||||
["yield", { yieldDetected: true }],
|
||||
["approval prompts", { didSendDeterministicApprovalPrompt: true }],
|
||||
[
|
||||
"heartbeat responses",
|
||||
{
|
||||
heartbeatToolResponse: {
|
||||
outcome: "progress",
|
||||
notify: false,
|
||||
summary: "Still working",
|
||||
},
|
||||
},
|
||||
],
|
||||
["tool media", { toolMediaUrls: ["file:///tmp/render.png"] }],
|
||||
["voice media", { toolAudioAsVoice: true }],
|
||||
["trusted local media", { toolTrustedLocalMedia: true }],
|
||||
[
|
||||
"source reply payloads",
|
||||
{ messagingToolSourceReplyPayloads: [{ text: "Delivered through the source reply." }] },
|
||||
],
|
||||
["delivered source replies", { didDeliverSourceReplyViaMessageTool: true }],
|
||||
["tool errors", { lastToolError: { toolName: "read", error: "read failed" } }],
|
||||
] satisfies Array<[string, Partial<EmbeddedRunAttemptResult>]>)(
|
||||
"does not retry after terminal %s",
|
||||
async (_label, attemptState) => {
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
makeAttemptResult({
|
||||
...emptyErrorAttempt("anthropic", "claude-opus-4-8", 1120, [
|
||||
{
|
||||
type: "thinking",
|
||||
thinking: "internal reasoning before provider error",
|
||||
thinkingSignature: JSON.stringify({ id: "rs_error", type: "reasoning" }),
|
||||
},
|
||||
]),
|
||||
...attemptState,
|
||||
}),
|
||||
);
|
||||
|
||||
await runEmbeddedAgent({
|
||||
...overflowBaseRunParams,
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-8",
|
||||
runId: `run-empty-error-retry-terminal-${_label.replaceAll(" ", "-")}`,
|
||||
});
|
||||
|
||||
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
@@ -41,6 +41,7 @@ import {
|
||||
resolveRunLivenessState,
|
||||
resolveSilentToolResultReplyPayload,
|
||||
shouldRetryMissingAssistantTurn,
|
||||
shouldRetrySilentErrorAssistantTurn,
|
||||
shouldTreatEmptyAssistantReplyAsSilent,
|
||||
} from "./run/incomplete-turn.js";
|
||||
import type { EmbeddedRunAttemptResult } from "./run/types.js";
|
||||
@@ -693,7 +694,7 @@ describe("runEmbeddedAgent incomplete-turn safety", () => {
|
||||
expect(result.payloads).toBeUndefined();
|
||||
});
|
||||
|
||||
it("does not retry reasoning-only turns when the assistant ended in error", async () => {
|
||||
it("retries reasoning-only turns when the assistant ended in error", async () => {
|
||||
mockedClassifyFailoverReason.mockReturnValue(null);
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
makeAttemptResult({
|
||||
@@ -714,6 +715,18 @@ describe("runEmbeddedAgent incomplete-turn safety", () => {
|
||||
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
|
||||
}),
|
||||
);
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
makeAttemptResult({
|
||||
assistantTexts: ["Recovered."],
|
||||
lastAssistant: {
|
||||
role: "assistant",
|
||||
stopReason: "stop",
|
||||
provider: "openai",
|
||||
model: "gpt-5.4",
|
||||
content: [{ type: "text", text: "Recovered." }],
|
||||
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await runEmbeddedAgent({
|
||||
...overflowBaseRunParams,
|
||||
@@ -722,9 +735,8 @@ describe("runEmbeddedAgent incomplete-turn safety", () => {
|
||||
runId: "run-reasoning-only-assistant-error",
|
||||
});
|
||||
|
||||
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
|
||||
expect(result.payloads?.[0]?.isError).toBe(true);
|
||||
expect(result.payloads?.[0]?.text).toContain("Please try again");
|
||||
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
|
||||
expect(result.payloads).toBeUndefined();
|
||||
});
|
||||
|
||||
it("does not retry reasoning-only turns for non-strict-agentic providers", async () => {
|
||||
@@ -2529,6 +2541,191 @@ describe("runEmbeddedAgent incomplete-turn safety", () => {
|
||||
expect(retryInstruction).toBeNull();
|
||||
});
|
||||
|
||||
it("surfaces incomplete-turn text for errored signed-thinking-only turns with payloads", () => {
|
||||
const incompleteTurnText = resolveIncompleteTurnPayloadText({
|
||||
payloadCount: 1,
|
||||
aborted: false,
|
||||
timedOut: false,
|
||||
attempt: makeAttemptResult({
|
||||
assistantTexts: [],
|
||||
lastAssistant: {
|
||||
role: "assistant",
|
||||
stopReason: "error",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-8",
|
||||
content: [
|
||||
{
|
||||
type: "thinking",
|
||||
thinking: "internal reasoning before provider error",
|
||||
thinkingSignature: JSON.stringify({ id: "rs_error_payload", type: "reasoning" }),
|
||||
},
|
||||
],
|
||||
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
|
||||
}),
|
||||
});
|
||||
|
||||
expect(incompleteTurnText).toContain("couldn't generate a response");
|
||||
});
|
||||
|
||||
it.each([
|
||||
[
|
||||
"heartbeat responses",
|
||||
{
|
||||
heartbeatToolResponse: {
|
||||
outcome: "progress" as const,
|
||||
notify: false,
|
||||
summary: "Still working",
|
||||
},
|
||||
},
|
||||
],
|
||||
["tool media", { toolMediaUrls: ["file:///tmp/render.png"] }],
|
||||
["voice media", { toolAudioAsVoice: true }],
|
||||
["trusted local media", { toolTrustedLocalMedia: true }],
|
||||
[
|
||||
"source reply payloads",
|
||||
{ messagingToolSourceReplyPayloads: [{ text: "Delivered through the source reply." }] },
|
||||
],
|
||||
["delivered source replies", { didDeliverSourceReplyViaMessageTool: true }],
|
||||
] satisfies Array<[string, Partial<EmbeddedRunAttemptResult>]>)(
|
||||
"does not replace terminal %s with an incomplete-turn warning",
|
||||
(_label, attemptState) => {
|
||||
const incompleteTurnText = resolveIncompleteTurnPayloadText({
|
||||
payloadCount: 1,
|
||||
aborted: false,
|
||||
timedOut: false,
|
||||
attempt: makeAttemptResult({
|
||||
assistantTexts: [],
|
||||
...attemptState,
|
||||
lastAssistant: {
|
||||
role: "assistant",
|
||||
stopReason: "error",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-8",
|
||||
content: [
|
||||
{
|
||||
type: "thinking",
|
||||
thinking: "internal reasoning before provider error",
|
||||
thinkingSignature: JSON.stringify({
|
||||
id: "rs_terminal_payload",
|
||||
type: "reasoning",
|
||||
}),
|
||||
},
|
||||
],
|
||||
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
|
||||
}),
|
||||
});
|
||||
|
||||
expect(incompleteTurnText).toBeNull();
|
||||
},
|
||||
);
|
||||
|
||||
it("retries replay-safe errored turns that only emitted thinking blocks", () => {
|
||||
const assistant = {
|
||||
role: "assistant",
|
||||
stopReason: "error",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-8",
|
||||
content: [
|
||||
{
|
||||
type: "thinking",
|
||||
thinking: "internal reasoning before provider error",
|
||||
thinkingSignature: JSON.stringify({ id: "rs_error", type: "reasoning" }),
|
||||
},
|
||||
{ type: "redacted_thinking", data: "opaque" },
|
||||
{ type: "text", text: " " },
|
||||
],
|
||||
usage: { input: 100, output: 1120, totalTokens: 1220 },
|
||||
} as unknown as EmbeddedRunAttemptResult["lastAssistant"];
|
||||
expect(
|
||||
shouldRetrySilentErrorAssistantTurn({
|
||||
attempt: makeAttemptResult({ assistantTexts: [], lastAssistant: assistant }),
|
||||
assistant,
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("does not retry errored empty turns when non-zero output may indicate progress", () => {
|
||||
const assistant = {
|
||||
role: "assistant",
|
||||
stopReason: "error",
|
||||
provider: "ollama",
|
||||
model: "glm-5.1:cloud",
|
||||
content: [],
|
||||
usage: { input: 100, output: 12, totalTokens: 112 },
|
||||
} as unknown as EmbeddedRunAttemptResult["lastAssistant"];
|
||||
expect(
|
||||
shouldRetrySilentErrorAssistantTurn({
|
||||
attempt: makeAttemptResult({ assistantTexts: [], lastAssistant: assistant }),
|
||||
assistant,
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it.each([
|
||||
{
|
||||
name: "visible text",
|
||||
content: [
|
||||
{ type: "thinking", thinking: "internal", thinkingSignature: "sig" },
|
||||
{ type: "text", text: "partial answer" },
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "tool call",
|
||||
content: [
|
||||
{ type: "thinking", thinking: "internal", thinkingSignature: "sig" },
|
||||
{ type: "toolCall", id: "call_1", name: "read", arguments: { path: "README.md" } },
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "unknown block",
|
||||
content: [{ type: "provider_metadata", value: "opaque" }],
|
||||
},
|
||||
])("does not retry errored turns containing $name", ({ content }) => {
|
||||
const assistant = {
|
||||
role: "assistant",
|
||||
stopReason: "error",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-8",
|
||||
content,
|
||||
usage: { input: 100, output: 1120, totalTokens: 1220 },
|
||||
} as unknown as EmbeddedRunAttemptResult["lastAssistant"];
|
||||
expect(
|
||||
shouldRetrySilentErrorAssistantTurn({
|
||||
attempt: makeAttemptResult({ assistantTexts: [], lastAssistant: assistant }),
|
||||
assistant,
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("does not retry errored thinking-only turns after side effects", () => {
|
||||
const assistant = {
|
||||
role: "assistant",
|
||||
stopReason: "error",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-8",
|
||||
content: [
|
||||
{
|
||||
type: "redacted_thinking",
|
||||
data: "opaque",
|
||||
},
|
||||
],
|
||||
usage: { input: 100, output: 1120, totalTokens: 1220 },
|
||||
} as unknown as EmbeddedRunAttemptResult["lastAssistant"];
|
||||
expect(
|
||||
shouldRetrySilentErrorAssistantTurn({
|
||||
attempt: makeAttemptResult({
|
||||
assistantTexts: [],
|
||||
replayMetadata: {
|
||||
hadPotentialSideEffects: true,
|
||||
replaySafe: false,
|
||||
},
|
||||
lastAssistant: assistant,
|
||||
}),
|
||||
assistant,
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("detects empty openai-compatible stop turns with non-zero output usage", () => {
|
||||
const retryInstruction = resolveEmptyResponseRetryInstruction({
|
||||
provider: "llamacpp",
|
||||
|
||||
@@ -225,6 +225,9 @@ export const mockedIsBillingAssistantError = vi.fn(() => false);
|
||||
export const mockedIsCompactionFailureError = vi.fn(() => false);
|
||||
export const mockedIsFailoverAssistantError = vi.fn<MockAssistantErrorProbe>(() => false);
|
||||
export const mockedIsFailoverErrorMessage = vi.fn(() => false);
|
||||
export const mockedIsGenericUnknownStreamErrorMessage = vi.fn((raw: string) =>
|
||||
/^\s*an unknown error occurred\.?\s*$/i.test(raw),
|
||||
);
|
||||
export const mockedIsLikelyContextOverflowError = vi.fn((msg?: string) => {
|
||||
const lower = normalizeLowercaseStringOrEmpty(msg ?? "");
|
||||
return (
|
||||
@@ -412,6 +415,10 @@ export function resetRunOverflowCompactionHarnessMocks(): void {
|
||||
mockedIsFailoverAssistantError.mockReturnValue(false);
|
||||
mockedIsFailoverErrorMessage.mockReset();
|
||||
mockedIsFailoverErrorMessage.mockReturnValue(false);
|
||||
mockedIsGenericUnknownStreamErrorMessage.mockReset();
|
||||
mockedIsGenericUnknownStreamErrorMessage.mockImplementation((raw: string) =>
|
||||
/^\s*an unknown error occurred\.?\s*$/i.test(raw),
|
||||
);
|
||||
mockedIsLikelyContextOverflowError.mockReset();
|
||||
mockedIsLikelyContextOverflowError.mockImplementation((msg?: string) => {
|
||||
const lower = normalizeLowercaseStringOrEmpty(msg ?? "");
|
||||
@@ -642,6 +649,7 @@ export async function loadRunOverflowCompactionHarness(): Promise<{
|
||||
isLikelyContextOverflowError: mockedIsLikelyContextOverflowError,
|
||||
isFailoverAssistantError: mockedIsFailoverAssistantError,
|
||||
isFailoverErrorMessage: mockedIsFailoverErrorMessage,
|
||||
isGenericUnknownStreamErrorMessage: mockedIsGenericUnknownStreamErrorMessage,
|
||||
parseImageSizeError: mockedParseImageSizeError,
|
||||
parseImageDimensionError: mockedParseImageDimensionError,
|
||||
isRateLimitAssistantError: mockedIsRateLimitAssistantError,
|
||||
|
||||
@@ -72,6 +72,7 @@ import {
|
||||
isCompactionFailureError,
|
||||
isFailoverAssistantError,
|
||||
isFailoverErrorMessage,
|
||||
isGenericUnknownStreamErrorMessage,
|
||||
isLikelyContextOverflowError,
|
||||
isRateLimitAssistantError,
|
||||
parseImageDimensionError,
|
||||
@@ -107,6 +108,7 @@ import {
|
||||
resolveSelectedOpenAIRuntimeProvider,
|
||||
} from "../openai-routing.js";
|
||||
import { resolveProviderIdForAuth } from "../provider-auth-aliases.js";
|
||||
import { hasOnlyAssistantReasoningContent } from "../replay-turn-classification.js";
|
||||
import { runAgentCleanupStep } from "../run-cleanup-timeout.js";
|
||||
import { buildAgentRuntimeAuthPlan } from "../runtime-plan/auth.js";
|
||||
import { buildAgentRuntimePlan } from "../runtime-plan/build.js";
|
||||
@@ -195,6 +197,7 @@ import {
|
||||
resolveReplayInvalidFlag,
|
||||
resolveRunLivenessState,
|
||||
shouldRetryMissingAssistantTurn,
|
||||
shouldRetrySilentErrorAssistantTurn,
|
||||
shouldTreatEmptyAssistantReplyAsSilent,
|
||||
} from "./run/incomplete-turn.js";
|
||||
import type { RunEmbeddedAgentParams } from "./run/params.js";
|
||||
@@ -2936,6 +2939,43 @@ async function runEmbeddedAgentInternal(
|
||||
const imageDimensionError = parseImageDimensionError(
|
||||
assistantForFailover?.errorMessage ?? "",
|
||||
);
|
||||
// The shared runtime wraps interrupted streams as a timeout. Retry that
|
||||
// wrapper only for reasoning-only output so ordinary timeouts keep failover.
|
||||
const genericUnknownReasoningError =
|
||||
assistantFailoverReason === "timeout" &&
|
||||
isGenericUnknownStreamErrorMessage(assistantForFailover?.errorMessage ?? "") &&
|
||||
Boolean(assistantForFailover && hasOnlyAssistantReasoningContent(assistantForFailover));
|
||||
const silentErrorRetryReason =
|
||||
assistantFailoverReason === null ||
|
||||
genericUnknownReasoningError ||
|
||||
assistantFailoverReason === "no_error_details" ||
|
||||
assistantFailoverReason === "unclassified" ||
|
||||
assistantFailoverReason === "unknown";
|
||||
// Retry replay-safe non-visible provider errors before assistant
|
||||
// failover surfaces them as terminal provider failures.
|
||||
if (
|
||||
!authFailure &&
|
||||
!rateLimitFailure &&
|
||||
!billingFailure &&
|
||||
!cloudCodeAssistFormatError &&
|
||||
!imageDimensionError &&
|
||||
!aborted &&
|
||||
!promptError &&
|
||||
!timedOut &&
|
||||
silentErrorRetryReason &&
|
||||
shouldRetrySilentErrorAssistantTurn({ attempt, assistant: assistantForFailover }) &&
|
||||
emptyErrorRetries < MAX_EMPTY_ERROR_RETRIES
|
||||
) {
|
||||
emptyErrorRetries += 1;
|
||||
log.warn(
|
||||
`[empty-error-retry] stopReason=error non-visible-output; resubmitting ` +
|
||||
`attempt=${emptyErrorRetries}/${MAX_EMPTY_ERROR_RETRIES} ` +
|
||||
`provider=${assistantForFailover?.provider ?? provider} ` +
|
||||
`model=${assistantForFailover?.model ?? model.id} ` +
|
||||
`sessionKey=${params.sessionKey ?? params.sessionId}`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
// Capture the failing profile before auth-profile rotation mutates `lastProfileId`.
|
||||
const failedAssistantProfileId = lastProfileId;
|
||||
const logAssistantFailoverDecision = createFailoverDecisionLogger({
|
||||
@@ -3602,47 +3642,6 @@ async function runEmbeddedAgentInternal(
|
||||
`provider=${activeErrorContext.provider}/${activeErrorContext.model} attempts=${emptyResponseRetryAttempts}/${maxEmptyResponseRetryAttempts} — surfacing incomplete-turn error`,
|
||||
);
|
||||
}
|
||||
// ── silent-error retry ────────────────────────────────────────────
|
||||
// Observed with ollama/glm-5.1: a turn can end with stopReason="error"
|
||||
// and zero output tokens AND empty content after a successful
|
||||
// tool-call sequence, producing no user-visible text at all. This
|
||||
// path is narrower than the empty-response continuation retry:
|
||||
// same prompt, same session transcript (tool results already
|
||||
// captured), no instruction injection. Placed before the
|
||||
// incompleteTurnText return so it actually gets a chance to fire.
|
||||
//
|
||||
// Content-empty guard: a reasoning-only error (content has thinking
|
||||
// blocks) is a distinct failure mode handled elsewhere; only retry
|
||||
// when the assistant truly produced nothing.
|
||||
//
|
||||
// Side-effect guard: if the failed attempt already recorded potential
|
||||
// side effects (messaging tool sent, cron add, mutating tool
|
||||
// call that wasn't round-tripped as replay-safe), resubmission can
|
||||
// duplicate those actions. Mirror the gate the other retry resolvers
|
||||
// use (resolveEmptyResponseRetryInstruction, reasoning-only, planning-
|
||||
// only), which short-circuit on attempt.replayMetadata.hadPotentialSideEffects.
|
||||
const silentErrorContent = sessionLastAssistant?.content as Array<unknown> | undefined;
|
||||
if (
|
||||
incompleteTurnText &&
|
||||
!aborted &&
|
||||
!promptError &&
|
||||
!timedOut &&
|
||||
sessionLastAssistant?.stopReason === "error" &&
|
||||
((sessionLastAssistant?.usage as { output?: number } | undefined)?.output ?? 0) === 0 &&
|
||||
(silentErrorContent?.length ?? 0) === 0 &&
|
||||
(attempt.replayMetadata ? !attempt.replayMetadata.hadPotentialSideEffects : false) &&
|
||||
emptyErrorRetries < MAX_EMPTY_ERROR_RETRIES
|
||||
) {
|
||||
emptyErrorRetries += 1;
|
||||
log.warn(
|
||||
`[empty-error-retry] stopReason=error output=0; resubmitting ` +
|
||||
`attempt=${emptyErrorRetries}/${MAX_EMPTY_ERROR_RETRIES} ` +
|
||||
`provider=${sessionLastAssistant?.provider ?? provider} ` +
|
||||
`model=${sessionLastAssistant?.model ?? model.id} ` +
|
||||
`sessionKey=${params.sessionKey ?? params.sessionId}`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
if (incompleteTurnText) {
|
||||
const replayInvalid = resolveReplayInvalidForAttempt(incompleteTurnText);
|
||||
const livenessState = resolveRunLivenessState({
|
||||
|
||||
@@ -16,6 +16,7 @@ import {
|
||||
isStrictAgenticSupportedProviderModel,
|
||||
stripProviderPrefix,
|
||||
} from "../../execution-contract.js";
|
||||
import { hasOnlyAssistantReasoningContent } from "../../replay-turn-classification.js";
|
||||
import type { AgentMessage } from "../../runtime/index.js";
|
||||
import { isLikelyMutatingToolName } from "../../tool-mutation.js";
|
||||
import {
|
||||
@@ -44,6 +45,12 @@ type IncompleteTurnAttempt = Pick<
|
||||
| "currentAttemptAssistant"
|
||||
| "yieldDetected"
|
||||
| "didSendDeterministicApprovalPrompt"
|
||||
| "heartbeatToolResponse"
|
||||
| "toolMediaUrls"
|
||||
| "toolAudioAsVoice"
|
||||
| "toolTrustedLocalMedia"
|
||||
| "didDeliverSourceReplyViaMessageTool"
|
||||
| "messagingToolSourceReplyPayloads"
|
||||
| "didSendViaMessagingTool"
|
||||
| "messagingToolSentTexts"
|
||||
| "messagingToolSentMediaUrls"
|
||||
@@ -262,6 +269,35 @@ export function resolveAttemptReplayMetadata(attempt: {
|
||||
return attempt.replayMetadata ?? REPLAY_UNSAFE_FALLBACK_METADATA;
|
||||
}
|
||||
|
||||
type TerminalAttemptState = Pick<
|
||||
EmbeddedRunAttemptResult,
|
||||
| "clientToolCalls"
|
||||
| "yieldDetected"
|
||||
| "didSendDeterministicApprovalPrompt"
|
||||
| "heartbeatToolResponse"
|
||||
| "lastToolError"
|
||||
| "toolMediaUrls"
|
||||
| "toolAudioAsVoice"
|
||||
| "toolTrustedLocalMedia"
|
||||
| "didDeliverSourceReplyViaMessageTool"
|
||||
| "messagingToolSourceReplyPayloads"
|
||||
>;
|
||||
|
||||
function hasAttemptTerminalState(attempt: TerminalAttemptState): boolean {
|
||||
return Boolean(
|
||||
attempt.clientToolCalls ||
|
||||
attempt.yieldDetected ||
|
||||
attempt.didSendDeterministicApprovalPrompt ||
|
||||
attempt.heartbeatToolResponse ||
|
||||
attempt.lastToolError ||
|
||||
attempt.toolMediaUrls?.some((url) => url.trim().length > 0) ||
|
||||
attempt.toolAudioAsVoice ||
|
||||
attempt.toolTrustedLocalMedia ||
|
||||
attempt.didDeliverSourceReplyViaMessageTool ||
|
||||
attempt.messagingToolSourceReplyPayloads?.length,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds the user-visible incomplete-turn warning when a terminal attempt did
|
||||
* not produce a safe final assistant response and no committed delivery/progress
|
||||
@@ -281,16 +317,17 @@ export function resolveIncompleteTurnPayloadText(params: {
|
||||
// produced. (#76477)
|
||||
const toolUseTerminal = params.attempt.lastAssistant?.stopReason === "toolUse";
|
||||
const assistant = params.attempt.currentAttemptAssistant ?? params.attempt.lastAssistant;
|
||||
// Unsigned thinking payloads count toward payloadCount but carry no user-visible
|
||||
// content; bypass the visible-text guard when unsigned thinking was the only output
|
||||
// so that incomplete-turn stall detection fires below. (#89787)
|
||||
const unsignedThinkingOnlyTerminal =
|
||||
// Thinking payloads can count toward payloadCount but carry no user-visible
|
||||
// content; bypass the visible-text guard when thinking was the only output
|
||||
// so that incomplete-turn stall detection fires below. (#89787, #91953)
|
||||
const thinkingOnlyTerminal =
|
||||
params.payloadCount !== 0 &&
|
||||
!joinAssistantTexts(params.attempt.assistantTexts).length &&
|
||||
isUnsignedThinkingOnlyAssistantTurn(assistant);
|
||||
!hasAttemptTerminalState(params.attempt) &&
|
||||
Boolean(assistant && hasOnlyAssistantReasoningContent(assistant));
|
||||
|
||||
if (
|
||||
(params.payloadCount !== 0 && !toolUseTerminal && !unsignedThinkingOnlyTerminal) ||
|
||||
(params.payloadCount !== 0 && !toolUseTerminal && !thinkingOnlyTerminal) ||
|
||||
(params.aborted && params.externalAbort) ||
|
||||
params.timedOut ||
|
||||
params.attempt.clientToolCalls ||
|
||||
@@ -330,7 +367,7 @@ export function resolveIncompleteTurnPayloadText(params: {
|
||||
if (
|
||||
!incompleteTerminalAssistant &&
|
||||
!reasoningOnlyAssistant &&
|
||||
!unsignedThinkingOnlyTerminal &&
|
||||
!thinkingOnlyTerminal &&
|
||||
!emptyResponseAssistant &&
|
||||
stopReason !== "error"
|
||||
) {
|
||||
@@ -555,6 +592,50 @@ function isUnsignedThinkingOnlyAssistantTurn(message: unknown): boolean {
|
||||
return assessLastAssistantMessage(message as AgentMessage) === "incomplete-thinking";
|
||||
}
|
||||
|
||||
export function shouldRetrySilentErrorAssistantTurn(params: {
|
||||
attempt: Pick<
|
||||
EmbeddedRunAttemptResult,
|
||||
| "assistantTexts"
|
||||
| "clientToolCalls"
|
||||
| "yieldDetected"
|
||||
| "didSendDeterministicApprovalPrompt"
|
||||
| "heartbeatToolResponse"
|
||||
| "lastToolError"
|
||||
| "toolMediaUrls"
|
||||
| "toolAudioAsVoice"
|
||||
| "toolTrustedLocalMedia"
|
||||
| "didDeliverSourceReplyViaMessageTool"
|
||||
| "messagingToolSourceReplyPayloads"
|
||||
| "replayMetadata"
|
||||
>;
|
||||
assistant: EmbeddedRunAttemptResult["lastAssistant"] | null | undefined;
|
||||
}): boolean {
|
||||
if (joinAssistantTexts(params.attempt.assistantTexts).length > 0) {
|
||||
return false;
|
||||
}
|
||||
if (hasAttemptTerminalState(params.attempt)) {
|
||||
return false;
|
||||
}
|
||||
if (resolveAttemptReplayMetadata(params.attempt).hadPotentialSideEffects) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const assistant = params.assistant;
|
||||
if (!assistant || assistant.stopReason !== "error") {
|
||||
return false;
|
||||
}
|
||||
|
||||
const content = (assistant as { content?: unknown }).content;
|
||||
if (!Array.isArray(content)) {
|
||||
return false;
|
||||
}
|
||||
if (content.length === 0) {
|
||||
return !hasPositiveOutputTokenUsage(assistant);
|
||||
}
|
||||
|
||||
return hasOnlyAssistantReasoningContent(assistant);
|
||||
}
|
||||
|
||||
function isEmptyResponseAssistantTurn(params: {
|
||||
payloadCount: number;
|
||||
attempt: Pick<
|
||||
|
||||
@@ -4,9 +4,9 @@ type AssistantTurnLike = {
|
||||
content?: unknown;
|
||||
};
|
||||
|
||||
/** Returns true when a token-limited turn contains only incomplete provider reasoning. */
|
||||
export function isReasoningOnlyLengthAssistantTurn(message: AssistantTurnLike): boolean {
|
||||
if (message.role !== "assistant" || message.stopReason !== "length") {
|
||||
/** Returns true when an assistant turn contains only provider reasoning and blank text. */
|
||||
export function hasOnlyAssistantReasoningContent(message: AssistantTurnLike): boolean {
|
||||
if (message.role !== "assistant") {
|
||||
return false;
|
||||
}
|
||||
const content = Array.isArray(message.content)
|
||||
@@ -31,3 +31,8 @@ export function isReasoningOnlyLengthAssistantTurn(message: AssistantTurnLike):
|
||||
}
|
||||
return hasThinking;
|
||||
}
|
||||
|
||||
/** Returns true when a token-limited turn contains only incomplete provider reasoning. */
|
||||
export function isReasoningOnlyLengthAssistantTurn(message: AssistantTurnLike): boolean {
|
||||
return message.stopReason === "length" && hasOnlyAssistantReasoningContent(message);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user