diff --git a/src/agents/embedded-agent-runner/result-fallback-classifier.test.ts b/src/agents/embedded-agent-runner/result-fallback-classifier.test.ts index c34527f7455..7c4a7aaa892 100644 --- a/src/agents/embedded-agent-runner/result-fallback-classifier.test.ts +++ b/src/agents/embedded-agent-runner/result-fallback-classifier.test.ts @@ -1,5 +1,6 @@ // Coverage for deciding when embedded run results should trigger model fallback. import { describe, expect, it } from "vitest"; +import { GENERIC_EXTERNAL_RUN_FAILURE_TEXT } from "../../auto-reply/reply/agent-runner-failure-copy.js"; import { classifyEmbeddedAgentRunResultForModelFallback } from "./result-fallback-classifier.js"; describe("classifyEmbeddedAgentRunResultForModelFallback", () => { @@ -49,6 +50,119 @@ describe("classifyEmbeddedAgentRunResultForModelFallback", () => { }); }); + it("classifies generic external runner failure text as fallback-worthy", () => { + const result = classifyEmbeddedAgentRunResultForModelFallback({ + provider: "claude-cli", + model: "claude-sonnet-4-6", + result: { + payloads: [{ text: GENERIC_EXTERNAL_RUN_FAILURE_TEXT }], + meta: { + durationMs: 42, + }, + }, + }); + + expect(result).toEqual({ + message: + "claude-cli/claude-sonnet-4-6 ended with a generic external runner failure: " + + GENERIC_EXTERNAL_RUN_FAILURE_TEXT, + reason: "format", + code: "generic_external_run_failure", + rawError: GENERIC_EXTERNAL_RUN_FAILURE_TEXT, + }); + }); + + it("does not classify normal visible assistant output as fallback-worthy", () => { + const result = classifyEmbeddedAgentRunResultForModelFallback({ + provider: "claude-cli", + model: "claude-sonnet-4-6", + result: { + payloads: [{ text: "Here is the requested answer." }], + meta: { + durationMs: 42, + }, + }, + }); + + expect(result).toBeNull(); + }); + + it("does not retry generic external runner failure text mixed with non-text visible content", () => { + const result = classifyEmbeddedAgentRunResultForModelFallback({ + provider: "claude-cli", + model: "claude-sonnet-4-6", + result: { + payloads: [ + { + text: GENERIC_EXTERNAL_RUN_FAILURE_TEXT, + mediaUrl: "https://example.com/failure-screenshot.png", + channelData: { delivered: true }, + }, + ], + meta: { + durationMs: 42, + }, + }, + }); + + expect(result).toBeNull(); + }); + + it("does not retry generic external runner failure text mixed with interactive content", () => { + const result = classifyEmbeddedAgentRunResultForModelFallback({ + provider: "claude-cli", + model: "claude-sonnet-4-6", + result: { + payloads: [ + { + text: GENERIC_EXTERNAL_RUN_FAILURE_TEXT, + interactive: { type: "button", label: "Retry" }, + }, + ], + meta: { + durationMs: 42, + }, + }, + }); + + expect(result).toBeNull(); + }); + + it("does not retry generic external runner failure text after committed delivery", () => { + const result = classifyEmbeddedAgentRunResultForModelFallback({ + provider: "claude-cli", + model: "claude-sonnet-4-6", + result: { + payloads: [{ text: GENERIC_EXTERNAL_RUN_FAILURE_TEXT }], + messagingToolSentTexts: ["already delivered"], + meta: { + durationMs: 42, + }, + }, + }); + + expect(result).toBeNull(); + }); + + it("preserves hook block results with generic external runner failure text", () => { + const result = classifyEmbeddedAgentRunResultForModelFallback({ + provider: "claude-cli", + model: "claude-sonnet-4-6", + result: { + payloads: [{ text: GENERIC_EXTERNAL_RUN_FAILURE_TEXT }], + meta: { + durationMs: 42, + error: { + kind: "hook_block", + message: GENERIC_EXTERNAL_RUN_FAILURE_TEXT, + }, + }, + }, + }); + + expect(result).toBeNull(); + }); + it("preserves hook block results with auth-like error payload text", () => { // Hook policy blocks are intentional local decisions, not provider failures // that should rotate models. diff --git a/src/agents/embedded-agent-runner/result-fallback-classifier.ts b/src/agents/embedded-agent-runner/result-fallback-classifier.ts index 36bbde3cfb2..d536f170e13 100644 --- a/src/agents/embedded-agent-runner/result-fallback-classifier.ts +++ b/src/agents/embedded-agent-runner/result-fallback-classifier.ts @@ -1,6 +1,7 @@ /** * Classifies embedded-agent run results for model fallback decisions. */ +import { GENERIC_EXTERNAL_RUN_FAILURE_TEXT } from "../../auto-reply/reply/agent-runner-failure-copy.js"; import { isSilentReplyPayloadText } from "../../auto-reply/tokens.js"; import { classifyFailoverReason } from "../embedded-agent-helpers/errors.js"; import type { FailoverReason } from "../embedded-agent-helpers/types.js"; @@ -15,8 +16,9 @@ import type { EmbeddedAgentRunResult } from "./types.js"; /** * Classifies embedded-agent terminal results for model fallback decisions. * - * The classifier only flags failed invisible outcomes; delivered messages, deliberate silent - * replies, hook blocks, and aborts must not trigger another model attempt. + * The classifier only flags failed invisible outcomes or exact generic external-runner failure + * copy; delivered messages, deliberate silent replies, hook blocks, and aborts must not trigger + * another model attempt. */ function isEmbeddedAgentRunResult(value: unknown): value is EmbeddedAgentRunResult { return Boolean( @@ -74,6 +76,47 @@ function hasDeliberateSilentTerminalReply(result: EmbeddedAgentRunResult): boole ); } +function hasNonTextVisiblePayloadContent( + payload: NonNullable[number], +): boolean { + const { text: _text, ...payloadWithoutText } = payload; + return hasVisibleAgentPayload( + { payloads: [payloadWithoutText] }, + { + includeErrorPayloads: false, + includeReasoningPayloads: false, + }, + ); +} + +function classifyGenericExternalRunFailurePayload(params: { + provider: string; + model: string; + result: EmbeddedAgentRunResult; +}): ModelFallbackResultClassification { + const payloads = params.result.payloads; + if (!Array.isArray(payloads) || payloads.length !== 1) { + return null; + } + const [payload] = payloads; + const text = payload?.text; + if ( + payload?.isError === true || + payload?.isReasoning === true || + typeof text !== "string" || + text.trim() !== GENERIC_EXTERNAL_RUN_FAILURE_TEXT || + hasNonTextVisiblePayloadContent(payload) + ) { + return null; + } + return { + message: `${params.provider}/${params.model} ended with a generic external runner failure: ${text}`, + reason: "format", + code: "generic_external_run_failure", + rawError: text, + }; +} + function classifyHarnessResult(params: { provider: string; model: string; @@ -136,11 +179,7 @@ export function classifyEmbeddedAgentRunResultForModelFallback(params: { if ( params.result.meta.aborted || params.hasDirectlySentBlockReply === true || - params.hasBlockReplyPipelineOutput === true || - hasVisibleAgentPayload(params.result, { - includeErrorPayloads: false, - includeReasoningPayloads: false, - }) + params.hasBlockReplyPipelineOutput === true ) { return null; } @@ -161,6 +200,22 @@ export function classifyEmbeddedAgentRunResultForModelFallback(params: { return null; } const payloads = params.result.payloads ?? []; + const genericExternalFailureClassification = classifyGenericExternalRunFailurePayload({ + provider: params.provider, + model: params.model, + result: params.result, + }); + if (genericExternalFailureClassification) { + return genericExternalFailureClassification; + } + if ( + hasVisibleAgentPayload(params.result, { + includeErrorPayloads: false, + includeReasoningPayloads: false, + }) + ) { + return null; + } if (fallbackSafeIncompleteTurn) { const terminalErrorText = payloads.find(