fix #95489: [Bug]: claude-cli out-of-credits error bypasses model fallback chain — error text delivered as final response (#95508)

* fix(agents): fallback on generic cli failure text

* fix(agents): guard generic cli failure payload visibility

Co-Authored-By: Claude <noreply@anthropic.com>

* fix(agents): use exported generic failure text

Signed-off-by: sallyom <somalley@redhat.com>

---------

Signed-off-by: sallyom <somalley@redhat.com>
Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: sallyom <somalley@redhat.com>
This commit is contained in:
mikasa
2026-06-23 10:55:35 +08:00
committed by GitHub
parent c48dd3cdd1
commit aa0bdb901f
2 changed files with 176 additions and 7 deletions

View File

@@ -1,5 +1,6 @@
// Coverage for deciding when embedded run results should trigger model fallback.
import { describe, expect, it } from "vitest";
import { GENERIC_EXTERNAL_RUN_FAILURE_TEXT } from "../../auto-reply/reply/agent-runner-failure-copy.js";
import { classifyEmbeddedAgentRunResultForModelFallback } from "./result-fallback-classifier.js";
describe("classifyEmbeddedAgentRunResultForModelFallback", () => {
@@ -49,6 +50,119 @@ describe("classifyEmbeddedAgentRunResultForModelFallback", () => {
});
});
it("classifies generic external runner failure text as fallback-worthy", () => {
const result = classifyEmbeddedAgentRunResultForModelFallback({
provider: "claude-cli",
model: "claude-sonnet-4-6",
result: {
payloads: [{ text: GENERIC_EXTERNAL_RUN_FAILURE_TEXT }],
meta: {
durationMs: 42,
},
},
});
expect(result).toEqual({
message:
"claude-cli/claude-sonnet-4-6 ended with a generic external runner failure: " +
GENERIC_EXTERNAL_RUN_FAILURE_TEXT,
reason: "format",
code: "generic_external_run_failure",
rawError: GENERIC_EXTERNAL_RUN_FAILURE_TEXT,
});
});
it("does not classify normal visible assistant output as fallback-worthy", () => {
const result = classifyEmbeddedAgentRunResultForModelFallback({
provider: "claude-cli",
model: "claude-sonnet-4-6",
result: {
payloads: [{ text: "Here is the requested answer." }],
meta: {
durationMs: 42,
},
},
});
expect(result).toBeNull();
});
it("does not retry generic external runner failure text mixed with non-text visible content", () => {
const result = classifyEmbeddedAgentRunResultForModelFallback({
provider: "claude-cli",
model: "claude-sonnet-4-6",
result: {
payloads: [
{
text: GENERIC_EXTERNAL_RUN_FAILURE_TEXT,
mediaUrl: "https://example.com/failure-screenshot.png",
channelData: { delivered: true },
},
],
meta: {
durationMs: 42,
},
},
});
expect(result).toBeNull();
});
it("does not retry generic external runner failure text mixed with interactive content", () => {
const result = classifyEmbeddedAgentRunResultForModelFallback({
provider: "claude-cli",
model: "claude-sonnet-4-6",
result: {
payloads: [
{
text: GENERIC_EXTERNAL_RUN_FAILURE_TEXT,
interactive: { type: "button", label: "Retry" },
},
],
meta: {
durationMs: 42,
},
},
});
expect(result).toBeNull();
});
it("does not retry generic external runner failure text after committed delivery", () => {
const result = classifyEmbeddedAgentRunResultForModelFallback({
provider: "claude-cli",
model: "claude-sonnet-4-6",
result: {
payloads: [{ text: GENERIC_EXTERNAL_RUN_FAILURE_TEXT }],
messagingToolSentTexts: ["already delivered"],
meta: {
durationMs: 42,
},
},
});
expect(result).toBeNull();
});
it("preserves hook block results with generic external runner failure text", () => {
const result = classifyEmbeddedAgentRunResultForModelFallback({
provider: "claude-cli",
model: "claude-sonnet-4-6",
result: {
payloads: [{ text: GENERIC_EXTERNAL_RUN_FAILURE_TEXT }],
meta: {
durationMs: 42,
error: {
kind: "hook_block",
message: GENERIC_EXTERNAL_RUN_FAILURE_TEXT,
},
},
},
});
expect(result).toBeNull();
});
it("preserves hook block results with auth-like error payload text", () => {
// Hook policy blocks are intentional local decisions, not provider failures
// that should rotate models.

View File

@@ -1,6 +1,7 @@
/**
* Classifies embedded-agent run results for model fallback decisions.
*/
import { GENERIC_EXTERNAL_RUN_FAILURE_TEXT } from "../../auto-reply/reply/agent-runner-failure-copy.js";
import { isSilentReplyPayloadText } from "../../auto-reply/tokens.js";
import { classifyFailoverReason } from "../embedded-agent-helpers/errors.js";
import type { FailoverReason } from "../embedded-agent-helpers/types.js";
@@ -15,8 +16,9 @@ import type { EmbeddedAgentRunResult } from "./types.js";
/**
* Classifies embedded-agent terminal results for model fallback decisions.
*
* The classifier only flags failed invisible outcomes; delivered messages, deliberate silent
* replies, hook blocks, and aborts must not trigger another model attempt.
* The classifier only flags failed invisible outcomes or exact generic external-runner failure
* copy; delivered messages, deliberate silent replies, hook blocks, and aborts must not trigger
* another model attempt.
*/
function isEmbeddedAgentRunResult(value: unknown): value is EmbeddedAgentRunResult {
return Boolean(
@@ -74,6 +76,47 @@ function hasDeliberateSilentTerminalReply(result: EmbeddedAgentRunResult): boole
);
}
function hasNonTextVisiblePayloadContent(
payload: NonNullable<EmbeddedAgentRunResult["payloads"]>[number],
): boolean {
const { text: _text, ...payloadWithoutText } = payload;
return hasVisibleAgentPayload(
{ payloads: [payloadWithoutText] },
{
includeErrorPayloads: false,
includeReasoningPayloads: false,
},
);
}
function classifyGenericExternalRunFailurePayload(params: {
provider: string;
model: string;
result: EmbeddedAgentRunResult;
}): ModelFallbackResultClassification {
const payloads = params.result.payloads;
if (!Array.isArray(payloads) || payloads.length !== 1) {
return null;
}
const [payload] = payloads;
const text = payload?.text;
if (
payload?.isError === true ||
payload?.isReasoning === true ||
typeof text !== "string" ||
text.trim() !== GENERIC_EXTERNAL_RUN_FAILURE_TEXT ||
hasNonTextVisiblePayloadContent(payload)
) {
return null;
}
return {
message: `${params.provider}/${params.model} ended with a generic external runner failure: ${text}`,
reason: "format",
code: "generic_external_run_failure",
rawError: text,
};
}
function classifyHarnessResult(params: {
provider: string;
model: string;
@@ -136,11 +179,7 @@ export function classifyEmbeddedAgentRunResultForModelFallback(params: {
if (
params.result.meta.aborted ||
params.hasDirectlySentBlockReply === true ||
params.hasBlockReplyPipelineOutput === true ||
hasVisibleAgentPayload(params.result, {
includeErrorPayloads: false,
includeReasoningPayloads: false,
})
params.hasBlockReplyPipelineOutput === true
) {
return null;
}
@@ -161,6 +200,22 @@ export function classifyEmbeddedAgentRunResultForModelFallback(params: {
return null;
}
const payloads = params.result.payloads ?? [];
const genericExternalFailureClassification = classifyGenericExternalRunFailurePayload({
provider: params.provider,
model: params.model,
result: params.result,
});
if (genericExternalFailureClassification) {
return genericExternalFailureClassification;
}
if (
hasVisibleAgentPayload(params.result, {
includeErrorPayloads: false,
includeReasoningPayloads: false,
})
) {
return null;
}
if (fallbackSafeIncompleteTurn) {
const terminalErrorText = payloads.find(