fix #95489: [Bug]: claude-cli out-of-credits error bypasses model fallback chain — error text delivered as final response (#95508)

* fix(agents): fallback on generic cli failure text * fix(agents): guard generic cli failure payload visibility Co-Authored-By: Claude <noreply@anthropic.com> * fix(agents): use exported generic failure text Signed-off-by: sallyom <somalley@redhat.com> --------- Signed-off-by: sallyom <somalley@redhat.com> Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: sallyom <somalley@redhat.com>
2026-06-23 12:48:10 +00:00 · 2026-06-23 10:55:35 +08:00
parent c48dd3cdd1
commit aa0bdb901f
2 changed files with 176 additions and 7 deletions
--- a/src/agents/embedded-agent-runner/result-fallback-classifier.test.ts
+++ b/src/agents/embedded-agent-runner/result-fallback-classifier.test.ts
@@ -1,5 +1,6 @@
 // Coverage for deciding when embedded run results should trigger model fallback.
 import { describe, expect, it } from "vitest";
+import { GENERIC_EXTERNAL_RUN_FAILURE_TEXT } from "../../auto-reply/reply/agent-runner-failure-copy.js";
 import { classifyEmbeddedAgentRunResultForModelFallback } from "./result-fallback-classifier.js";

 describe("classifyEmbeddedAgentRunResultForModelFallback", () => {
@@ -49,6 +50,119 @@ describe("classifyEmbeddedAgentRunResultForModelFallback", () => {
    });
  });

+  it("classifies generic external runner failure text as fallback-worthy", () => {
+    const result = classifyEmbeddedAgentRunResultForModelFallback({
+      provider: "claude-cli",
+      model: "claude-sonnet-4-6",
+      result: {
+        payloads: [{ text: GENERIC_EXTERNAL_RUN_FAILURE_TEXT }],
+        meta: {
+          durationMs: 42,
+        },
+      },
+    });
+
+    expect(result).toEqual({
+      message:
+        "claude-cli/claude-sonnet-4-6 ended with a generic external runner failure: " +
+        GENERIC_EXTERNAL_RUN_FAILURE_TEXT,
+      reason: "format",
+      code: "generic_external_run_failure",
+      rawError: GENERIC_EXTERNAL_RUN_FAILURE_TEXT,
+    });
+  });
+
+  it("does not classify normal visible assistant output as fallback-worthy", () => {
+    const result = classifyEmbeddedAgentRunResultForModelFallback({
+      provider: "claude-cli",
+      model: "claude-sonnet-4-6",
+      result: {
+        payloads: [{ text: "Here is the requested answer." }],
+        meta: {
+          durationMs: 42,
+        },
+      },
+    });
+
+    expect(result).toBeNull();
+  });
+
+  it("does not retry generic external runner failure text mixed with non-text visible content", () => {
+    const result = classifyEmbeddedAgentRunResultForModelFallback({
+      provider: "claude-cli",
+      model: "claude-sonnet-4-6",
+      result: {
+        payloads: [
+          {
+            text: GENERIC_EXTERNAL_RUN_FAILURE_TEXT,
+            mediaUrl: "https://example.com/failure-screenshot.png",
+            channelData: { delivered: true },
+          },
+        ],
+        meta: {
+          durationMs: 42,
+        },
+      },
+    });
+
+    expect(result).toBeNull();
+  });
+
+  it("does not retry generic external runner failure text mixed with interactive content", () => {
+    const result = classifyEmbeddedAgentRunResultForModelFallback({
+      provider: "claude-cli",
+      model: "claude-sonnet-4-6",
+      result: {
+        payloads: [
+          {
+            text: GENERIC_EXTERNAL_RUN_FAILURE_TEXT,
+            interactive: { type: "button", label: "Retry" },
+          },
+        ],
+        meta: {
+          durationMs: 42,
+        },
+      },
+    });
+
+    expect(result).toBeNull();
+  });
+
+  it("does not retry generic external runner failure text after committed delivery", () => {
+    const result = classifyEmbeddedAgentRunResultForModelFallback({
+      provider: "claude-cli",
+      model: "claude-sonnet-4-6",
+      result: {
+        payloads: [{ text: GENERIC_EXTERNAL_RUN_FAILURE_TEXT }],
+        messagingToolSentTexts: ["already delivered"],
+        meta: {
+          durationMs: 42,
+        },
+      },
+    });
+
+    expect(result).toBeNull();
+  });
+
+  it("preserves hook block results with generic external runner failure text", () => {
+    const result = classifyEmbeddedAgentRunResultForModelFallback({
+      provider: "claude-cli",
+      model: "claude-sonnet-4-6",
+      result: {
+        payloads: [{ text: GENERIC_EXTERNAL_RUN_FAILURE_TEXT }],
+        meta: {
+          durationMs: 42,
+          error: {
+            kind: "hook_block",
+            message: GENERIC_EXTERNAL_RUN_FAILURE_TEXT,
+          },
+        },
+      },
+    });
+
+    expect(result).toBeNull();
+  });
+
  it("preserves hook block results with auth-like error payload text", () => {
    // Hook policy blocks are intentional local decisions, not provider failures
    // that should rotate models.
--- a/src/agents/embedded-agent-runner/result-fallback-classifier.ts
+++ b/src/agents/embedded-agent-runner/result-fallback-classifier.ts
@@ -1,6 +1,7 @@
 /**
 * Classifies embedded-agent run results for model fallback decisions.
 */
+import { GENERIC_EXTERNAL_RUN_FAILURE_TEXT } from "../../auto-reply/reply/agent-runner-failure-copy.js";
 import { isSilentReplyPayloadText } from "../../auto-reply/tokens.js";
 import { classifyFailoverReason } from "../embedded-agent-helpers/errors.js";
 import type { FailoverReason } from "../embedded-agent-helpers/types.js";
@@ -15,8 +16,9 @@ import type { EmbeddedAgentRunResult } from "./types.js";
 /**
 * Classifies embedded-agent terminal results for model fallback decisions.
 *
- * The classifier only flags failed invisible outcomes; delivered messages, deliberate silent
- * replies, hook blocks, and aborts must not trigger another model attempt.
+ * The classifier only flags failed invisible outcomes or exact generic external-runner failure
+ * copy; delivered messages, deliberate silent replies, hook blocks, and aborts must not trigger
+ * another model attempt.
 */
 function isEmbeddedAgentRunResult(value: unknown): value is EmbeddedAgentRunResult {
  return Boolean(
@@ -74,6 +76,47 @@ function hasDeliberateSilentTerminalReply(result: EmbeddedAgentRunResult): boole
  );
 }

+function hasNonTextVisiblePayloadContent(
+  payload: NonNullable<EmbeddedAgentRunResult["payloads"]>[number],
+): boolean {
+  const { text: _text, ...payloadWithoutText } = payload;
+  return hasVisibleAgentPayload(
+    { payloads: [payloadWithoutText] },
+    {
+      includeErrorPayloads: false,
+      includeReasoningPayloads: false,
+    },
+  );
+}
+
+function classifyGenericExternalRunFailurePayload(params: {
+  provider: string;
+  model: string;
+  result: EmbeddedAgentRunResult;
+}): ModelFallbackResultClassification {
+  const payloads = params.result.payloads;
+  if (!Array.isArray(payloads) || payloads.length !== 1) {
+    return null;
+  }
+  const [payload] = payloads;
+  const text = payload?.text;
+  if (
+    payload?.isError === true ||
+    payload?.isReasoning === true ||
+    typeof text !== "string" ||
+    text.trim() !== GENERIC_EXTERNAL_RUN_FAILURE_TEXT ||
+    hasNonTextVisiblePayloadContent(payload)
+  ) {
+    return null;
+  }
+  return {
+    message: `${params.provider}/${params.model} ended with a generic external runner failure: ${text}`,
+    reason: "format",
+    code: "generic_external_run_failure",
+    rawError: text,
+  };
+}
+
 function classifyHarnessResult(params: {
  provider: string;
  model: string;
@@ -136,11 +179,7 @@ export function classifyEmbeddedAgentRunResultForModelFallback(params: {
  if (
    params.result.meta.aborted ||
    params.hasDirectlySentBlockReply === true ||
-    params.hasBlockReplyPipelineOutput === true ||
-    hasVisibleAgentPayload(params.result, {
-      includeErrorPayloads: false,
-      includeReasoningPayloads: false,
-    })
+    params.hasBlockReplyPipelineOutput === true
  ) {
    return null;
  }
@@ -161,6 +200,22 @@ export function classifyEmbeddedAgentRunResultForModelFallback(params: {
    return null;
  }
  const payloads = params.result.payloads ?? [];
+  const genericExternalFailureClassification = classifyGenericExternalRunFailurePayload({
+    provider: params.provider,
+    model: params.model,
+    result: params.result,
+  });
+  if (genericExternalFailureClassification) {
+    return genericExternalFailureClassification;
+  }
+  if (
+    hasVisibleAgentPayload(params.result, {
+      includeErrorPayloads: false,
+      includeReasoningPayloads: false,
+    })
+  ) {
+    return null;
+  }

  if (fallbackSafeIncompleteTurn) {
    const terminalErrorText = payloads.find(