fix(agents): check billing errors before context overflow heuristics (#40409)

Merged via squash. Prepared head SHA: c88f89c462 Co-authored-by: ademczuk <5212682+ademczuk@users.noreply.github.com> Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com> Reviewed-by: @altaywtf
2026-03-12 07:20:45 +00:00 · 2026-03-11 19:08:55 +01:00
parent f417d78eef
commit 58634c9c65
5 changed files with 101 additions and 7 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1045,6 +1045,7 @@ Docs: https://docs.openclaw.ai
 - Browser/Navigate: resolve the correct `targetId` in navigate responses after renderer swaps. (#25326) Thanks @stone-jin and @vincentkoc.
 - FS/Sandbox workspace boundaries: add a dedicated `outside-workspace` safe-open error code for root-escape checks, and propagate specific outside-workspace messages across edit/browser/media consumers instead of generic not-found/invalid-path fallbacks. (#29715) Thanks @YuzuruS.
 - Diagnostics/Stuck session signal: add configurable stuck-session warning threshold via `diagnostics.stuckSessionWarnMs` (default 120000ms) to reduce false-positive warnings on long multi-tool turns. (#31032)
 - Agents/error classification: check billing errors before context overflow heuristics in the agent runner catch block so spend-limit and quota errors show the billing-specific message instead of being misclassified as "Context overflow: prompt too large". (#40409) Thanks @ademczuk.
 ## 2026.2.26
--- a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts
+++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts
@@ -439,6 +439,18 @@ describe("isLikelyContextOverflowError", () => {
      expect(isLikelyContextOverflowError(sample)).toBe(false);
    }
  });
  it("excludes billing errors even when text matches context overflow patterns", () => {
    const samples = [
      "402 Payment Required: request token limit exceeded for this billing plan",
      "insufficient credits: request size exceeds your current plan limits",
      "Your credit balance is too low. Maximum request token limit exceeded.",
    ];
    for (const sample of samples) {
      expect(isBillingErrorMessage(sample)).toBe(true);
      expect(isLikelyContextOverflowError(sample)).toBe(false);
    }
  });
 });
 describe("isTransientHttpError", () => {
--- a/src/agents/pi-embedded-helpers/errors.ts
+++ b/src/agents/pi-embedded-helpers/errors.ts
@@ -138,6 +138,13 @@ export function isLikelyContextOverflowError(errorMessage?: string): boolean {
    return false;
  }
  // Billing/quota errors can contain patterns like "request size exceeds" or
  // "maximum token limit exceeded" that match the context overflow heuristic.
  // Billing is a more specific error class — exclude it early.
  if (isBillingErrorMessage(errorMessage)) {
    return false;
  }
  if (CONTEXT_WINDOW_TOO_SMALL_RE.test(errorMessage)) {
    return false;
  }
--- a/src/auto-reply/reply/agent-runner-execution.ts
+++ b/src/auto-reply/reply/agent-runner-execution.ts
@@ -6,8 +6,10 @@ import { getCliSessionId } from "../../agents/cli-session.js";
 import { runWithModelFallback } from "../../agents/model-fallback.js";
 import { isCliProvider } from "../../agents/model-selection.js";
 import {
  BILLING_ERROR_USER_MESSAGE,
  isCompactionFailureError,
  isContextOverflowError,
  isBillingErrorMessage,
  isLikelyContextOverflowError,
  isTransientHttpError,
  sanitizeUserFacingText,
@@ -514,8 +516,9 @@ export async function runAgentTurnWithFallback(params: {
      break;
    } catch (err) {
      const message = err instanceof Error ? err.message : String(err);
-      const isContextOverflow = isLikelyContextOverflowError(message);
+      const isBilling = isBillingErrorMessage(message);
-      const isCompactionFailure = isCompactionFailureError(message);
+      const isContextOverflow = !isBilling && isLikelyContextOverflowError(message);
      const isCompactionFailure = !isBilling && isCompactionFailureError(message);
      const isSessionCorruption = /function call turn comes immediately after/i.test(message);
      const isRoleOrderingError = /incorrect role information|roles must alternate/i.test(message);
      const isTransientHttp = isTransientHttpError(message);
@@ -610,11 +613,13 @@ export async function runAgentTurnWithFallback(params: {
        ? sanitizeUserFacingText(message, { errorContext: true })
        : message;
      const trimmedMessage = safeMessage.replace(/\.\s*$/, "");
-      const fallbackText = isContextOverflow
+      const fallbackText = isBilling
-        ? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model."
+        ? BILLING_ERROR_USER_MESSAGE
-        : isRoleOrderingError
+        : isContextOverflow
-          ? "⚠️ Message ordering conflict - please try again. If this persists, use /new to start a fresh session."
+          ? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model."
-          : `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow`;
+          : isRoleOrderingError
            ? "⚠️ Message ordering conflict - please try again. If this persists, use /new to start a fresh session."
            : `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow`;
      return {
        kind: "final",
--- a/src/auto-reply/reply/agent-runner.misc.runreplyagent.test.ts
+++ b/src/auto-reply/reply/agent-runner.misc.runreplyagent.test.ts
@@ -1628,3 +1628,72 @@ describe("runReplyAgent transient HTTP retry", () => {
    expect(payload?.text).toContain("Recovered response");
  });
 });
 describe("runReplyAgent billing error classification", () => {
  // Regression guard for the runner-level catch block in runAgentTurnWithFallback.
  // Billing errors from providers like OpenRouter can contain token/size wording that
  // matches context overflow heuristics. This test verifies the final user-visible
  // message is the billing-specific one, not the "Context overflow" fallback.
  it("returns billing message for mixed-signal error (billing text + overflow patterns)", async () => {
    runEmbeddedPiAgentMock.mockRejectedValueOnce(
      new Error("402 Payment Required: request token limit exceeded for this billing plan"),
    );
    const typing = createMockTypingController();
    const sessionCtx = {
      Provider: "telegram",
      MessageSid: "msg",
    } as unknown as TemplateContext;
    const resolvedQueue = { mode: "interrupt" } as unknown as QueueSettings;
    const followupRun = {
      prompt: "hello",
      summaryLine: "hello",
      enqueuedAt: Date.now(),
      run: {
        sessionId: "session",
        sessionKey: "main",
        messageProvider: "telegram",
        sessionFile: "/tmp/session.jsonl",
        workspaceDir: "/tmp",
        config: {},
        skillsSnapshot: {},
        provider: "anthropic",
        model: "claude",
        thinkLevel: "low",
        verboseLevel: "off",
        elevatedLevel: "off",
        bashElevated: {
          enabled: false,
          allowed: false,
          defaultLevel: "off",
        },
        timeoutMs: 1_000,
        blockReplyBreak: "message_end",
      },
    } as unknown as FollowupRun;
    const result = await runReplyAgent({
      commandBody: "hello",
      followupRun,
      queueKey: "main",
      resolvedQueue,
      shouldSteer: false,
      shouldFollowup: false,
      isActive: false,
      isStreaming: false,
      typing,
      sessionCtx,
      defaultModel: "anthropic/claude",
      resolvedVerboseLevel: "off",
      isNewSession: false,
      blockStreamingEnabled: false,
      resolvedBlockStreamingBreak: "message_end",
      shouldInjectGroupIntro: false,
      typingMode: "instant",
    });
    const payload = Array.isArray(result) ? result[0] : result;
    expect(payload?.text).toContain("billing error");
    expect(payload?.text).not.toContain("Context overflow");
  });
 });