fix: mid-turn 429 rate limit silent no-reply and context engine registration failure (#50930)

Merged via squash. Prepared head SHA: eea7800df3 Co-authored-by: infichen <13826604+infichen@users.noreply.github.com> Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com> Reviewed-by: @jalehman
2026-04-30 13:20:21 +00:00 · 2026-03-26 01:43:08 +08:00
parent e0972db7a2
commit 4ae4d1fabe
7 changed files with 294 additions and 12 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai
 - Plugins/SDK: thread `moduleUrl` through plugin-sdk alias resolution so user-installed plugins outside the openclaw directory (e.g. `~/.openclaw/extensions/`) correctly resolve `openclaw/plugin-sdk/*` subpath imports, and gate `plugin-sdk:check-exports` in `release:check`. (#54283) Thanks @xieyongliang.
 - Telegram/pairing: ignore self-authored DM `message` updates so bot-pinned status cards and similar service updates do not trigger bogus pairing requests or re-enter inbound dispatch. (#54530) thanks @huntharo
 - iMessage: stop leaking inline `[[reply_to:...]]` tags into delivered text by sending `reply_to` as RPC metadata and stripping stray directive tags from outbound messages. (#39512) Thanks @mvanhorn.
+- Agents/embedded replies: surface mid-turn 429 and overload failures when embedded runs end without a user-visible reply, while preserving successful media-only replies that still use legacy `mediaUrl`. (#50930) Thanks @infichen.

 ## 2026.3.24

--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -64,6 +64,7 @@ import {
  type FailoverReason,
 } from "../pi-embedded-helpers.js";
 import { ensureRuntimePluginsLoaded } from "../runtime-plugins.js";
+import { isLikelyMutatingToolName } from "../tool-mutation.js";
 import { derivePromptTokens, normalizeUsage, type UsageLike } from "../usage.js";
 import { redactRunIdentifier, resolveRunWorkspaceDir } from "../workspace-run.js";
 import { buildEmbeddedCompactionRuntimeContext } from "./compaction-runtime-context.js";
@@ -1596,6 +1597,82 @@ export async function runEmbeddedPiAgent(
            };
          }

+          // Detect incomplete turns where prompt() resolved prematurely due to
+          // pi-agent-core's auto-retry timing issue: when a mid-turn 429/overload
+          // triggers an internal retry, waitForRetry() resolves on the next
+          // assistant message *before* tool execution completes in the retried
+          // loop (see #8643). The captured lastAssistant has a non-terminal
+          // stopReason (e.g. "toolUse") with no text content, producing empty
+          // payloads. Surface an error instead of silently dropping the reply.
+          //
+          // Exclusions:
+          //  - didSendDeterministicApprovalPrompt: approval-prompt turns
+          //    intentionally produce empty payloads with stopReason=toolUse
+          //  - lastToolError: suppressed/recoverable tool failures also produce
+          //    empty payloads with stopReason=toolUse; those are handled by
+          //    buildEmbeddedRunPayloads' own warning policy
+          if (
+            payloads.length === 0 &&
+            !aborted &&
+            !timedOut &&
+            !attempt.clientToolCall &&
+            !attempt.yieldDetected &&
+            !attempt.didSendDeterministicApprovalPrompt &&
+            !attempt.lastToolError
+          ) {
+            const incompleteStopReason = lastAssistant?.stopReason;
+            // Only trigger for non-terminal stop reasons (toolUse, etc.) to
+            // avoid false positives when the model legitimately produces no text.
+            // StopReason union: "aborted" | "error" | "length" | "toolUse"
+            // "toolUse" is the key signal that prompt() resolved mid-turn.
+            if (incompleteStopReason === "toolUse" || incompleteStopReason === "error") {
+              log.warn(
+                `incomplete turn detected: runId=${params.runId} sessionId=${params.sessionId} ` +
+                  `stopReason=${incompleteStopReason} payloads=0 — surfacing error to user`,
+              );
+
+              // Mark the failing profile for cooldown so multi-profile setups
+              // rotate away from the exhausted credential on the next turn.
+              if (lastProfileId) {
+                const failoverReason = classifyFailoverReason(lastAssistant?.errorMessage ?? "");
+                await maybeMarkAuthProfileFailure({
+                  profileId: lastProfileId,
+                  reason: resolveAuthProfileFailureReason(failoverReason),
+                });
+              }
+
+              // Warn about potential side-effects when mutating tools executed
+              // before the turn was interrupted, so users don't blindly retry.
+              const hadMutatingTools = attempt.toolMetas.some((t) =>
+                isLikelyMutatingToolName(t.toolName),
+              );
+              const errorText = hadMutatingTools
+                ? "⚠️ Agent couldn't generate a response. Note: some tool actions may have already been executed — please verify before retrying."
+                : "⚠️ Agent couldn't generate a response. Please try again.";
+
+              return {
+                payloads: [
+                  {
+                    text: errorText,
+                    isError: true,
+                  },
+                ],
+                meta: {
+                  durationMs: Date.now() - started,
+                  agentMeta,
+                  aborted,
+                  systemPromptReport: attempt.systemPromptReport,
+                },
+                didSendViaMessagingTool: attempt.didSendViaMessagingTool,
+                didSendDeterministicApprovalPrompt: attempt.didSendDeterministicApprovalPrompt,
+                messagingToolSentTexts: attempt.messagingToolSentTexts,
+                messagingToolSentMediaUrls: attempt.messagingToolSentMediaUrls,
+                messagingToolSentTargets: attempt.messagingToolSentTargets,
+                successfulCronAdds: attempt.successfulCronAdds,
+              };
+            }
+          }
+
          log.debug(
            `embedded run done: runId=${params.runId} sessionId=${params.sessionId} durationMs=${Date.now() - started} aborted=${aborted}`,
          );
--- a/src/agents/pi-embedded-runner/types.ts
+++ b/src/agents/pi-embedded-runner/types.ts
@@ -61,6 +61,7 @@ export type EmbeddedPiRunResult = {
    mediaUrls?: string[];
    replyToId?: string;
    isError?: boolean;
+    isReasoning?: boolean;
  }>;
  meta: EmbeddedPiRunMeta;
  // True if a messaging tool (telegram, whatsapp, discord, slack, sessions_send)
--- a/src/auto-reply/reply/agent-runner-execution.ts
+++ b/src/auto-reply/reply/agent-runner-execution.ts
@@ -1,6 +1,9 @@
 import crypto from "node:crypto";
 import fs from "node:fs";
-import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-payload";
+import {
+  hasOutboundReplyContent,
+  resolveSendableOutboundReplyParts,
+} from "openclaw/plugin-sdk/reply-payload";
 import { resolveBootstrapWarningSignaturesSeen } from "../../agents/bootstrap-budget.js";
 import { runCliAgent } from "../../agents/cli-runner.js";
 import { getCliSessionId } from "../../agents/cli-session.js";
@@ -12,6 +15,8 @@ import {
  isContextOverflowError,
  isBillingErrorMessage,
  isLikelyContextOverflowError,
+  isOverloadedErrorMessage,
+  isRateLimitErrorMessage,
  isTransientHttpError,
  sanitizeUserFacingText,
 } from "../../agents/pi-embedded-helpers.js";
@@ -680,13 +685,54 @@ export async function runAgentTurnWithFallback(params: {
  // overflow errors were returned as embedded error payloads.
  const finalEmbeddedError = runResult?.meta?.error;
  const hasPayloadText = runResult?.payloads?.some((p) => p.text?.trim());
-  if (finalEmbeddedError && isContextOverflowError(finalEmbeddedError.message) && !hasPayloadText) {
-    return {
-      kind: "final",
-      payload: {
-        text: "⚠️ Context overflow — this conversation is too large for the model. Use /new to start a fresh session.",
-      },
-    };
+  if (finalEmbeddedError && !hasPayloadText) {
+    const errorMsg = finalEmbeddedError.message ?? "";
+    if (isContextOverflowError(errorMsg)) {
+      return {
+        kind: "final",
+        payload: {
+          text: "⚠️ Context overflow — this conversation is too large for the model. Use /new to start a fresh session.",
+        },
+      };
+    }
+  }
+
+  // Surface rate limit and overload errors that occur mid-turn (after tool
+  // calls) instead of silently returning an empty response. See #36142.
+  // Only applies when the assistant produced no valid (non-error) reply text,
+  // so tool-level rate-limit messages don't override a successful turn.
+  // Prioritize metaErrorMsg (raw upstream error) over errorPayloadText to
+  // avoid self-matching on pre-formatted "⚠️" messages from run.ts, and
+  // skip already-formatted payloads so tool-specific 429 errors (e.g.
+  // browser/search tool failures) are preserved rather than overwritten.
+  //
+  // Instead of early-returning kind:"final" (which would bypass
+  // buildReplyPayloads() filtering and session bookkeeping), inject the
+  // error payload into runResult so it flows through the normal
+  // kind:"success" path — preserving streaming dedup, message_send
+  // suppression, and usage/model metadata updates.
+  if (runResult) {
+    const hasNonErrorContent = runResult.payloads?.some(
+      (p) => !p.isError && !p.isReasoning && hasOutboundReplyContent(p, { trimText: true }),
+    );
+    if (!hasNonErrorContent) {
+      const metaErrorMsg = finalEmbeddedError?.message ?? "";
+      const rawErrorPayloadText =
+        runResult.payloads?.find((p) => p.isError && p.text?.trim() && !p.text.startsWith("⚠️"))
+          ?.text ?? "";
+      const errorCandidate = metaErrorMsg || rawErrorPayloadText;
+      if (
+        errorCandidate &&
+        (isRateLimitErrorMessage(errorCandidate) || isOverloadedErrorMessage(errorCandidate))
+      ) {
+        runResult.payloads = [
+          {
+            text: "⚠️ API rate limit reached — the model couldn't generate a response. Please try again in a moment.",
+            isError: true,
+          },
+        ];
+      }
+    }
  }

  return {
--- a/src/auto-reply/reply/agent-runner.misc.runreplyagent.test.ts
+++ b/src/auto-reply/reply/agent-runner.misc.runreplyagent.test.ts
@@ -1946,3 +1946,97 @@ describe("runReplyAgent billing error classification", () => {
    expect(payload?.text).not.toContain("Context overflow");
  });
 });
+
+describe("runReplyAgent mid-turn rate-limit fallback", () => {
+  function createRun() {
+    const typing = createMockTypingController();
+    const sessionCtx = {
+      Provider: "telegram",
+      MessageSid: "msg",
+    } as unknown as TemplateContext;
+    const resolvedQueue = { mode: "interrupt" } as unknown as QueueSettings;
+    const followupRun = {
+      prompt: "hello",
+      summaryLine: "hello",
+      enqueuedAt: Date.now(),
+      run: {
+        sessionId: "session",
+        sessionKey: "main",
+        messageProvider: "telegram",
+        sessionFile: "/tmp/session.jsonl",
+        workspaceDir: "/tmp",
+        config: {},
+        skillsSnapshot: {},
+        provider: "anthropic",
+        model: "claude",
+        thinkLevel: "low",
+        verboseLevel: "off",
+        elevatedLevel: "off",
+        bashElevated: {
+          enabled: false,
+          allowed: false,
+          defaultLevel: "off",
+        },
+        timeoutMs: 1_000,
+        blockReplyBreak: "message_end",
+      },
+    } as unknown as FollowupRun;
+
+    return runReplyAgent({
+      commandBody: "hello",
+      followupRun,
+      queueKey: "main",
+      resolvedQueue,
+      shouldSteer: false,
+      shouldFollowup: false,
+      isActive: false,
+      isStreaming: false,
+      typing,
+      sessionCtx,
+      defaultModel: "anthropic/claude",
+      resolvedVerboseLevel: "off",
+      isNewSession: false,
+      blockStreamingEnabled: false,
+      resolvedBlockStreamingBreak: "message_end",
+      shouldInjectGroupIntro: false,
+      typingMode: "instant",
+    });
+  }
+
+  it("surfaces a final error when only reasoning preceded a mid-turn rate limit", async () => {
+    runEmbeddedPiAgentMock.mockResolvedValueOnce({
+      payloads: [{ text: "reasoning", isReasoning: true }],
+      meta: {
+        error: {
+          kind: "retry_limit",
+          message: "429 Too Many Requests: rate limit exceeded",
+        },
+      },
+    });
+
+    const result = await createRun();
+    const payload = Array.isArray(result) ? result[0] : result;
+
+    expect(payload?.text).toContain("API rate limit reached");
+  });
+
+  it("preserves successful media-only replies that use legacy mediaUrl", async () => {
+    runEmbeddedPiAgentMock.mockResolvedValueOnce({
+      payloads: [{ mediaUrl: "https://example.test/image.png" }],
+      meta: {
+        error: {
+          kind: "retry_limit",
+          message: "429 Too Many Requests: rate limit exceeded",
+        },
+      },
+    });
+
+    const result = await createRun();
+    const payload = Array.isArray(result) ? result[0] : result;
+
+    expect(payload).toMatchObject({
+      mediaUrl: "https://example.test/image.png",
+    });
+    expect(payload?.text).toBeUndefined();
+  });
+});
--- a/src/cli/program/preaction.test.ts
+++ b/src/cli/program/preaction.test.ts
@@ -202,6 +202,8 @@ describe("registerPreActionHooks", () => {
  }

  it("handles debug mode and plugin-required command preaction", async () => {
+    const processTitleSetSpy = vi.spyOn(process, "title", "set");
+
    await runPreAction({
      parseArgv: ["status"],
      processArgv: ["node", "openclaw", "status", "--debug"],
@@ -214,7 +216,7 @@ describe("registerPreActionHooks", () => {
      commandPath: ["status"],
    });
    expect(ensurePluginRegistryLoadedMock).toHaveBeenCalledWith({ scope: "channels" });
-    expect(process.title).toBe("openclaw-status");
+    expect(processTitleSetSpy).toHaveBeenCalledWith("openclaw-status");

    vi.clearAllMocks();
    await runPreAction({
@@ -229,6 +231,7 @@ describe("registerPreActionHooks", () => {
      commandPath: ["message", "send"],
    });
    expect(ensurePluginRegistryLoadedMock).toHaveBeenCalledWith({ scope: "all" });
+    processTitleSetSpy.mockRestore();
  });

  it("keeps setup alias and channels add manifest-first", async () => {
--- a/src/infra/provider-usage.auth.normalizes-keys.test.ts
+++ b/src/infra/provider-usage.auth.normalizes-keys.test.ts
@@ -7,18 +7,72 @@ import { NON_ENV_SECRETREF_MARKER } from "../agents/model-auth-markers.js";
 import type { OpenClawConfig } from "../config/config.js";
 import type { ModelDefinitionConfig } from "../config/types.models.js";

+vi.mock("../agents/auth-profiles.js", async () => {
+  const profiles = await vi.importActual<typeof import("../agents/auth-profiles/profiles.js")>(
+    "../agents/auth-profiles/profiles.js",
+  );
+  const order = await vi.importActual<typeof import("../agents/auth-profiles/order.js")>(
+    "../agents/auth-profiles/order.js",
+  );
+  const oauth = await vi.importActual<typeof import("../agents/auth-profiles/oauth.js")>(
+    "../agents/auth-profiles/oauth.js",
+  );
+
+  const readStore = (agentDir?: string) => {
+    if (!agentDir) {
+      return { version: 1, profiles: {} };
+    }
+    const authPath = path.join(agentDir, "auth-profiles.json");
+    try {
+      const parsed = JSON.parse(nodeFs.readFileSync(authPath, "utf8")) as {
+        version?: number;
+        profiles?: Record<string, unknown>;
+        order?: Record<string, string[]>;
+        lastGood?: Record<string, string>;
+        usageStats?: Record<string, unknown>;
+      };
+      return {
+        version: parsed.version ?? 1,
+        profiles: parsed.profiles ?? {},
+        ...(parsed.order ? { order: parsed.order } : {}),
+        ...(parsed.lastGood ? { lastGood: parsed.lastGood } : {}),
+        ...(parsed.usageStats ? { usageStats: parsed.usageStats } : {}),
+      };
+    } catch {
+      return { version: 1, profiles: {} };
+    }
+  };
+
+  return {
+    clearRuntimeAuthProfileStoreSnapshots: () => {},
+    ensureAuthProfileStore: (agentDir?: string) => readStore(agentDir),
+    dedupeProfileIds: profiles.dedupeProfileIds,
+    listProfilesForProvider: profiles.listProfilesForProvider,
+    resolveApiKeyForProfile: oauth.resolveApiKeyForProfile,
+    resolveAuthProfileOrder: order.resolveAuthProfileOrder,
+  };
+});
+
 const resolveProviderUsageAuthWithPluginMock = vi.fn(async (..._args: unknown[]) => null);

 vi.mock("../plugins/provider-runtime.js", () => ({
  resolveProviderUsageAuthWithPlugin: resolveProviderUsageAuthWithPluginMock,
 }));

+vi.mock("../plugins/provider-runtime.ts", () => ({
+  resolveProviderUsageAuthWithPlugin: resolveProviderUsageAuthWithPluginMock,
+}));
+
 vi.mock("../agents/cli-credentials.js", () => ({
  readCodexCliCredentialsCached: () => null,
  readMiniMaxCliCredentialsCached: () => null,
  readQwenCliCredentialsCached: () => null,
 }));

+vi.mock("../agents/auth-profiles/external-cli-sync.js", () => ({
+  syncExternalCliCredentials: () => false,
+}));
+
 let resolveProviderAuths: typeof import("./provider-usage.auth.js").resolveProviderAuths;
 let clearRuntimeAuthProfileStoreSnapshots: typeof import("../agents/auth-profiles.js").clearRuntimeAuthProfileStoreSnapshots;
 let clearConfigCache: typeof import("../config/config.js").clearConfigCache;
@@ -64,9 +118,15 @@ describe("resolveProviderAuths key normalization", () => {
  async function withSuiteHome<T>(fn: (home: string) => Promise<T>): Promise<T> {
    const base = path.join(suiteRoot, `case-${++suiteCase}`);
    nodeFs.mkdirSync(base, { recursive: true });
-    nodeFs.mkdirSync(path.join(base, ".openclaw", "agents", "main", "sessions"), {
-      recursive: true,
-    });
+    const stateDir = path.join(base, ".openclaw");
+    const agentDir = path.join(stateDir, "agents", "main", "agent");
+    nodeFs.mkdirSync(path.join(stateDir, "agents", "main", "sessions"), { recursive: true });
+    nodeFs.mkdirSync(agentDir, { recursive: true });
+    nodeFs.writeFileSync(
+      path.join(agentDir, "auth-profiles.json"),
+      `${JSON.stringify({ version: 1, profiles: {} }, null, 2)}\n`,
+      "utf8",
+    );
    return await fn(base);
  }