fix: mid-turn 429 rate limit silent no-reply and context engine registration failure

- Fix legacy.ts: use registerContextEngineForOwner with core owner to bypass public-sdk protection on default slot - Add incomplete turn detection in run.ts: surface error when prompt() resolves prematurely during mid-turn 429 retry producing empty payloads - Fix TS2367: use correct StopReason union members (toolUse|error) instead of non-existent end_turn|max_tokens Fixes issues introduced by PR #47046 (5e293da)
2026-05-16 04:30:46 +00:00 · 2026-03-20 15:53:13 +08:00
parent e0972db7a2
commit ccbc0dc07b
2 changed files with 89 additions and 7 deletions
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -1596,6 +1596,56 @@ export async function runEmbeddedPiAgent(
            };
          }

+          // Detect incomplete turns where prompt() resolved prematurely due to
+          // pi-agent-core's auto-retry timing issue: when a mid-turn 429/overload
+          // triggers an internal retry, waitForRetry() resolves on the next
+          // assistant message *before* tool execution completes in the retried
+          // loop (see #8643). The captured lastAssistant has a non-terminal
+          // stopReason (e.g. "toolUse") with no text content, producing empty
+          // payloads. Surface an error instead of silently dropping the reply.
+          if (
+            payloads.length === 0 &&
+            !aborted &&
+            !timedOut &&
+            !attempt.didSendViaMessagingTool &&
+            !attempt.clientToolCall &&
+            !attempt.yieldDetected
+          ) {
+            const incompleteStopReason = lastAssistant?.stopReason;
+            // Only trigger for non-terminal stop reasons (toolUse, etc.) to
+            // avoid false positives when the model legitimately produces no text.
+            // StopReason union: "aborted" | "error" | "length" | "toolUse"
+            // "toolUse" is the key signal that prompt() resolved mid-turn.
+            if (incompleteStopReason === "toolUse" || incompleteStopReason === "error") {
+              log.warn(
+                `incomplete turn detected: runId=${params.runId} sessionId=${params.sessionId} ` +
+                  `stopReason=${incompleteStopReason} payloads=0 — surfacing error to user`,
+              );
+              return {
+                payloads: [
+                  {
+                    text:
+                      "⚠️ API rate limit reached mid-turn — the model couldn't generate a response " +
+                      "after tool calls completed. Please try again in a moment.",
+                    isError: true,
+                  },
+                ],
+                meta: {
+                  durationMs: Date.now() - started,
+                  agentMeta,
+                  aborted,
+                  systemPromptReport: attempt.systemPromptReport,
+                },
+                didSendViaMessagingTool: attempt.didSendViaMessagingTool,
+                didSendDeterministicApprovalPrompt: attempt.didSendDeterministicApprovalPrompt,
+                messagingToolSentTexts: attempt.messagingToolSentTexts,
+                messagingToolSentMediaUrls: attempt.messagingToolSentMediaUrls,
+                messagingToolSentTargets: attempt.messagingToolSentTargets,
+                successfulCronAdds: attempt.successfulCronAdds,
+              };
+            }
+          }
+
          log.debug(
            `embedded run done: runId=${params.runId} sessionId=${params.sessionId} durationMs=${Date.now() - started} aborted=${aborted}`,
          );
--- a/src/auto-reply/reply/agent-runner-execution.ts
+++ b/src/auto-reply/reply/agent-runner-execution.ts
@@ -12,6 +12,8 @@ import {
  isContextOverflowError,
  isBillingErrorMessage,
  isLikelyContextOverflowError,
+  isOverloadedErrorMessage,
+  isRateLimitErrorMessage,
  isTransientHttpError,
  sanitizeUserFacingText,
 } from "../../agents/pi-embedded-helpers.js";
@@ -680,13 +682,43 @@ export async function runAgentTurnWithFallback(params: {
  // overflow errors were returned as embedded error payloads.
  const finalEmbeddedError = runResult?.meta?.error;
  const hasPayloadText = runResult?.payloads?.some((p) => p.text?.trim());
-  if (finalEmbeddedError && isContextOverflowError(finalEmbeddedError.message) && !hasPayloadText) {
-    return {
-      kind: "final",
-      payload: {
-        text: "⚠️ Context overflow — this conversation is too large for the model. Use /new to start a fresh session.",
-      },
-    };
+  if (finalEmbeddedError && !hasPayloadText) {
+    const errorMsg = finalEmbeddedError.message ?? "";
+    if (isContextOverflowError(errorMsg)) {
+      return {
+        kind: "final",
+        payload: {
+          text: "⚠️ Context overflow — this conversation is too large for the model. Use /new to start a fresh session.",
+        },
+      };
+    }
+  }
+
+  // Surface rate limit and overload errors that occur mid-turn (after tool
+  // calls) instead of silently returning an empty response. See #36142.
+  // Only applies when the assistant produced no valid (non-error) reply text,
+  // so tool-level rate-limit messages don't override a successful turn.
+  {
+    const hasNonErrorContent = runResult?.payloads?.some(
+      (p) => !p.isError && (p.text?.trim() || (p.mediaUrls?.length ?? 0) > 0),
+    );
+    if (!hasNonErrorContent) {
+      const errorPayloadText =
+        runResult?.payloads?.find((p) => p.isError && p.text?.trim())?.text ?? "";
+      const metaErrorMsg = finalEmbeddedError?.message ?? "";
+      const errorCandidate = errorPayloadText || metaErrorMsg;
+      if (
+        errorCandidate &&
+        (isRateLimitErrorMessage(errorCandidate) || isOverloadedErrorMessage(errorCandidate))
+      ) {
+        return {
+          kind: "final",
+          payload: {
+            text: "⚠️ API rate limit reached mid-turn — the model couldn't generate a response after tool calls completed. Please try again in a moment.",
+          },
+        };
+      }
+    }
  }

  return {