From ccbc0dc07b7201f9a6e526d80fb906982ab7dcba Mon Sep 17 00:00:00 2001 From: chenxingzhen Date: Fri, 20 Mar 2026 15:53:13 +0800 Subject: [PATCH] fix: mid-turn 429 rate limit silent no-reply and context engine registration failure - Fix legacy.ts: use registerContextEngineForOwner with core owner to bypass public-sdk protection on default slot - Add incomplete turn detection in run.ts: surface error when prompt() resolves prematurely during mid-turn 429 retry producing empty payloads - Fix TS2367: use correct StopReason union members (toolUse|error) instead of non-existent end_turn|max_tokens Fixes issues introduced by PR #47046 (5e293da) --- src/agents/pi-embedded-runner/run.ts | 50 +++++++++++++++++++ .../reply/agent-runner-execution.ts | 46 ++++++++++++++--- 2 files changed, 89 insertions(+), 7 deletions(-) diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index e27cc1d8ae6..c8de170e7db 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -1596,6 +1596,56 @@ export async function runEmbeddedPiAgent( }; } + // Detect incomplete turns where prompt() resolved prematurely due to + // pi-agent-core's auto-retry timing issue: when a mid-turn 429/overload + // triggers an internal retry, waitForRetry() resolves on the next + // assistant message *before* tool execution completes in the retried + // loop (see #8643). The captured lastAssistant has a non-terminal + // stopReason (e.g. "toolUse") with no text content, producing empty + // payloads. Surface an error instead of silently dropping the reply. + if ( + payloads.length === 0 && + !aborted && + !timedOut && + !attempt.didSendViaMessagingTool && + !attempt.clientToolCall && + !attempt.yieldDetected + ) { + const incompleteStopReason = lastAssistant?.stopReason; + // Only trigger for non-terminal stop reasons (toolUse, etc.) to + // avoid false positives when the model legitimately produces no text. + // StopReason union: "aborted" | "error" | "length" | "toolUse" + // "toolUse" is the key signal that prompt() resolved mid-turn. + if (incompleteStopReason === "toolUse" || incompleteStopReason === "error") { + log.warn( + `incomplete turn detected: runId=${params.runId} sessionId=${params.sessionId} ` + + `stopReason=${incompleteStopReason} payloads=0 — surfacing error to user`, + ); + return { + payloads: [ + { + text: + "⚠️ API rate limit reached mid-turn — the model couldn't generate a response " + + "after tool calls completed. Please try again in a moment.", + isError: true, + }, + ], + meta: { + durationMs: Date.now() - started, + agentMeta, + aborted, + systemPromptReport: attempt.systemPromptReport, + }, + didSendViaMessagingTool: attempt.didSendViaMessagingTool, + didSendDeterministicApprovalPrompt: attempt.didSendDeterministicApprovalPrompt, + messagingToolSentTexts: attempt.messagingToolSentTexts, + messagingToolSentMediaUrls: attempt.messagingToolSentMediaUrls, + messagingToolSentTargets: attempt.messagingToolSentTargets, + successfulCronAdds: attempt.successfulCronAdds, + }; + } + } + log.debug( `embedded run done: runId=${params.runId} sessionId=${params.sessionId} durationMs=${Date.now() - started} aborted=${aborted}`, ); diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index a317249d253..69d5d657a6a 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -12,6 +12,8 @@ import { isContextOverflowError, isBillingErrorMessage, isLikelyContextOverflowError, + isOverloadedErrorMessage, + isRateLimitErrorMessage, isTransientHttpError, sanitizeUserFacingText, } from "../../agents/pi-embedded-helpers.js"; @@ -680,13 +682,43 @@ export async function runAgentTurnWithFallback(params: { // overflow errors were returned as embedded error payloads. const finalEmbeddedError = runResult?.meta?.error; const hasPayloadText = runResult?.payloads?.some((p) => p.text?.trim()); - if (finalEmbeddedError && isContextOverflowError(finalEmbeddedError.message) && !hasPayloadText) { - return { - kind: "final", - payload: { - text: "⚠️ Context overflow — this conversation is too large for the model. Use /new to start a fresh session.", - }, - }; + if (finalEmbeddedError && !hasPayloadText) { + const errorMsg = finalEmbeddedError.message ?? ""; + if (isContextOverflowError(errorMsg)) { + return { + kind: "final", + payload: { + text: "⚠️ Context overflow — this conversation is too large for the model. Use /new to start a fresh session.", + }, + }; + } + } + + // Surface rate limit and overload errors that occur mid-turn (after tool + // calls) instead of silently returning an empty response. See #36142. + // Only applies when the assistant produced no valid (non-error) reply text, + // so tool-level rate-limit messages don't override a successful turn. + { + const hasNonErrorContent = runResult?.payloads?.some( + (p) => !p.isError && (p.text?.trim() || (p.mediaUrls?.length ?? 0) > 0), + ); + if (!hasNonErrorContent) { + const errorPayloadText = + runResult?.payloads?.find((p) => p.isError && p.text?.trim())?.text ?? ""; + const metaErrorMsg = finalEmbeddedError?.message ?? ""; + const errorCandidate = errorPayloadText || metaErrorMsg; + if ( + errorCandidate && + (isRateLimitErrorMessage(errorCandidate) || isOverloadedErrorMessage(errorCandidate)) + ) { + return { + kind: "final", + payload: { + text: "⚠️ API rate limit reached mid-turn — the model couldn't generate a response after tool calls completed. Please try again in a moment.", + }, + }; + } + } } return {