fix: mid-turn 429 rate limit silent no-reply and context engine registration failure

- Fix legacy.ts: use registerContextEngineForOwner with core owner to
  bypass public-sdk protection on default slot
- Add incomplete turn detection in run.ts: surface error when prompt()
  resolves prematurely during mid-turn 429 retry producing empty payloads
- Fix TS2367: use correct StopReason union members (toolUse|error)
  instead of non-existent end_turn|max_tokens

Fixes issues introduced by PR #47046 (5e293da)
This commit is contained in:
chenxingzhen
2026-03-20 15:53:13 +08:00
committed by Josh Lehman
parent e0972db7a2
commit ccbc0dc07b
2 changed files with 89 additions and 7 deletions

View File

@@ -1596,6 +1596,56 @@ export async function runEmbeddedPiAgent(
};
}
// Detect incomplete turns where prompt() resolved prematurely due to
// pi-agent-core's auto-retry timing issue: when a mid-turn 429/overload
// triggers an internal retry, waitForRetry() resolves on the next
// assistant message *before* tool execution completes in the retried
// loop (see #8643). The captured lastAssistant has a non-terminal
// stopReason (e.g. "toolUse") with no text content, producing empty
// payloads. Surface an error instead of silently dropping the reply.
if (
payloads.length === 0 &&
!aborted &&
!timedOut &&
!attempt.didSendViaMessagingTool &&
!attempt.clientToolCall &&
!attempt.yieldDetected
) {
const incompleteStopReason = lastAssistant?.stopReason;
// Only trigger for non-terminal stop reasons (toolUse, etc.) to
// avoid false positives when the model legitimately produces no text.
// StopReason union: "aborted" | "error" | "length" | "toolUse"
// "toolUse" is the key signal that prompt() resolved mid-turn.
if (incompleteStopReason === "toolUse" || incompleteStopReason === "error") {
log.warn(
`incomplete turn detected: runId=${params.runId} sessionId=${params.sessionId} ` +
`stopReason=${incompleteStopReason} payloads=0 — surfacing error to user`,
);
return {
payloads: [
{
text:
"⚠️ API rate limit reached mid-turn — the model couldn't generate a response " +
"after tool calls completed. Please try again in a moment.",
isError: true,
},
],
meta: {
durationMs: Date.now() - started,
agentMeta,
aborted,
systemPromptReport: attempt.systemPromptReport,
},
didSendViaMessagingTool: attempt.didSendViaMessagingTool,
didSendDeterministicApprovalPrompt: attempt.didSendDeterministicApprovalPrompt,
messagingToolSentTexts: attempt.messagingToolSentTexts,
messagingToolSentMediaUrls: attempt.messagingToolSentMediaUrls,
messagingToolSentTargets: attempt.messagingToolSentTargets,
successfulCronAdds: attempt.successfulCronAdds,
};
}
}
log.debug(
`embedded run done: runId=${params.runId} sessionId=${params.sessionId} durationMs=${Date.now() - started} aborted=${aborted}`,
);

View File

@@ -12,6 +12,8 @@ import {
isContextOverflowError,
isBillingErrorMessage,
isLikelyContextOverflowError,
isOverloadedErrorMessage,
isRateLimitErrorMessage,
isTransientHttpError,
sanitizeUserFacingText,
} from "../../agents/pi-embedded-helpers.js";
@@ -680,13 +682,43 @@ export async function runAgentTurnWithFallback(params: {
// overflow errors were returned as embedded error payloads.
const finalEmbeddedError = runResult?.meta?.error;
const hasPayloadText = runResult?.payloads?.some((p) => p.text?.trim());
if (finalEmbeddedError && isContextOverflowError(finalEmbeddedError.message) && !hasPayloadText) {
return {
kind: "final",
payload: {
text: "⚠️ Context overflow — this conversation is too large for the model. Use /new to start a fresh session.",
},
};
if (finalEmbeddedError && !hasPayloadText) {
const errorMsg = finalEmbeddedError.message ?? "";
if (isContextOverflowError(errorMsg)) {
return {
kind: "final",
payload: {
text: "⚠️ Context overflow — this conversation is too large for the model. Use /new to start a fresh session.",
},
};
}
}
// Surface rate limit and overload errors that occur mid-turn (after tool
// calls) instead of silently returning an empty response. See #36142.
// Only applies when the assistant produced no valid (non-error) reply text,
// so tool-level rate-limit messages don't override a successful turn.
{
const hasNonErrorContent = runResult?.payloads?.some(
(p) => !p.isError && (p.text?.trim() || (p.mediaUrls?.length ?? 0) > 0),
);
if (!hasNonErrorContent) {
const errorPayloadText =
runResult?.payloads?.find((p) => p.isError && p.text?.trim())?.text ?? "";
const metaErrorMsg = finalEmbeddedError?.message ?? "";
const errorCandidate = errorPayloadText || metaErrorMsg;
if (
errorCandidate &&
(isRateLimitErrorMessage(errorCandidate) || isOverloadedErrorMessage(errorCandidate))
) {
return {
kind: "final",
payload: {
text: "⚠️ API rate limit reached mid-turn — the model couldn't generate a response after tool calls completed. Please try again in a moment.",
},
};
}
}
}
return {