fix(agents): preserve Codex model capacity guidance

This commit is contained in:
Vincent Koc
2026-04-23 14:27:48 -07:00
parent f0300253c1
commit f718ba6601
5 changed files with 130 additions and 18 deletions

View File

@@ -13,7 +13,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Agents/OpenAI: surface selected-model capacity failures with a model-switch hint instead of the generic empty-response error. Thanks @vincentkoc.
- Agents/OpenAI: surface selected-model capacity failures from PI, Codex, and auto-reply harness paths with a model-switch hint instead of the generic empty-response error. Thanks @vincentkoc.
- Providers/OpenAI: stop advertising the removed `gpt-5.3-codex-spark` Codex model through fallback catalogs, and suppress stale rows with a GPT-5.5 recovery hint.
- Plugins/QR: replace legacy `qrcode-terminal` QR rendering with bounded `qrcode-tui` helpers for plugin login/setup flows. (#65969) Thanks @vincentkoc.
- Voice-call/realtime: wait for OpenAI session configuration before greeting or forwarding buffered audio, and reject non-allowlisted Twilio callers before stream setup. (#43501) Thanks @forrestblount.

View File

@@ -13,6 +13,7 @@ export {
BILLING_ERROR_USER_MESSAGE,
classifyProviderRuntimeFailureKind,
formatBillingErrorMessage,
formatRateLimitOrOverloadedErrorCopy,
classifyFailoverReason,
classifyFailoverReasonFromHttpStatus,
formatRawAssistantErrorForUi,

View File

@@ -53,6 +53,7 @@ import type { FailoverReason } from "./types.js";
export {
BILLING_ERROR_USER_MESSAGE,
formatBillingErrorMessage,
formatRateLimitOrOverloadedErrorCopy,
getApiErrorPayloadFingerprint,
isRawApiErrorPayload,
sanitizeUserFacingText,

View File

@@ -49,10 +49,23 @@ vi.mock("../../agents/bootstrap-budget.js", () => ({
vi.mock("../../agents/pi-embedded-helpers.js", () => ({
BILLING_ERROR_USER_MESSAGE: "billing",
formatRateLimitOrOverloadedErrorCopy: (message: string) => {
if (/model\s+(?:is\s+)?at capacity/i.test(message)) {
return "⚠️ Selected model is at capacity. Try a different model, or wait and retry.";
}
if (/rate.limit|too many requests|429/i.test(message)) {
return "⚠️ API rate limit reached. Please try again later.";
}
if (/overloaded/i.test(message)) {
return "The AI service is temporarily overloaded. Please try again in a moment.";
}
return undefined;
},
isCompactionFailureError: () => false,
isContextOverflowError: () => false,
isBillingErrorMessage: () => false,
isLikelyContextOverflowError: () => false,
isOverloadedErrorMessage: (message: string) => /overloaded|capacity/i.test(message),
isRateLimitErrorMessage: () => false,
isTransientHttpError: () => false,
sanitizeUserFacingText: (text?: string) => text ?? "",
@@ -410,6 +423,95 @@ describe("runAgentTurnWithFallback", () => {
expect(onToolResult.mock.calls[0]?.[0]?.text).toBeUndefined();
});
it("surfaces model capacity errors from no-text mid-turn failures", async () => {
state.runEmbeddedPiAgentMock.mockResolvedValueOnce({
payloads: [{ text: "thinking", isReasoning: true }],
meta: {
error: {
kind: "server_overloaded",
message: "Selected model is at capacity. Please try a different model.",
},
},
});
const runAgentTurnWithFallback = await getRunAgentTurnWithFallback();
const result = await runAgentTurnWithFallback({
commandBody: "hello",
followupRun: createFollowupRun(),
sessionCtx: {
Provider: "whatsapp",
MessageSid: "msg",
} as unknown as TemplateContext,
opts: {},
typingSignals: createMockTypingSignaler(),
blockReplyPipeline: null,
blockStreamingEnabled: false,
resolvedBlockStreamingBreak: "message_end",
applyReplyToMode: (payload) => payload,
shouldEmitToolResult: () => true,
shouldEmitToolOutput: () => false,
pendingToolTasks: new Set(),
resetSessionAfterCompactionFailure: async () => false,
resetSessionAfterRoleOrderingConflict: async () => false,
isHeartbeat: false,
sessionKey: "main",
getActiveSessionEntry: () => undefined,
resolvedVerboseLevel: "off",
});
expect(result.kind).toBe("success");
if (result.kind === "success") {
expect(result.runResult.payloads).toEqual([
{
text: "⚠️ Selected model is at capacity. Try a different model, or wait and retry.",
isError: true,
},
]);
}
});
it("surfaces model capacity errors from pre-reply CLI failures", async () => {
state.runWithModelFallbackMock.mockRejectedValueOnce(
new Error("Selected model is at capacity. Please try a different model."),
);
const runAgentTurnWithFallback = await getRunAgentTurnWithFallback();
const followupRun = createFollowupRun();
followupRun.run.provider = "openai-codex";
followupRun.run.model = "gpt-5.5";
const result = await runAgentTurnWithFallback({
commandBody: "hello",
followupRun,
sessionCtx: {
Provider: "whatsapp",
MessageSid: "msg",
} as unknown as TemplateContext,
opts: {},
typingSignals: createMockTypingSignaler(),
blockReplyPipeline: null,
blockStreamingEnabled: false,
resolvedBlockStreamingBreak: "message_end",
applyReplyToMode: (payload) => payload,
shouldEmitToolResult: () => true,
shouldEmitToolOutput: () => false,
pendingToolTasks: new Set(),
resetSessionAfterCompactionFailure: async () => false,
resetSessionAfterRoleOrderingConflict: async () => false,
isHeartbeat: false,
sessionKey: "main",
getActiveSessionEntry: () => undefined,
resolvedVerboseLevel: "off",
});
expect(result).toEqual({
kind: "final",
payload: {
text: "⚠️ Selected model is at capacity. Try a different model, or wait and retry.",
},
});
});
it("strips a glued leading NO_REPLY token from streamed tool results", async () => {
const onToolResult = vi.fn();
state.runEmbeddedPiAgentMock.mockImplementationOnce(async (params: EmbeddedAgentParams) => {

View File

@@ -16,6 +16,7 @@ import { runWithModelFallback, isFallbackSummaryError } from "../../agents/model
import { isCliProvider } from "../../agents/model-selection.js";
import {
BILLING_ERROR_USER_MESSAGE,
formatRateLimitOrOverloadedErrorCopy,
isCompactionFailureError,
isContextOverflowError,
isBillingErrorMessage,
@@ -1516,24 +1517,34 @@ export async function runAgentTurnWithFallback(params: {
// underlying error. FallbackSummaryError messages embed per-attempt
// reason labels like `(rate_limit)`, so string-matching the summary text
// would misclassify mixed-cause exhaustion as a pure transient cooldown.
const isRateLimit = isFallbackSummaryError(err)
const isFallbackSummary = isFallbackSummaryError(err);
const isPureTransientSummary = isFallbackSummary
? isPureTransientRateLimitSummary(err)
: false;
const isRateLimit = isFallbackSummary
? isPureTransientSummary
: isRateLimitErrorMessage(message);
const rateLimitOrOverloadedCopy =
!isFallbackSummary || isPureTransientSummary
? formatRateLimitOrOverloadedErrorCopy(message)
: undefined;
const safeMessage = isTransientHttp
? sanitizeUserFacingText(message, { errorContext: true })
: message;
const trimmedMessage = safeMessage.replace(/\.\s*$/, "");
const fallbackText = isBilling
? BILLING_ERROR_USER_MESSAGE
: isRateLimit
: isRateLimit && !isOverloadedErrorMessage(message)
? buildRateLimitCooldownMessage(err)
: isContextOverflow
? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model."
: isRoleOrderingError
? "⚠️ Message ordering conflict - please try again. If this persists, use /new to start a fresh session."
: shouldSurfaceToControlUi
? `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow`
: buildExternalRunFailureText(message);
: rateLimitOrOverloadedCopy
? rateLimitOrOverloadedCopy
: isContextOverflow
? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model."
: isRoleOrderingError
? "⚠️ Message ordering conflict - please try again. If this persists, use /new to start a fresh session."
: shouldSurfaceToControlUi
? `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow`
: buildExternalRunFailureText(message);
params.replyOperation?.fail("run_failed", err);
return {
@@ -1590,16 +1601,13 @@ export async function runAgentTurnWithFallback(params: {
(p) => p.isError && hasNonEmptyString(p.text) && !p.text.startsWith("⚠️"),
)?.text ?? "";
const errorCandidate = metaErrorMsg || rawErrorPayloadText;
if (
errorCandidate &&
(isRateLimitErrorMessage(errorCandidate) || isOverloadedErrorMessage(errorCandidate))
) {
const isOverloaded = isOverloadedErrorMessage(errorCandidate);
const formattedErrorCandidate = errorCandidate
? formatRateLimitOrOverloadedErrorCopy(errorCandidate)
: undefined;
if (formattedErrorCandidate) {
runResult.payloads = [
{
text: isOverloaded
? "⚠️ The AI service is temporarily overloaded. Please try again in a moment."
: "⚠️ API rate limit reached — the model couldn't generate a response. Please try again in a moment.",
text: formattedErrorCandidate,
isError: true,
},
];