mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 06:30:42 +00:00
fix(agents): preserve Codex model capacity guidance
This commit is contained in:
@@ -13,7 +13,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Fixes
|
||||
|
||||
- Agents/OpenAI: surface selected-model capacity failures with a model-switch hint instead of the generic empty-response error. Thanks @vincentkoc.
|
||||
- Agents/OpenAI: surface selected-model capacity failures from PI, Codex, and auto-reply harness paths with a model-switch hint instead of the generic empty-response error. Thanks @vincentkoc.
|
||||
- Providers/OpenAI: stop advertising the removed `gpt-5.3-codex-spark` Codex model through fallback catalogs, and suppress stale rows with a GPT-5.5 recovery hint.
|
||||
- Plugins/QR: replace legacy `qrcode-terminal` QR rendering with bounded `qrcode-tui` helpers for plugin login/setup flows. (#65969) Thanks @vincentkoc.
|
||||
- Voice-call/realtime: wait for OpenAI session configuration before greeting or forwarding buffered audio, and reject non-allowlisted Twilio callers before stream setup. (#43501) Thanks @forrestblount.
|
||||
|
||||
@@ -13,6 +13,7 @@ export {
|
||||
BILLING_ERROR_USER_MESSAGE,
|
||||
classifyProviderRuntimeFailureKind,
|
||||
formatBillingErrorMessage,
|
||||
formatRateLimitOrOverloadedErrorCopy,
|
||||
classifyFailoverReason,
|
||||
classifyFailoverReasonFromHttpStatus,
|
||||
formatRawAssistantErrorForUi,
|
||||
|
||||
@@ -53,6 +53,7 @@ import type { FailoverReason } from "./types.js";
|
||||
export {
|
||||
BILLING_ERROR_USER_MESSAGE,
|
||||
formatBillingErrorMessage,
|
||||
formatRateLimitOrOverloadedErrorCopy,
|
||||
getApiErrorPayloadFingerprint,
|
||||
isRawApiErrorPayload,
|
||||
sanitizeUserFacingText,
|
||||
|
||||
@@ -49,10 +49,23 @@ vi.mock("../../agents/bootstrap-budget.js", () => ({
|
||||
|
||||
vi.mock("../../agents/pi-embedded-helpers.js", () => ({
|
||||
BILLING_ERROR_USER_MESSAGE: "billing",
|
||||
formatRateLimitOrOverloadedErrorCopy: (message: string) => {
|
||||
if (/model\s+(?:is\s+)?at capacity/i.test(message)) {
|
||||
return "⚠️ Selected model is at capacity. Try a different model, or wait and retry.";
|
||||
}
|
||||
if (/rate.limit|too many requests|429/i.test(message)) {
|
||||
return "⚠️ API rate limit reached. Please try again later.";
|
||||
}
|
||||
if (/overloaded/i.test(message)) {
|
||||
return "The AI service is temporarily overloaded. Please try again in a moment.";
|
||||
}
|
||||
return undefined;
|
||||
},
|
||||
isCompactionFailureError: () => false,
|
||||
isContextOverflowError: () => false,
|
||||
isBillingErrorMessage: () => false,
|
||||
isLikelyContextOverflowError: () => false,
|
||||
isOverloadedErrorMessage: (message: string) => /overloaded|capacity/i.test(message),
|
||||
isRateLimitErrorMessage: () => false,
|
||||
isTransientHttpError: () => false,
|
||||
sanitizeUserFacingText: (text?: string) => text ?? "",
|
||||
@@ -410,6 +423,95 @@ describe("runAgentTurnWithFallback", () => {
|
||||
expect(onToolResult.mock.calls[0]?.[0]?.text).toBeUndefined();
|
||||
});
|
||||
|
||||
it("surfaces model capacity errors from no-text mid-turn failures", async () => {
|
||||
state.runEmbeddedPiAgentMock.mockResolvedValueOnce({
|
||||
payloads: [{ text: "thinking", isReasoning: true }],
|
||||
meta: {
|
||||
error: {
|
||||
kind: "server_overloaded",
|
||||
message: "Selected model is at capacity. Please try a different model.",
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const runAgentTurnWithFallback = await getRunAgentTurnWithFallback();
|
||||
const result = await runAgentTurnWithFallback({
|
||||
commandBody: "hello",
|
||||
followupRun: createFollowupRun(),
|
||||
sessionCtx: {
|
||||
Provider: "whatsapp",
|
||||
MessageSid: "msg",
|
||||
} as unknown as TemplateContext,
|
||||
opts: {},
|
||||
typingSignals: createMockTypingSignaler(),
|
||||
blockReplyPipeline: null,
|
||||
blockStreamingEnabled: false,
|
||||
resolvedBlockStreamingBreak: "message_end",
|
||||
applyReplyToMode: (payload) => payload,
|
||||
shouldEmitToolResult: () => true,
|
||||
shouldEmitToolOutput: () => false,
|
||||
pendingToolTasks: new Set(),
|
||||
resetSessionAfterCompactionFailure: async () => false,
|
||||
resetSessionAfterRoleOrderingConflict: async () => false,
|
||||
isHeartbeat: false,
|
||||
sessionKey: "main",
|
||||
getActiveSessionEntry: () => undefined,
|
||||
resolvedVerboseLevel: "off",
|
||||
});
|
||||
|
||||
expect(result.kind).toBe("success");
|
||||
if (result.kind === "success") {
|
||||
expect(result.runResult.payloads).toEqual([
|
||||
{
|
||||
text: "⚠️ Selected model is at capacity. Try a different model, or wait and retry.",
|
||||
isError: true,
|
||||
},
|
||||
]);
|
||||
}
|
||||
});
|
||||
|
||||
it("surfaces model capacity errors from pre-reply CLI failures", async () => {
|
||||
state.runWithModelFallbackMock.mockRejectedValueOnce(
|
||||
new Error("Selected model is at capacity. Please try a different model."),
|
||||
);
|
||||
|
||||
const runAgentTurnWithFallback = await getRunAgentTurnWithFallback();
|
||||
const followupRun = createFollowupRun();
|
||||
followupRun.run.provider = "openai-codex";
|
||||
followupRun.run.model = "gpt-5.5";
|
||||
|
||||
const result = await runAgentTurnWithFallback({
|
||||
commandBody: "hello",
|
||||
followupRun,
|
||||
sessionCtx: {
|
||||
Provider: "whatsapp",
|
||||
MessageSid: "msg",
|
||||
} as unknown as TemplateContext,
|
||||
opts: {},
|
||||
typingSignals: createMockTypingSignaler(),
|
||||
blockReplyPipeline: null,
|
||||
blockStreamingEnabled: false,
|
||||
resolvedBlockStreamingBreak: "message_end",
|
||||
applyReplyToMode: (payload) => payload,
|
||||
shouldEmitToolResult: () => true,
|
||||
shouldEmitToolOutput: () => false,
|
||||
pendingToolTasks: new Set(),
|
||||
resetSessionAfterCompactionFailure: async () => false,
|
||||
resetSessionAfterRoleOrderingConflict: async () => false,
|
||||
isHeartbeat: false,
|
||||
sessionKey: "main",
|
||||
getActiveSessionEntry: () => undefined,
|
||||
resolvedVerboseLevel: "off",
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
kind: "final",
|
||||
payload: {
|
||||
text: "⚠️ Selected model is at capacity. Try a different model, or wait and retry.",
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it("strips a glued leading NO_REPLY token from streamed tool results", async () => {
|
||||
const onToolResult = vi.fn();
|
||||
state.runEmbeddedPiAgentMock.mockImplementationOnce(async (params: EmbeddedAgentParams) => {
|
||||
|
||||
@@ -16,6 +16,7 @@ import { runWithModelFallback, isFallbackSummaryError } from "../../agents/model
|
||||
import { isCliProvider } from "../../agents/model-selection.js";
|
||||
import {
|
||||
BILLING_ERROR_USER_MESSAGE,
|
||||
formatRateLimitOrOverloadedErrorCopy,
|
||||
isCompactionFailureError,
|
||||
isContextOverflowError,
|
||||
isBillingErrorMessage,
|
||||
@@ -1516,24 +1517,34 @@ export async function runAgentTurnWithFallback(params: {
|
||||
// underlying error. FallbackSummaryError messages embed per-attempt
|
||||
// reason labels like `(rate_limit)`, so string-matching the summary text
|
||||
// would misclassify mixed-cause exhaustion as a pure transient cooldown.
|
||||
const isRateLimit = isFallbackSummaryError(err)
|
||||
const isFallbackSummary = isFallbackSummaryError(err);
|
||||
const isPureTransientSummary = isFallbackSummary
|
||||
? isPureTransientRateLimitSummary(err)
|
||||
: false;
|
||||
const isRateLimit = isFallbackSummary
|
||||
? isPureTransientSummary
|
||||
: isRateLimitErrorMessage(message);
|
||||
const rateLimitOrOverloadedCopy =
|
||||
!isFallbackSummary || isPureTransientSummary
|
||||
? formatRateLimitOrOverloadedErrorCopy(message)
|
||||
: undefined;
|
||||
const safeMessage = isTransientHttp
|
||||
? sanitizeUserFacingText(message, { errorContext: true })
|
||||
: message;
|
||||
const trimmedMessage = safeMessage.replace(/\.\s*$/, "");
|
||||
const fallbackText = isBilling
|
||||
? BILLING_ERROR_USER_MESSAGE
|
||||
: isRateLimit
|
||||
: isRateLimit && !isOverloadedErrorMessage(message)
|
||||
? buildRateLimitCooldownMessage(err)
|
||||
: isContextOverflow
|
||||
? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model."
|
||||
: isRoleOrderingError
|
||||
? "⚠️ Message ordering conflict - please try again. If this persists, use /new to start a fresh session."
|
||||
: shouldSurfaceToControlUi
|
||||
? `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow`
|
||||
: buildExternalRunFailureText(message);
|
||||
: rateLimitOrOverloadedCopy
|
||||
? rateLimitOrOverloadedCopy
|
||||
: isContextOverflow
|
||||
? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model."
|
||||
: isRoleOrderingError
|
||||
? "⚠️ Message ordering conflict - please try again. If this persists, use /new to start a fresh session."
|
||||
: shouldSurfaceToControlUi
|
||||
? `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow`
|
||||
: buildExternalRunFailureText(message);
|
||||
|
||||
params.replyOperation?.fail("run_failed", err);
|
||||
return {
|
||||
@@ -1590,16 +1601,13 @@ export async function runAgentTurnWithFallback(params: {
|
||||
(p) => p.isError && hasNonEmptyString(p.text) && !p.text.startsWith("⚠️"),
|
||||
)?.text ?? "";
|
||||
const errorCandidate = metaErrorMsg || rawErrorPayloadText;
|
||||
if (
|
||||
errorCandidate &&
|
||||
(isRateLimitErrorMessage(errorCandidate) || isOverloadedErrorMessage(errorCandidate))
|
||||
) {
|
||||
const isOverloaded = isOverloadedErrorMessage(errorCandidate);
|
||||
const formattedErrorCandidate = errorCandidate
|
||||
? formatRateLimitOrOverloadedErrorCopy(errorCandidate)
|
||||
: undefined;
|
||||
if (formattedErrorCandidate) {
|
||||
runResult.payloads = [
|
||||
{
|
||||
text: isOverloaded
|
||||
? "⚠️ The AI service is temporarily overloaded. Please try again in a moment."
|
||||
: "⚠️ API rate limit reached — the model couldn't generate a response. Please try again in a moment.",
|
||||
text: formattedErrorCandidate,
|
||||
isError: true,
|
||||
},
|
||||
];
|
||||
|
||||
Reference in New Issue
Block a user