mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 08:40:44 +00:00
fix(agents): fail fast on terminal provider 429s
This commit is contained in:
@@ -11,6 +11,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Fixes
|
||||
|
||||
- Agents/failover: forward embedded run abort signals into provider-owned model streams, cap implicit LLM idle watchdogs below long run timeouts, and mark 429 responses without usable retry timing as non-retryable so GitHub Copilot rate limits fail over or surface promptly instead of hanging until run timeout. Fixes #71120.
|
||||
- Browser/tool: keep explicit AI snapshots from inheriting the efficient role-snapshot default and preserve numeric Playwright AI refs, so `--format ai` remains a real AI snapshot path. Fixes #62550. Thanks @ly85206559.
|
||||
- Gateway/config: keep in-process config patch reload comparisons on the resolved source snapshot when `${VAR}` env refs are restored on disk, avoiding false full gateway restarts for unchanged gateway/plugin secrets. Fixes #71208. Thanks @robbiethompson18.
|
||||
- Slack/messages: serialize write-client requests and whole outbound sends per target so rapid multi-message Slack replies preserve send order. Fixes #69101. (#69105) Thanks @nightq and @ztexydt-cqh.
|
||||
|
||||
@@ -50,13 +50,22 @@ describe("resolveLlmIdleTimeoutMs", () => {
|
||||
expect(resolveLlmIdleTimeoutMs({ cfg })).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS);
|
||||
});
|
||||
|
||||
it("falls back to agents.defaults.timeoutSeconds when llm.idleTimeoutSeconds is not set", () => {
|
||||
it("caps agents.defaults.timeoutSeconds fallback at the default idle watchdog", () => {
|
||||
const cfg = { agents: { defaults: { timeoutSeconds: 300 } } } as OpenClawConfig;
|
||||
expect(resolveLlmIdleTimeoutMs({ cfg })).toBe(300_000);
|
||||
expect(resolveLlmIdleTimeoutMs({ cfg })).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS);
|
||||
});
|
||||
|
||||
it("uses an explicit run timeout override when llm.idleTimeoutSeconds is not set", () => {
|
||||
expect(resolveLlmIdleTimeoutMs({ runTimeoutMs: 900_000 })).toBe(900_000);
|
||||
it("uses agents.defaults.timeoutSeconds when it is shorter than the default idle watchdog", () => {
|
||||
const cfg = { agents: { defaults: { timeoutSeconds: 30 } } } as OpenClawConfig;
|
||||
expect(resolveLlmIdleTimeoutMs({ cfg })).toBe(30_000);
|
||||
});
|
||||
|
||||
it("caps an explicit run timeout override at the default idle watchdog", () => {
|
||||
expect(resolveLlmIdleTimeoutMs({ runTimeoutMs: 900_000 })).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS);
|
||||
});
|
||||
|
||||
it("uses an explicit run timeout override when shorter than the default idle watchdog", () => {
|
||||
expect(resolveLlmIdleTimeoutMs({ runTimeoutMs: 30_000 })).toBe(30_000);
|
||||
});
|
||||
|
||||
it("disables the idle watchdog when an explicit run timeout disables timeouts", () => {
|
||||
@@ -91,9 +100,9 @@ describe("resolveLlmIdleTimeoutMs", () => {
|
||||
expect(resolveLlmIdleTimeoutMs({ cfg, trigger: "cron" })).toBe(0);
|
||||
});
|
||||
|
||||
it("uses agents.defaults.timeoutSeconds for cron before disabling the default idle timeout", () => {
|
||||
it("caps agents.defaults.timeoutSeconds for cron before disabling the default idle timeout", () => {
|
||||
const cfg = { agents: { defaults: { timeoutSeconds: 300 } } } as OpenClawConfig;
|
||||
expect(resolveLlmIdleTimeoutMs({ cfg, trigger: "cron" })).toBe(300_000);
|
||||
expect(resolveLlmIdleTimeoutMs({ cfg, trigger: "cron" })).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS);
|
||||
});
|
||||
|
||||
it("keeps an explicit cron idle timeout when configured", () => {
|
||||
|
||||
@@ -25,6 +25,8 @@ export function resolveLlmIdleTimeoutMs(params?: {
|
||||
runTimeoutMs?: number;
|
||||
}): number {
|
||||
const clampTimeoutMs = (valueMs: number) => Math.min(Math.floor(valueMs), MAX_SAFE_TIMEOUT_MS);
|
||||
const clampImplicitTimeoutMs = (valueMs: number) =>
|
||||
clampTimeoutMs(Math.min(valueMs, DEFAULT_LLM_IDLE_TIMEOUT_MS));
|
||||
const raw = params?.cfg?.agents?.defaults?.llm?.idleTimeoutSeconds;
|
||||
// 0 means explicitly disabled (no timeout).
|
||||
if (raw === 0) {
|
||||
@@ -39,7 +41,7 @@ export function resolveLlmIdleTimeoutMs(params?: {
|
||||
if (runTimeoutMs >= MAX_SAFE_TIMEOUT_MS) {
|
||||
return 0;
|
||||
}
|
||||
return clampTimeoutMs(runTimeoutMs);
|
||||
return clampImplicitTimeoutMs(runTimeoutMs);
|
||||
}
|
||||
|
||||
const agentTimeoutSeconds = params?.cfg?.agents?.defaults?.timeoutSeconds;
|
||||
@@ -48,7 +50,7 @@ export function resolveLlmIdleTimeoutMs(params?: {
|
||||
Number.isFinite(agentTimeoutSeconds) &&
|
||||
agentTimeoutSeconds > 0
|
||||
) {
|
||||
return clampTimeoutMs(agentTimeoutSeconds * 1000);
|
||||
return clampImplicitTimeoutMs(agentTimeoutSeconds * 1000);
|
||||
}
|
||||
|
||||
if (params?.trigger === "cron") {
|
||||
|
||||
@@ -153,4 +153,54 @@ describe("resolveEmbeddedAgentStreamFn", () => {
|
||||
expect(authStorage.getApiKey).not.toHaveBeenCalled();
|
||||
expect(providerStreamFn).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("forwards the run abort signal into provider-owned stream functions", async () => {
|
||||
const providerStreamFn = vi.fn(async (_model, _context, options) => options);
|
||||
const signal = new AbortController().signal;
|
||||
const streamFn = resolveEmbeddedAgentStreamFn({
|
||||
currentStreamFn: undefined,
|
||||
providerStreamFn,
|
||||
shouldUseWebSocketTransport: false,
|
||||
sessionId: "session-1",
|
||||
signal,
|
||||
model: {
|
||||
api: "openai-responses",
|
||||
provider: "github-copilot",
|
||||
id: "gpt-5.4",
|
||||
} as never,
|
||||
resolvedApiKey: "resolved-key",
|
||||
});
|
||||
|
||||
await expect(
|
||||
streamFn({ provider: "github-copilot", id: "gpt-5.4" } as never, {} as never, {}),
|
||||
).resolves.toMatchObject({
|
||||
signal,
|
||||
});
|
||||
});
|
||||
|
||||
it("does not overwrite an explicit provider-owned stream signal", async () => {
|
||||
const providerStreamFn = vi.fn(async (_model, _context, options) => options);
|
||||
const runSignal = new AbortController().signal;
|
||||
const explicitSignal = new AbortController().signal;
|
||||
const streamFn = resolveEmbeddedAgentStreamFn({
|
||||
currentStreamFn: undefined,
|
||||
providerStreamFn,
|
||||
shouldUseWebSocketTransport: false,
|
||||
sessionId: "session-1",
|
||||
signal: runSignal,
|
||||
model: {
|
||||
api: "openai-responses",
|
||||
provider: "github-copilot",
|
||||
id: "gpt-5.4",
|
||||
} as never,
|
||||
});
|
||||
|
||||
await expect(
|
||||
streamFn({ provider: "github-copilot", id: "gpt-5.4" } as never, {} as never, {
|
||||
signal: explicitSignal,
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
signal: explicitSignal,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -81,6 +81,10 @@ export function resolveEmbeddedAgentStreamFn(params: {
|
||||
systemPrompt: stripSystemPromptCacheBoundary(context.systemPrompt),
|
||||
}
|
||||
: context;
|
||||
const mergeRunSignal = (options: Parameters<StreamFn>[2]) => {
|
||||
const signal = options?.signal ?? params.signal;
|
||||
return signal ? { ...options, signal } : options;
|
||||
};
|
||||
// Provider-owned transports bypass pi-coding-agent's default auth lookup,
|
||||
// so keep injecting the resolved runtime apiKey for streamSimple-compatible
|
||||
// transports that still read credentials from options.apiKey.
|
||||
@@ -93,12 +97,12 @@ export function resolveEmbeddedAgentStreamFn(params: {
|
||||
authStorage,
|
||||
});
|
||||
return inner(m, normalizeContext(context), {
|
||||
...options,
|
||||
...mergeRunSignal(options),
|
||||
apiKey: apiKey ?? options?.apiKey,
|
||||
});
|
||||
};
|
||||
}
|
||||
return (m, context, options) => inner(m, normalizeContext(context), options);
|
||||
return (m, context, options) => inner(m, normalizeContext(context), mergeRunSignal(options));
|
||||
}
|
||||
|
||||
const currentStreamFn = params.currentStreamFn ?? streamSimple;
|
||||
|
||||
@@ -213,6 +213,46 @@ describe("buildGuardedModelFetch", () => {
|
||||
expect(response.headers.get("x-should-retry")).toBe("false");
|
||||
});
|
||||
|
||||
it("injects x-should-retry:false for terminal 429 responses without retry-after", async () => {
|
||||
fetchWithSsrFGuardMock.mockResolvedValue({
|
||||
response: new Response("Sorry, you've exceeded your weekly rate limit.", {
|
||||
status: 429,
|
||||
headers: { "content-type": "text/plain; charset=utf-8" },
|
||||
}),
|
||||
finalUrl: "https://api.individual.githubcopilot.com/responses",
|
||||
release: vi.fn(async () => undefined),
|
||||
});
|
||||
|
||||
const { buildGuardedModelFetch } = await import("./provider-transport-fetch.js");
|
||||
const response = await buildGuardedModelFetch(openaiModel)(
|
||||
"https://api.individual.githubcopilot.com/responses",
|
||||
{ method: "POST" },
|
||||
);
|
||||
|
||||
expect(response.status).toBe(429);
|
||||
expect(response.headers.get("x-should-retry")).toBe("false");
|
||||
await expect(response.text()).resolves.toContain("weekly rate limit");
|
||||
});
|
||||
|
||||
it("keeps short retry-after 429 responses retryable", async () => {
|
||||
fetchWithSsrFGuardMock.mockResolvedValue({
|
||||
response: new Response(null, {
|
||||
status: 429,
|
||||
headers: { "retry-after": "30" },
|
||||
}),
|
||||
finalUrl: "https://api.anthropic.com/v1/messages",
|
||||
release: vi.fn(async () => undefined),
|
||||
});
|
||||
|
||||
const { buildGuardedModelFetch } = await import("./provider-transport-fetch.js");
|
||||
const response = await buildGuardedModelFetch(anthropicModel)(
|
||||
"https://api.anthropic.com/v1/messages",
|
||||
{ method: "POST" },
|
||||
);
|
||||
|
||||
expect(response.headers.get("x-should-retry")).toBeNull();
|
||||
});
|
||||
|
||||
it("can be disabled with OPENCLAW_SDK_RETRY_MAX_WAIT_SECONDS=0", async () => {
|
||||
process.env.OPENCLAW_SDK_RETRY_MAX_WAIT_SECONDS = "0";
|
||||
fetchWithSsrFGuardMock.mockResolvedValue({
|
||||
@@ -252,7 +292,7 @@ describe("buildGuardedModelFetch", () => {
|
||||
expect(response.headers.get("x-should-retry")).toBeNull();
|
||||
});
|
||||
|
||||
it("ignores malformed retry-after values", async () => {
|
||||
it("treats malformed 429 retry-after values as terminal", async () => {
|
||||
fetchWithSsrFGuardMock.mockResolvedValue({
|
||||
response: new Response(null, {
|
||||
status: 429,
|
||||
@@ -268,7 +308,7 @@ describe("buildGuardedModelFetch", () => {
|
||||
{ method: "POST" },
|
||||
);
|
||||
|
||||
expect(response.headers.get("x-should-retry")).toBeNull();
|
||||
expect(response.headers.get("x-should-retry")).toBe("false");
|
||||
});
|
||||
|
||||
it("ignores retry-after on non-retryable responses", async () => {
|
||||
|
||||
@@ -68,7 +68,11 @@ function shouldBypassLongSdkRetry(response: Response): boolean {
|
||||
}
|
||||
|
||||
const retryAfterSeconds = parseRetryAfterSeconds(response.headers);
|
||||
return retryAfterSeconds !== undefined && retryAfterSeconds > maxWaitSeconds;
|
||||
if (retryAfterSeconds !== undefined) {
|
||||
return retryAfterSeconds > maxWaitSeconds;
|
||||
}
|
||||
|
||||
return status === 429;
|
||||
}
|
||||
|
||||
function buildManagedResponse(response: Response, release: () => Promise<void>): Response {
|
||||
|
||||
Reference in New Issue
Block a user