From 74211128983a427543f674785a06631bdfa02218 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 27 Apr 2026 09:55:26 +0100 Subject: [PATCH] fix(agents): pass OpenAI SDK request timeouts --- CHANGELOG.md | 1 + src/agents/openai-transport-stream.test.ts | 189 +++++++++++++++++++++ src/agents/openai-transport-stream.ts | 46 ++++- src/agents/provider-transport-fetch.ts | 2 +- 4 files changed, 231 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dfe9aada861..897e2f65d3a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai - Agents/LM Studio: promote standalone bracketed local-model tool requests into registered tool calls and hide unsupported bracket blocks from visible replies, so MemPalace MCP lookups do not print raw `[tool]` JSON scaffolding in chat. Fixes #66178. Thanks @detroit357. - Local models: warn when an assistant reply looks like a tool call but the provider emitted plain text instead of a structured tool invocation, making fake/non-executed tool calls visible in logs. Fixes #51332. Thanks @emilclaw. - Local models: classify terminated, reset, closed, timeout, and aborted model-call failures and attach a process memory snapshot to the diagnostic event, making LM Studio/Ollama RAM-pressure failures easier to prove from stability bundles. Refs #65551. Thanks @BigWiLLi111. +- Local models: pass configured provider request timeouts through OpenAI SDK transports so long-running local or custom OpenAI-compatible streams are not capped by the SDK's 10-minute default. Fixes #63663. Thanks @aidiffuser. - LM Studio: trust configured LM Studio loopback, LAN, and tailnet endpoints for guarded model requests by default, preserving explicit private-network opt-outs. Refs #60994. Thanks @tnowakow. - Docker/setup: route Docker onboarding defaults for host-side LM Studio and Ollama through `host.docker.internal` and add the Linux host-gateway mapping to the bundled Compose file, so containerized gateways can reach local providers without using container loopback. Fixes #68684; supersedes #68702. Thanks @safrano9999 and @skolez. - Agents/LM Studio: strip prior-turn Gemma 4 reasoning from OpenAI-compatible replay while preserving active tool-call continuation reasoning. Fixes #68704. Thanks @chip-snomo and @Kailigithub. diff --git a/src/agents/openai-transport-stream.test.ts b/src/agents/openai-transport-stream.test.ts index 95eb8dc6cdc..e961c131352 100644 --- a/src/agents/openai-transport-stream.test.ts +++ b/src/agents/openai-transport-stream.test.ts @@ -1,8 +1,10 @@ +import { createServer } from "node:http"; import type { Model } from "@mariozechner/pi-ai"; import { describe, expect, it } from "vitest"; import { buildOpenAIResponsesParams, buildOpenAICompletionsParams, + createOpenAICompletionsTransportStreamFn, parseTransportChunkUsage, resolveAzureOpenAIApiVersion, sanitizeTransportPayloadText, @@ -345,6 +347,193 @@ describe("openai transport stream", () => { ); }); + it("passes provider request timeouts to OpenAI SDK clients", () => { + const context = { systemPrompt: "system", messages: [], tools: [] } as never; + const requestTimeoutMs = 900_000; + + const responsesModel = { + id: "gpt-5.4", + name: "GPT-5.4", + api: "openai-responses", + provider: "custom-openai", + baseUrl: "https://api.example.com/v1", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200000, + maxTokens: 8192, + requestTimeoutMs, + } satisfies Model<"openai-responses"> & { requestTimeoutMs: number }; + const azureModel = { + ...responsesModel, + api: "azure-openai-responses", + provider: "azure-openai", + baseUrl: "https://example.openai.azure.com/openai/deployments/gpt-5.4", + } satisfies Model<"azure-openai-responses"> & { requestTimeoutMs: number }; + const completionsModel = { + ...responsesModel, + api: "openai-completions", + reasoning: false, + } satisfies Model<"openai-completions"> & { requestTimeoutMs: number }; + + expect( + ( + __testing.createOpenAIResponsesClient(responsesModel, context, "test-key") as { + timeout: number; + } + ).timeout, + ).toBe(requestTimeoutMs); + expect( + (__testing.createAzureOpenAIClient(azureModel, context, "test-key") as { timeout: number }) + .timeout, + ).toBe(requestTimeoutMs); + expect( + ( + __testing.createOpenAICompletionsClient(completionsModel, context, "test-key") as { + timeout: number; + } + ).timeout, + ).toBe(requestTimeoutMs); + }); + + it("passes provider request timeouts to OpenAI SDK per-request options", () => { + const signal = new AbortController().signal; + const model = { + id: "glm-5", + name: "GLM-5", + api: "openai-completions", + provider: "vllm", + baseUrl: "http://localhost:8000/v1", + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 128000, + maxTokens: 4096, + requestTimeoutMs: 900_000.7, + } satisfies Model<"openai-completions"> & { requestTimeoutMs: number }; + + expect(__testing.buildOpenAISdkRequestOptions(model, signal)).toEqual({ + signal, + timeout: 900_000, + }); + expect( + __testing.buildOpenAISdkRequestOptions( + { ...model, requestTimeoutMs: -1 } as Model<"openai-completions">, + undefined, + ), + ).toBeUndefined(); + }); + + it("streams OpenAI-compatible loopback requests with the configured SDK timeout", async () => { + let captured: { path?: string; timeout?: string; roles?: string[] } = {}; + const server = createServer((req, res) => { + let body = ""; + req.setEncoding("utf8"); + req.on("data", (chunk) => { + body += chunk; + }); + req.on("end", () => { + const parsed = JSON.parse(body) as { messages?: Array<{ role?: string }> }; + captured = { + path: req.url, + timeout: Array.isArray(req.headers["x-stainless-timeout"]) + ? req.headers["x-stainless-timeout"][0] + : req.headers["x-stainless-timeout"], + roles: parsed.messages?.map((message) => message.role ?? ""), + }; + res.writeHead(200, { + "content-type": "text/event-stream; charset=utf-8", + "cache-control": "no-cache", + connection: "keep-alive", + }); + const created = Math.floor(Date.now() / 1000); + res.write( + `data: ${JSON.stringify({ + id: "chatcmpl-timeout-proof", + object: "chat.completion.chunk", + created, + model: "slow-local", + choices: [ + { + index: 0, + delta: { role: "assistant", content: "OK" }, + finish_reason: null, + }, + ], + })}\n\n`, + ); + res.write( + `data: ${JSON.stringify({ + id: "chatcmpl-timeout-proof", + object: "chat.completion.chunk", + created, + model: "slow-local", + choices: [{ index: 0, delta: {}, finish_reason: "stop" }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + })}\n\n`, + ); + res.write("data: [DONE]\n\n"); + res.end(); + }); + }); + + await new Promise((resolve) => server.listen(0, "127.0.0.1", resolve)); + try { + const address = server.address(); + if (!address || typeof address === "string") { + throw new Error("Missing loopback server address"); + } + const baseModel = { + id: "slow-local", + name: "Slow Local", + api: "openai-completions", + provider: "custom-openai-compatible", + baseUrl: `http://127.0.0.1:${address.port}/v1`, + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 4096, + maxTokens: 256, + requestTimeoutMs: 900_000, + } satisfies Model<"openai-completions"> & { requestTimeoutMs: number }; + const model = attachModelProviderRequestTransport(baseModel, { allowPrivateNetwork: true }); + const stream = createOpenAICompletionsTransportStreamFn()( + model, + { + systemPrompt: "system", + messages: [{ role: "user", content: "Reply OK", timestamp: Date.now() }], + tools: [], + } as never, + { apiKey: "test-key" } as never, + ); + + let doneReason: string | undefined; + let text = ""; + for await (const event of stream as AsyncIterable<{ + type: string; + delta?: string; + reason?: string; + }>) { + if (event.type === "text_delta") { + text += event.delta ?? ""; + } + if (event.type === "done") { + doneReason = event.reason; + } + } + + expect(captured.path).toBe("/v1/chat/completions"); + expect(captured.timeout).toBe("900"); + expect(captured.roles).toEqual(["system", "user"]); + expect(doneReason).toBe("stop"); + expect(text).toBe("OK"); + } finally { + await new Promise((resolve, reject) => { + server.close((error) => (error ? reject(error) : resolve())); + }); + } + }); + it("does not double-count reasoning tokens and clamps uncached prompt usage at zero", () => { const model = { id: "gpt-5", diff --git a/src/agents/openai-transport-stream.ts b/src/agents/openai-transport-stream.ts index b02e270a9af..67045555539 100644 --- a/src/agents/openai-transport-stream.ts +++ b/src/agents/openai-transport-stream.ts @@ -43,7 +43,10 @@ import { resolveOpenAIStrictToolFlagForInventory, resolveOpenAIStrictToolSetting, } from "./openai-tool-schema.js"; -import { buildGuardedModelFetch } from "./provider-transport-fetch.js"; +import { + buildGuardedModelFetch, + resolveModelRequestTimeoutMs, +} from "./provider-transport-fetch.js"; import { stripSystemPromptCacheBoundary } from "./system-prompt-cache-boundary.js"; import { transformTransportMessages } from "./transport-message-transform.js"; import { mergeTransportMetadata, sanitizeTransportPayloadText } from "./transport-stream-shared.js"; @@ -665,6 +668,29 @@ function resolveProviderTransportTurnState( }); } +function resolveOpenAISdkTimeoutMs(model: Model): number | undefined { + return resolveModelRequestTimeoutMs(model, undefined); +} + +function buildOpenAISdkClientOptions(model: Model): { timeout?: number } { + const timeout = resolveOpenAISdkTimeoutMs(model); + return timeout === undefined ? {} : { timeout }; +} + +function buildOpenAISdkRequestOptions( + model: Model, + signal?: AbortSignal, +): { signal?: AbortSignal; timeout?: number } | undefined { + const timeout = resolveOpenAISdkTimeoutMs(model); + if (timeout === undefined && !signal) { + return undefined; + } + return { + ...(signal ? { signal } : {}), + ...(timeout !== undefined ? { timeout } : {}), + }; +} + function createOpenAIResponsesClient( model: Model, context: Context, @@ -678,6 +704,7 @@ function createOpenAIResponsesClient( dangerouslyAllowBrowser: true, defaultHeaders: buildOpenAIClientHeaders(model, context, optionHeaders, turnHeaders), fetch: buildGuardedModelFetch(model), + ...buildOpenAISdkClientOptions(model), }); } @@ -731,7 +758,7 @@ export function createOpenAIResponsesTransportStreamFn(): StreamFn { params = mergeTransportMetadata(params, turnState?.metadata); const responseStream = (await client.responses.create( params as never, - options?.signal ? { signal: options.signal } : undefined, + buildOpenAISdkRequestOptions(model, options?.signal), )) as unknown as AsyncIterable; stream.push({ type: "start", partial: output as never }); await processResponsesStream(responseStream, output, stream, model, { @@ -975,7 +1002,7 @@ export function createAzureOpenAIResponsesTransportStreamFn(): StreamFn { params = mergeTransportMetadata(params, turnState?.metadata); const responseStream = (await client.responses.create( params as never, - options?.signal ? { signal: options.signal } : undefined, + buildOpenAISdkRequestOptions(model, options?.signal), )) as unknown as AsyncIterable; stream.push({ type: "start", partial: output as never }); await processResponsesStream(responseStream, output, stream, model); @@ -1029,6 +1056,7 @@ function createAzureOpenAIClient( defaultHeaders: buildOpenAIClientHeaders(model, context, optionHeaders, turnHeaders), baseURL: normalizeAzureBaseUrl(model.baseUrl), fetch: buildGuardedModelFetch(model), + ...buildOpenAISdkClientOptions(model), }); } @@ -1067,6 +1095,7 @@ function createOpenAICompletionsClient( defaultHeaders: clientConfig.defaultHeaders, defaultQuery: clientConfig.defaultQuery, fetch: buildGuardedModelFetch(model), + ...buildOpenAISdkClientOptions(model), }); } @@ -1160,9 +1189,10 @@ export function createOpenAICompletionsTransportStreamFn(): StreamFn { if (nextParams !== undefined) { params = nextParams as typeof params; } - const responseStream = (await client.chat.completions.create(params as never, { - signal: options?.signal, - })) as unknown as AsyncIterable; + const responseStream = (await client.chat.completions.create( + params as never, + buildOpenAISdkRequestOptions(model, options?.signal), + )) as unknown as AsyncIterable; stream.push({ type: "start", partial: output as never }); await processOpenAICompletionsStream(responseStream, output, model, stream); if (options?.signal?.aborted) { @@ -1849,6 +1879,10 @@ function mapStopReason(reason: string | null) { } export const __testing = { + buildOpenAISdkRequestOptions, + createAzureOpenAIClient, + createOpenAICompletionsClient, + createOpenAIResponsesClient, buildOpenAICompletionsClientConfig, processOpenAICompletionsStream, }; diff --git a/src/agents/provider-transport-fetch.ts b/src/agents/provider-transport-fetch.ts index 434c6411b6e..8e31f8718d0 100644 --- a/src/agents/provider-transport-fetch.ts +++ b/src/agents/provider-transport-fetch.ts @@ -154,7 +154,7 @@ function resolveModelRequestPolicy(model: Model) { }); } -function resolveModelRequestTimeoutMs( +export function resolveModelRequestTimeoutMs( model: Model, timeoutMs: number | undefined, ): number | undefined {