fix(agents): pass OpenAI SDK request timeouts

This commit is contained in:
Peter Steinberger
2026-04-27 09:55:26 +01:00
parent cb45f16330
commit 7421112898
4 changed files with 231 additions and 7 deletions

View File

@@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai
- Agents/LM Studio: promote standalone bracketed local-model tool requests into registered tool calls and hide unsupported bracket blocks from visible replies, so MemPalace MCP lookups do not print raw `[tool]` JSON scaffolding in chat. Fixes #66178. Thanks @detroit357.
- Local models: warn when an assistant reply looks like a tool call but the provider emitted plain text instead of a structured tool invocation, making fake/non-executed tool calls visible in logs. Fixes #51332. Thanks @emilclaw.
- Local models: classify terminated, reset, closed, timeout, and aborted model-call failures and attach a process memory snapshot to the diagnostic event, making LM Studio/Ollama RAM-pressure failures easier to prove from stability bundles. Refs #65551. Thanks @BigWiLLi111.
- Local models: pass configured provider request timeouts through OpenAI SDK transports so long-running local or custom OpenAI-compatible streams are not capped by the SDK's 10-minute default. Fixes #63663. Thanks @aidiffuser.
- LM Studio: trust configured LM Studio loopback, LAN, and tailnet endpoints for guarded model requests by default, preserving explicit private-network opt-outs. Refs #60994. Thanks @tnowakow.
- Docker/setup: route Docker onboarding defaults for host-side LM Studio and Ollama through `host.docker.internal` and add the Linux host-gateway mapping to the bundled Compose file, so containerized gateways can reach local providers without using container loopback. Fixes #68684; supersedes #68702. Thanks @safrano9999 and @skolez.
- Agents/LM Studio: strip prior-turn Gemma 4 reasoning from OpenAI-compatible replay while preserving active tool-call continuation reasoning. Fixes #68704. Thanks @chip-snomo and @Kailigithub.

View File

@@ -1,8 +1,10 @@
import { createServer } from "node:http";
import type { Model } from "@mariozechner/pi-ai";
import { describe, expect, it } from "vitest";
import {
buildOpenAIResponsesParams,
buildOpenAICompletionsParams,
createOpenAICompletionsTransportStreamFn,
parseTransportChunkUsage,
resolveAzureOpenAIApiVersion,
sanitizeTransportPayloadText,
@@ -345,6 +347,193 @@ describe("openai transport stream", () => {
);
});
it("passes provider request timeouts to OpenAI SDK clients", () => {
const context = { systemPrompt: "system", messages: [], tools: [] } as never;
const requestTimeoutMs = 900_000;
const responsesModel = {
id: "gpt-5.4",
name: "GPT-5.4",
api: "openai-responses",
provider: "custom-openai",
baseUrl: "https://api.example.com/v1",
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 200000,
maxTokens: 8192,
requestTimeoutMs,
} satisfies Model<"openai-responses"> & { requestTimeoutMs: number };
const azureModel = {
...responsesModel,
api: "azure-openai-responses",
provider: "azure-openai",
baseUrl: "https://example.openai.azure.com/openai/deployments/gpt-5.4",
} satisfies Model<"azure-openai-responses"> & { requestTimeoutMs: number };
const completionsModel = {
...responsesModel,
api: "openai-completions",
reasoning: false,
} satisfies Model<"openai-completions"> & { requestTimeoutMs: number };
expect(
(
__testing.createOpenAIResponsesClient(responsesModel, context, "test-key") as {
timeout: number;
}
).timeout,
).toBe(requestTimeoutMs);
expect(
(__testing.createAzureOpenAIClient(azureModel, context, "test-key") as { timeout: number })
.timeout,
).toBe(requestTimeoutMs);
expect(
(
__testing.createOpenAICompletionsClient(completionsModel, context, "test-key") as {
timeout: number;
}
).timeout,
).toBe(requestTimeoutMs);
});
it("passes provider request timeouts to OpenAI SDK per-request options", () => {
const signal = new AbortController().signal;
const model = {
id: "glm-5",
name: "GLM-5",
api: "openai-completions",
provider: "vllm",
baseUrl: "http://localhost:8000/v1",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 128000,
maxTokens: 4096,
requestTimeoutMs: 900_000.7,
} satisfies Model<"openai-completions"> & { requestTimeoutMs: number };
expect(__testing.buildOpenAISdkRequestOptions(model, signal)).toEqual({
signal,
timeout: 900_000,
});
expect(
__testing.buildOpenAISdkRequestOptions(
{ ...model, requestTimeoutMs: -1 } as Model<"openai-completions">,
undefined,
),
).toBeUndefined();
});
it("streams OpenAI-compatible loopback requests with the configured SDK timeout", async () => {
let captured: { path?: string; timeout?: string; roles?: string[] } = {};
const server = createServer((req, res) => {
let body = "";
req.setEncoding("utf8");
req.on("data", (chunk) => {
body += chunk;
});
req.on("end", () => {
const parsed = JSON.parse(body) as { messages?: Array<{ role?: string }> };
captured = {
path: req.url,
timeout: Array.isArray(req.headers["x-stainless-timeout"])
? req.headers["x-stainless-timeout"][0]
: req.headers["x-stainless-timeout"],
roles: parsed.messages?.map((message) => message.role ?? ""),
};
res.writeHead(200, {
"content-type": "text/event-stream; charset=utf-8",
"cache-control": "no-cache",
connection: "keep-alive",
});
const created = Math.floor(Date.now() / 1000);
res.write(
`data: ${JSON.stringify({
id: "chatcmpl-timeout-proof",
object: "chat.completion.chunk",
created,
model: "slow-local",
choices: [
{
index: 0,
delta: { role: "assistant", content: "OK" },
finish_reason: null,
},
],
})}\n\n`,
);
res.write(
`data: ${JSON.stringify({
id: "chatcmpl-timeout-proof",
object: "chat.completion.chunk",
created,
model: "slow-local",
choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
})}\n\n`,
);
res.write("data: [DONE]\n\n");
res.end();
});
});
await new Promise<void>((resolve) => server.listen(0, "127.0.0.1", resolve));
try {
const address = server.address();
if (!address || typeof address === "string") {
throw new Error("Missing loopback server address");
}
const baseModel = {
id: "slow-local",
name: "Slow Local",
api: "openai-completions",
provider: "custom-openai-compatible",
baseUrl: `http://127.0.0.1:${address.port}/v1`,
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 4096,
maxTokens: 256,
requestTimeoutMs: 900_000,
} satisfies Model<"openai-completions"> & { requestTimeoutMs: number };
const model = attachModelProviderRequestTransport(baseModel, { allowPrivateNetwork: true });
const stream = createOpenAICompletionsTransportStreamFn()(
model,
{
systemPrompt: "system",
messages: [{ role: "user", content: "Reply OK", timestamp: Date.now() }],
tools: [],
} as never,
{ apiKey: "test-key" } as never,
);
let doneReason: string | undefined;
let text = "";
for await (const event of stream as AsyncIterable<{
type: string;
delta?: string;
reason?: string;
}>) {
if (event.type === "text_delta") {
text += event.delta ?? "";
}
if (event.type === "done") {
doneReason = event.reason;
}
}
expect(captured.path).toBe("/v1/chat/completions");
expect(captured.timeout).toBe("900");
expect(captured.roles).toEqual(["system", "user"]);
expect(doneReason).toBe("stop");
expect(text).toBe("OK");
} finally {
await new Promise<void>((resolve, reject) => {
server.close((error) => (error ? reject(error) : resolve()));
});
}
});
it("does not double-count reasoning tokens and clamps uncached prompt usage at zero", () => {
const model = {
id: "gpt-5",

View File

@@ -43,7 +43,10 @@ import {
resolveOpenAIStrictToolFlagForInventory,
resolveOpenAIStrictToolSetting,
} from "./openai-tool-schema.js";
import { buildGuardedModelFetch } from "./provider-transport-fetch.js";
import {
buildGuardedModelFetch,
resolveModelRequestTimeoutMs,
} from "./provider-transport-fetch.js";
import { stripSystemPromptCacheBoundary } from "./system-prompt-cache-boundary.js";
import { transformTransportMessages } from "./transport-message-transform.js";
import { mergeTransportMetadata, sanitizeTransportPayloadText } from "./transport-stream-shared.js";
@@ -665,6 +668,29 @@ function resolveProviderTransportTurnState(
});
}
function resolveOpenAISdkTimeoutMs(model: Model<Api>): number | undefined {
return resolveModelRequestTimeoutMs(model, undefined);
}
function buildOpenAISdkClientOptions(model: Model<Api>): { timeout?: number } {
const timeout = resolveOpenAISdkTimeoutMs(model);
return timeout === undefined ? {} : { timeout };
}
function buildOpenAISdkRequestOptions(
model: Model<Api>,
signal?: AbortSignal,
): { signal?: AbortSignal; timeout?: number } | undefined {
const timeout = resolveOpenAISdkTimeoutMs(model);
if (timeout === undefined && !signal) {
return undefined;
}
return {
...(signal ? { signal } : {}),
...(timeout !== undefined ? { timeout } : {}),
};
}
function createOpenAIResponsesClient(
model: Model<Api>,
context: Context,
@@ -678,6 +704,7 @@ function createOpenAIResponsesClient(
dangerouslyAllowBrowser: true,
defaultHeaders: buildOpenAIClientHeaders(model, context, optionHeaders, turnHeaders),
fetch: buildGuardedModelFetch(model),
...buildOpenAISdkClientOptions(model),
});
}
@@ -731,7 +758,7 @@ export function createOpenAIResponsesTransportStreamFn(): StreamFn {
params = mergeTransportMetadata(params, turnState?.metadata);
const responseStream = (await client.responses.create(
params as never,
options?.signal ? { signal: options.signal } : undefined,
buildOpenAISdkRequestOptions(model, options?.signal),
)) as unknown as AsyncIterable<unknown>;
stream.push({ type: "start", partial: output as never });
await processResponsesStream(responseStream, output, stream, model, {
@@ -975,7 +1002,7 @@ export function createAzureOpenAIResponsesTransportStreamFn(): StreamFn {
params = mergeTransportMetadata(params, turnState?.metadata);
const responseStream = (await client.responses.create(
params as never,
options?.signal ? { signal: options.signal } : undefined,
buildOpenAISdkRequestOptions(model, options?.signal),
)) as unknown as AsyncIterable<unknown>;
stream.push({ type: "start", partial: output as never });
await processResponsesStream(responseStream, output, stream, model);
@@ -1029,6 +1056,7 @@ function createAzureOpenAIClient(
defaultHeaders: buildOpenAIClientHeaders(model, context, optionHeaders, turnHeaders),
baseURL: normalizeAzureBaseUrl(model.baseUrl),
fetch: buildGuardedModelFetch(model),
...buildOpenAISdkClientOptions(model),
});
}
@@ -1067,6 +1095,7 @@ function createOpenAICompletionsClient(
defaultHeaders: clientConfig.defaultHeaders,
defaultQuery: clientConfig.defaultQuery,
fetch: buildGuardedModelFetch(model),
...buildOpenAISdkClientOptions(model),
});
}
@@ -1160,9 +1189,10 @@ export function createOpenAICompletionsTransportStreamFn(): StreamFn {
if (nextParams !== undefined) {
params = nextParams as typeof params;
}
const responseStream = (await client.chat.completions.create(params as never, {
signal: options?.signal,
})) as unknown as AsyncIterable<ChatCompletionChunk>;
const responseStream = (await client.chat.completions.create(
params as never,
buildOpenAISdkRequestOptions(model, options?.signal),
)) as unknown as AsyncIterable<ChatCompletionChunk>;
stream.push({ type: "start", partial: output as never });
await processOpenAICompletionsStream(responseStream, output, model, stream);
if (options?.signal?.aborted) {
@@ -1849,6 +1879,10 @@ function mapStopReason(reason: string | null) {
}
export const __testing = {
buildOpenAISdkRequestOptions,
createAzureOpenAIClient,
createOpenAICompletionsClient,
createOpenAIResponsesClient,
buildOpenAICompletionsClientConfig,
processOpenAICompletionsStream,
};

View File

@@ -154,7 +154,7 @@ function resolveModelRequestPolicy(model: Model<Api>) {
});
}
function resolveModelRequestTimeoutMs(
export function resolveModelRequestTimeoutMs(
model: Model<Api>,
timeoutMs: number | undefined,
): number | undefined {