mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 14:40:43 +00:00
fix(agents): pass OpenAI SDK request timeouts
This commit is contained in:
@@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Agents/LM Studio: promote standalone bracketed local-model tool requests into registered tool calls and hide unsupported bracket blocks from visible replies, so MemPalace MCP lookups do not print raw `[tool]` JSON scaffolding in chat. Fixes #66178. Thanks @detroit357.
|
||||
- Local models: warn when an assistant reply looks like a tool call but the provider emitted plain text instead of a structured tool invocation, making fake/non-executed tool calls visible in logs. Fixes #51332. Thanks @emilclaw.
|
||||
- Local models: classify terminated, reset, closed, timeout, and aborted model-call failures and attach a process memory snapshot to the diagnostic event, making LM Studio/Ollama RAM-pressure failures easier to prove from stability bundles. Refs #65551. Thanks @BigWiLLi111.
|
||||
- Local models: pass configured provider request timeouts through OpenAI SDK transports so long-running local or custom OpenAI-compatible streams are not capped by the SDK's 10-minute default. Fixes #63663. Thanks @aidiffuser.
|
||||
- LM Studio: trust configured LM Studio loopback, LAN, and tailnet endpoints for guarded model requests by default, preserving explicit private-network opt-outs. Refs #60994. Thanks @tnowakow.
|
||||
- Docker/setup: route Docker onboarding defaults for host-side LM Studio and Ollama through `host.docker.internal` and add the Linux host-gateway mapping to the bundled Compose file, so containerized gateways can reach local providers without using container loopback. Fixes #68684; supersedes #68702. Thanks @safrano9999 and @skolez.
|
||||
- Agents/LM Studio: strip prior-turn Gemma 4 reasoning from OpenAI-compatible replay while preserving active tool-call continuation reasoning. Fixes #68704. Thanks @chip-snomo and @Kailigithub.
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
import { createServer } from "node:http";
|
||||
import type { Model } from "@mariozechner/pi-ai";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
buildOpenAIResponsesParams,
|
||||
buildOpenAICompletionsParams,
|
||||
createOpenAICompletionsTransportStreamFn,
|
||||
parseTransportChunkUsage,
|
||||
resolveAzureOpenAIApiVersion,
|
||||
sanitizeTransportPayloadText,
|
||||
@@ -345,6 +347,193 @@ describe("openai transport stream", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("passes provider request timeouts to OpenAI SDK clients", () => {
|
||||
const context = { systemPrompt: "system", messages: [], tools: [] } as never;
|
||||
const requestTimeoutMs = 900_000;
|
||||
|
||||
const responsesModel = {
|
||||
id: "gpt-5.4",
|
||||
name: "GPT-5.4",
|
||||
api: "openai-responses",
|
||||
provider: "custom-openai",
|
||||
baseUrl: "https://api.example.com/v1",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 200000,
|
||||
maxTokens: 8192,
|
||||
requestTimeoutMs,
|
||||
} satisfies Model<"openai-responses"> & { requestTimeoutMs: number };
|
||||
const azureModel = {
|
||||
...responsesModel,
|
||||
api: "azure-openai-responses",
|
||||
provider: "azure-openai",
|
||||
baseUrl: "https://example.openai.azure.com/openai/deployments/gpt-5.4",
|
||||
} satisfies Model<"azure-openai-responses"> & { requestTimeoutMs: number };
|
||||
const completionsModel = {
|
||||
...responsesModel,
|
||||
api: "openai-completions",
|
||||
reasoning: false,
|
||||
} satisfies Model<"openai-completions"> & { requestTimeoutMs: number };
|
||||
|
||||
expect(
|
||||
(
|
||||
__testing.createOpenAIResponsesClient(responsesModel, context, "test-key") as {
|
||||
timeout: number;
|
||||
}
|
||||
).timeout,
|
||||
).toBe(requestTimeoutMs);
|
||||
expect(
|
||||
(__testing.createAzureOpenAIClient(azureModel, context, "test-key") as { timeout: number })
|
||||
.timeout,
|
||||
).toBe(requestTimeoutMs);
|
||||
expect(
|
||||
(
|
||||
__testing.createOpenAICompletionsClient(completionsModel, context, "test-key") as {
|
||||
timeout: number;
|
||||
}
|
||||
).timeout,
|
||||
).toBe(requestTimeoutMs);
|
||||
});
|
||||
|
||||
it("passes provider request timeouts to OpenAI SDK per-request options", () => {
|
||||
const signal = new AbortController().signal;
|
||||
const model = {
|
||||
id: "glm-5",
|
||||
name: "GLM-5",
|
||||
api: "openai-completions",
|
||||
provider: "vllm",
|
||||
baseUrl: "http://localhost:8000/v1",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 128000,
|
||||
maxTokens: 4096,
|
||||
requestTimeoutMs: 900_000.7,
|
||||
} satisfies Model<"openai-completions"> & { requestTimeoutMs: number };
|
||||
|
||||
expect(__testing.buildOpenAISdkRequestOptions(model, signal)).toEqual({
|
||||
signal,
|
||||
timeout: 900_000,
|
||||
});
|
||||
expect(
|
||||
__testing.buildOpenAISdkRequestOptions(
|
||||
{ ...model, requestTimeoutMs: -1 } as Model<"openai-completions">,
|
||||
undefined,
|
||||
),
|
||||
).toBeUndefined();
|
||||
});
|
||||
|
||||
it("streams OpenAI-compatible loopback requests with the configured SDK timeout", async () => {
|
||||
let captured: { path?: string; timeout?: string; roles?: string[] } = {};
|
||||
const server = createServer((req, res) => {
|
||||
let body = "";
|
||||
req.setEncoding("utf8");
|
||||
req.on("data", (chunk) => {
|
||||
body += chunk;
|
||||
});
|
||||
req.on("end", () => {
|
||||
const parsed = JSON.parse(body) as { messages?: Array<{ role?: string }> };
|
||||
captured = {
|
||||
path: req.url,
|
||||
timeout: Array.isArray(req.headers["x-stainless-timeout"])
|
||||
? req.headers["x-stainless-timeout"][0]
|
||||
: req.headers["x-stainless-timeout"],
|
||||
roles: parsed.messages?.map((message) => message.role ?? ""),
|
||||
};
|
||||
res.writeHead(200, {
|
||||
"content-type": "text/event-stream; charset=utf-8",
|
||||
"cache-control": "no-cache",
|
||||
connection: "keep-alive",
|
||||
});
|
||||
const created = Math.floor(Date.now() / 1000);
|
||||
res.write(
|
||||
`data: ${JSON.stringify({
|
||||
id: "chatcmpl-timeout-proof",
|
||||
object: "chat.completion.chunk",
|
||||
created,
|
||||
model: "slow-local",
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: { role: "assistant", content: "OK" },
|
||||
finish_reason: null,
|
||||
},
|
||||
],
|
||||
})}\n\n`,
|
||||
);
|
||||
res.write(
|
||||
`data: ${JSON.stringify({
|
||||
id: "chatcmpl-timeout-proof",
|
||||
object: "chat.completion.chunk",
|
||||
created,
|
||||
model: "slow-local",
|
||||
choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
|
||||
usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
|
||||
})}\n\n`,
|
||||
);
|
||||
res.write("data: [DONE]\n\n");
|
||||
res.end();
|
||||
});
|
||||
});
|
||||
|
||||
await new Promise<void>((resolve) => server.listen(0, "127.0.0.1", resolve));
|
||||
try {
|
||||
const address = server.address();
|
||||
if (!address || typeof address === "string") {
|
||||
throw new Error("Missing loopback server address");
|
||||
}
|
||||
const baseModel = {
|
||||
id: "slow-local",
|
||||
name: "Slow Local",
|
||||
api: "openai-completions",
|
||||
provider: "custom-openai-compatible",
|
||||
baseUrl: `http://127.0.0.1:${address.port}/v1`,
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 4096,
|
||||
maxTokens: 256,
|
||||
requestTimeoutMs: 900_000,
|
||||
} satisfies Model<"openai-completions"> & { requestTimeoutMs: number };
|
||||
const model = attachModelProviderRequestTransport(baseModel, { allowPrivateNetwork: true });
|
||||
const stream = createOpenAICompletionsTransportStreamFn()(
|
||||
model,
|
||||
{
|
||||
systemPrompt: "system",
|
||||
messages: [{ role: "user", content: "Reply OK", timestamp: Date.now() }],
|
||||
tools: [],
|
||||
} as never,
|
||||
{ apiKey: "test-key" } as never,
|
||||
);
|
||||
|
||||
let doneReason: string | undefined;
|
||||
let text = "";
|
||||
for await (const event of stream as AsyncIterable<{
|
||||
type: string;
|
||||
delta?: string;
|
||||
reason?: string;
|
||||
}>) {
|
||||
if (event.type === "text_delta") {
|
||||
text += event.delta ?? "";
|
||||
}
|
||||
if (event.type === "done") {
|
||||
doneReason = event.reason;
|
||||
}
|
||||
}
|
||||
|
||||
expect(captured.path).toBe("/v1/chat/completions");
|
||||
expect(captured.timeout).toBe("900");
|
||||
expect(captured.roles).toEqual(["system", "user"]);
|
||||
expect(doneReason).toBe("stop");
|
||||
expect(text).toBe("OK");
|
||||
} finally {
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
server.close((error) => (error ? reject(error) : resolve()));
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
it("does not double-count reasoning tokens and clamps uncached prompt usage at zero", () => {
|
||||
const model = {
|
||||
id: "gpt-5",
|
||||
|
||||
@@ -43,7 +43,10 @@ import {
|
||||
resolveOpenAIStrictToolFlagForInventory,
|
||||
resolveOpenAIStrictToolSetting,
|
||||
} from "./openai-tool-schema.js";
|
||||
import { buildGuardedModelFetch } from "./provider-transport-fetch.js";
|
||||
import {
|
||||
buildGuardedModelFetch,
|
||||
resolveModelRequestTimeoutMs,
|
||||
} from "./provider-transport-fetch.js";
|
||||
import { stripSystemPromptCacheBoundary } from "./system-prompt-cache-boundary.js";
|
||||
import { transformTransportMessages } from "./transport-message-transform.js";
|
||||
import { mergeTransportMetadata, sanitizeTransportPayloadText } from "./transport-stream-shared.js";
|
||||
@@ -665,6 +668,29 @@ function resolveProviderTransportTurnState(
|
||||
});
|
||||
}
|
||||
|
||||
function resolveOpenAISdkTimeoutMs(model: Model<Api>): number | undefined {
|
||||
return resolveModelRequestTimeoutMs(model, undefined);
|
||||
}
|
||||
|
||||
function buildOpenAISdkClientOptions(model: Model<Api>): { timeout?: number } {
|
||||
const timeout = resolveOpenAISdkTimeoutMs(model);
|
||||
return timeout === undefined ? {} : { timeout };
|
||||
}
|
||||
|
||||
function buildOpenAISdkRequestOptions(
|
||||
model: Model<Api>,
|
||||
signal?: AbortSignal,
|
||||
): { signal?: AbortSignal; timeout?: number } | undefined {
|
||||
const timeout = resolveOpenAISdkTimeoutMs(model);
|
||||
if (timeout === undefined && !signal) {
|
||||
return undefined;
|
||||
}
|
||||
return {
|
||||
...(signal ? { signal } : {}),
|
||||
...(timeout !== undefined ? { timeout } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
function createOpenAIResponsesClient(
|
||||
model: Model<Api>,
|
||||
context: Context,
|
||||
@@ -678,6 +704,7 @@ function createOpenAIResponsesClient(
|
||||
dangerouslyAllowBrowser: true,
|
||||
defaultHeaders: buildOpenAIClientHeaders(model, context, optionHeaders, turnHeaders),
|
||||
fetch: buildGuardedModelFetch(model),
|
||||
...buildOpenAISdkClientOptions(model),
|
||||
});
|
||||
}
|
||||
|
||||
@@ -731,7 +758,7 @@ export function createOpenAIResponsesTransportStreamFn(): StreamFn {
|
||||
params = mergeTransportMetadata(params, turnState?.metadata);
|
||||
const responseStream = (await client.responses.create(
|
||||
params as never,
|
||||
options?.signal ? { signal: options.signal } : undefined,
|
||||
buildOpenAISdkRequestOptions(model, options?.signal),
|
||||
)) as unknown as AsyncIterable<unknown>;
|
||||
stream.push({ type: "start", partial: output as never });
|
||||
await processResponsesStream(responseStream, output, stream, model, {
|
||||
@@ -975,7 +1002,7 @@ export function createAzureOpenAIResponsesTransportStreamFn(): StreamFn {
|
||||
params = mergeTransportMetadata(params, turnState?.metadata);
|
||||
const responseStream = (await client.responses.create(
|
||||
params as never,
|
||||
options?.signal ? { signal: options.signal } : undefined,
|
||||
buildOpenAISdkRequestOptions(model, options?.signal),
|
||||
)) as unknown as AsyncIterable<unknown>;
|
||||
stream.push({ type: "start", partial: output as never });
|
||||
await processResponsesStream(responseStream, output, stream, model);
|
||||
@@ -1029,6 +1056,7 @@ function createAzureOpenAIClient(
|
||||
defaultHeaders: buildOpenAIClientHeaders(model, context, optionHeaders, turnHeaders),
|
||||
baseURL: normalizeAzureBaseUrl(model.baseUrl),
|
||||
fetch: buildGuardedModelFetch(model),
|
||||
...buildOpenAISdkClientOptions(model),
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1067,6 +1095,7 @@ function createOpenAICompletionsClient(
|
||||
defaultHeaders: clientConfig.defaultHeaders,
|
||||
defaultQuery: clientConfig.defaultQuery,
|
||||
fetch: buildGuardedModelFetch(model),
|
||||
...buildOpenAISdkClientOptions(model),
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1160,9 +1189,10 @@ export function createOpenAICompletionsTransportStreamFn(): StreamFn {
|
||||
if (nextParams !== undefined) {
|
||||
params = nextParams as typeof params;
|
||||
}
|
||||
const responseStream = (await client.chat.completions.create(params as never, {
|
||||
signal: options?.signal,
|
||||
})) as unknown as AsyncIterable<ChatCompletionChunk>;
|
||||
const responseStream = (await client.chat.completions.create(
|
||||
params as never,
|
||||
buildOpenAISdkRequestOptions(model, options?.signal),
|
||||
)) as unknown as AsyncIterable<ChatCompletionChunk>;
|
||||
stream.push({ type: "start", partial: output as never });
|
||||
await processOpenAICompletionsStream(responseStream, output, model, stream);
|
||||
if (options?.signal?.aborted) {
|
||||
@@ -1849,6 +1879,10 @@ function mapStopReason(reason: string | null) {
|
||||
}
|
||||
|
||||
export const __testing = {
|
||||
buildOpenAISdkRequestOptions,
|
||||
createAzureOpenAIClient,
|
||||
createOpenAICompletionsClient,
|
||||
createOpenAIResponsesClient,
|
||||
buildOpenAICompletionsClientConfig,
|
||||
processOpenAICompletionsStream,
|
||||
};
|
||||
|
||||
@@ -154,7 +154,7 @@ function resolveModelRequestPolicy(model: Model<Api>) {
|
||||
});
|
||||
}
|
||||
|
||||
function resolveModelRequestTimeoutMs(
|
||||
export function resolveModelRequestTimeoutMs(
|
||||
model: Model<Api>,
|
||||
timeoutMs: number | undefined,
|
||||
): number | undefined {
|
||||
|
||||
Reference in New Issue
Block a user