From ee2eaddeb3d8cbe1fceadff2ff0c0894f7a090e5 Mon Sep 17 00:00:00 2001 From: Sid Date: Sat, 28 Feb 2026 05:51:58 +0800 Subject: [PATCH] fix(onboard): increase verification timeout and reduce max_tokens for custom provider probes (#27380) * fix(onboard): increase verification timeout and reduce max_tokens for custom provider probes The onboard wizard sends a chat-completion request to verify custom providers. With max_tokens: 1024 and a 10 s timeout, large local models (e.g. Qwen3.5-27B on llama.cpp) routinely time out because the server needs to load the model and generate up to 1024 tokens before responding. Changes: - Raise VERIFY_TIMEOUT_MS from 10 s to 30 s - Lower max_tokens from 1024 to 1 (verification only needs a single token to confirm the API is reachable and the model ID is valid) - Add explicit stream: false to both OpenAI and Anthropic probes Closes #27346 Made-with: Cursor * Changelog: note custom-provider onboarding verification fix --------- Co-authored-by: Philipp Spiess --- CHANGELOG.md | 1 + src/commands/onboard-custom.test.ts | 6 +++--- src/commands/onboard-custom.ts | 8 +++++--- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e216e5ad76..5866beb699c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ Docs: https://docs.openclaw.ai - Gateway/WS: close repeated post-handshake `unauthorized role:*` request floods per connection and sample duplicate rejection logs, preventing a single misbehaving client from degrading gateway responsiveness. (#20168) Thanks @acy103, @vibecodooor, and @vincentkoc. - Gateway/Auth: improve device-auth v2 migration diagnostics so operators get clearer guidance when legacy clients connect. (#28305) Thanks @vincentkoc. - CLI/Install: add an npm-link fallback to fix CLI startup `Permission denied` failures (`exit 127`) on affected installs. (#17151) Thanks @sskyu and @vincentkoc. +- Onboarding/Custom providers: improve verification reliability for slower local endpoints (for example Ollama) during setup. (#27380) Thanks @Sid-Qin. - Agents/Ollama: demote empty-discovery logging from `warn` to `debug` to reduce noisy warnings in normal edge-case discovery flows. (#26379) Thanks @byungsker. - Install/npm: fix npm global install deprecation warnings. (#28318) Thanks @vincentkoc. - Slack/Native commands: register Slack native status as `/agentstatus` (Slack-reserved `/status`) so manifest slash command registration stays valid while text `/status` still works. Landed from contributor PR #29032 by @maloqab. Thanks @maloqab. diff --git a/src/commands/onboard-custom.test.ts b/src/commands/onboard-custom.test.ts index 55be1b89dc3..abdb99bd09d 100644 --- a/src/commands/onboard-custom.test.ts +++ b/src/commands/onboard-custom.test.ts @@ -128,7 +128,7 @@ describe("promptCustomApiConfig", () => { const firstCall = fetchMock.mock.calls[0]?.[1] as { body?: string } | undefined; expect(firstCall?.body).toBeDefined(); - expect(JSON.parse(firstCall?.body ?? "{}")).toMatchObject({ max_tokens: 1024 }); + expect(JSON.parse(firstCall?.body ?? "{}")).toMatchObject({ max_tokens: 1 }); }); it("uses expanded max_tokens for anthropic verification probes", async () => { @@ -143,7 +143,7 @@ describe("promptCustomApiConfig", () => { expect(fetchMock).toHaveBeenCalledTimes(2); const secondCall = fetchMock.mock.calls[1]?.[1] as { body?: string } | undefined; expect(secondCall?.body).toBeDefined(); - expect(JSON.parse(secondCall?.body ?? "{}")).toMatchObject({ max_tokens: 1024 }); + expect(JSON.parse(secondCall?.body ?? "{}")).toMatchObject({ max_tokens: 1 }); }); it("re-prompts base url when unknown detection fails", async () => { @@ -220,7 +220,7 @@ describe("promptCustomApiConfig", () => { const promise = runPromptCustomApi(prompter); - await vi.advanceTimersByTimeAsync(10000); + await vi.advanceTimersByTimeAsync(30_000); await promise; expect(prompter.text).toHaveBeenCalledTimes(6); diff --git a/src/commands/onboard-custom.ts b/src/commands/onboard-custom.ts index 11b7fcc75da..c8bae0c8cca 100644 --- a/src/commands/onboard-custom.ts +++ b/src/commands/onboard-custom.ts @@ -18,7 +18,7 @@ import type { SecretInputMode } from "./onboard-types.js"; const DEFAULT_OLLAMA_BASE_URL = "http://127.0.0.1:11434/v1"; const DEFAULT_CONTEXT_WINDOW = 4096; const DEFAULT_MAX_TOKENS = 4096; -const VERIFY_TIMEOUT_MS = 10000; +const VERIFY_TIMEOUT_MS = 30_000; /** * Detects if a URL is from Azure AI Foundry or Azure OpenAI. @@ -317,7 +317,8 @@ async function requestOpenAiVerification(params: { body: { model: params.modelId, messages: [{ role: "user", content: "Hi" }], - max_tokens: 1024, + max_tokens: 1, + stream: false, }, }); } @@ -343,8 +344,9 @@ async function requestAnthropicVerification(params: { headers: buildAnthropicHeaders(params.apiKey), body: { model: params.modelId, - max_tokens: 1024, + max_tokens: 1, messages: [{ role: "user", content: "Hi" }], + stream: false, }, }); }