From da8621df0d2e31369eb82f54079b6f3cd845a1f9 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 23 Apr 2026 03:32:29 +0100 Subject: [PATCH] fix(openai-completions): enable local streaming usage compat (#68711) (thanks @gaineyllc) --- CHANGELOG.md | 1 + docs/providers/sglang.md | 4 ++ docs/providers/vllm.md | 4 ++ src/agents/openai-completions-compat.test.ts | 50 +++++++++++++++++++- src/agents/openai-completions-compat.ts | 28 ++++++++++- src/agents/openai-transport-stream.test.ts | 4 +- 6 files changed, 87 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d2551238c2..29898c57b24 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ Docs: https://docs.openclaw.ai - Providers/Amazon Bedrock Mantle: add Claude Opus 4.7 through Mantle's Anthropic Messages route with provider-owned bearer-auth streaming, so the model is actually callable without treating AWS bearer tokens like Anthropic API keys. Thanks @wirjo. - Providers/OpenAI Codex: remove the Codex CLI auth import path from onboarding and provider discovery so OpenClaw no longer copies `~/.codex` OAuth material into agent auth stores; use browser login or device pairing instead. (#70390) Thanks @pashpashpash. +- Providers/OpenAI-compatible: mark known local backends such as vLLM, SGLang, llama.cpp, LM Studio, LocalAI, Jan, TabbyAPI, and text-generation-webui as streaming-usage compatible, so their token accounting no longer degrades to unknown/stale totals. (#68711) Thanks @gaineyllc. - OpenAI/Responses: use OpenAI's native `web_search` tool automatically for direct OpenAI Responses models when web search is enabled and no managed search provider is pinned; explicit providers such as Brave keep the managed `web_search` tool. - ACPX: add an explicit `openClawToolsMcpBridge` option that injects a core OpenClaw MCP server for selected built-in tools, starting with `cron`. - Agents/sessions: add mailbox-style `sessions_list` filters for label, agent, and search plus visibility-scoped derived title and last-message previews. (#69839) Thanks @dangoZhang. diff --git a/docs/providers/sglang.md b/docs/providers/sglang.md index 25c88ee74d0..6aa57d8c93b 100644 --- a/docs/providers/sglang.md +++ b/docs/providers/sglang.md @@ -15,6 +15,10 @@ OpenClaw can also **auto-discover** available models from SGLang when you opt in with `SGLANG_API_KEY` (any value works if your server does not enforce auth) and you do not define an explicit `models.providers.sglang` entry. +OpenClaw treats `sglang` as a local OpenAI-compatible provider that supports +streamed usage accounting, so status/context token counts can update from +`stream_options.include_usage` responses. + ## Getting started diff --git a/docs/providers/vllm.md b/docs/providers/vllm.md index d31b53f8c28..3c2358ca8c5 100644 --- a/docs/providers/vllm.md +++ b/docs/providers/vllm.md @@ -12,6 +12,10 @@ vLLM can serve open-source (and some custom) models via an **OpenAI-compatible** OpenClaw can also **auto-discover** available models from vLLM when you opt in with `VLLM_API_KEY` (any value works if your server does not enforce auth) and you do not define an explicit `models.providers.vllm` entry. +OpenClaw treats `vllm` as a local OpenAI-compatible provider that supports +streamed usage accounting, so status/context token counts can update from +`stream_options.include_usage` responses. + | Property | Value | | ---------------- | ---------------------------------------- | | Provider ID | `vllm` | diff --git a/src/agents/openai-completions-compat.test.ts b/src/agents/openai-completions-compat.test.ts index 009b9f9ab59..3d343c1fade 100644 --- a/src/agents/openai-completions-compat.test.ts +++ b/src/agents/openai-completions-compat.test.ts @@ -1,5 +1,8 @@ import { describe, expect, it } from "vitest"; -import { resolveOpenAICompletionsCompatDefaults } from "./openai-completions-compat.js"; +import { + detectOpenAICompletionsCompat, + resolveOpenAICompletionsCompatDefaults, +} from "./openai-completions-compat.js"; describe("resolveOpenAICompletionsCompatDefaults", () => { it("keeps streaming usage enabled for provider-declared compatible endpoints", () => { @@ -33,4 +36,49 @@ describe("resolveOpenAICompletionsCompatDefaults", () => { }).supportsUsageInStreaming, ).toBe(false); }); + + it.each([ + "vllm", + "localai", + "sglang", + "llama-cpp", + "llama.cpp", + "llamacpp", + "jan", + "lmstudio", + "lm-studio", + "text-generation-webui", + "tabby", + "tabbyapi", + ])("enables streaming usage compat for known local provider %s", (provider) => { + expect( + resolveOpenAICompletionsCompatDefaults({ + provider, + endpointClass: "custom", + knownProviderFamily: provider, + }).supportsUsageInStreaming, + ).toBe(true); + }); + + it("matches known local providers case-insensitively", () => { + expect( + resolveOpenAICompletionsCompatDefaults({ + provider: "vLLM", + endpointClass: "local", + knownProviderFamily: "vllm", + }).supportsUsageInStreaming, + ).toBe(true); + }); +}); + +describe("detectOpenAICompletionsCompat", () => { + it("enables streaming usage compat for vLLM on a local OpenAI-compatible endpoint", () => { + const detected = detectOpenAICompletionsCompat({ + provider: "vllm", + baseUrl: "http://127.0.0.1:8000/v1", + id: "Qwen/Qwen3-Coder-Next-FP8", + }); + + expect(detected.defaults.supportsUsageInStreaming).toBe(true); + }); }); diff --git a/src/agents/openai-completions-compat.ts b/src/agents/openai-completions-compat.ts index ebc9825d27e..af1826c18c9 100644 --- a/src/agents/openai-completions-compat.ts +++ b/src/agents/openai-completions-compat.ts @@ -30,6 +30,27 @@ function isDefaultRouteProvider(provider: string | undefined, ...ids: string[]) return provider !== undefined && ids.includes(provider); } +const KNOWN_LOCAL_STREAMING_USAGE_PROVIDERS = new Set([ + "jan", + "llama-cpp", + "llama.cpp", + "llamacpp", + "lm-studio", + "lmstudio", + "localai", + "sglang", + "tabby", + "tabbyapi", + "text-generation-webui", + "vllm", +]); + +function isKnownLocalStreamingUsageProvider(...ids: Array): boolean { + return ids.some( + (id) => id !== undefined && KNOWN_LOCAL_STREAMING_USAGE_PROVIDERS.has(id.toLowerCase()), + ); +} + export function resolveOpenAICompletionsCompatDefaults( input: OpenAICompletionsCompatDefaultsInput, ): OpenAICompletionsCompatDefaults { @@ -67,6 +88,10 @@ export function resolveOpenAICompletionsCompatDefaults( endpointClass === "mistral-public" || knownProviderFamily === "mistral" || (isDefaultRoute && isDefaultRouteProvider(provider, "chutes")); + const supportsKnownLocalStreamingUsage = isKnownLocalStreamingUsageProvider( + provider, + knownProviderFamily, + ); return { supportsStore: !isNonStandard && knownProviderFamily !== "mistral" && !usesExplicitProxyLikeEndpoint, @@ -77,7 +102,8 @@ export function resolveOpenAICompletionsCompatDefaults( endpointClass !== "xai-native" && !usesExplicitProxyLikeEndpoint, supportsUsageInStreaming: - !isNonStandard && (!usesConfiguredNonOpenAIEndpoint || supportsNativeStreamingUsageCompat), + supportsKnownLocalStreamingUsage || + (!isNonStandard && (!usesConfiguredNonOpenAIEndpoint || supportsNativeStreamingUsageCompat)), maxTokensField: usesMaxTokens ? "max_tokens" : "max_completion_tokens", thinkingFormat: isZai ? "zai" : isOpenRouterLike ? "openrouter" : "openai", visibleReasoningDetailTypes: isOpenRouterLike ? ["response.output_text", "response.text"] : [], diff --git a/src/agents/openai-transport-stream.test.ts b/src/agents/openai-transport-stream.test.ts index 4971112bc55..1a21e776954 100644 --- a/src/agents/openai-transport-stream.test.ts +++ b/src/agents/openai-transport-stream.test.ts @@ -1353,13 +1353,13 @@ describe("openai transport stream", () => { expect(params.stream_options).toMatchObject({ include_usage: true }); }); - it("always includes stream_options.include_usage for non-standard backends like llama-cpp", () => { + it("always includes stream_options.include_usage for known local backends like llama-cpp", () => { const params = buildOpenAICompletionsParams( { id: "llama-3", name: "Llama 3", api: "openai-completions", - provider: "custom-cpa", + provider: "llama-cpp", baseUrl: "http://localhost:8080/v1", reasoning: false, input: ["text"],