mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 17:31:06 +00:00
fix(openai-completions): enable local streaming usage compat (#68711) (thanks @gaineyllc)
This commit is contained in:
@@ -8,6 +8,7 @@ Docs: https://docs.openclaw.ai
|
|||||||
|
|
||||||
- Providers/Amazon Bedrock Mantle: add Claude Opus 4.7 through Mantle's Anthropic Messages route with provider-owned bearer-auth streaming, so the model is actually callable without treating AWS bearer tokens like Anthropic API keys. Thanks @wirjo.
|
- Providers/Amazon Bedrock Mantle: add Claude Opus 4.7 through Mantle's Anthropic Messages route with provider-owned bearer-auth streaming, so the model is actually callable without treating AWS bearer tokens like Anthropic API keys. Thanks @wirjo.
|
||||||
- Providers/OpenAI Codex: remove the Codex CLI auth import path from onboarding and provider discovery so OpenClaw no longer copies `~/.codex` OAuth material into agent auth stores; use browser login or device pairing instead. (#70390) Thanks @pashpashpash.
|
- Providers/OpenAI Codex: remove the Codex CLI auth import path from onboarding and provider discovery so OpenClaw no longer copies `~/.codex` OAuth material into agent auth stores; use browser login or device pairing instead. (#70390) Thanks @pashpashpash.
|
||||||
|
- Providers/OpenAI-compatible: mark known local backends such as vLLM, SGLang, llama.cpp, LM Studio, LocalAI, Jan, TabbyAPI, and text-generation-webui as streaming-usage compatible, so their token accounting no longer degrades to unknown/stale totals. (#68711) Thanks @gaineyllc.
|
||||||
- OpenAI/Responses: use OpenAI's native `web_search` tool automatically for direct OpenAI Responses models when web search is enabled and no managed search provider is pinned; explicit providers such as Brave keep the managed `web_search` tool.
|
- OpenAI/Responses: use OpenAI's native `web_search` tool automatically for direct OpenAI Responses models when web search is enabled and no managed search provider is pinned; explicit providers such as Brave keep the managed `web_search` tool.
|
||||||
- ACPX: add an explicit `openClawToolsMcpBridge` option that injects a core OpenClaw MCP server for selected built-in tools, starting with `cron`.
|
- ACPX: add an explicit `openClawToolsMcpBridge` option that injects a core OpenClaw MCP server for selected built-in tools, starting with `cron`.
|
||||||
- Agents/sessions: add mailbox-style `sessions_list` filters for label, agent, and search plus visibility-scoped derived title and last-message previews. (#69839) Thanks @dangoZhang.
|
- Agents/sessions: add mailbox-style `sessions_list` filters for label, agent, and search plus visibility-scoped derived title and last-message previews. (#69839) Thanks @dangoZhang.
|
||||||
|
|||||||
@@ -15,6 +15,10 @@ OpenClaw can also **auto-discover** available models from SGLang when you opt
|
|||||||
in with `SGLANG_API_KEY` (any value works if your server does not enforce auth)
|
in with `SGLANG_API_KEY` (any value works if your server does not enforce auth)
|
||||||
and you do not define an explicit `models.providers.sglang` entry.
|
and you do not define an explicit `models.providers.sglang` entry.
|
||||||
|
|
||||||
|
OpenClaw treats `sglang` as a local OpenAI-compatible provider that supports
|
||||||
|
streamed usage accounting, so status/context token counts can update from
|
||||||
|
`stream_options.include_usage` responses.
|
||||||
|
|
||||||
## Getting started
|
## Getting started
|
||||||
|
|
||||||
<Steps>
|
<Steps>
|
||||||
|
|||||||
@@ -12,6 +12,10 @@ vLLM can serve open-source (and some custom) models via an **OpenAI-compatible**
|
|||||||
|
|
||||||
OpenClaw can also **auto-discover** available models from vLLM when you opt in with `VLLM_API_KEY` (any value works if your server does not enforce auth) and you do not define an explicit `models.providers.vllm` entry.
|
OpenClaw can also **auto-discover** available models from vLLM when you opt in with `VLLM_API_KEY` (any value works if your server does not enforce auth) and you do not define an explicit `models.providers.vllm` entry.
|
||||||
|
|
||||||
|
OpenClaw treats `vllm` as a local OpenAI-compatible provider that supports
|
||||||
|
streamed usage accounting, so status/context token counts can update from
|
||||||
|
`stream_options.include_usage` responses.
|
||||||
|
|
||||||
| Property | Value |
|
| Property | Value |
|
||||||
| ---------------- | ---------------------------------------- |
|
| ---------------- | ---------------------------------------- |
|
||||||
| Provider ID | `vllm` |
|
| Provider ID | `vllm` |
|
||||||
|
|||||||
@@ -1,5 +1,8 @@
|
|||||||
import { describe, expect, it } from "vitest";
|
import { describe, expect, it } from "vitest";
|
||||||
import { resolveOpenAICompletionsCompatDefaults } from "./openai-completions-compat.js";
|
import {
|
||||||
|
detectOpenAICompletionsCompat,
|
||||||
|
resolveOpenAICompletionsCompatDefaults,
|
||||||
|
} from "./openai-completions-compat.js";
|
||||||
|
|
||||||
describe("resolveOpenAICompletionsCompatDefaults", () => {
|
describe("resolveOpenAICompletionsCompatDefaults", () => {
|
||||||
it("keeps streaming usage enabled for provider-declared compatible endpoints", () => {
|
it("keeps streaming usage enabled for provider-declared compatible endpoints", () => {
|
||||||
@@ -33,4 +36,49 @@ describe("resolveOpenAICompletionsCompatDefaults", () => {
|
|||||||
}).supportsUsageInStreaming,
|
}).supportsUsageInStreaming,
|
||||||
).toBe(false);
|
).toBe(false);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it.each([
|
||||||
|
"vllm",
|
||||||
|
"localai",
|
||||||
|
"sglang",
|
||||||
|
"llama-cpp",
|
||||||
|
"llama.cpp",
|
||||||
|
"llamacpp",
|
||||||
|
"jan",
|
||||||
|
"lmstudio",
|
||||||
|
"lm-studio",
|
||||||
|
"text-generation-webui",
|
||||||
|
"tabby",
|
||||||
|
"tabbyapi",
|
||||||
|
])("enables streaming usage compat for known local provider %s", (provider) => {
|
||||||
|
expect(
|
||||||
|
resolveOpenAICompletionsCompatDefaults({
|
||||||
|
provider,
|
||||||
|
endpointClass: "custom",
|
||||||
|
knownProviderFamily: provider,
|
||||||
|
}).supportsUsageInStreaming,
|
||||||
|
).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("matches known local providers case-insensitively", () => {
|
||||||
|
expect(
|
||||||
|
resolveOpenAICompletionsCompatDefaults({
|
||||||
|
provider: "vLLM",
|
||||||
|
endpointClass: "local",
|
||||||
|
knownProviderFamily: "vllm",
|
||||||
|
}).supportsUsageInStreaming,
|
||||||
|
).toBe(true);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("detectOpenAICompletionsCompat", () => {
|
||||||
|
it("enables streaming usage compat for vLLM on a local OpenAI-compatible endpoint", () => {
|
||||||
|
const detected = detectOpenAICompletionsCompat({
|
||||||
|
provider: "vllm",
|
||||||
|
baseUrl: "http://127.0.0.1:8000/v1",
|
||||||
|
id: "Qwen/Qwen3-Coder-Next-FP8",
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(detected.defaults.supportsUsageInStreaming).toBe(true);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -30,6 +30,27 @@ function isDefaultRouteProvider(provider: string | undefined, ...ids: string[])
|
|||||||
return provider !== undefined && ids.includes(provider);
|
return provider !== undefined && ids.includes(provider);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const KNOWN_LOCAL_STREAMING_USAGE_PROVIDERS = new Set([
|
||||||
|
"jan",
|
||||||
|
"llama-cpp",
|
||||||
|
"llama.cpp",
|
||||||
|
"llamacpp",
|
||||||
|
"lm-studio",
|
||||||
|
"lmstudio",
|
||||||
|
"localai",
|
||||||
|
"sglang",
|
||||||
|
"tabby",
|
||||||
|
"tabbyapi",
|
||||||
|
"text-generation-webui",
|
||||||
|
"vllm",
|
||||||
|
]);
|
||||||
|
|
||||||
|
function isKnownLocalStreamingUsageProvider(...ids: Array<string | undefined>): boolean {
|
||||||
|
return ids.some(
|
||||||
|
(id) => id !== undefined && KNOWN_LOCAL_STREAMING_USAGE_PROVIDERS.has(id.toLowerCase()),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
export function resolveOpenAICompletionsCompatDefaults(
|
export function resolveOpenAICompletionsCompatDefaults(
|
||||||
input: OpenAICompletionsCompatDefaultsInput,
|
input: OpenAICompletionsCompatDefaultsInput,
|
||||||
): OpenAICompletionsCompatDefaults {
|
): OpenAICompletionsCompatDefaults {
|
||||||
@@ -67,6 +88,10 @@ export function resolveOpenAICompletionsCompatDefaults(
|
|||||||
endpointClass === "mistral-public" ||
|
endpointClass === "mistral-public" ||
|
||||||
knownProviderFamily === "mistral" ||
|
knownProviderFamily === "mistral" ||
|
||||||
(isDefaultRoute && isDefaultRouteProvider(provider, "chutes"));
|
(isDefaultRoute && isDefaultRouteProvider(provider, "chutes"));
|
||||||
|
const supportsKnownLocalStreamingUsage = isKnownLocalStreamingUsageProvider(
|
||||||
|
provider,
|
||||||
|
knownProviderFamily,
|
||||||
|
);
|
||||||
return {
|
return {
|
||||||
supportsStore:
|
supportsStore:
|
||||||
!isNonStandard && knownProviderFamily !== "mistral" && !usesExplicitProxyLikeEndpoint,
|
!isNonStandard && knownProviderFamily !== "mistral" && !usesExplicitProxyLikeEndpoint,
|
||||||
@@ -77,7 +102,8 @@ export function resolveOpenAICompletionsCompatDefaults(
|
|||||||
endpointClass !== "xai-native" &&
|
endpointClass !== "xai-native" &&
|
||||||
!usesExplicitProxyLikeEndpoint,
|
!usesExplicitProxyLikeEndpoint,
|
||||||
supportsUsageInStreaming:
|
supportsUsageInStreaming:
|
||||||
!isNonStandard && (!usesConfiguredNonOpenAIEndpoint || supportsNativeStreamingUsageCompat),
|
supportsKnownLocalStreamingUsage ||
|
||||||
|
(!isNonStandard && (!usesConfiguredNonOpenAIEndpoint || supportsNativeStreamingUsageCompat)),
|
||||||
maxTokensField: usesMaxTokens ? "max_tokens" : "max_completion_tokens",
|
maxTokensField: usesMaxTokens ? "max_tokens" : "max_completion_tokens",
|
||||||
thinkingFormat: isZai ? "zai" : isOpenRouterLike ? "openrouter" : "openai",
|
thinkingFormat: isZai ? "zai" : isOpenRouterLike ? "openrouter" : "openai",
|
||||||
visibleReasoningDetailTypes: isOpenRouterLike ? ["response.output_text", "response.text"] : [],
|
visibleReasoningDetailTypes: isOpenRouterLike ? ["response.output_text", "response.text"] : [],
|
||||||
|
|||||||
@@ -1353,13 +1353,13 @@ describe("openai transport stream", () => {
|
|||||||
expect(params.stream_options).toMatchObject({ include_usage: true });
|
expect(params.stream_options).toMatchObject({ include_usage: true });
|
||||||
});
|
});
|
||||||
|
|
||||||
it("always includes stream_options.include_usage for non-standard backends like llama-cpp", () => {
|
it("always includes stream_options.include_usage for known local backends like llama-cpp", () => {
|
||||||
const params = buildOpenAICompletionsParams(
|
const params = buildOpenAICompletionsParams(
|
||||||
{
|
{
|
||||||
id: "llama-3",
|
id: "llama-3",
|
||||||
name: "Llama 3",
|
name: "Llama 3",
|
||||||
api: "openai-completions",
|
api: "openai-completions",
|
||||||
provider: "custom-cpa",
|
provider: "llama-cpp",
|
||||||
baseUrl: "http://localhost:8080/v1",
|
baseUrl: "http://localhost:8080/v1",
|
||||||
reasoning: false,
|
reasoning: false,
|
||||||
input: ["text"],
|
input: ["text"],
|
||||||
|
|||||||
Reference in New Issue
Block a user