diff --git a/CHANGELOG.md b/CHANGELOG.md index 88617bc80ad..df5a997b8a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -72,6 +72,7 @@ Docs: https://docs.openclaw.ai - Dashboard/Windows: open Control UI and OAuth URLs through the system URL handler without `cmd.exe` parsing or PATH-based `rundll32` lookup, and reject non-HTTP browser-open inputs. Fixes #71098. Thanks @Sanjays2402. - Config/doctor: reject legacy `secretref-env:` marker strings on SecretRef credential paths and migrate valid markers to structured env SecretRefs with `openclaw doctor --fix`. Fixes #51794. Thanks @halointellicore. - Providers/OpenAI: separate API-key and Codex sign-in onboarding groups, and avoid replaying stale OpenAI Responses reasoning blocks after a model route switch. +- Providers/OpenAI-compatible: forward `prompt_cache_key` on Completions requests only for providers that opt in with `compat.supportsPromptCacheKey`, keeping default proxy payloads unchanged. Fixes #69272. - Providers/ElevenLabs: omit the MP3-only `Accept` header for PCM telephony synthesis, so Voice Call requests for `pcm_22050` no longer receive MP3 audio. Fixes #67340. Thanks @marcchabot. - Plugins/Voice Call: reap stale pre-answer calls by default, honor configured TTS timeouts for Twilio media-stream playback, and fail empty telephony audio instead of completing as silence. Fixes #42071; supersedes #60957. Thanks @Ryce and @sliekens. - Skills: honor legacy `metadata.clawdbot` requirements and installer hints when `metadata.openclaw` is absent, so older skills no longer appear ready when required binaries are missing. Fixes #71323. Thanks @chen-zhang-cs-code. diff --git a/docs/reference/prompt-caching.md b/docs/reference/prompt-caching.md index 819d3352aab..7b4e5ab2c33 100644 --- a/docs/reference/prompt-caching.md +++ b/docs/reference/prompt-caching.md @@ -105,6 +105,7 @@ Per-agent heartbeat is supported at `agents.list[].heartbeat`. - Prompt caching is automatic on supported recent models. OpenClaw does not need to inject block-level cache markers. - OpenClaw uses `prompt_cache_key` to keep cache routing stable across turns and uses `prompt_cache_retention: "24h"` only when `cacheRetention: "long"` is selected on direct OpenAI hosts. +- OpenAI-compatible Completions providers receive `prompt_cache_key` only when their model config explicitly sets `compat.supportsPromptCacheKey: true`; `cacheRetention: "none"` still suppresses it. - OpenAI responses expose cached prompt tokens via `usage.prompt_tokens_details.cached_tokens` (or `input_tokens_details.cached_tokens` on Responses API events). OpenClaw maps that to `cacheRead`. - OpenAI does not expose a separate cache-write token counter, so `cacheWrite` stays `0` on OpenAI paths even when the provider is warming a cache. - OpenAI returns useful tracing and rate-limit headers such as `x-request-id`, `openai-processing-ms`, and `x-ratelimit-*`, but cache-hit accounting should come from the usage payload, not from headers. diff --git a/src/agents/openai-transport-stream.test.ts b/src/agents/openai-transport-stream.test.ts index 6614b447bcc..a60d47bc1d0 100644 --- a/src/agents/openai-transport-stream.test.ts +++ b/src/agents/openai-transport-stream.test.ts @@ -37,7 +37,7 @@ describe("openai transport stream", () => { cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, contextWindow: 128000, maxTokens: 4096, - } satisfies Model<"openai-completions">, + } as unknown as Model<"openai-completions">, { systemPrompt: "", messages: [] } as never, ); @@ -1374,7 +1374,7 @@ describe("openai transport stream", () => { cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, contextWindow: 200000, maxTokens: 8192, - } satisfies Model<"openai-completions">, + } as unknown as Model<"openai-completions">, { systemPrompt: "system", messages: [], @@ -1560,7 +1560,7 @@ describe("openai transport stream", () => { cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, contextWindow: 32768, maxTokens: 8192, - } satisfies Model<"openai-completions">, + } as unknown as Model<"openai-completions">, { systemPrompt: "system", messages: [], @@ -1601,6 +1601,67 @@ describe("openai transport stream", () => { expect(params.stream_options).toEqual({ include_usage: true }); }); + it("forwards prompt_cache_key for opted-in OpenAI-compatible completions providers", () => { + const params = buildOpenAICompletionsParams( + { + id: "custom-model", + name: "Custom Model", + api: "openai-completions", + provider: "custom-cpa", + baseUrl: "https://proxy.example.com/v1", + compat: { supportsPromptCacheKey: true }, + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 32768, + maxTokens: 8192, + } as unknown as Model<"openai-completions">, + { + systemPrompt: "system", + messages: [], + tools: [], + } as never, + { sessionId: "session-123" }, + ) as { prompt_cache_key?: string }; + + expect(params.prompt_cache_key).toBe("session-123"); + }); + + it("omits prompt_cache_key for completions when caching is disabled or not opted in", () => { + const baseModel = { + id: "custom-model", + name: "Custom Model", + api: "openai-completions", + provider: "custom-cpa", + baseUrl: "https://proxy.example.com/v1", + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 32768, + maxTokens: 8192, + } satisfies Model<"openai-completions">; + const context = { + systemPrompt: "system", + messages: [], + tools: [], + } as never; + + const disabled = buildOpenAICompletionsParams( + { + ...baseModel, + compat: { supportsPromptCacheKey: true }, + } as unknown as Model<"openai-completions">, + context, + { sessionId: "session-123", cacheRetention: "none" }, + ) as { prompt_cache_key?: string }; + const notOptedIn = buildOpenAICompletionsParams(baseModel, context, { + sessionId: "session-123", + }) as { prompt_cache_key?: string }; + + expect(disabled.prompt_cache_key).toBeUndefined(); + expect(notOptedIn.prompt_cache_key).toBeUndefined(); + }); + it("disables developer-role-only compat defaults for configured custom proxy completions providers", () => { const params = buildOpenAICompletionsParams( { diff --git a/src/agents/openai-transport-stream.ts b/src/agents/openai-transport-stream.ts index d249ca71216..44daf7a9c40 100644 --- a/src/agents/openai-transport-stream.ts +++ b/src/agents/openai-transport-stream.ts @@ -1522,6 +1522,7 @@ function getCompat(model: OpenAIModeModel): { openRouterRouting: Record; vercelGatewayRouting: Record; supportsStrictMode: boolean; + supportsPromptCacheKey: boolean; requiresStringContent: boolean; visibleReasoningDetailTypes: string[]; } { @@ -1550,6 +1551,7 @@ function getCompat(model: OpenAIModeModel): { (compat.vercelGatewayRouting as Record | undefined) ?? detected.vercelGatewayRouting, supportsStrictMode: compat.supportsStrictMode ?? detected.supportsStrictMode, + supportsPromptCacheKey: compat.supportsPromptCacheKey === true, requiresStringContent: compat.requiresStringContent ?? false, visibleReasoningDetailTypes: compat.visibleReasoningDetailTypes ?? detected.visibleReasoningDetailTypes, @@ -1716,6 +1718,7 @@ export function buildOpenAICompletionsParams( : context; const messages = convertMessages(model as never, completionsContext, compat as never); injectToolCallThoughtSignatures(messages as unknown[], context, model); + const cacheRetention = resolveCacheRetention(options?.cacheRetention); const params: Record = { model: model.id, messages: compat.requiresStringContent @@ -1727,6 +1730,9 @@ export function buildOpenAICompletionsParams( if (compat.supportsStore) { params.store = false; } + if (compat.supportsPromptCacheKey && cacheRetention !== "none" && options?.sessionId) { + params.prompt_cache_key = options.sessionId; + } if (options?.maxTokens) { if (compat.maxTokensField === "max_tokens") { params.max_tokens = options.maxTokens;