fix: forward completions prompt cache keys

This commit is contained in:
Peter Steinberger
2026-04-25 03:51:35 +01:00
parent 24fdfdba6e
commit 26f06afb90
4 changed files with 72 additions and 3 deletions

View File

@@ -72,6 +72,7 @@ Docs: https://docs.openclaw.ai
- Dashboard/Windows: open Control UI and OAuth URLs through the system URL handler without `cmd.exe` parsing or PATH-based `rundll32` lookup, and reject non-HTTP browser-open inputs. Fixes #71098. Thanks @Sanjays2402.
- Config/doctor: reject legacy `secretref-env:<ENV_VAR>` marker strings on SecretRef credential paths and migrate valid markers to structured env SecretRefs with `openclaw doctor --fix`. Fixes #51794. Thanks @halointellicore.
- Providers/OpenAI: separate API-key and Codex sign-in onboarding groups, and avoid replaying stale OpenAI Responses reasoning blocks after a model route switch.
- Providers/OpenAI-compatible: forward `prompt_cache_key` on Completions requests only for providers that opt in with `compat.supportsPromptCacheKey`, keeping default proxy payloads unchanged. Fixes #69272.
- Providers/ElevenLabs: omit the MP3-only `Accept` header for PCM telephony synthesis, so Voice Call requests for `pcm_22050` no longer receive MP3 audio. Fixes #67340. Thanks @marcchabot.
- Plugins/Voice Call: reap stale pre-answer calls by default, honor configured TTS timeouts for Twilio media-stream playback, and fail empty telephony audio instead of completing as silence. Fixes #42071; supersedes #60957. Thanks @Ryce and @sliekens.
- Skills: honor legacy `metadata.clawdbot` requirements and installer hints when `metadata.openclaw` is absent, so older skills no longer appear ready when required binaries are missing. Fixes #71323. Thanks @chen-zhang-cs-code.

View File

@@ -105,6 +105,7 @@ Per-agent heartbeat is supported at `agents.list[].heartbeat`.
- Prompt caching is automatic on supported recent models. OpenClaw does not need to inject block-level cache markers.
- OpenClaw uses `prompt_cache_key` to keep cache routing stable across turns and uses `prompt_cache_retention: "24h"` only when `cacheRetention: "long"` is selected on direct OpenAI hosts.
- OpenAI-compatible Completions providers receive `prompt_cache_key` only when their model config explicitly sets `compat.supportsPromptCacheKey: true`; `cacheRetention: "none"` still suppresses it.
- OpenAI responses expose cached prompt tokens via `usage.prompt_tokens_details.cached_tokens` (or `input_tokens_details.cached_tokens` on Responses API events). OpenClaw maps that to `cacheRead`.
- OpenAI does not expose a separate cache-write token counter, so `cacheWrite` stays `0` on OpenAI paths even when the provider is warming a cache.
- OpenAI returns useful tracing and rate-limit headers such as `x-request-id`, `openai-processing-ms`, and `x-ratelimit-*`, but cache-hit accounting should come from the usage payload, not from headers.

View File

@@ -37,7 +37,7 @@ describe("openai transport stream", () => {
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
} as unknown as Model<"openai-completions">,
{ systemPrompt: "", messages: [] } as never,
);
@@ -1374,7 +1374,7 @@ describe("openai transport stream", () => {
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 200000,
maxTokens: 8192,
} satisfies Model<"openai-completions">,
} as unknown as Model<"openai-completions">,
{
systemPrompt: "system",
messages: [],
@@ -1560,7 +1560,7 @@ describe("openai transport stream", () => {
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 32768,
maxTokens: 8192,
} satisfies Model<"openai-completions">,
} as unknown as Model<"openai-completions">,
{
systemPrompt: "system",
messages: [],
@@ -1601,6 +1601,67 @@ describe("openai transport stream", () => {
expect(params.stream_options).toEqual({ include_usage: true });
});
it("forwards prompt_cache_key for opted-in OpenAI-compatible completions providers", () => {
const params = buildOpenAICompletionsParams(
{
id: "custom-model",
name: "Custom Model",
api: "openai-completions",
provider: "custom-cpa",
baseUrl: "https://proxy.example.com/v1",
compat: { supportsPromptCacheKey: true },
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 32768,
maxTokens: 8192,
} as unknown as Model<"openai-completions">,
{
systemPrompt: "system",
messages: [],
tools: [],
} as never,
{ sessionId: "session-123" },
) as { prompt_cache_key?: string };
expect(params.prompt_cache_key).toBe("session-123");
});
it("omits prompt_cache_key for completions when caching is disabled or not opted in", () => {
const baseModel = {
id: "custom-model",
name: "Custom Model",
api: "openai-completions",
provider: "custom-cpa",
baseUrl: "https://proxy.example.com/v1",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 32768,
maxTokens: 8192,
} satisfies Model<"openai-completions">;
const context = {
systemPrompt: "system",
messages: [],
tools: [],
} as never;
const disabled = buildOpenAICompletionsParams(
{
...baseModel,
compat: { supportsPromptCacheKey: true },
} as unknown as Model<"openai-completions">,
context,
{ sessionId: "session-123", cacheRetention: "none" },
) as { prompt_cache_key?: string };
const notOptedIn = buildOpenAICompletionsParams(baseModel, context, {
sessionId: "session-123",
}) as { prompt_cache_key?: string };
expect(disabled.prompt_cache_key).toBeUndefined();
expect(notOptedIn.prompt_cache_key).toBeUndefined();
});
it("disables developer-role-only compat defaults for configured custom proxy completions providers", () => {
const params = buildOpenAICompletionsParams(
{

View File

@@ -1522,6 +1522,7 @@ function getCompat(model: OpenAIModeModel): {
openRouterRouting: Record<string, unknown>;
vercelGatewayRouting: Record<string, unknown>;
supportsStrictMode: boolean;
supportsPromptCacheKey: boolean;
requiresStringContent: boolean;
visibleReasoningDetailTypes: string[];
} {
@@ -1550,6 +1551,7 @@ function getCompat(model: OpenAIModeModel): {
(compat.vercelGatewayRouting as Record<string, unknown> | undefined) ??
detected.vercelGatewayRouting,
supportsStrictMode: compat.supportsStrictMode ?? detected.supportsStrictMode,
supportsPromptCacheKey: compat.supportsPromptCacheKey === true,
requiresStringContent: compat.requiresStringContent ?? false,
visibleReasoningDetailTypes:
compat.visibleReasoningDetailTypes ?? detected.visibleReasoningDetailTypes,
@@ -1716,6 +1718,7 @@ export function buildOpenAICompletionsParams(
: context;
const messages = convertMessages(model as never, completionsContext, compat as never);
injectToolCallThoughtSignatures(messages as unknown[], context, model);
const cacheRetention = resolveCacheRetention(options?.cacheRetention);
const params: Record<string, unknown> = {
model: model.id,
messages: compat.requiresStringContent
@@ -1727,6 +1730,9 @@ export function buildOpenAICompletionsParams(
if (compat.supportsStore) {
params.store = false;
}
if (compat.supportsPromptCacheKey && cacheRetention !== "none" && options?.sessionId) {
params.prompt_cache_key = options.sessionId;
}
if (options?.maxTokens) {
if (compat.maxTokensField === "max_tokens") {
params.max_tokens = options.maxTokens;