diff --git a/docs/reference/prompt-caching.md b/docs/reference/prompt-caching.md index 794a3b8e9c7..eb15d11bcb4 100644 --- a/docs/reference/prompt-caching.md +++ b/docs/reference/prompt-caching.md @@ -136,6 +136,11 @@ model refs, `contextPruning.mode: "cache-ttl"` is allowed because OpenRouter handles provider-side prompt caching automatically. OpenClaw does not inject Anthropic `cache_control` markers into those requests. +DeepSeek cache construction is best-effort and can take a few seconds. An +immediate follow-up may still show `cached_tokens: 0`; verify with a repeated +same-prefix request after a short delay and use `usage.prompt_tokens_details.cached_tokens` +as the cache-hit signal. + If you repoint the model at an arbitrary OpenAI-compatible proxy URL, OpenClaw stops injecting those OpenRouter-specific Anthropic cache markers. diff --git a/extensions/openrouter/openrouter.live.test.ts b/extensions/openrouter/openrouter.live.test.ts index f3f92bdb1a3..1bafd08c37b 100644 --- a/extensions/openrouter/openrouter.live.test.ts +++ b/extensions/openrouter/openrouter.live.test.ts @@ -10,8 +10,12 @@ import plugin from "./index.js"; const OPENROUTER_API_KEY = process.env.OPENROUTER_API_KEY ?? ""; const LIVE_MODEL_ID = process.env.OPENCLAW_LIVE_OPENROUTER_PLUGIN_MODEL?.trim() || "openai/gpt-5.4-nano"; +const LIVE_CACHE_MODEL_ID = + process.env.OPENCLAW_LIVE_OPENROUTER_CACHE_MODEL?.trim() || "deepseek/deepseek-v3.2"; const liveEnabled = OPENROUTER_API_KEY.trim().length > 0 && process.env.OPENCLAW_LIVE_TEST === "1"; const describeLive = liveEnabled ? describe : describe.skip; +const describeCacheLive = + liveEnabled && process.env.OPENCLAW_LIVE_CACHE_TEST === "1" ? describe : describe.skip; const ModelRegistryCtor = ModelRegistry as unknown as { new (authStorage: AuthStorage, modelsJsonPath?: string): ModelRegistry; }; @@ -23,6 +27,28 @@ const registerOpenRouterPlugin = async () => name: "OpenRouter Provider", }); +function buildStableCachePrefix(): string { + return Array.from( + { length: 700 }, + (_, index) => + `Stable OpenRouter cache probe sentence ${ + index % 20 + }: this prefix must stay byte-identical across repeated requests.`, + ).join("\n"); +} + +async function completeOpenRouterChat(params: { + client: OpenAI; + messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[]; + model: string; +}) { + return params.client.chat.completions.create({ + model: params.model, + messages: params.messages, + max_tokens: 8, + }); +} + describeLive("openrouter plugin live", () => { it("registers an OpenRouter provider that can complete a live request", async () => { const { providers } = await registerOpenRouterPlugin(); @@ -57,3 +83,38 @@ describeLive("openrouter plugin live", () => { expect(response.choices[0]?.message?.content?.trim()).toMatch(/^OK[.!]?$/); }, 30_000); }); + +describeCacheLive("openrouter plugin live cache", () => { + it("observes automatic cache reads for DeepSeek model refs after cache construction", async () => { + const { providers } = await registerOpenRouterPlugin(); + const provider = requireRegisteredProvider(providers, "openrouter"); + const resolved = provider.resolveDynamicModel?.({ + provider: "openrouter", + modelId: LIVE_CACHE_MODEL_ID, + modelRegistry: new ModelRegistryCtor(AuthStorage.inMemory()), + }); + if (!resolved) { + throw new Error(`openrouter provider did not resolve ${LIVE_CACHE_MODEL_ID}`); + } + + const client = new OpenAI({ + apiKey: OPENROUTER_API_KEY, + baseURL: resolved.baseUrl, + }); + const messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[] = [ + { + role: "system", + content: `You are testing prompt caching.\n${buildStableCachePrefix()}`, + }, + { role: "user", content: "Reply with exactly OK." }, + ]; + + await completeOpenRouterChat({ client, model: resolved.id, messages }); + await new Promise((resolve) => setTimeout(resolve, 2_000)); + const cached = await completeOpenRouterChat({ client, model: resolved.id, messages }); + + const cachedTokens = cached.usage?.prompt_tokens_details?.cached_tokens ?? 0; + expect(cached.choices[0]?.message?.content?.trim()).toMatch(/^OK[.!]?$/); + expect(cachedTokens).toBeGreaterThan(1024); + }, 60_000); +});