test(openrouter): cover DeepSeek live cache hits

2026-05-06 06:20:43 +00:00 · 2026-04-25 06:47:34 +01:00
parent 2cd2732ab6
commit cfb551c709
2 changed files with 66 additions and 0 deletions
--- a/docs/reference/prompt-caching.md
+++ b/docs/reference/prompt-caching.md
@@ -136,6 +136,11 @@ model refs, `contextPruning.mode: "cache-ttl"` is allowed because OpenRouter
 handles provider-side prompt caching automatically. OpenClaw does not inject
 Anthropic `cache_control` markers into those requests.

+DeepSeek cache construction is best-effort and can take a few seconds. An
+immediate follow-up may still show `cached_tokens: 0`; verify with a repeated
+same-prefix request after a short delay and use `usage.prompt_tokens_details.cached_tokens`
+as the cache-hit signal.
+
 If you repoint the model at an arbitrary OpenAI-compatible proxy URL, OpenClaw
 stops injecting those OpenRouter-specific Anthropic cache markers.

--- a/extensions/openrouter/openrouter.live.test.ts
+++ b/extensions/openrouter/openrouter.live.test.ts
@@ -10,8 +10,12 @@ import plugin from "./index.js";
 const OPENROUTER_API_KEY = process.env.OPENROUTER_API_KEY ?? "";
 const LIVE_MODEL_ID =
  process.env.OPENCLAW_LIVE_OPENROUTER_PLUGIN_MODEL?.trim() || "openai/gpt-5.4-nano";
+const LIVE_CACHE_MODEL_ID =
+  process.env.OPENCLAW_LIVE_OPENROUTER_CACHE_MODEL?.trim() || "deepseek/deepseek-v3.2";
 const liveEnabled = OPENROUTER_API_KEY.trim().length > 0 && process.env.OPENCLAW_LIVE_TEST === "1";
 const describeLive = liveEnabled ? describe : describe.skip;
+const describeCacheLive =
+  liveEnabled && process.env.OPENCLAW_LIVE_CACHE_TEST === "1" ? describe : describe.skip;
 const ModelRegistryCtor = ModelRegistry as unknown as {
  new (authStorage: AuthStorage, modelsJsonPath?: string): ModelRegistry;
 };
@@ -23,6 +27,28 @@ const registerOpenRouterPlugin = async () =>
    name: "OpenRouter Provider",
  });

+function buildStableCachePrefix(): string {
+  return Array.from(
+    { length: 700 },
+    (_, index) =>
+      `Stable OpenRouter cache probe sentence ${
+        index % 20
+      }: this prefix must stay byte-identical across repeated requests.`,
+  ).join("\n");
+}
+
+async function completeOpenRouterChat(params: {
+  client: OpenAI;
+  messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[];
+  model: string;
+}) {
+  return params.client.chat.completions.create({
+    model: params.model,
+    messages: params.messages,
+    max_tokens: 8,
+  });
+}
+
 describeLive("openrouter plugin live", () => {
  it("registers an OpenRouter provider that can complete a live request", async () => {
    const { providers } = await registerOpenRouterPlugin();
@@ -57,3 +83,38 @@ describeLive("openrouter plugin live", () => {
    expect(response.choices[0]?.message?.content?.trim()).toMatch(/^OK[.!]?$/);
  }, 30_000);
 });
+
+describeCacheLive("openrouter plugin live cache", () => {
+  it("observes automatic cache reads for DeepSeek model refs after cache construction", async () => {
+    const { providers } = await registerOpenRouterPlugin();
+    const provider = requireRegisteredProvider(providers, "openrouter");
+    const resolved = provider.resolveDynamicModel?.({
+      provider: "openrouter",
+      modelId: LIVE_CACHE_MODEL_ID,
+      modelRegistry: new ModelRegistryCtor(AuthStorage.inMemory()),
+    });
+    if (!resolved) {
+      throw new Error(`openrouter provider did not resolve ${LIVE_CACHE_MODEL_ID}`);
+    }
+
+    const client = new OpenAI({
+      apiKey: OPENROUTER_API_KEY,
+      baseURL: resolved.baseUrl,
+    });
+    const messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[] = [
+      {
+        role: "system",
+        content: `You are testing prompt caching.\n${buildStableCachePrefix()}`,
+      },
+      { role: "user", content: "Reply with exactly OK." },
+    ];
+
+    await completeOpenRouterChat({ client, model: resolved.id, messages });
+    await new Promise((resolve) => setTimeout(resolve, 2_000));
+    const cached = await completeOpenRouterChat({ client, model: resolved.id, messages });
+
+    const cachedTokens = cached.usage?.prompt_tokens_details?.cached_tokens ?? 0;
+    expect(cached.choices[0]?.message?.content?.trim()).toMatch(/^OK[.!]?$/);
+    expect(cachedTokens).toBeGreaterThan(1024);
+  }, 60_000);
+});