From d79b9e0af4c0cbbfdc7b4f8d79939c06d13bc138 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 25 Apr 2026 06:38:11 +0100 Subject: [PATCH] fix(openrouter): allow DeepSeek cache-ttl eligibility --- CHANGELOG.md | 1 + docs/concepts/model-providers.md | 2 +- docs/reference/prompt-caching.md | 7 ++++++- extensions/openrouter/index.ts | 1 + src/agents/pi-embedded-runner/cache-ttl.test.ts | 3 ++- .../pi-embedded-runner/proxy-stream-wrappers.test.ts | 8 ++++++++ 6 files changed, 19 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7c70e6e312d..470055e0b8c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -72,6 +72,7 @@ Docs: https://docs.openclaw.ai ### Fixes - Dashboard/security: avoid writing tokenized Control UI URLs or SSH hints to runtime logs, keeping gateway bearer fragments out of console-captured logs readable through `logs.tail`. (#70029) Thanks @Ziy1-Tan. +- Providers/OpenRouter: treat DeepSeek refs as cache-TTL eligible without injecting Anthropic cache-control markers, aligning context pruning with OpenRouter-managed prompt caching. (#51983) Thanks @QuinnH496. - Discord/cron: deliver text-only isolated cron and heartbeat announce output from the canonical final assistant text once, avoiding duplicate Discord posts when streamed block payloads and the final answer contain the same content. Fixes #71406. Thanks @alexgross21. - macOS Gateway: wait for launchd to reload the exited Gateway LaunchAgent before bootstrapping repair fallback, preventing config-triggered restarts from leaving the service not loaded. Fixes #45178. Thanks @vincentkoc. - TTS/hooks: preserve audio-only TTS transcripts for `message_sending` and `message_sent` hooks without rendering the transcript as a media caption. Thanks @zqchris. diff --git a/docs/concepts/model-providers.md b/docs/concepts/model-providers.md index bf0bebb55d9..bf233135cf9 100644 --- a/docs/concepts/model-providers.md +++ b/docs/concepts/model-providers.md @@ -269,7 +269,7 @@ See [/providers/kilocode](/providers/kilocode) for setup details. Quirks worth knowing: -- **OpenRouter** applies its app-attribution headers and Anthropic `cache_control` markers only on verified `openrouter.ai` routes. As a proxy-style OpenAI-compatible path, it skips native-OpenAI-only shaping (`serviceTier`, Responses `store`, prompt-cache hints, OpenAI reasoning-compat). Gemini-backed refs keep proxy-Gemini thought-signature sanitation only. +- **OpenRouter** applies its app-attribution headers and Anthropic `cache_control` markers only on verified `openrouter.ai` routes. DeepSeek, Moonshot, and ZAI refs are cache-TTL eligible for OpenRouter-managed prompt caching but do not receive Anthropic cache markers. As a proxy-style OpenAI-compatible path, it skips native-OpenAI-only shaping (`serviceTier`, Responses `store`, prompt-cache hints, OpenAI reasoning-compat). Gemini-backed refs keep proxy-Gemini thought-signature sanitation only. - **Kilo Gateway** Gemini-backed refs follow the same proxy-Gemini sanitation path; `kilocode/kilo/auto` and other proxy-reasoning-unsupported refs skip proxy reasoning injection. - **MiniMax** API-key onboarding writes explicit text-only M2.7 chat model definitions; image understanding stays on the plugin-owned `MiniMax-VL-01` media provider. - **xAI** uses the xAI Responses path. `/fast` or `params.fastMode: true` rewrites `grok-3`, `grok-3-mini`, `grok-4`, and `grok-4-0709` to their `*-fast` variants. `tool_stream` defaults on; disable via `agents.defaults.models["xai/"].params.tool_stream=false`. diff --git a/docs/reference/prompt-caching.md b/docs/reference/prompt-caching.md index 3c5fa801106..794a3b8e9c7 100644 --- a/docs/reference/prompt-caching.md +++ b/docs/reference/prompt-caching.md @@ -123,7 +123,7 @@ Per-agent heartbeat is supported at `agents.list[].heartbeat`. - Anthropic Claude model refs (`amazon-bedrock/*anthropic.claude*`) support explicit `cacheRetention` pass-through. - Non-Anthropic Bedrock models are forced to `cacheRetention: "none"` at runtime. -### OpenRouter Anthropic models +### OpenRouter models For `openrouter/anthropic/*` model refs, OpenClaw injects Anthropic `cache_control` on system/developer prompt blocks to improve prompt-cache @@ -131,6 +131,11 @@ reuse only when the request is still targeting a verified OpenRouter route (`openrouter` on its default endpoint, or any provider/base URL that resolves to `openrouter.ai`). +For `openrouter/deepseek/*`, `openrouter/moonshot*/*`, and `openrouter/zai/*` +model refs, `contextPruning.mode: "cache-ttl"` is allowed because OpenRouter +handles provider-side prompt caching automatically. OpenClaw does not inject +Anthropic `cache_control` markers into those requests. + If you repoint the model at an arbitrary OpenAI-compatible proxy URL, OpenClaw stops injecting those OpenRouter-specific Anthropic cache markers. diff --git a/extensions/openrouter/index.ts b/extensions/openrouter/index.ts index e133576e983..02cf1da3b80 100644 --- a/extensions/openrouter/index.ts +++ b/extensions/openrouter/index.ts @@ -27,6 +27,7 @@ const PROVIDER_ID = "openrouter"; const OPENROUTER_DEFAULT_MAX_TOKENS = 8192; const OPENROUTER_CACHE_TTL_MODEL_PREFIXES = [ "anthropic/", + "deepseek/", "moonshot/", "moonshotai/", "zai/", diff --git a/src/agents/pi-embedded-runner/cache-ttl.test.ts b/src/agents/pi-embedded-runner/cache-ttl.test.ts index 7c15193a99b..262967a863f 100644 --- a/src/agents/pi-embedded-runner/cache-ttl.test.ts +++ b/src/agents/pi-embedded-runner/cache-ttl.test.ts @@ -16,7 +16,7 @@ vi.mock("../../plugins/provider-runtime.js", async () => { return true; } if (params.context.provider === "openrouter") { - return ["anthropic/", "moonshot/", "moonshotai/", "zai/"].some((prefix) => + return ["anthropic/", "deepseek/", "moonshot/", "moonshotai/", "zai/"].some((prefix) => params.context.modelId.startsWith(prefix), ); } @@ -44,6 +44,7 @@ describe("isCacheTtlEligibleProvider", () => { it("allows openrouter cache-ttl models", () => { expect(isCacheTtlEligibleProvider("openrouter", "anthropic/claude-sonnet-4")).toBe(true); + expect(isCacheTtlEligibleProvider("openrouter", "deepseek/deepseek-v3.2")).toBe(true); expect(isCacheTtlEligibleProvider("openrouter", "moonshotai/kimi-k2.5")).toBe(true); expect(isCacheTtlEligibleProvider("openrouter", "moonshot/kimi-k2.5")).toBe(true); expect(isCacheTtlEligibleProvider("openrouter", "zai/glm-5")).toBe(true); diff --git a/src/agents/pi-embedded-runner/proxy-stream-wrappers.test.ts b/src/agents/pi-embedded-runner/proxy-stream-wrappers.test.ts index 7b3757d0cf8..9db00c48096 100644 --- a/src/agents/pi-embedded-runner/proxy-stream-wrappers.test.ts +++ b/src/agents/pi-embedded-runner/proxy-stream-wrappers.test.ts @@ -79,6 +79,14 @@ describe("proxy stream wrappers", () => { expect(payload.messages[0]?.content).toBe("system prompt"); }); + it("does not inject Anthropic cache_control markers for automatic OpenRouter DeepSeek cache models", () => { + const payload = runSystemCacheWrapper({ + id: "deepseek/deepseek-v3.2", + }); + + expect(payload.messages[0]?.content).toBe("system prompt"); + }); + it("injects cache_control markers for native OpenRouter hosts behind custom provider ids", () => { const payload = runSystemCacheWrapper({ provider: "custom-openrouter",