fix(openrouter): gate prompt cache markers by endpoint (#60761)

* fix(openrouter): gate prompt cache markers by endpoint * test(openrouter): use claude sonnet 4.6 cache model
2026-04-22 14:41:34 +00:00 · 2026-04-04 19:32:13 +09:00
parent ee742cec40
commit 0a3211df2d
5 changed files with 102 additions and 7 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -172,6 +172,7 @@ Docs: https://docs.openclaw.ai
 - Exec approvals/channels: decouple initiating-surface approval availability from native delivery enablement so Telegram, Slack, and Discord still expose approvals when approvers exist and native target routing is configured separately. (#59776) Thanks @joelnishanth.
 - Agents/logging: keep orphaned-user transcript repair warnings focused on interactive runs, and downgrade background-trigger repairs (`heartbeat`, `cron`, `memory`, `overflow`) to debug logs to reduce false-alarm gateway noise.
 - Gateway/node pairing: require `operator.pairing` for node approvals end-to-end, while still requiring `operator.write` or `operator.admin` when the pending node commands need those higher scopes. (#60461) Thanks @eleqtrizit.
+- Providers/OpenRouter: gate Anthropic prompt-cache `cache_control` markers to native/default OpenRouter routes and preserve them for native OpenRouter hosts behind custom provider ids. Thanks @vincentkoc.

 ## 2026.4.1

--- a/extensions/openrouter/index.ts
+++ b/extensions/openrouter/index.ts
@@ -23,7 +23,6 @@ export default definePluginEntry({
    const {
      buildPassthroughGeminiSanitizingReplayPolicy,
      composeProviderStreamWrappers,
-      createOpenRouterSystemCacheWrapper,
      createOpenRouterWrapper,
      createProviderApiKeyAuthMethod,
      DEFAULT_CONTEXT_TOKENS,
@@ -146,7 +145,6 @@ export default definePluginEntry({
            ? (streamFn) => injectOpenRouterRouting(streamFn, providerRouting)
            : undefined,
          (streamFn) => createOpenRouterWrapper(streamFn, openRouterThinkingLevel),
-          (streamFn) => createOpenRouterSystemCacheWrapper(streamFn),
        );
      },
      isCacheTtlEligible: (ctx) => isOpenRouterCacheTtlModel(ctx.modelId),
--- a/src/agents/pi-embedded-runner/extra-params.ts
+++ b/src/agents/pi-embedded-runner/extra-params.ts
@@ -17,6 +17,7 @@ import {
  shouldApplySiliconFlowThinkingOffCompat,
 } from "./moonshot-stream-wrappers.js";
 import { createOpenAIResponsesContextManagementWrapper } from "./openai-stream-wrappers.js";
+import { createOpenRouterSystemCacheWrapper } from "./proxy-stream-wrappers.js";
 import { streamWithPayloadPatch } from "./stream-payload-utils.js";

 const defaultProviderRuntimeDeps = {
@@ -328,6 +329,8 @@ function applyPrePluginStreamWrappers(ctx: ApplyExtraParamsContext): void {
 function applyPostPluginStreamWrappers(
  ctx: ApplyExtraParamsContext & { providerWrapperHandled: boolean },
 ): void {
+  ctx.agent.streamFn = createOpenRouterSystemCacheWrapper(ctx.agent.streamFn);
+
  if (!ctx.providerWrapperHandled) {
    // Guard Google-family payloads against invalid negative thinking budgets
    // emitted by upstream model-ID heuristics for Gemini 3.1 variants.
--- a/src/agents/pi-embedded-runner/proxy-stream-wrappers.test.ts
+++ b/src/agents/pi-embedded-runner/proxy-stream-wrappers.test.ts
@@ -2,7 +2,10 @@ import type { StreamFn } from "@mariozechner/pi-agent-core";
 import type { Context, Model } from "@mariozechner/pi-ai";
 import { createAssistantMessageEventStream } from "@mariozechner/pi-ai";
 import { describe, expect, it } from "vitest";
-import { createOpenRouterWrapper } from "./proxy-stream-wrappers.js";
+import {
+  createOpenRouterSystemCacheWrapper,
+  createOpenRouterWrapper,
+} from "./proxy-stream-wrappers.js";

 describe("proxy stream wrappers", () => {
  it("adds OpenRouter attribution headers to stream options", () => {
@@ -35,4 +38,79 @@ describe("proxy stream wrappers", () => {
      },
    ]);
  });
+
+  it("injects cache_control markers for declared OpenRouter Anthropic models on the default route", () => {
+    const payload = {
+      messages: [{ role: "system", content: "system prompt" }],
+    };
+    const baseStreamFn: StreamFn = (model, _context, options) => {
+      options?.onPayload?.(payload, model);
+      return createAssistantMessageEventStream();
+    };
+
+    const wrapped = createOpenRouterSystemCacheWrapper(baseStreamFn);
+    void wrapped(
+      {
+        api: "openai-completions",
+        provider: "openrouter",
+        id: "anthropic/claude-sonnet-4.6",
+      } as Model<"openai-completions">,
+      { messages: [] },
+      {},
+    );
+
+    expect(payload.messages[0]?.content).toEqual([
+      { type: "text", text: "system prompt", cache_control: { type: "ephemeral" } },
+    ]);
+  });
+
+  it("does not inject cache_control markers for declared OpenRouter providers on custom proxy URLs", () => {
+    const payload = {
+      messages: [{ role: "system", content: "system prompt" }],
+    };
+    const baseStreamFn: StreamFn = (model, _context, options) => {
+      options?.onPayload?.(payload, model);
+      return createAssistantMessageEventStream();
+    };
+
+    const wrapped = createOpenRouterSystemCacheWrapper(baseStreamFn);
+    void wrapped(
+      {
+        api: "openai-completions",
+        provider: "openrouter",
+        id: "anthropic/claude-sonnet-4.6",
+        baseUrl: "https://proxy.example.com/v1",
+      } as Model<"openai-completions">,
+      { messages: [] },
+      {},
+    );
+
+    expect(payload.messages[0]?.content).toBe("system prompt");
+  });
+
+  it("injects cache_control markers for native OpenRouter hosts behind custom provider ids", () => {
+    const payload = {
+      messages: [{ role: "system", content: "system prompt" }],
+    };
+    const baseStreamFn: StreamFn = (model, _context, options) => {
+      options?.onPayload?.(payload, model);
+      return createAssistantMessageEventStream();
+    };
+
+    const wrapped = createOpenRouterSystemCacheWrapper(baseStreamFn);
+    void wrapped(
+      {
+        api: "openai-completions",
+        provider: "custom-openrouter",
+        id: "anthropic/claude-sonnet-4.6",
+        baseUrl: "https://openrouter.ai/api/v1",
+      } as Model<"openai-completions">,
+      { messages: [] },
+      {},
+    );
+
+    expect(payload.messages[0]?.content).toEqual([
+      { type: "text", text: "system prompt", cache_control: { type: "ephemeral" } },
+    ]);
+  });
 });
--- a/src/agents/pi-embedded-runner/proxy-stream-wrappers.ts
+++ b/src/agents/pi-embedded-runner/proxy-stream-wrappers.ts
@@ -1,10 +1,11 @@
 import type { StreamFn } from "@mariozechner/pi-agent-core";
 import { streamSimple } from "@mariozechner/pi-ai";
 import type { ThinkLevel } from "../../auto-reply/thinking.js";
+import { resolveProviderRequestPolicy } from "../provider-attribution.js";
 import { isProxyReasoningUnsupportedModelHint } from "../../plugin-sdk/provider-model-shared.js";
 import { resolveProviderRequestPolicyConfig } from "../provider-request-config.js";
 import { applyAnthropicEphemeralCacheControlMarkers } from "./anthropic-cache-control-payload.js";
-import { isOpenRouterAnthropicModelRef } from "./anthropic-family-cache-semantics.js";
+import { isAnthropicModelRef } from "./anthropic-family-cache-semantics.js";
 import { streamWithPayloadPatch } from "./stream-payload-utils.js";
 const KILOCODE_FEATURE_HEADER = "X-KILOCODE-FEATURE";
 const KILOCODE_FEATURE_DEFAULT = "openclaw";
@@ -58,10 +59,24 @@ function normalizeProxyReasoningPayload(payload: unknown, thinkingLevel?: ThinkL
 export function createOpenRouterSystemCacheWrapper(baseStreamFn: StreamFn | undefined): StreamFn {
  const underlying = baseStreamFn ?? streamSimple;
  return (model, context, options) => {
+    const provider = typeof model.provider === "string" ? model.provider : undefined;
+    const modelId = typeof model.id === "string" ? model.id : undefined;
+    // Keep OpenRouter-specific cache markers on verified OpenRouter routes
+    // (or the provider's default route), but not on arbitrary OpenAI proxies.
+    const endpointClass = resolveProviderRequestPolicy({
+      provider,
+      api: typeof model.api === "string" ? model.api : undefined,
+      baseUrl: typeof model.baseUrl === "string" ? model.baseUrl : undefined,
+      capability: "llm",
+      transport: "stream",
+    }).endpointClass;
    if (
-      typeof model.provider !== "string" ||
-      typeof model.id !== "string" ||
-      !isOpenRouterAnthropicModelRef(model.provider, model.id)
+      !modelId ||
+      !isAnthropicModelRef(modelId) ||
+      !(
+        endpointClass === "openrouter" ||
+        (endpointClass === "default" && provider?.trim().toLowerCase() === "openrouter")
+      )
    ) {
      return underlying(model, context, options);
    }