feat(openrouter): add opt-in response caching

Adds opt-in OpenRouter response caching params, preserves alias precedence across config scopes, and documents the behavior.\n\nVerification:\n- pnpm test:serial src/agents/pi-embedded-runner/proxy-stream-wrappers.test.ts src/agents/pi-embedded-runner-extraparams-openrouter.test.ts -- --reporter=verbose\n- pnpm exec oxfmt --check --threads=1 src/agents/pi-embedded-runner/proxy-stream-wrappers.ts src/plugin-sdk/provider-stream.ts src/agents/pi-embedded-runner/proxy-stream-wrappers.test.ts src/agents/pi-embedded-runner-extraparams-openrouter.test.ts docs/providers/openrouter.md CHANGELOG.md\n- git diff --check\n- Testbox tbx_01kqr4dakpsk9rswz9pem49nz0: pnpm check:changed (https://github.com/openclaw/openclaw/actions/runs/25294515012)
2026-05-06 07:00:43 +00:00 · 2026-05-03 17:02:18 -07:00
parent f88e1f4c1c
commit 34b3471f85
7 changed files with 422 additions and 9 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ Docs: https://docs.openclaw.ai
 - Plugins/onboarding: let Manual setup install optional official plugins, including ClawHub-backed diagnostics with npm fallback, and expose the external Codex plugin as a selectable provider setup choice. Thanks @vincentkoc.
 - Plugins/CLI: include package dependency install state in `openclaw plugins list --json` so scripts can spot missing plugin dependencies without runtime-loading plugins.
 - Discord/status: add degraded Discord transport and gateway event-loop starvation signals to `openclaw channels status`, `openclaw status --deep`, and fetch-timeout logs so intermittent socket resets do not look like a healthy running channel. (#76327) Thanks @joshavant.
+- Providers/OpenRouter: add opt-in response caching params that send OpenRouter's `X-OpenRouter-Cache`, `X-OpenRouter-Cache-TTL`, and cache-clear headers only on verified OpenRouter routes. Thanks @vincentkoc.
 - Plugins/update: on the beta OpenClaw update channel, default-line npm and ClawHub plugin updates try `@beta` first and fall back to default/latest when no plugin beta release exists.
 - Channels/WhatsApp: support explicit WhatsApp Channel/Newsletter `@newsletter` outbound message targets with channel session metadata instead of DM routing. Fixes #13417; carries forward the narrow outbound target idea from #13424. Thanks @vincentkoc and @agentz-manfred.
 - Exec approvals: add a tree-sitter-backed shell command explainer for future approval and command-review surfaces. (#75004) Thanks @jesse-merhi.
--- a/docs/providers/openrouter.md
+++ b/docs/providers/openrouter.md
@@ -153,6 +153,39 @@ does **not** inject those OpenRouter-specific headers or Anthropic cache markers
 ## Advanced configuration

 <AccordionGroup>
+  <Accordion title="Response caching">
+    OpenRouter response caching is opt-in. Enable it per OpenRouter model with
+    model params:
+
+    ```json5
+    {
+      agents: {
+        defaults: {
+          models: {
+            "openrouter/auto": {
+              params: {
+                responseCache: true,
+                responseCacheTtlSeconds: 300,
+              },
+            },
+          },
+        },
+      },
+    }
+    ```
+
+    OpenClaw sends `X-OpenRouter-Cache: true` and, when configured,
+    `X-OpenRouter-Cache-TTL`. `responseCacheClear: true` forces a refresh for
+    the current request and stores the replacement response. Snake_case aliases
+    (`response_cache`, `response_cache_ttl_seconds`, and
+    `response_cache_clear`) are also accepted.
+
+    This is separate from provider prompt caching and from OpenRouter's
+    Anthropic `cache_control` markers. It is only applied on verified
+    `openrouter.ai` routes, not custom proxy base URLs.
+
+  </Accordion>
+
  <Accordion title="Anthropic cache markers">
    On verified OpenRouter routes, Anthropic model refs keep the
    OpenRouter-specific Anthropic `cache_control` markers that OpenClaw uses for
--- a/src/agents/pi-embedded-runner-extraparams-openrouter.test.ts
+++ b/src/agents/pi-embedded-runner-extraparams-openrouter.test.ts
@@ -1,7 +1,10 @@
 import type { StreamFn } from "@mariozechner/pi-agent-core";
 import { afterEach, beforeEach, describe, expect, it } from "vitest";
 import { runExtraParamsPayloadCase } from "./pi-embedded-runner-extraparams.test-support.js";
-import { __testing as extraParamsTesting } from "./pi-embedded-runner/extra-params.js";
+import {
+  applyExtraParamsToAgent,
+  __testing as extraParamsTesting,
+} from "./pi-embedded-runner/extra-params.js";
 import {
  createOpenRouterSystemCacheWrapper,
  createOpenRouterWrapper,
@@ -39,7 +42,9 @@ beforeEach(() => {
      const skipReasoningInjection =
        params.context.modelId === "auto" || isProxyReasoningUnsupported(params.context.modelId);
      const thinkingLevel = skipReasoningInjection ? undefined : params.context.thinkingLevel;
-      return createOpenRouterSystemCacheWrapper(createOpenRouterWrapper(streamFn, thinkingLevel));
+      return createOpenRouterSystemCacheWrapper(
+        createOpenRouterWrapper(streamFn, thinkingLevel, params.context.extraParams),
+      );
    },
  });
 });
@@ -61,6 +66,101 @@ describe("applyExtraParamsToAgent OpenRouter reasoning", () => {
    expect(payload).not.toHaveProperty("reasoning_effort");
  });

+  it("forwards opt-in response cache params as OpenRouter headers", () => {
+    const calls: Array<{ headers?: Record<string, string> }> = [];
+    const baseStreamFn: StreamFn = (_model, _context, options) => {
+      calls.push({ headers: options?.headers });
+      return {} as ReturnType<StreamFn>;
+    };
+    const agent = { streamFn: baseStreamFn };
+
+    applyExtraParamsToAgent(
+      agent,
+      {
+        agents: {
+          defaults: {
+            models: {
+              "openrouter/auto": {
+                params: {
+                  responseCache: true,
+                  responseCacheTtlSeconds: 600,
+                },
+              },
+            },
+          },
+        },
+      },
+      "openrouter",
+      "auto",
+    );
+
+    void agent.streamFn?.(
+      {
+        api: "openai-completions",
+        provider: "openrouter",
+        id: "auto",
+      } as never,
+      { messages: [] } as never,
+      {},
+    );
+
+    expect(calls[0]?.headers).toMatchObject({
+      "X-OpenRouter-Cache": "true",
+      "X-OpenRouter-Cache-TTL": "600",
+    });
+  });
+
+  it("honors narrower camelCase response cache params over wider snake_case aliases", () => {
+    const calls: Array<{ headers?: Record<string, string> }> = [];
+    const baseStreamFn: StreamFn = (_model, _context, options) => {
+      calls.push({ headers: options?.headers });
+      return {} as ReturnType<StreamFn>;
+    };
+    const agent = { streamFn: baseStreamFn };
+
+    applyExtraParamsToAgent(
+      agent,
+      {
+        agents: {
+          defaults: {
+            params: {
+              response_cache: false,
+              response_cache_ttl_seconds: 60,
+              response_cache_clear: false,
+            },
+            models: {
+              "openrouter/auto": {
+                params: {
+                  responseCache: true,
+                  responseCacheTtlSeconds: 600,
+                  responseCacheClear: true,
+                },
+              },
+            },
+          },
+        },
+      },
+      "openrouter",
+      "auto",
+    );
+
+    void agent.streamFn?.(
+      {
+        api: "openai-completions",
+        provider: "openrouter",
+        id: "auto",
+      } as never,
+      { messages: [] } as never,
+      {},
+    );
+
+    expect(calls[0]?.headers).toMatchObject({
+      "X-OpenRouter-Cache": "true",
+      "X-OpenRouter-Cache-Clear": "true",
+      "X-OpenRouter-Cache-TTL": "600",
+    });
+  });
+
  it("injects reasoning.effort when thinkingLevel is non-off for OpenRouter", () => {
    const payload = runExtraParamsPayloadCase({
      provider: "openrouter",
--- a/src/agents/pi-embedded-runner/extra-params.ts
+++ b/src/agents/pi-embedded-runner/extra-params.ts
@@ -116,6 +116,9 @@ export function resolveExtraParams(params: {
    merged.cachedContent = resolvedCachedContent;
    delete merged.cached_content;
  }
+  if (params.provider === "openrouter") {
+    canonicalizeOpenRouterResponseCacheParams(merged, [defaultParams, globalParams, agentParams]);
+  }

  applyDefaultOpenAIGptRuntimeParams(params, merged);

@@ -233,6 +236,9 @@ export function resolvePreparedExtraParams(params: {
    merged.cachedContent = resolvedCachedContent;
    delete merged.cached_content;
  }
+  if (params.provider === "openrouter") {
+    canonicalizeOpenRouterResponseCacheParams(merged, [resolvedExtraParams, override]);
+  }
  const cfg = params.cfg;
  const cacheKey = cfg ? resolvePreparedExtraParamsCacheKey(params) : undefined;
  if (cacheKey) {
@@ -432,6 +438,13 @@ function resolveAliasedParamValue(
  sources: Array<Record<string, unknown> | undefined>,
  snakeCaseKey: string,
  camelCaseKey: string,
+): unknown {
+  return resolveAliasedParamValueFromKeys(sources, [snakeCaseKey, camelCaseKey]);
+}
+
+function resolveAliasedParamValueFromKeys(
+  sources: Array<Record<string, unknown> | undefined>,
+  keys: readonly string[],
 ): unknown {
  let resolved: unknown = undefined;
  let seen = false;
@@ -439,17 +452,63 @@ function resolveAliasedParamValue(
    if (!source) {
      continue;
    }
-    const hasSnakeCaseKey = Object.hasOwn(source, snakeCaseKey);
-    const hasCamelCaseKey = Object.hasOwn(source, camelCaseKey);
-    if (!hasSnakeCaseKey && !hasCamelCaseKey) {
-      continue;
+    for (const key of keys) {
+      if (!Object.hasOwn(source, key)) {
+        continue;
+      }
+      resolved = source[key];
+      seen = true;
+      break;
    }
-    resolved = hasSnakeCaseKey ? source[snakeCaseKey] : source[camelCaseKey];
-    seen = true;
  }
  return seen ? resolved : undefined;
 }

+function applyCanonicalAliasedParamValue(params: {
+  merged: Record<string, unknown>;
+  sources: Array<Record<string, unknown> | undefined>;
+  keys: readonly string[];
+  canonicalKey: string;
+}): void {
+  const resolved = resolveAliasedParamValueFromKeys(params.sources, params.keys);
+  if (resolved === undefined) {
+    return;
+  }
+  for (const key of params.keys) {
+    delete params.merged[key];
+  }
+  params.merged[params.canonicalKey] = resolved;
+}
+
+function canonicalizeOpenRouterResponseCacheParams(
+  merged: Record<string, unknown>,
+  sources: Array<Record<string, unknown> | undefined>,
+): void {
+  applyCanonicalAliasedParamValue({
+    merged,
+    sources,
+    keys: ["responseCache", "response_cache"],
+    canonicalKey: "responseCache",
+  });
+  applyCanonicalAliasedParamValue({
+    merged,
+    sources,
+    keys: [
+      "responseCacheTtlSeconds",
+      "response_cache_ttl_seconds",
+      "responseCacheTtl",
+      "response_cache_ttl",
+    ],
+    canonicalKey: "responseCacheTtlSeconds",
+  });
+  applyCanonicalAliasedParamValue({
+    merged,
+    sources,
+    keys: ["responseCacheClear", "response_cache_clear"],
+    canonicalKey: "responseCacheClear",
+  });
+}
+
 function createParallelToolCallsWrapper(
  baseStreamFn: StreamFn | undefined,
  enabled: boolean,
--- a/src/agents/pi-embedded-runner/proxy-stream-wrappers.test.ts
+++ b/src/agents/pi-embedded-runner/proxy-stream-wrappers.test.ts
@@ -63,6 +63,118 @@ describe("proxy stream wrappers", () => {
    ]);
  });

+  it("adds opt-in OpenRouter response caching headers", () => {
+    const calls: Array<{ headers?: Record<string, string> }> = [];
+    const baseStreamFn: StreamFn = (_model, _context, options) => {
+      calls.push({ headers: options?.headers });
+      return createAssistantMessageEventStream();
+    };
+
+    const wrapped = createOpenRouterWrapper(baseStreamFn, undefined, {
+      responseCache: true,
+      responseCacheTtlSeconds: 900,
+    });
+
+    void wrapped(
+      {
+        api: "openai-completions",
+        provider: "openrouter",
+        id: "openrouter/auto",
+        baseUrl: "https://openrouter.ai/api/v1",
+      } as Model<"openai-completions">,
+      { messages: [] },
+      {},
+    );
+
+    expect(calls[0]?.headers).toMatchObject({
+      "HTTP-Referer": "https://openclaw.ai",
+      "X-OpenRouter-Cache": "true",
+      "X-OpenRouter-Cache-TTL": "900",
+    });
+  });
+
+  it("sends OpenRouter response cache disables for preset opt-outs", () => {
+    const calls: Array<{ headers?: Record<string, string> }> = [];
+    const baseStreamFn: StreamFn = (_model, _context, options) => {
+      calls.push({ headers: options?.headers });
+      return createAssistantMessageEventStream();
+    };
+
+    const wrapped = createOpenRouterWrapper(baseStreamFn, undefined, {
+      response_cache: false,
+      response_cache_ttl_seconds: 600,
+    });
+
+    void wrapped(
+      {
+        api: "openai-completions",
+        provider: "openrouter",
+        id: "openrouter/@preset/cached-tests",
+      } as Model<"openai-completions">,
+      { messages: [] },
+      {},
+    );
+
+    expect(calls[0]?.headers).toMatchObject({
+      "X-OpenRouter-Cache": "false",
+    });
+    expect(calls[0]?.headers).not.toHaveProperty("X-OpenRouter-Cache-TTL");
+  });
+
+  it("supports OpenRouter response cache refresh and TTL clamping", () => {
+    const calls: Array<{ headers?: Record<string, string> }> = [];
+    const baseStreamFn: StreamFn = (_model, _context, options) => {
+      calls.push({ headers: options?.headers });
+      return createAssistantMessageEventStream();
+    };
+
+    const wrapped = createOpenRouterWrapper(baseStreamFn, undefined, {
+      response_cache_clear: "true",
+      response_cache_ttl: 999999,
+    });
+
+    void wrapped(
+      {
+        api: "openai-completions",
+        provider: "openrouter",
+        id: "openrouter/auto",
+      } as Model<"openai-completions">,
+      { messages: [] },
+      {},
+    );
+
+    expect(calls[0]?.headers).toMatchObject({
+      "X-OpenRouter-Cache": "true",
+      "X-OpenRouter-Cache-Clear": "true",
+      "X-OpenRouter-Cache-TTL": "86400",
+    });
+  });
+
+  it("does not add OpenRouter response caching headers to custom proxy routes", () => {
+    const calls: Array<{ headers?: Record<string, string> }> = [];
+    const baseStreamFn: StreamFn = (_model, _context, options) => {
+      calls.push({ headers: options?.headers });
+      return createAssistantMessageEventStream();
+    };
+
+    const wrapped = createOpenRouterWrapper(baseStreamFn, undefined, {
+      responseCache: true,
+    });
+
+    void wrapped(
+      {
+        api: "openai-completions",
+        provider: "openrouter",
+        id: "openrouter/auto",
+        baseUrl: "https://proxy.example.com/v1",
+      } as Model<"openai-completions">,
+      { messages: [] },
+      {},
+    );
+
+    expect(calls[0]?.headers).toBeUndefined();
+  });
+
  it("injects cache_control markers for declared OpenRouter Anthropic models on the default route", () => {
    const payload = runSystemCacheWrapper({});

--- a/src/agents/pi-embedded-runner/proxy-stream-wrappers.ts
+++ b/src/agents/pi-embedded-runner/proxy-stream-wrappers.ts
@@ -17,6 +17,111 @@ function resolveKilocodeAppHeaders(): Record<string, string> {
  return { [KILOCODE_FEATURE_HEADER]: feature };
 }

+function readExtraParam(
+  extraParams: Record<string, unknown> | undefined,
+  keys: readonly string[],
+): unknown {
+  if (!extraParams) {
+    return undefined;
+  }
+  for (const key of keys) {
+    if (Object.hasOwn(extraParams, key)) {
+      return extraParams[key];
+    }
+  }
+  return undefined;
+}
+
+function resolveBooleanParam(value: unknown): boolean | undefined {
+  if (typeof value === "boolean") {
+    return value;
+  }
+  if (typeof value !== "string") {
+    return undefined;
+  }
+  const normalized = normalizeOptionalLowercaseString(value);
+  if (!normalized) {
+    return undefined;
+  }
+  if (["1", "true", "yes", "on", "enable", "enabled"].includes(normalized)) {
+    return true;
+  }
+  if (["0", "false", "no", "off", "disable", "disabled"].includes(normalized)) {
+    return false;
+  }
+  return undefined;
+}
+
+function resolveOpenRouterResponseCacheTtlSeconds(value: unknown): string | undefined {
+  const parsed =
+    typeof value === "number"
+      ? value
+      : typeof value === "string"
+        ? Number.parseFloat(value.trim())
+        : Number.NaN;
+  if (!Number.isFinite(parsed)) {
+    return undefined;
+  }
+  return String(Math.max(1, Math.min(86400, Math.trunc(parsed))));
+}
+
+function shouldApplyOpenRouterResponseCacheHeaders(model: Parameters<StreamFn>[0]): boolean {
+  const provider = readStringValue(model.provider);
+  const endpointClass = resolveProviderRequestPolicy({
+    provider,
+    api: readStringValue(model.api),
+    baseUrl: readStringValue(model.baseUrl),
+    capability: "llm",
+    transport: "stream",
+  }).endpointClass;
+  return (
+    endpointClass === "openrouter" ||
+    (endpointClass === "default" && normalizeOptionalLowercaseString(provider) === "openrouter")
+  );
+}
+
+function resolveOpenRouterResponseCacheHeaders(
+  model: Parameters<StreamFn>[0],
+  extraParams: Record<string, unknown> | undefined,
+): Record<string, string> | undefined {
+  if (!shouldApplyOpenRouterResponseCacheHeaders(model)) {
+    return undefined;
+  }
+  const configuredCache = resolveBooleanParam(
+    readExtraParam(extraParams, ["responseCache", "response_cache"]),
+  );
+  const clearCache = resolveBooleanParam(
+    readExtraParam(extraParams, ["responseCacheClear", "response_cache_clear"]),
+  );
+  const cacheEnabled = configuredCache ?? (clearCache ? true : undefined);
+  if (cacheEnabled === undefined) {
+    return undefined;
+  }
+
+  const headers: Record<string, string> = {
+    "X-OpenRouter-Cache": cacheEnabled ? "true" : "false",
+  };
+  if (!cacheEnabled) {
+    return headers;
+  }
+
+  const ttl = resolveOpenRouterResponseCacheTtlSeconds(
+    readExtraParam(extraParams, [
+      "responseCacheTtlSeconds",
+      "response_cache_ttl_seconds",
+      "responseCacheTtl",
+      "response_cache_ttl",
+    ]),
+  );
+  if (ttl) {
+    headers["X-OpenRouter-Cache-TTL"] = ttl;
+  }
+  if (clearCache) {
+    headers["X-OpenRouter-Cache-Clear"] = "true";
+  }
+  return headers;
+}
+
 function normalizeProxyReasoningPayload(payload: unknown, thinkingLevel?: ThinkLevel): void {
  if (!payload || typeof payload !== "object") {
    return;
@@ -79,9 +184,11 @@ export function createOpenRouterSystemCacheWrapper(baseStreamFn: StreamFn | unde
 export function createOpenRouterWrapper(
  baseStreamFn: StreamFn | undefined,
  thinkingLevel?: ThinkLevel,
+  extraParams?: Record<string, unknown>,
 ): StreamFn {
  const underlying = baseStreamFn ?? streamSimple;
  return (model, context, options) => {
+    const providerHeaders = resolveOpenRouterResponseCacheHeaders(model, extraParams);
    const headers = resolveProviderRequestPolicyConfig({
      provider: readStringValue(model.provider) ?? "openrouter",
      api: readStringValue(model.api),
@@ -89,6 +196,7 @@ export function createOpenRouterWrapper(
      capability: "llm",
      transport: "stream",
      callerHeaders: options?.headers,
+      providerHeaders,
      precedence: "caller-wins",
    }).headers;
    return streamWithPayloadPatch(
--- a/src/plugin-sdk/provider-stream.ts
+++ b/src/plugin-sdk/provider-stream.ts
@@ -139,7 +139,7 @@ export function buildProviderStreamFamilyHooks(
            ctx.modelId === "auto" || isProxyReasoningUnsupported(ctx.modelId)
              ? undefined
              : ctx.thinkingLevel;
-          return createOpenRouterWrapper(ctx.streamFn, thinkingLevel);
+          return createOpenRouterWrapper(ctx.streamFn, thinkingLevel, ctx.extraParams);
        },
      };
    case "tool-stream-default-on":