fix(openrouter): gate prompt cache markers by endpoint (#60761)

* fix(openrouter): gate prompt cache markers by endpoint

* test(openrouter): use claude sonnet 4.6 cache model
This commit is contained in:
Vincent Koc
2026-04-04 19:32:13 +09:00
committed by GitHub
parent ee742cec40
commit 0a3211df2d
5 changed files with 102 additions and 7 deletions

View File

@@ -172,6 +172,7 @@ Docs: https://docs.openclaw.ai
- Exec approvals/channels: decouple initiating-surface approval availability from native delivery enablement so Telegram, Slack, and Discord still expose approvals when approvers exist and native target routing is configured separately. (#59776) Thanks @joelnishanth.
- Agents/logging: keep orphaned-user transcript repair warnings focused on interactive runs, and downgrade background-trigger repairs (`heartbeat`, `cron`, `memory`, `overflow`) to debug logs to reduce false-alarm gateway noise.
- Gateway/node pairing: require `operator.pairing` for node approvals end-to-end, while still requiring `operator.write` or `operator.admin` when the pending node commands need those higher scopes. (#60461) Thanks @eleqtrizit.
- Providers/OpenRouter: gate Anthropic prompt-cache `cache_control` markers to native/default OpenRouter routes and preserve them for native OpenRouter hosts behind custom provider ids. Thanks @vincentkoc.
## 2026.4.1

View File

@@ -23,7 +23,6 @@ export default definePluginEntry({
const {
buildPassthroughGeminiSanitizingReplayPolicy,
composeProviderStreamWrappers,
createOpenRouterSystemCacheWrapper,
createOpenRouterWrapper,
createProviderApiKeyAuthMethod,
DEFAULT_CONTEXT_TOKENS,
@@ -146,7 +145,6 @@ export default definePluginEntry({
? (streamFn) => injectOpenRouterRouting(streamFn, providerRouting)
: undefined,
(streamFn) => createOpenRouterWrapper(streamFn, openRouterThinkingLevel),
(streamFn) => createOpenRouterSystemCacheWrapper(streamFn),
);
},
isCacheTtlEligible: (ctx) => isOpenRouterCacheTtlModel(ctx.modelId),

View File

@@ -17,6 +17,7 @@ import {
shouldApplySiliconFlowThinkingOffCompat,
} from "./moonshot-stream-wrappers.js";
import { createOpenAIResponsesContextManagementWrapper } from "./openai-stream-wrappers.js";
import { createOpenRouterSystemCacheWrapper } from "./proxy-stream-wrappers.js";
import { streamWithPayloadPatch } from "./stream-payload-utils.js";
const defaultProviderRuntimeDeps = {
@@ -328,6 +329,8 @@ function applyPrePluginStreamWrappers(ctx: ApplyExtraParamsContext): void {
function applyPostPluginStreamWrappers(
ctx: ApplyExtraParamsContext & { providerWrapperHandled: boolean },
): void {
ctx.agent.streamFn = createOpenRouterSystemCacheWrapper(ctx.agent.streamFn);
if (!ctx.providerWrapperHandled) {
// Guard Google-family payloads against invalid negative thinking budgets
// emitted by upstream model-ID heuristics for Gemini 3.1 variants.

View File

@@ -2,7 +2,10 @@ import type { StreamFn } from "@mariozechner/pi-agent-core";
import type { Context, Model } from "@mariozechner/pi-ai";
import { createAssistantMessageEventStream } from "@mariozechner/pi-ai";
import { describe, expect, it } from "vitest";
import { createOpenRouterWrapper } from "./proxy-stream-wrappers.js";
import {
createOpenRouterSystemCacheWrapper,
createOpenRouterWrapper,
} from "./proxy-stream-wrappers.js";
describe("proxy stream wrappers", () => {
it("adds OpenRouter attribution headers to stream options", () => {
@@ -35,4 +38,79 @@ describe("proxy stream wrappers", () => {
},
]);
});
it("injects cache_control markers for declared OpenRouter Anthropic models on the default route", () => {
const payload = {
messages: [{ role: "system", content: "system prompt" }],
};
const baseStreamFn: StreamFn = (model, _context, options) => {
options?.onPayload?.(payload, model);
return createAssistantMessageEventStream();
};
const wrapped = createOpenRouterSystemCacheWrapper(baseStreamFn);
void wrapped(
{
api: "openai-completions",
provider: "openrouter",
id: "anthropic/claude-sonnet-4.6",
} as Model<"openai-completions">,
{ messages: [] },
{},
);
expect(payload.messages[0]?.content).toEqual([
{ type: "text", text: "system prompt", cache_control: { type: "ephemeral" } },
]);
});
it("does not inject cache_control markers for declared OpenRouter providers on custom proxy URLs", () => {
const payload = {
messages: [{ role: "system", content: "system prompt" }],
};
const baseStreamFn: StreamFn = (model, _context, options) => {
options?.onPayload?.(payload, model);
return createAssistantMessageEventStream();
};
const wrapped = createOpenRouterSystemCacheWrapper(baseStreamFn);
void wrapped(
{
api: "openai-completions",
provider: "openrouter",
id: "anthropic/claude-sonnet-4.6",
baseUrl: "https://proxy.example.com/v1",
} as Model<"openai-completions">,
{ messages: [] },
{},
);
expect(payload.messages[0]?.content).toBe("system prompt");
});
it("injects cache_control markers for native OpenRouter hosts behind custom provider ids", () => {
const payload = {
messages: [{ role: "system", content: "system prompt" }],
};
const baseStreamFn: StreamFn = (model, _context, options) => {
options?.onPayload?.(payload, model);
return createAssistantMessageEventStream();
};
const wrapped = createOpenRouterSystemCacheWrapper(baseStreamFn);
void wrapped(
{
api: "openai-completions",
provider: "custom-openrouter",
id: "anthropic/claude-sonnet-4.6",
baseUrl: "https://openrouter.ai/api/v1",
} as Model<"openai-completions">,
{ messages: [] },
{},
);
expect(payload.messages[0]?.content).toEqual([
{ type: "text", text: "system prompt", cache_control: { type: "ephemeral" } },
]);
});
});

View File

@@ -1,10 +1,11 @@
import type { StreamFn } from "@mariozechner/pi-agent-core";
import { streamSimple } from "@mariozechner/pi-ai";
import type { ThinkLevel } from "../../auto-reply/thinking.js";
import { resolveProviderRequestPolicy } from "../provider-attribution.js";
import { isProxyReasoningUnsupportedModelHint } from "../../plugin-sdk/provider-model-shared.js";
import { resolveProviderRequestPolicyConfig } from "../provider-request-config.js";
import { applyAnthropicEphemeralCacheControlMarkers } from "./anthropic-cache-control-payload.js";
import { isOpenRouterAnthropicModelRef } from "./anthropic-family-cache-semantics.js";
import { isAnthropicModelRef } from "./anthropic-family-cache-semantics.js";
import { streamWithPayloadPatch } from "./stream-payload-utils.js";
const KILOCODE_FEATURE_HEADER = "X-KILOCODE-FEATURE";
const KILOCODE_FEATURE_DEFAULT = "openclaw";
@@ -58,10 +59,24 @@ function normalizeProxyReasoningPayload(payload: unknown, thinkingLevel?: ThinkL
export function createOpenRouterSystemCacheWrapper(baseStreamFn: StreamFn | undefined): StreamFn {
const underlying = baseStreamFn ?? streamSimple;
return (model, context, options) => {
const provider = typeof model.provider === "string" ? model.provider : undefined;
const modelId = typeof model.id === "string" ? model.id : undefined;
// Keep OpenRouter-specific cache markers on verified OpenRouter routes
// (or the provider's default route), but not on arbitrary OpenAI proxies.
const endpointClass = resolveProviderRequestPolicy({
provider,
api: typeof model.api === "string" ? model.api : undefined,
baseUrl: typeof model.baseUrl === "string" ? model.baseUrl : undefined,
capability: "llm",
transport: "stream",
}).endpointClass;
if (
typeof model.provider !== "string" ||
typeof model.id !== "string" ||
!isOpenRouterAnthropicModelRef(model.provider, model.id)
!modelId ||
!isAnthropicModelRef(modelId) ||
!(
endpointClass === "openrouter" ||
(endpointClass === "default" && provider?.trim().toLowerCase() === "openrouter")
)
) {
return underlying(model, context, options);
}