mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-22 14:41:34 +00:00
fix(openrouter): gate prompt cache markers by endpoint (#60761)
* fix(openrouter): gate prompt cache markers by endpoint * test(openrouter): use claude sonnet 4.6 cache model
This commit is contained in:
@@ -172,6 +172,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Exec approvals/channels: decouple initiating-surface approval availability from native delivery enablement so Telegram, Slack, and Discord still expose approvals when approvers exist and native target routing is configured separately. (#59776) Thanks @joelnishanth.
|
||||
- Agents/logging: keep orphaned-user transcript repair warnings focused on interactive runs, and downgrade background-trigger repairs (`heartbeat`, `cron`, `memory`, `overflow`) to debug logs to reduce false-alarm gateway noise.
|
||||
- Gateway/node pairing: require `operator.pairing` for node approvals end-to-end, while still requiring `operator.write` or `operator.admin` when the pending node commands need those higher scopes. (#60461) Thanks @eleqtrizit.
|
||||
- Providers/OpenRouter: gate Anthropic prompt-cache `cache_control` markers to native/default OpenRouter routes and preserve them for native OpenRouter hosts behind custom provider ids. Thanks @vincentkoc.
|
||||
|
||||
## 2026.4.1
|
||||
|
||||
|
||||
@@ -23,7 +23,6 @@ export default definePluginEntry({
|
||||
const {
|
||||
buildPassthroughGeminiSanitizingReplayPolicy,
|
||||
composeProviderStreamWrappers,
|
||||
createOpenRouterSystemCacheWrapper,
|
||||
createOpenRouterWrapper,
|
||||
createProviderApiKeyAuthMethod,
|
||||
DEFAULT_CONTEXT_TOKENS,
|
||||
@@ -146,7 +145,6 @@ export default definePluginEntry({
|
||||
? (streamFn) => injectOpenRouterRouting(streamFn, providerRouting)
|
||||
: undefined,
|
||||
(streamFn) => createOpenRouterWrapper(streamFn, openRouterThinkingLevel),
|
||||
(streamFn) => createOpenRouterSystemCacheWrapper(streamFn),
|
||||
);
|
||||
},
|
||||
isCacheTtlEligible: (ctx) => isOpenRouterCacheTtlModel(ctx.modelId),
|
||||
|
||||
@@ -17,6 +17,7 @@ import {
|
||||
shouldApplySiliconFlowThinkingOffCompat,
|
||||
} from "./moonshot-stream-wrappers.js";
|
||||
import { createOpenAIResponsesContextManagementWrapper } from "./openai-stream-wrappers.js";
|
||||
import { createOpenRouterSystemCacheWrapper } from "./proxy-stream-wrappers.js";
|
||||
import { streamWithPayloadPatch } from "./stream-payload-utils.js";
|
||||
|
||||
const defaultProviderRuntimeDeps = {
|
||||
@@ -328,6 +329,8 @@ function applyPrePluginStreamWrappers(ctx: ApplyExtraParamsContext): void {
|
||||
function applyPostPluginStreamWrappers(
|
||||
ctx: ApplyExtraParamsContext & { providerWrapperHandled: boolean },
|
||||
): void {
|
||||
ctx.agent.streamFn = createOpenRouterSystemCacheWrapper(ctx.agent.streamFn);
|
||||
|
||||
if (!ctx.providerWrapperHandled) {
|
||||
// Guard Google-family payloads against invalid negative thinking budgets
|
||||
// emitted by upstream model-ID heuristics for Gemini 3.1 variants.
|
||||
|
||||
@@ -2,7 +2,10 @@ import type { StreamFn } from "@mariozechner/pi-agent-core";
|
||||
import type { Context, Model } from "@mariozechner/pi-ai";
|
||||
import { createAssistantMessageEventStream } from "@mariozechner/pi-ai";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { createOpenRouterWrapper } from "./proxy-stream-wrappers.js";
|
||||
import {
|
||||
createOpenRouterSystemCacheWrapper,
|
||||
createOpenRouterWrapper,
|
||||
} from "./proxy-stream-wrappers.js";
|
||||
|
||||
describe("proxy stream wrappers", () => {
|
||||
it("adds OpenRouter attribution headers to stream options", () => {
|
||||
@@ -35,4 +38,79 @@ describe("proxy stream wrappers", () => {
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("injects cache_control markers for declared OpenRouter Anthropic models on the default route", () => {
|
||||
const payload = {
|
||||
messages: [{ role: "system", content: "system prompt" }],
|
||||
};
|
||||
const baseStreamFn: StreamFn = (model, _context, options) => {
|
||||
options?.onPayload?.(payload, model);
|
||||
return createAssistantMessageEventStream();
|
||||
};
|
||||
|
||||
const wrapped = createOpenRouterSystemCacheWrapper(baseStreamFn);
|
||||
void wrapped(
|
||||
{
|
||||
api: "openai-completions",
|
||||
provider: "openrouter",
|
||||
id: "anthropic/claude-sonnet-4.6",
|
||||
} as Model<"openai-completions">,
|
||||
{ messages: [] },
|
||||
{},
|
||||
);
|
||||
|
||||
expect(payload.messages[0]?.content).toEqual([
|
||||
{ type: "text", text: "system prompt", cache_control: { type: "ephemeral" } },
|
||||
]);
|
||||
});
|
||||
|
||||
it("does not inject cache_control markers for declared OpenRouter providers on custom proxy URLs", () => {
|
||||
const payload = {
|
||||
messages: [{ role: "system", content: "system prompt" }],
|
||||
};
|
||||
const baseStreamFn: StreamFn = (model, _context, options) => {
|
||||
options?.onPayload?.(payload, model);
|
||||
return createAssistantMessageEventStream();
|
||||
};
|
||||
|
||||
const wrapped = createOpenRouterSystemCacheWrapper(baseStreamFn);
|
||||
void wrapped(
|
||||
{
|
||||
api: "openai-completions",
|
||||
provider: "openrouter",
|
||||
id: "anthropic/claude-sonnet-4.6",
|
||||
baseUrl: "https://proxy.example.com/v1",
|
||||
} as Model<"openai-completions">,
|
||||
{ messages: [] },
|
||||
{},
|
||||
);
|
||||
|
||||
expect(payload.messages[0]?.content).toBe("system prompt");
|
||||
});
|
||||
|
||||
it("injects cache_control markers for native OpenRouter hosts behind custom provider ids", () => {
|
||||
const payload = {
|
||||
messages: [{ role: "system", content: "system prompt" }],
|
||||
};
|
||||
const baseStreamFn: StreamFn = (model, _context, options) => {
|
||||
options?.onPayload?.(payload, model);
|
||||
return createAssistantMessageEventStream();
|
||||
};
|
||||
|
||||
const wrapped = createOpenRouterSystemCacheWrapper(baseStreamFn);
|
||||
void wrapped(
|
||||
{
|
||||
api: "openai-completions",
|
||||
provider: "custom-openrouter",
|
||||
id: "anthropic/claude-sonnet-4.6",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
} as Model<"openai-completions">,
|
||||
{ messages: [] },
|
||||
{},
|
||||
);
|
||||
|
||||
expect(payload.messages[0]?.content).toEqual([
|
||||
{ type: "text", text: "system prompt", cache_control: { type: "ephemeral" } },
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
import type { StreamFn } from "@mariozechner/pi-agent-core";
|
||||
import { streamSimple } from "@mariozechner/pi-ai";
|
||||
import type { ThinkLevel } from "../../auto-reply/thinking.js";
|
||||
import { resolveProviderRequestPolicy } from "../provider-attribution.js";
|
||||
import { isProxyReasoningUnsupportedModelHint } from "../../plugin-sdk/provider-model-shared.js";
|
||||
import { resolveProviderRequestPolicyConfig } from "../provider-request-config.js";
|
||||
import { applyAnthropicEphemeralCacheControlMarkers } from "./anthropic-cache-control-payload.js";
|
||||
import { isOpenRouterAnthropicModelRef } from "./anthropic-family-cache-semantics.js";
|
||||
import { isAnthropicModelRef } from "./anthropic-family-cache-semantics.js";
|
||||
import { streamWithPayloadPatch } from "./stream-payload-utils.js";
|
||||
const KILOCODE_FEATURE_HEADER = "X-KILOCODE-FEATURE";
|
||||
const KILOCODE_FEATURE_DEFAULT = "openclaw";
|
||||
@@ -58,10 +59,24 @@ function normalizeProxyReasoningPayload(payload: unknown, thinkingLevel?: ThinkL
|
||||
export function createOpenRouterSystemCacheWrapper(baseStreamFn: StreamFn | undefined): StreamFn {
|
||||
const underlying = baseStreamFn ?? streamSimple;
|
||||
return (model, context, options) => {
|
||||
const provider = typeof model.provider === "string" ? model.provider : undefined;
|
||||
const modelId = typeof model.id === "string" ? model.id : undefined;
|
||||
// Keep OpenRouter-specific cache markers on verified OpenRouter routes
|
||||
// (or the provider's default route), but not on arbitrary OpenAI proxies.
|
||||
const endpointClass = resolveProviderRequestPolicy({
|
||||
provider,
|
||||
api: typeof model.api === "string" ? model.api : undefined,
|
||||
baseUrl: typeof model.baseUrl === "string" ? model.baseUrl : undefined,
|
||||
capability: "llm",
|
||||
transport: "stream",
|
||||
}).endpointClass;
|
||||
if (
|
||||
typeof model.provider !== "string" ||
|
||||
typeof model.id !== "string" ||
|
||||
!isOpenRouterAnthropicModelRef(model.provider, model.id)
|
||||
!modelId ||
|
||||
!isAnthropicModelRef(modelId) ||
|
||||
!(
|
||||
endpointClass === "openrouter" ||
|
||||
(endpointClass === "default" && provider?.trim().toLowerCase() === "openrouter")
|
||||
)
|
||||
) {
|
||||
return underlying(model, context, options);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user