From bcc725ffe2c3783f4d8fdbf6b7727c357cdd643a Mon Sep 17 00:00:00 2001 From: Shaun Tsai Date: Thu, 19 Mar 2026 15:12:29 +0800 Subject: [PATCH] fix(agents): strip prompt cache for non-OpenAI responses endpoints (#49877) thanks @ShaunTsai Fixes #48155 Co-authored-by: Shaun Tsai <13811075+ShaunTsai@users.noreply.github.com> Co-authored-by: frankekn <4488090+frankekn@users.noreply.github.com> --- CHANGELOG.md | 1 + .../pi-embedded-runner-extraparams.test.ts | 79 +++++++++++++++++++ .../openai-stream-wrappers.ts | 21 ++++- 3 files changed, 100 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dfa7100d461..c5a376f35bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,6 +52,7 @@ Docs: https://docs.openclaw.ai - Plugins/imports: fix stale googlechat runtime-api import paths and signal SDK circular re-exports broken by recent plugin-sdk refactors. Thanks @BunsDev. - Google auth/Node 25: patch `gaxios` to use native fetch without injecting `globalThis.window`, while translating proxy and mTLS transport settings so Google Vertex and Google Chat auth keep working on Node 25. (#47914) Thanks @pdd-cli. - Gateway/startup: load bundled channel plugins from compiled `dist/extensions` entries in built installs, so gateway boot no longer recompiles bundled extension TypeScript on every startup and WhatsApp-class cold starts drop back to seconds instead of tens of seconds or worse. (#47560) Thanks @ngutman. +- Agents/openai-responses: strip `prompt_cache_key` and `prompt_cache_retention` for non-OpenAI-compatible Responses endpoints while keeping them on direct OpenAI and Azure OpenAI paths, so third-party OpenAI-compatible providers no longer reject those requests with HTTP 400. (#49877) Thanks @ShaunTsai. - Plugins/context engines: enforce owner-aware context-engine registration on both loader and public SDK paths so plugins cannot spoof privileged ownership, claim the core `legacy` engine id, or overwrite an existing engine id through direct SDK imports. (#47595) Thanks @vincentkoc. - Browser/remote CDP: honor strict browser SSRF policy during remote CDP reachability and `/json/version` discovery checks, redact sensitive `cdpUrl` tokens from status output, and warn when remote CDP targets private/internal hosts. - Gateway/plugins: pin runtime webhook routes to the gateway startup registry so channel webhooks keep working across plugin-registry churn, and make plugin auth + dispatch resolve routes from the same live HTTP-route registry. (#47902) Fixes #46924 and #47041. Thanks @steipete. diff --git a/src/agents/pi-embedded-runner-extraparams.test.ts b/src/agents/pi-embedded-runner-extraparams.test.ts index 685976bf63d..b176de6fab5 100644 --- a/src/agents/pi-embedded-runner-extraparams.test.ts +++ b/src/agents/pi-embedded-runner-extraparams.test.ts @@ -2291,4 +2291,83 @@ describe("applyExtraParamsToAgent", () => { expect(run().store).toBe(false); }, ); + + it("strips prompt cache fields for non-OpenAI openai-responses endpoints", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "custom-proxy", + applyModelId: "some-model", + model: { + api: "openai-responses", + provider: "custom-proxy", + id: "some-model", + baseUrl: "https://my-proxy.example.com/v1", + } as unknown as Model<"openai-responses">, + payload: { + store: false, + prompt_cache_key: "session-xyz", + prompt_cache_retention: "24h", + }, + }); + expect(payload).not.toHaveProperty("prompt_cache_key"); + expect(payload).not.toHaveProperty("prompt_cache_retention"); + }); + + it("keeps prompt cache fields for direct OpenAI openai-responses endpoints", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "openai", + applyModelId: "gpt-5", + model: { + api: "openai-responses", + provider: "openai", + id: "gpt-5", + baseUrl: "https://api.openai.com/v1", + } as unknown as Model<"openai-responses">, + payload: { + store: false, + prompt_cache_key: "session-123", + prompt_cache_retention: "24h", + }, + }); + expect(payload.prompt_cache_key).toBe("session-123"); + expect(payload.prompt_cache_retention).toBe("24h"); + }); + + it("keeps prompt cache fields for direct Azure OpenAI openai-responses endpoints", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "azure-openai-responses", + applyModelId: "gpt-4o", + model: { + api: "openai-responses", + provider: "azure-openai-responses", + id: "gpt-4o", + baseUrl: "https://example.openai.azure.com/openai/v1", + } as unknown as Model<"openai-responses">, + payload: { + store: false, + prompt_cache_key: "session-azure", + prompt_cache_retention: "24h", + }, + }); + expect(payload.prompt_cache_key).toBe("session-azure"); + expect(payload.prompt_cache_retention).toBe("24h"); + }); + + it("keeps prompt cache fields when openai-responses baseUrl is omitted", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "openai", + applyModelId: "gpt-5", + model: { + api: "openai-responses", + provider: "openai", + id: "gpt-5", + } as unknown as Model<"openai-responses">, + payload: { + store: false, + prompt_cache_key: "session-default", + prompt_cache_retention: "24h", + }, + }); + expect(payload.prompt_cache_key).toBe("session-default"); + expect(payload.prompt_cache_retention).toBe("24h"); + }); }); diff --git a/src/agents/pi-embedded-runner/openai-stream-wrappers.ts b/src/agents/pi-embedded-runner/openai-stream-wrappers.ts index 4131a33f08d..a4433f65b10 100644 --- a/src/agents/pi-embedded-runner/openai-stream-wrappers.ts +++ b/src/agents/pi-embedded-runner/openai-stream-wrappers.ts @@ -154,10 +154,23 @@ function shouldStripResponsesStore( return OPENAI_RESPONSES_APIS.has(model.api) && model.compat?.supportsStore === false; } +function shouldStripResponsesPromptCache(model: { api?: unknown; baseUrl?: unknown }): boolean { + if (typeof model.api !== "string" || !OPENAI_RESPONSES_APIS.has(model.api)) { + return false; + } + // Missing baseUrl means pi-ai will use the default OpenAI endpoint, so keep + // prompt cache fields for that direct path. + if (typeof model.baseUrl !== "string" || !model.baseUrl.trim()) { + return false; + } + return !isDirectOpenAIBaseUrl(model.baseUrl); +} + function applyOpenAIResponsesPayloadOverrides(params: { payloadObj: Record; forceStore: boolean; stripStore: boolean; + stripPromptCache: boolean; useServerCompaction: boolean; compactThreshold: number; }): void { @@ -167,6 +180,10 @@ function applyOpenAIResponsesPayloadOverrides(params: { if (params.stripStore) { delete params.payloadObj.store; } + if (params.stripPromptCache) { + delete params.payloadObj.prompt_cache_key; + delete params.payloadObj.prompt_cache_retention; + } if (params.useServerCompaction && params.payloadObj.context_management === undefined) { params.payloadObj.context_management = [ { @@ -297,7 +314,8 @@ export function createOpenAIResponsesContextManagementWrapper( const forceStore = shouldForceResponsesStore(model); const useServerCompaction = shouldEnableOpenAIResponsesServerCompaction(model, extraParams); const stripStore = shouldStripResponsesStore(model, forceStore); - if (!forceStore && !useServerCompaction && !stripStore) { + const stripPromptCache = shouldStripResponsesPromptCache(model); + if (!forceStore && !useServerCompaction && !stripStore && !stripPromptCache) { return underlying(model, context, options); } @@ -313,6 +331,7 @@ export function createOpenAIResponsesContextManagementWrapper( payloadObj: payload as Record, forceStore, stripStore, + stripPromptCache, useServerCompaction, compactThreshold, });