fix(agents): strip prompt cache for non-OpenAI responses endpoints (#49877) thanks @ShaunTsai

Fixes #48155 Co-authored-by: Shaun Tsai <13811075+ShaunTsai@users.noreply.github.com> Co-authored-by: frankekn <4488090+frankekn@users.noreply.github.com>
2026-05-06 08:50:43 +00:00 · 2026-03-19 15:12:29 +08:00
parent b965ef3802
commit bcc725ffe2
3 changed files with 100 additions and 1 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -52,6 +52,7 @@ Docs: https://docs.openclaw.ai
 - Plugins/imports: fix stale googlechat runtime-api import paths and signal SDK circular re-exports broken by recent plugin-sdk refactors. Thanks @BunsDev.
 - Google auth/Node 25: patch `gaxios` to use native fetch without injecting `globalThis.window`, while translating proxy and mTLS transport settings so Google Vertex and Google Chat auth keep working on Node 25. (#47914) Thanks @pdd-cli.
 - Gateway/startup: load bundled channel plugins from compiled `dist/extensions` entries in built installs, so gateway boot no longer recompiles bundled extension TypeScript on every startup and WhatsApp-class cold starts drop back to seconds instead of tens of seconds or worse. (#47560) Thanks @ngutman.
+- Agents/openai-responses: strip `prompt_cache_key` and `prompt_cache_retention` for non-OpenAI-compatible Responses endpoints while keeping them on direct OpenAI and Azure OpenAI paths, so third-party OpenAI-compatible providers no longer reject those requests with HTTP 400. (#49877) Thanks @ShaunTsai.
 - Plugins/context engines: enforce owner-aware context-engine registration on both loader and public SDK paths so plugins cannot spoof privileged ownership, claim the core `legacy` engine id, or overwrite an existing engine id through direct SDK imports. (#47595) Thanks @vincentkoc.
 - Browser/remote CDP: honor strict browser SSRF policy during remote CDP reachability and `/json/version` discovery checks, redact sensitive `cdpUrl` tokens from status output, and warn when remote CDP targets private/internal hosts.
 - Gateway/plugins: pin runtime webhook routes to the gateway startup registry so channel webhooks keep working across plugin-registry churn, and make plugin auth + dispatch resolve routes from the same live HTTP-route registry. (#47902) Fixes #46924 and #47041. Thanks @steipete.
--- a/src/agents/pi-embedded-runner-extraparams.test.ts
+++ b/src/agents/pi-embedded-runner-extraparams.test.ts
@@ -2291,4 +2291,83 @@ describe("applyExtraParamsToAgent", () => {
      expect(run().store).toBe(false);
    },
  );
+
+  it("strips prompt cache fields for non-OpenAI openai-responses endpoints", () => {
+    const payload = runResponsesPayloadMutationCase({
+      applyProvider: "custom-proxy",
+      applyModelId: "some-model",
+      model: {
+        api: "openai-responses",
+        provider: "custom-proxy",
+        id: "some-model",
+        baseUrl: "https://my-proxy.example.com/v1",
+      } as unknown as Model<"openai-responses">,
+      payload: {
+        store: false,
+        prompt_cache_key: "session-xyz",
+        prompt_cache_retention: "24h",
+      },
+    });
+    expect(payload).not.toHaveProperty("prompt_cache_key");
+    expect(payload).not.toHaveProperty("prompt_cache_retention");
+  });
+
+  it("keeps prompt cache fields for direct OpenAI openai-responses endpoints", () => {
+    const payload = runResponsesPayloadMutationCase({
+      applyProvider: "openai",
+      applyModelId: "gpt-5",
+      model: {
+        api: "openai-responses",
+        provider: "openai",
+        id: "gpt-5",
+        baseUrl: "https://api.openai.com/v1",
+      } as unknown as Model<"openai-responses">,
+      payload: {
+        store: false,
+        prompt_cache_key: "session-123",
+        prompt_cache_retention: "24h",
+      },
+    });
+    expect(payload.prompt_cache_key).toBe("session-123");
+    expect(payload.prompt_cache_retention).toBe("24h");
+  });
+
+  it("keeps prompt cache fields for direct Azure OpenAI openai-responses endpoints", () => {
+    const payload = runResponsesPayloadMutationCase({
+      applyProvider: "azure-openai-responses",
+      applyModelId: "gpt-4o",
+      model: {
+        api: "openai-responses",
+        provider: "azure-openai-responses",
+        id: "gpt-4o",
+        baseUrl: "https://example.openai.azure.com/openai/v1",
+      } as unknown as Model<"openai-responses">,
+      payload: {
+        store: false,
+        prompt_cache_key: "session-azure",
+        prompt_cache_retention: "24h",
+      },
+    });
+    expect(payload.prompt_cache_key).toBe("session-azure");
+    expect(payload.prompt_cache_retention).toBe("24h");
+  });
+
+  it("keeps prompt cache fields when openai-responses baseUrl is omitted", () => {
+    const payload = runResponsesPayloadMutationCase({
+      applyProvider: "openai",
+      applyModelId: "gpt-5",
+      model: {
+        api: "openai-responses",
+        provider: "openai",
+        id: "gpt-5",
+      } as unknown as Model<"openai-responses">,
+      payload: {
+        store: false,
+        prompt_cache_key: "session-default",
+        prompt_cache_retention: "24h",
+      },
+    });
+    expect(payload.prompt_cache_key).toBe("session-default");
+    expect(payload.prompt_cache_retention).toBe("24h");
+  });
 });
--- a/src/agents/pi-embedded-runner/openai-stream-wrappers.ts
+++ b/src/agents/pi-embedded-runner/openai-stream-wrappers.ts
@@ -154,10 +154,23 @@ function shouldStripResponsesStore(
  return OPENAI_RESPONSES_APIS.has(model.api) && model.compat?.supportsStore === false;
 }

+function shouldStripResponsesPromptCache(model: { api?: unknown; baseUrl?: unknown }): boolean {
+  if (typeof model.api !== "string" || !OPENAI_RESPONSES_APIS.has(model.api)) {
+    return false;
+  }
+  // Missing baseUrl means pi-ai will use the default OpenAI endpoint, so keep
+  // prompt cache fields for that direct path.
+  if (typeof model.baseUrl !== "string" || !model.baseUrl.trim()) {
+    return false;
+  }
+  return !isDirectOpenAIBaseUrl(model.baseUrl);
+}
+
 function applyOpenAIResponsesPayloadOverrides(params: {
  payloadObj: Record<string, unknown>;
  forceStore: boolean;
  stripStore: boolean;
+  stripPromptCache: boolean;
  useServerCompaction: boolean;
  compactThreshold: number;
 }): void {
@@ -167,6 +180,10 @@ function applyOpenAIResponsesPayloadOverrides(params: {
  if (params.stripStore) {
    delete params.payloadObj.store;
  }
+  if (params.stripPromptCache) {
+    delete params.payloadObj.prompt_cache_key;
+    delete params.payloadObj.prompt_cache_retention;
+  }
  if (params.useServerCompaction && params.payloadObj.context_management === undefined) {
    params.payloadObj.context_management = [
      {
@@ -297,7 +314,8 @@ export function createOpenAIResponsesContextManagementWrapper(
    const forceStore = shouldForceResponsesStore(model);
    const useServerCompaction = shouldEnableOpenAIResponsesServerCompaction(model, extraParams);
    const stripStore = shouldStripResponsesStore(model, forceStore);
-    if (!forceStore && !useServerCompaction && !stripStore) {
+    const stripPromptCache = shouldStripResponsesPromptCache(model);
+    if (!forceStore && !useServerCompaction && !stripStore && !stripPromptCache) {
      return underlying(model, context, options);
    }

@@ -313,6 +331,7 @@ export function createOpenAIResponsesContextManagementWrapper(
            payloadObj: payload as Record<string, unknown>,
            forceStore,
            stripStore,
+            stripPromptCache,
            useServerCompaction,
            compactThreshold,
          });