From 27e467ad230ffa5ad4dad6f15b68f2b5bd53c0b1 Mon Sep 17 00:00:00 2001
From: mkdev11 <MkDev11@users.noreply.github.com>
Date: Sun, 3 May 2026 19:05:23 +0200
Subject: [PATCH] fix(openai-codex): avoid stale Responses replay state

---
 CHANGELOG.md                               |   1 +
 docs/reference/transcript-hygiene.md       |   1 +
 extensions/openai/transport-policy.test.ts |  26 +++
 extensions/openai/transport-policy.ts      |  11 ++
 src/agents/openai-transport-stream.test.ts | 203 ++++++++++++++++++++-
 src/agents/openai-transport-stream.ts      |  45 ++++-
 6 files changed, 273 insertions(+), 14 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 48a479053bd..0ad07570479 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -403,6 +403,7 @@ Docs: https://docs.openclaw.ai
 - Plugins/config: deduplicate identical manifest compatibility diagnostics when an explicitly configured plugin overrides another discovered candidate, so external channel plugins do not print the same missing `channelConfigs` warning repeatedly during install and enable. Thanks @vincentkoc.
 - Discord/status: honor explicit `messages.statusReactions.enabled: true` in tool-only guild channels so queued ack reactions can progress through thinking/done lifecycle reactions instead of stopping at the initial emoji. Thanks @Marvinthebored.
 - Discord/native commands: compare Discord-normalized slash-command descriptions and localized descriptions during reconcile so CJK or multiline command text no longer triggers redundant startup PATCH bursts and rate-limit 429s. Fixes #76587. Thanks @zhengsx.
+- Agents/OpenAI Codex: scope ChatGPT Codex Responses request identity to each turn, strip the unsupported native Codex `prompt_cache_key`, and avoid replaying prior Responses reasoning/message/function item IDs so tool-call turns do not feed stale state into later Telegram replies. Refs #76413.
 - Agents/OpenAI: omit Chat Completions `reasoning_effort` for `gpt-5.4-mini` only when function tools are present while preserving tool-free Chat and Responses reasoning support, preventing Telegram-routed fallback runs from hanging after OpenAI rejects tool payloads. Fixes #76176. Thanks @ThisIsAdilah and @chinar-amrutkar.
 - Telegram: reuse the successful startup `getMe` probe for grammY polling startup and continue into `getUpdates` after recoverable `deleteWebhook` cleanup failures, reducing high-latency Bot API control-plane calls before long polling starts. Refs #76388. Thanks @jackiedepp.
 - Gateway/diagnostics: merge session id/key aliases in diagnostic session state and activity tracking so completed runs no longer leave stale queued work behind that keeps liveness samples at warning level.
diff --git a/docs/reference/transcript-hygiene.md b/docs/reference/transcript-hygiene.md
index cfd4e2d4497..c710ef35c7a 100644
--- a/docs/reference/transcript-hygiene.md
+++ b/docs/reference/transcript-hygiene.md
@@ -117,6 +117,7 @@ inter-session user turns that only have provenance metadata.
 - Image sanitization only.
 - Drop orphaned reasoning signatures (standalone reasoning items without a following content block) for OpenAI Responses/Codex transcripts, and drop replayable OpenAI reasoning after a model route switch.
 - Preserve replayable OpenAI Responses reasoning item payloads, including encrypted empty-summary items, so manual/WebSocket replay keeps required `rs_*` state paired with assistant output items.
+- Native ChatGPT Codex Responses is the exception: OpenClaw does not replay prior Responses reasoning/message/function item IDs or session `prompt_cache_key` to avoid stale backend replay across turns.
 - No tool call id sanitization.
 - Tool result pairing repair may move real matched outputs and synthesize Codex-style `aborted` outputs for missing tool calls.
 - No turn validation or reordering.
diff --git a/extensions/openai/transport-policy.test.ts b/extensions/openai/transport-policy.test.ts
index 65c05c9b371..2cddd1f70f7 100644
--- a/extensions/openai/transport-policy.test.ts
+++ b/extensions/openai/transport-policy.test.ts
@@ -67,6 +67,32 @@ describe("openai transport policy", () => {
     ).toBeUndefined();
   });
 
+  it("uses turn-scoped request identity for ChatGPT Codex stream turns", () => {
+    expect(
+      resolveOpenAITransportTurnState({
+        provider: "openai-codex",
+        modelId: "gpt-5.4",
+        model: {
+          ...nativeModel,
+          provider: "openai-codex",
+          api: "openai-codex-responses",
+          baseUrl: "https://chatgpt.com/backend-api",
+        },
+        sessionId: "session-123",
+        turnId: "turn-123",
+        attempt: 2,
+        transport: "stream",
+      }),
+    ).toMatchObject({
+      headers: {
+        "x-client-request-id": "turn-123",
+        "x-openclaw-session-id": "session-123",
+        "x-openclaw-turn-id": "turn-123",
+        "x-openclaw-turn-attempt": "2",
+      },
+    });
+  });
+
   it("returns websocket session headers and cooldown for native routes", () => {
     expect(
       resolveOpenAIWebSocketSessionPolicy({
diff --git a/extensions/openai/transport-policy.ts b/extensions/openai/transport-policy.ts
index cd69858ee77..50f9a710943 100644
--- a/extensions/openai/transport-policy.ts
+++ b/extensions/openai/transport-policy.ts
@@ -46,6 +46,13 @@ function usesKnownNativeOpenAIRoute(provider: string, baseUrl?: string): boolean
   return false;
 }
 
+function usesNativeOpenAICodexRoute(provider: string, baseUrl?: string): boolean {
+  const normalizedProvider = normalizeProviderId(provider);
+  return (
+    normalizedProvider === OPENAI_CODEX_PROVIDER_ID && (!baseUrl || isOpenAICodexBaseUrl(baseUrl))
+  );
+}
+
 function resolveSessionHeaders(params: {
   provider: string;
   baseUrl?: string;
@@ -78,10 +85,14 @@ export function resolveOpenAITransportTurnState(
 
   const turnId = normalizeIdentityValue(ctx.turnId);
   const attempt = String(Math.max(1, ctx.attempt));
+  const requestId = usesNativeOpenAICodexRoute(ctx.provider, ctx.model?.baseUrl)
+    ? turnId || `${sessionHeaders["x-openclaw-session-id"] ?? "session"}:${attempt}`
+    : sessionHeaders["x-client-request-id"];
 
   return {
     headers: {
       ...sessionHeaders,
+      "x-client-request-id": requestId,
       "x-openclaw-turn-id": turnId,
       "x-openclaw-turn-attempt": attempt,
     },
diff --git a/src/agents/openai-transport-stream.test.ts b/src/agents/openai-transport-stream.test.ts
index d7393794127..cde72f1e6be 100644
--- a/src/agents/openai-transport-stream.test.ts
+++ b/src/agents/openai-transport-stream.test.ts
@@ -1059,7 +1059,7 @@ describe("openai transport stream", () => {
     expect(params.input?.some((item) => item.role === "system" || item.role === "developer")).toBe(
       false,
     );
-    expect(params.prompt_cache_key).toBe("session-123");
+    expect(params).not.toHaveProperty("prompt_cache_key");
     expect(params.store).toBe(false);
     expect(params).not.toHaveProperty("metadata");
     expect(params).not.toHaveProperty("max_output_tokens");
@@ -1097,7 +1097,7 @@ describe("openai transport stream", () => {
       payload,
     );
 
-    expect(sanitized.prompt_cache_key).toBe("session-123");
+    expect(sanitized).not.toHaveProperty("prompt_cache_key");
     expect(sanitized).not.toHaveProperty("metadata");
     expect(sanitized).not.toHaveProperty("max_output_tokens");
     expect(sanitized).not.toHaveProperty("prompt_cache_retention");
@@ -1178,6 +1178,197 @@ describe("openai transport stream", () => {
     expect(sanitized).toEqual(payload);
   });
 
+  it("omits prior Responses replay item ids for native Codex responses", () => {
+    const params = buildOpenAIResponsesParams(
+      {
+        id: "gpt-5.4",
+        name: "GPT-5.4",
+        api: "openai-codex-responses",
+        provider: "openai-codex",
+        baseUrl: "https://chatgpt.com/backend-api",
+        reasoning: true,
+        input: ["text"],
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+        contextWindow: 200000,
+        maxTokens: 8192,
+      } satisfies Model<"openai-codex-responses">,
+      {
+        systemPrompt: "system",
+        messages: [
+          {
+            role: "assistant",
+            api: "openai-codex-responses",
+            provider: "openai-codex",
+            model: "gpt-5.4",
+            usage: {
+              input: 0,
+              output: 0,
+              cacheRead: 0,
+              cacheWrite: 0,
+              totalTokens: 0,
+              cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+            },
+            stopReason: "toolUse",
+            timestamp: 1,
+            content: [
+              {
+                type: "thinking",
+                thinking: "Need a tool.",
+                thinkingSignature: JSON.stringify({
+                  type: "reasoning",
+                  id: "rs_prior",
+                  encrypted_content: "ciphertext",
+                }),
+              },
+              {
+                type: "text",
+                text: "Checking the price.",
+                textSignature: JSON.stringify({
+                  v: 1,
+                  id: "msg_prior",
+                  phase: "commentary",
+                }),
+              },
+              {
+                type: "toolCall",
+                id: "call_abc|fc_prior",
+                name: "price_lookup",
+                arguments: { symbol: "SOL" },
+              },
+            ],
+          },
+          {
+            role: "toolResult",
+            toolCallId: "call_abc|fc_prior",
+            toolName: "price_lookup",
+            content: [{ type: "text", text: "$83.95" }],
+            isError: false,
+            timestamp: 2,
+          },
+          { role: "user", content: "what is the capital of the philippines", timestamp: 3 },
+        ],
+        tools: [],
+      } as never,
+      { sessionId: "session-123" },
+    ) as {
+      input?: Array<{
+        type?: string;
+        role?: string;
+        id?: string;
+        call_id?: string;
+        phase?: string;
+      }>;
+    };
+
+    expect(params.input?.some((item) => item.type === "reasoning")).toBe(false);
+    const assistantMessage = params.input?.find(
+      (item) => item.type === "message" && item.role === "assistant",
+    );
+    expect(assistantMessage).toMatchObject({
+      type: "message",
+      role: "assistant",
+      phase: "commentary",
+    });
+    expect(assistantMessage?.id).toBeUndefined();
+    const functionCall = params.input?.find((item) => item.type === "function_call");
+    expect(functionCall).toMatchObject({
+      type: "function_call",
+      call_id: "call_abc",
+    });
+    expect(functionCall?.id).toBeUndefined();
+  });
+
+  it("preserves prior Responses replay item ids for custom Codex-compatible responses", () => {
+    const params = buildOpenAIResponsesParams(
+      {
+        id: "gpt-5.4",
+        name: "GPT-5.4",
+        api: "openai-codex-responses",
+        provider: "openai-codex",
+        baseUrl: "https://proxy.example.com/v1",
+        reasoning: true,
+        input: ["text"],
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+        contextWindow: 200000,
+        maxTokens: 8192,
+      } satisfies Model<"openai-codex-responses">,
+      {
+        systemPrompt: "system",
+        messages: [
+          {
+            role: "assistant",
+            api: "openai-codex-responses",
+            provider: "openai-codex",
+            model: "gpt-5.4",
+            usage: {
+              input: 0,
+              output: 0,
+              cacheRead: 0,
+              cacheWrite: 0,
+              totalTokens: 0,
+              cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+            },
+            stopReason: "toolUse",
+            timestamp: 1,
+            content: [
+              {
+                type: "thinking",
+                thinking: "Need a tool.",
+                thinkingSignature: JSON.stringify({
+                  type: "reasoning",
+                  id: "rs_prior",
+                  encrypted_content: "ciphertext",
+                }),
+              },
+              {
+                type: "text",
+                text: "Checking the price.",
+                textSignature: JSON.stringify({
+                  v: 1,
+                  id: "msg_prior",
+                  phase: "commentary",
+                }),
+              },
+              {
+                type: "toolCall",
+                id: "call_abc|fc_prior",
+                name: "price_lookup",
+                arguments: { symbol: "SOL" },
+              },
+            ],
+          },
+        ],
+        tools: [],
+      } as never,
+      { sessionId: "session-123" },
+    ) as {
+      input?: Array<{
+        type?: string;
+        role?: string;
+        id?: string;
+        call_id?: string;
+        phase?: string;
+      }>;
+    };
+
+    expect(params.input?.some((item) => item.type === "reasoning")).toBe(true);
+    const assistantMessage = params.input?.find(
+      (item) => item.type === "message" && item.role === "assistant",
+    );
+    expect(assistantMessage).toMatchObject({
+      type: "message",
+      role: "assistant",
+      id: "msg_prior",
+      phase: "commentary",
+    });
+    const functionCall = params.input?.find((item) => item.type === "function_call");
+    expect(functionCall).toMatchObject({
+      type: "function_call",
+      id: "fc_prior",
+      call_id: "call_abc",
+    });
+  });
+
   it("adds minimal user input for Codex responses when only the system prompt is present", () => {
     const params = buildOpenAIResponsesParams(
       {
@@ -1492,7 +1683,7 @@ describe("openai transport stream", () => {
         baseUrl: "https://proxy.example.com/v1",
       },
     },
-  ])("replays assistant phase metadata for $label responses payloads", ({ model }) => {
+  ])("replays assistant phase metadata for $label responses payloads", ({ label, model }) => {
     const params = buildOpenAIResponsesParams(
       {
         ...model,
@@ -1548,9 +1739,13 @@ describe("openai transport stream", () => {
     const assistantItem = params.input?.find((item) => item.role === "assistant");
     expect(assistantItem).toMatchObject({
       role: "assistant",
-      id: "msg_commentary",
       phase: "commentary",
     });
+    if (label === "openai-codex") {
+      expect(assistantItem?.id).toBeUndefined();
+    } else {
+      expect(assistantItem?.id).toBe("msg_commentary");
+    }
   });
 
   it("strips the internal cache boundary from OpenAI system prompts", () => {
diff --git a/src/agents/openai-transport-stream.ts b/src/agents/openai-transport-stream.ts
index 2b16ca25706..7c8acb12cb4 100644
--- a/src/agents/openai-transport-stream.ts
+++ b/src/agents/openai-transport-stream.ts
@@ -17,7 +17,9 @@ import type {
   ResponseCreateParamsStreaming,
   ResponseFunctionCallOutputItemList,
   ResponseInput,
+  ResponseInputItem,
   ResponseInputMessageContentList,
+  ResponseOutputMessage,
 } from "openai/resources/responses/responses.js";
 import type { ModelCompatConfig } from "../config/types.models.js";
 import { createSubsystemLogger } from "../logging/subsystem.js";
@@ -57,6 +59,8 @@ const DEFAULT_AZURE_OPENAI_API_VERSION = "2024-12-01-preview";
 const OPENAI_CODEX_RESPONSES_EMPTY_INPUT_TEXT = " ";
 const log = createSubsystemLogger("openai-transport");
 
+type ReplayableResponseOutputMessage = Omit<ResponseOutputMessage, "id"> & { id?: string };
+
 type BaseStreamOptions = {
   temperature?: number;
   maxTokens?: number;
@@ -211,9 +215,16 @@ function convertResponsesMessages(
   model: Model<Api>,
   context: Context,
   allowedToolCallProviders: Set<string>,
-  options?: { includeSystemPrompt?: boolean; supportsDeveloperRole?: boolean },
+  options?: {
+    includeSystemPrompt?: boolean;
+    supportsDeveloperRole?: boolean;
+    replayReasoningItems?: boolean;
+    replayResponsesItemIds?: boolean;
+  },
 ): ResponseInput {
   const messages: ResponseInput = [];
+  const shouldReplayReasoningItems = options?.replayReasoningItems ?? true;
+  const shouldReplayResponsesItemIds = options?.replayResponsesItemIds ?? true;
   const normalizeIdPart = (part: string) => {
     const sanitized = part.replace(/[^a-zA-Z0-9_-]/g, "_");
     const normalized = sanitized.length > 64 ? sanitized.slice(0, 64) : sanitized;
@@ -287,15 +298,18 @@ function convertResponsesMessages(
         msg.model !== model.id && msg.provider === model.provider && msg.api === model.api;
       for (const block of msg.content) {
         if (block.type === "thinking") {
-          if (block.thinkingSignature) {
+          if (shouldReplayReasoningItems && block.thinkingSignature) {
             output.push(JSON.parse(block.thinkingSignature));
           }
         } else if (block.type === "text") {
-          let msgId = parseTextSignature(block.textSignature)?.id ?? `msg_${msgIndex}`;
-          if (msgId.length > 64) {
+          const textSignature = parseTextSignature(block.textSignature);
+          let msgId = shouldReplayResponsesItemIds
+            ? (textSignature?.id ?? `msg_${msgIndex}`)
+            : undefined;
+          if (msgId && msgId.length > 64) {
             msgId = `msg_${shortHash(msgId)}`;
           }
-          output.push({
+          const messageItem: ReplayableResponseOutputMessage = {
             type: "message",
             role: "assistant",
             content: [
@@ -306,12 +320,16 @@ function convertResponsesMessages(
               },
             ],
             status: "completed",
-            id: msgId,
-            phase: parseTextSignature(block.textSignature)?.phase,
-          });
+            ...(msgId ? { id: msgId } : {}),
+            phase: textSignature?.phase,
+          };
+          output.push(messageItem as ResponseInputItem);
         } else if (block.type === "toolCall") {
           const [callId, itemIdRaw] = block.id.split("|");
-          const itemId = isDifferentModel && itemIdRaw?.startsWith("fc_") ? undefined : itemIdRaw;
+          const itemId =
+            shouldReplayResponsesItemIds && !(isDifferentModel && itemIdRaw?.startsWith("fc_"))
+              ? itemIdRaw
+              : undefined;
           output.push({
             type: "function_call",
             id: itemId,
@@ -909,6 +927,7 @@ function usesNativeOpenAICodexResponsesBackend(model: Model<Api>): boolean {
 const OPENAI_CODEX_RESPONSES_UNSUPPORTED_PARAMS = [
   "max_output_tokens",
   "metadata",
+  "prompt_cache_key",
   "prompt_cache_retention",
   "service_tier",
   "temperature",
@@ -957,6 +976,7 @@ export function buildOpenAIResponsesParams(
   metadata?: Record<string, string>,
 ) {
   const isCodexResponses = isOpenAICodexResponsesModel(model);
+  const isNativeCodexResponses = usesNativeOpenAICodexResponsesBackend(model);
   const compat = getCompat(model as OpenAIModeModel);
   const supportsDeveloperRole =
     typeof compat.supportsDeveloperRole === "boolean" ? compat.supportsDeveloperRole : undefined;
@@ -964,7 +984,12 @@ export function buildOpenAIResponsesParams(
     model,
     context,
     new Set(["openai", "openai-codex", "opencode", "azure-openai-responses"]),
-    { includeSystemPrompt: !isCodexResponses, supportsDeveloperRole },
+    {
+      includeSystemPrompt: !isCodexResponses,
+      supportsDeveloperRole,
+      replayReasoningItems: !isNativeCodexResponses,
+      replayResponsesItemIds: !isNativeCodexResponses,
+    },
   );
   if (isCodexResponses) {
     ensureOpenAICodexResponsesInput(messages, context);