From 27e467ad230ffa5ad4dad6f15b68f2b5bd53c0b1 Mon Sep 17 00:00:00 2001 From: mkdev11 Date: Sun, 3 May 2026 19:05:23 +0200 Subject: [PATCH] fix(openai-codex): avoid stale Responses replay state --- CHANGELOG.md | 1 + docs/reference/transcript-hygiene.md | 1 + extensions/openai/transport-policy.test.ts | 26 +++ extensions/openai/transport-policy.ts | 11 ++ src/agents/openai-transport-stream.test.ts | 203 ++++++++++++++++++++- src/agents/openai-transport-stream.ts | 45 ++++- 6 files changed, 273 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 48a479053bd..0ad07570479 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -403,6 +403,7 @@ Docs: https://docs.openclaw.ai - Plugins/config: deduplicate identical manifest compatibility diagnostics when an explicitly configured plugin overrides another discovered candidate, so external channel plugins do not print the same missing `channelConfigs` warning repeatedly during install and enable. Thanks @vincentkoc. - Discord/status: honor explicit `messages.statusReactions.enabled: true` in tool-only guild channels so queued ack reactions can progress through thinking/done lifecycle reactions instead of stopping at the initial emoji. Thanks @Marvinthebored. - Discord/native commands: compare Discord-normalized slash-command descriptions and localized descriptions during reconcile so CJK or multiline command text no longer triggers redundant startup PATCH bursts and rate-limit 429s. Fixes #76587. Thanks @zhengsx. +- Agents/OpenAI Codex: scope ChatGPT Codex Responses request identity to each turn, strip the unsupported native Codex `prompt_cache_key`, and avoid replaying prior Responses reasoning/message/function item IDs so tool-call turns do not feed stale state into later Telegram replies. Refs #76413. - Agents/OpenAI: omit Chat Completions `reasoning_effort` for `gpt-5.4-mini` only when function tools are present while preserving tool-free Chat and Responses reasoning support, preventing Telegram-routed fallback runs from hanging after OpenAI rejects tool payloads. Fixes #76176. Thanks @ThisIsAdilah and @chinar-amrutkar. - Telegram: reuse the successful startup `getMe` probe for grammY polling startup and continue into `getUpdates` after recoverable `deleteWebhook` cleanup failures, reducing high-latency Bot API control-plane calls before long polling starts. Refs #76388. Thanks @jackiedepp. - Gateway/diagnostics: merge session id/key aliases in diagnostic session state and activity tracking so completed runs no longer leave stale queued work behind that keeps liveness samples at warning level. diff --git a/docs/reference/transcript-hygiene.md b/docs/reference/transcript-hygiene.md index cfd4e2d4497..c710ef35c7a 100644 --- a/docs/reference/transcript-hygiene.md +++ b/docs/reference/transcript-hygiene.md @@ -117,6 +117,7 @@ inter-session user turns that only have provenance metadata. - Image sanitization only. - Drop orphaned reasoning signatures (standalone reasoning items without a following content block) for OpenAI Responses/Codex transcripts, and drop replayable OpenAI reasoning after a model route switch. - Preserve replayable OpenAI Responses reasoning item payloads, including encrypted empty-summary items, so manual/WebSocket replay keeps required `rs_*` state paired with assistant output items. +- Native ChatGPT Codex Responses is the exception: OpenClaw does not replay prior Responses reasoning/message/function item IDs or session `prompt_cache_key` to avoid stale backend replay across turns. - No tool call id sanitization. - Tool result pairing repair may move real matched outputs and synthesize Codex-style `aborted` outputs for missing tool calls. - No turn validation or reordering. diff --git a/extensions/openai/transport-policy.test.ts b/extensions/openai/transport-policy.test.ts index 65c05c9b371..2cddd1f70f7 100644 --- a/extensions/openai/transport-policy.test.ts +++ b/extensions/openai/transport-policy.test.ts @@ -67,6 +67,32 @@ describe("openai transport policy", () => { ).toBeUndefined(); }); + it("uses turn-scoped request identity for ChatGPT Codex stream turns", () => { + expect( + resolveOpenAITransportTurnState({ + provider: "openai-codex", + modelId: "gpt-5.4", + model: { + ...nativeModel, + provider: "openai-codex", + api: "openai-codex-responses", + baseUrl: "https://chatgpt.com/backend-api", + }, + sessionId: "session-123", + turnId: "turn-123", + attempt: 2, + transport: "stream", + }), + ).toMatchObject({ + headers: { + "x-client-request-id": "turn-123", + "x-openclaw-session-id": "session-123", + "x-openclaw-turn-id": "turn-123", + "x-openclaw-turn-attempt": "2", + }, + }); + }); + it("returns websocket session headers and cooldown for native routes", () => { expect( resolveOpenAIWebSocketSessionPolicy({ diff --git a/extensions/openai/transport-policy.ts b/extensions/openai/transport-policy.ts index cd69858ee77..50f9a710943 100644 --- a/extensions/openai/transport-policy.ts +++ b/extensions/openai/transport-policy.ts @@ -46,6 +46,13 @@ function usesKnownNativeOpenAIRoute(provider: string, baseUrl?: string): boolean return false; } +function usesNativeOpenAICodexRoute(provider: string, baseUrl?: string): boolean { + const normalizedProvider = normalizeProviderId(provider); + return ( + normalizedProvider === OPENAI_CODEX_PROVIDER_ID && (!baseUrl || isOpenAICodexBaseUrl(baseUrl)) + ); +} + function resolveSessionHeaders(params: { provider: string; baseUrl?: string; @@ -78,10 +85,14 @@ export function resolveOpenAITransportTurnState( const turnId = normalizeIdentityValue(ctx.turnId); const attempt = String(Math.max(1, ctx.attempt)); + const requestId = usesNativeOpenAICodexRoute(ctx.provider, ctx.model?.baseUrl) + ? turnId || `${sessionHeaders["x-openclaw-session-id"] ?? "session"}:${attempt}` + : sessionHeaders["x-client-request-id"]; return { headers: { ...sessionHeaders, + "x-client-request-id": requestId, "x-openclaw-turn-id": turnId, "x-openclaw-turn-attempt": attempt, }, diff --git a/src/agents/openai-transport-stream.test.ts b/src/agents/openai-transport-stream.test.ts index d7393794127..cde72f1e6be 100644 --- a/src/agents/openai-transport-stream.test.ts +++ b/src/agents/openai-transport-stream.test.ts @@ -1059,7 +1059,7 @@ describe("openai transport stream", () => { expect(params.input?.some((item) => item.role === "system" || item.role === "developer")).toBe( false, ); - expect(params.prompt_cache_key).toBe("session-123"); + expect(params).not.toHaveProperty("prompt_cache_key"); expect(params.store).toBe(false); expect(params).not.toHaveProperty("metadata"); expect(params).not.toHaveProperty("max_output_tokens"); @@ -1097,7 +1097,7 @@ describe("openai transport stream", () => { payload, ); - expect(sanitized.prompt_cache_key).toBe("session-123"); + expect(sanitized).not.toHaveProperty("prompt_cache_key"); expect(sanitized).not.toHaveProperty("metadata"); expect(sanitized).not.toHaveProperty("max_output_tokens"); expect(sanitized).not.toHaveProperty("prompt_cache_retention"); @@ -1178,6 +1178,197 @@ describe("openai transport stream", () => { expect(sanitized).toEqual(payload); }); + it("omits prior Responses replay item ids for native Codex responses", () => { + const params = buildOpenAIResponsesParams( + { + id: "gpt-5.4", + name: "GPT-5.4", + api: "openai-codex-responses", + provider: "openai-codex", + baseUrl: "https://chatgpt.com/backend-api", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200000, + maxTokens: 8192, + } satisfies Model<"openai-codex-responses">, + { + systemPrompt: "system", + messages: [ + { + role: "assistant", + api: "openai-codex-responses", + provider: "openai-codex", + model: "gpt-5.4", + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: "toolUse", + timestamp: 1, + content: [ + { + type: "thinking", + thinking: "Need a tool.", + thinkingSignature: JSON.stringify({ + type: "reasoning", + id: "rs_prior", + encrypted_content: "ciphertext", + }), + }, + { + type: "text", + text: "Checking the price.", + textSignature: JSON.stringify({ + v: 1, + id: "msg_prior", + phase: "commentary", + }), + }, + { + type: "toolCall", + id: "call_abc|fc_prior", + name: "price_lookup", + arguments: { symbol: "SOL" }, + }, + ], + }, + { + role: "toolResult", + toolCallId: "call_abc|fc_prior", + toolName: "price_lookup", + content: [{ type: "text", text: "$83.95" }], + isError: false, + timestamp: 2, + }, + { role: "user", content: "what is the capital of the philippines", timestamp: 3 }, + ], + tools: [], + } as never, + { sessionId: "session-123" }, + ) as { + input?: Array<{ + type?: string; + role?: string; + id?: string; + call_id?: string; + phase?: string; + }>; + }; + + expect(params.input?.some((item) => item.type === "reasoning")).toBe(false); + const assistantMessage = params.input?.find( + (item) => item.type === "message" && item.role === "assistant", + ); + expect(assistantMessage).toMatchObject({ + type: "message", + role: "assistant", + phase: "commentary", + }); + expect(assistantMessage?.id).toBeUndefined(); + const functionCall = params.input?.find((item) => item.type === "function_call"); + expect(functionCall).toMatchObject({ + type: "function_call", + call_id: "call_abc", + }); + expect(functionCall?.id).toBeUndefined(); + }); + + it("preserves prior Responses replay item ids for custom Codex-compatible responses", () => { + const params = buildOpenAIResponsesParams( + { + id: "gpt-5.4", + name: "GPT-5.4", + api: "openai-codex-responses", + provider: "openai-codex", + baseUrl: "https://proxy.example.com/v1", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200000, + maxTokens: 8192, + } satisfies Model<"openai-codex-responses">, + { + systemPrompt: "system", + messages: [ + { + role: "assistant", + api: "openai-codex-responses", + provider: "openai-codex", + model: "gpt-5.4", + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: "toolUse", + timestamp: 1, + content: [ + { + type: "thinking", + thinking: "Need a tool.", + thinkingSignature: JSON.stringify({ + type: "reasoning", + id: "rs_prior", + encrypted_content: "ciphertext", + }), + }, + { + type: "text", + text: "Checking the price.", + textSignature: JSON.stringify({ + v: 1, + id: "msg_prior", + phase: "commentary", + }), + }, + { + type: "toolCall", + id: "call_abc|fc_prior", + name: "price_lookup", + arguments: { symbol: "SOL" }, + }, + ], + }, + ], + tools: [], + } as never, + { sessionId: "session-123" }, + ) as { + input?: Array<{ + type?: string; + role?: string; + id?: string; + call_id?: string; + phase?: string; + }>; + }; + + expect(params.input?.some((item) => item.type === "reasoning")).toBe(true); + const assistantMessage = params.input?.find( + (item) => item.type === "message" && item.role === "assistant", + ); + expect(assistantMessage).toMatchObject({ + type: "message", + role: "assistant", + id: "msg_prior", + phase: "commentary", + }); + const functionCall = params.input?.find((item) => item.type === "function_call"); + expect(functionCall).toMatchObject({ + type: "function_call", + id: "fc_prior", + call_id: "call_abc", + }); + }); + it("adds minimal user input for Codex responses when only the system prompt is present", () => { const params = buildOpenAIResponsesParams( { @@ -1492,7 +1683,7 @@ describe("openai transport stream", () => { baseUrl: "https://proxy.example.com/v1", }, }, - ])("replays assistant phase metadata for $label responses payloads", ({ model }) => { + ])("replays assistant phase metadata for $label responses payloads", ({ label, model }) => { const params = buildOpenAIResponsesParams( { ...model, @@ -1548,9 +1739,13 @@ describe("openai transport stream", () => { const assistantItem = params.input?.find((item) => item.role === "assistant"); expect(assistantItem).toMatchObject({ role: "assistant", - id: "msg_commentary", phase: "commentary", }); + if (label === "openai-codex") { + expect(assistantItem?.id).toBeUndefined(); + } else { + expect(assistantItem?.id).toBe("msg_commentary"); + } }); it("strips the internal cache boundary from OpenAI system prompts", () => { diff --git a/src/agents/openai-transport-stream.ts b/src/agents/openai-transport-stream.ts index 2b16ca25706..7c8acb12cb4 100644 --- a/src/agents/openai-transport-stream.ts +++ b/src/agents/openai-transport-stream.ts @@ -17,7 +17,9 @@ import type { ResponseCreateParamsStreaming, ResponseFunctionCallOutputItemList, ResponseInput, + ResponseInputItem, ResponseInputMessageContentList, + ResponseOutputMessage, } from "openai/resources/responses/responses.js"; import type { ModelCompatConfig } from "../config/types.models.js"; import { createSubsystemLogger } from "../logging/subsystem.js"; @@ -57,6 +59,8 @@ const DEFAULT_AZURE_OPENAI_API_VERSION = "2024-12-01-preview"; const OPENAI_CODEX_RESPONSES_EMPTY_INPUT_TEXT = " "; const log = createSubsystemLogger("openai-transport"); +type ReplayableResponseOutputMessage = Omit & { id?: string }; + type BaseStreamOptions = { temperature?: number; maxTokens?: number; @@ -211,9 +215,16 @@ function convertResponsesMessages( model: Model, context: Context, allowedToolCallProviders: Set, - options?: { includeSystemPrompt?: boolean; supportsDeveloperRole?: boolean }, + options?: { + includeSystemPrompt?: boolean; + supportsDeveloperRole?: boolean; + replayReasoningItems?: boolean; + replayResponsesItemIds?: boolean; + }, ): ResponseInput { const messages: ResponseInput = []; + const shouldReplayReasoningItems = options?.replayReasoningItems ?? true; + const shouldReplayResponsesItemIds = options?.replayResponsesItemIds ?? true; const normalizeIdPart = (part: string) => { const sanitized = part.replace(/[^a-zA-Z0-9_-]/g, "_"); const normalized = sanitized.length > 64 ? sanitized.slice(0, 64) : sanitized; @@ -287,15 +298,18 @@ function convertResponsesMessages( msg.model !== model.id && msg.provider === model.provider && msg.api === model.api; for (const block of msg.content) { if (block.type === "thinking") { - if (block.thinkingSignature) { + if (shouldReplayReasoningItems && block.thinkingSignature) { output.push(JSON.parse(block.thinkingSignature)); } } else if (block.type === "text") { - let msgId = parseTextSignature(block.textSignature)?.id ?? `msg_${msgIndex}`; - if (msgId.length > 64) { + const textSignature = parseTextSignature(block.textSignature); + let msgId = shouldReplayResponsesItemIds + ? (textSignature?.id ?? `msg_${msgIndex}`) + : undefined; + if (msgId && msgId.length > 64) { msgId = `msg_${shortHash(msgId)}`; } - output.push({ + const messageItem: ReplayableResponseOutputMessage = { type: "message", role: "assistant", content: [ @@ -306,12 +320,16 @@ function convertResponsesMessages( }, ], status: "completed", - id: msgId, - phase: parseTextSignature(block.textSignature)?.phase, - }); + ...(msgId ? { id: msgId } : {}), + phase: textSignature?.phase, + }; + output.push(messageItem as ResponseInputItem); } else if (block.type === "toolCall") { const [callId, itemIdRaw] = block.id.split("|"); - const itemId = isDifferentModel && itemIdRaw?.startsWith("fc_") ? undefined : itemIdRaw; + const itemId = + shouldReplayResponsesItemIds && !(isDifferentModel && itemIdRaw?.startsWith("fc_")) + ? itemIdRaw + : undefined; output.push({ type: "function_call", id: itemId, @@ -909,6 +927,7 @@ function usesNativeOpenAICodexResponsesBackend(model: Model): boolean { const OPENAI_CODEX_RESPONSES_UNSUPPORTED_PARAMS = [ "max_output_tokens", "metadata", + "prompt_cache_key", "prompt_cache_retention", "service_tier", "temperature", @@ -957,6 +976,7 @@ export function buildOpenAIResponsesParams( metadata?: Record, ) { const isCodexResponses = isOpenAICodexResponsesModel(model); + const isNativeCodexResponses = usesNativeOpenAICodexResponsesBackend(model); const compat = getCompat(model as OpenAIModeModel); const supportsDeveloperRole = typeof compat.supportsDeveloperRole === "boolean" ? compat.supportsDeveloperRole : undefined; @@ -964,7 +984,12 @@ export function buildOpenAIResponsesParams( model, context, new Set(["openai", "openai-codex", "opencode", "azure-openai-responses"]), - { includeSystemPrompt: !isCodexResponses, supportsDeveloperRole }, + { + includeSystemPrompt: !isCodexResponses, + supportsDeveloperRole, + replayReasoningItems: !isNativeCodexResponses, + replayResponsesItemIds: !isNativeCodexResponses, + }, ); if (isCodexResponses) { ensureOpenAICodexResponsesInput(messages, context);