fix(agents): unblock gpt-5.3-codex API-key routing and replay (#31083)

* fix(agents): unblock gpt-5.3-codex API-key replay path * fix(agents): scope OpenAI replay ID rewrites per turn * test: fix nodes-tool mock typing and reformat telegram accounts
2026-05-05 20:10:21 +00:00 · 2026-03-01 21:45:12 -06:00
parent e1bf9591c3
commit 92199ac129
9 changed files with 347 additions and 26 deletions
--- a/src/agents/model-fallback.test.ts
+++ b/src/agents/model-fallback.test.ts
@@ -174,7 +174,7 @@ async function expectSkippedUnavailableProvider(params: {
 }

 describe("runWithModelFallback", () => {
-  it("normalizes openai gpt-5.3 codex to openai-codex before running", async () => {
+  it("keeps openai gpt-5.3 codex on the openai provider before running", async () => {
    const cfg = makeCfg();
    const run = vi.fn().mockResolvedValueOnce("ok");

@@ -187,7 +187,7 @@ describe("runWithModelFallback", () => {

    expect(result.result).toBe("ok");
    expect(run).toHaveBeenCalledTimes(1);
-    expect(run).toHaveBeenCalledWith("openai-codex", "gpt-5.3-codex");
+    expect(run).toHaveBeenCalledWith("openai", "gpt-5.3-codex");
  });

  it("falls back on unrecognized errors when candidates remain", async () => {
--- a/src/agents/model-selection.test.ts
+++ b/src/agents/model-selection.test.ts
@@ -70,17 +70,17 @@ describe("model-selection", () => {
      });
    });

-    it("normalizes openai gpt-5.3 codex refs to openai-codex provider", () => {
+    it("keeps openai gpt-5.3 codex refs on the openai provider", () => {
      expect(parseModelRef("openai/gpt-5.3-codex", "anthropic")).toEqual({
-        provider: "openai-codex",
+        provider: "openai",
        model: "gpt-5.3-codex",
      });
      expect(parseModelRef("gpt-5.3-codex", "openai")).toEqual({
-        provider: "openai-codex",
+        provider: "openai",
        model: "gpt-5.3-codex",
      });
      expect(parseModelRef("openai/gpt-5.3-codex-codex", "anthropic")).toEqual({
-        provider: "openai-codex",
+        provider: "openai",
        model: "gpt-5.3-codex-codex",
      });
    });
--- a/src/agents/model-selection.ts
+++ b/src/agents/model-selection.ts
@@ -27,7 +27,6 @@ const ANTHROPIC_MODEL_ALIASES: Record<string, string> = {
  "sonnet-4.6": "claude-sonnet-4-6",
  "sonnet-4.5": "claude-sonnet-4-5",
 };
-const OPENAI_CODEX_OAUTH_MODEL_PREFIXES = ["gpt-5.3-codex"] as const;

 function normalizeAliasKey(value: string): string {
  return value.trim().toLowerCase();
@@ -133,25 +132,9 @@ function normalizeProviderModelId(provider: string, model: string): string {
  return model;
 }

-function shouldUseOpenAICodexProvider(provider: string, model: string): boolean {
-  if (provider !== "openai") {
-    return false;
-  }
-  const normalized = model.trim().toLowerCase();
-  if (!normalized) {
-    return false;
-  }
-  return OPENAI_CODEX_OAUTH_MODEL_PREFIXES.some(
-    (prefix) => normalized === prefix || normalized.startsWith(`${prefix}-`),
-  );
-}
-
 export function normalizeModelRef(provider: string, model: string): ModelRef {
  const normalizedProvider = normalizeProviderId(provider);
  const normalizedModel = normalizeProviderModelId(normalizedProvider, model.trim());
-  if (shouldUseOpenAICodexProvider(normalizedProvider, normalizedModel)) {
-    return { provider: "openai-codex", model: normalizedModel };
-  }
  return { provider: normalizedProvider, model: normalizedModel };
 }

--- a/src/agents/pi-embedded-helpers.sanitizeuserfacingtext.test.ts
+++ b/src/agents/pi-embedded-helpers.sanitizeuserfacingtext.test.ts
@@ -1,5 +1,6 @@
 import { describe, expect, it } from "vitest";
 import {
+  downgradeOpenAIFunctionCallReasoningPairs,
  downgradeOpenAIReasoningBlocks,
  isMessagingToolDuplicate,
  normalizeTextForComparison,
@@ -318,6 +319,125 @@ describe("downgradeOpenAIReasoningBlocks", () => {
  });
 });

+describe("downgradeOpenAIFunctionCallReasoningPairs", () => {
+  it("strips fc ids when reasoning cannot be replayed", () => {
+    const input = [
+      {
+        role: "assistant",
+        content: [{ type: "toolCall", id: "call_123|fc_123", name: "read", arguments: {} }],
+      },
+      {
+        role: "toolResult",
+        toolCallId: "call_123|fc_123",
+        toolName: "read",
+        content: [{ type: "text", text: "ok" }],
+      },
+    ];
+
+    // oxlint-disable-next-line typescript/no-explicit-any
+    expect(downgradeOpenAIFunctionCallReasoningPairs(input as any)).toEqual([
+      {
+        role: "assistant",
+        content: [{ type: "toolCall", id: "call_123", name: "read", arguments: {} }],
+      },
+      {
+        role: "toolResult",
+        toolCallId: "call_123",
+        toolName: "read",
+        content: [{ type: "text", text: "ok" }],
+      },
+    ]);
+  });
+
+  it("keeps fc ids when replayable reasoning is present", () => {
+    const input = [
+      {
+        role: "assistant",
+        content: [
+          {
+            type: "thinking",
+            thinking: "internal",
+            thinkingSignature: JSON.stringify({ id: "rs_123", type: "reasoning" }),
+          },
+          { type: "toolCall", id: "call_123|fc_123", name: "read", arguments: {} },
+        ],
+      },
+      {
+        role: "toolResult",
+        toolCallId: "call_123|fc_123",
+        toolName: "read",
+        content: [{ type: "text", text: "ok" }],
+      },
+    ];
+
+    // oxlint-disable-next-line typescript/no-explicit-any
+    expect(downgradeOpenAIFunctionCallReasoningPairs(input as any)).toEqual(input);
+  });
+
+  it("only rewrites tool results paired to the downgraded assistant turn", () => {
+    const input = [
+      {
+        role: "assistant",
+        content: [{ type: "toolCall", id: "call_123|fc_123", name: "read", arguments: {} }],
+      },
+      {
+        role: "toolResult",
+        toolCallId: "call_123|fc_123",
+        toolName: "read",
+        content: [{ type: "text", text: "turn1" }],
+      },
+      {
+        role: "assistant",
+        content: [
+          {
+            type: "thinking",
+            thinking: "internal",
+            thinkingSignature: JSON.stringify({ id: "rs_123", type: "reasoning" }),
+          },
+          { type: "toolCall", id: "call_123|fc_123", name: "read", arguments: {} },
+        ],
+      },
+      {
+        role: "toolResult",
+        toolCallId: "call_123|fc_123",
+        toolName: "read",
+        content: [{ type: "text", text: "turn2" }],
+      },
+    ];
+
+    // oxlint-disable-next-line typescript/no-explicit-any
+    expect(downgradeOpenAIFunctionCallReasoningPairs(input as any)).toEqual([
+      {
+        role: "assistant",
+        content: [{ type: "toolCall", id: "call_123", name: "read", arguments: {} }],
+      },
+      {
+        role: "toolResult",
+        toolCallId: "call_123",
+        toolName: "read",
+        content: [{ type: "text", text: "turn1" }],
+      },
+      {
+        role: "assistant",
+        content: [
+          {
+            type: "thinking",
+            thinking: "internal",
+            thinkingSignature: JSON.stringify({ id: "rs_123", type: "reasoning" }),
+          },
+          { type: "toolCall", id: "call_123|fc_123", name: "read", arguments: {} },
+        ],
+      },
+      {
+        role: "toolResult",
+        toolCallId: "call_123|fc_123",
+        toolName: "read",
+        content: [{ type: "text", text: "turn2" }],
+      },
+    ]);
+  });
+});
+
 describe("normalizeTextForComparison", () => {
  it.each([
    { input: "Hello World", expected: "hello world" },
--- a/src/agents/pi-embedded-helpers.ts
+++ b/src/agents/pi-embedded-helpers.ts
@@ -42,7 +42,10 @@ export {
 } from "./pi-embedded-helpers/errors.js";
 export { isGoogleModelApi, sanitizeGoogleTurnOrdering } from "./pi-embedded-helpers/google.js";

-export { downgradeOpenAIReasoningBlocks } from "./pi-embedded-helpers/openai.js";
+export {
+  downgradeOpenAIFunctionCallReasoningPairs,
+  downgradeOpenAIReasoningBlocks,
+} from "./pi-embedded-helpers/openai.js";
 export {
  isEmptyAssistantMessageContent,
  sanitizeSessionMessagesImages,
--- a/src/agents/pi-embedded-helpers/openai.ts
+++ b/src/agents/pi-embedded-helpers/openai.ts
@@ -6,6 +6,11 @@ type OpenAIThinkingBlock = {
  thinkingSignature?: unknown;
 };

+type OpenAIToolCallBlock = {
+  type?: unknown;
+  id?: unknown;
+};
+
 type OpenAIReasoningSignature = {
  id: string;
  type: string;
@@ -59,6 +64,141 @@ function hasFollowingNonThinkingBlock(
  return false;
 }

+function splitOpenAIFunctionCallPairing(id: string): {
+  callId: string;
+  itemId?: string;
+} {
+  const separator = id.indexOf("|");
+  if (separator <= 0 || separator >= id.length - 1) {
+    return { callId: id };
+  }
+  return {
+    callId: id.slice(0, separator),
+    itemId: id.slice(separator + 1),
+  };
+}
+
+function isOpenAIToolCallType(type: unknown): boolean {
+  return type === "toolCall" || type === "toolUse" || type === "functionCall";
+}
+
+/**
+ * OpenAI can reject replayed `function_call` items with an `fc_*` id if the
+ * matching `reasoning` item is absent in the same assistant turn.
+ *
+ * When that pairing is missing, strip the `|fc_*` suffix from tool call ids so
+ * pi-ai omits `function_call.id` on replay.
+ */
+export function downgradeOpenAIFunctionCallReasoningPairs(
+  messages: AgentMessage[],
+): AgentMessage[] {
+  let changed = false;
+  const rewrittenMessages: AgentMessage[] = [];
+  let pendingRewrittenIds: Map<string, string> | null = null;
+
+  for (const msg of messages) {
+    if (!msg || typeof msg !== "object") {
+      pendingRewrittenIds = null;
+      rewrittenMessages.push(msg);
+      continue;
+    }
+
+    const role = (msg as { role?: unknown }).role;
+    if (role === "assistant") {
+      const assistantMsg = msg as Extract<AgentMessage, { role: "assistant" }>;
+      if (!Array.isArray(assistantMsg.content)) {
+        pendingRewrittenIds = null;
+        rewrittenMessages.push(msg);
+        continue;
+      }
+
+      const localRewrittenIds = new Map<string, string>();
+      let seenReplayableReasoning = false;
+      let assistantChanged = false;
+      const nextContent = assistantMsg.content.map((block) => {
+        if (!block || typeof block !== "object") {
+          return block;
+        }
+
+        const thinkingBlock = block as OpenAIThinkingBlock;
+        if (
+          thinkingBlock.type === "thinking" &&
+          parseOpenAIReasoningSignature(thinkingBlock.thinkingSignature)
+        ) {
+          seenReplayableReasoning = true;
+          return block;
+        }
+
+        const toolCallBlock = block as OpenAIToolCallBlock;
+        if (!isOpenAIToolCallType(toolCallBlock.type) || typeof toolCallBlock.id !== "string") {
+          return block;
+        }
+
+        const pairing = splitOpenAIFunctionCallPairing(toolCallBlock.id);
+        if (seenReplayableReasoning || !pairing.itemId || !pairing.itemId.startsWith("fc_")) {
+          return block;
+        }
+
+        assistantChanged = true;
+        localRewrittenIds.set(toolCallBlock.id, pairing.callId);
+        return {
+          ...(block as unknown as Record<string, unknown>),
+          id: pairing.callId,
+        } as typeof block;
+      });
+
+      pendingRewrittenIds = localRewrittenIds.size > 0 ? localRewrittenIds : null;
+      if (!assistantChanged) {
+        rewrittenMessages.push(msg);
+        continue;
+      }
+      changed = true;
+      rewrittenMessages.push({ ...assistantMsg, content: nextContent } as AgentMessage);
+      continue;
+    }
+
+    if (role === "toolResult" && pendingRewrittenIds && pendingRewrittenIds.size > 0) {
+      const toolResult = msg as Extract<AgentMessage, { role: "toolResult" }> & {
+        toolUseId?: unknown;
+      };
+      let toolResultChanged = false;
+      const updates: Record<string, string> = {};
+
+      if (typeof toolResult.toolCallId === "string") {
+        const nextToolCallId = pendingRewrittenIds.get(toolResult.toolCallId);
+        if (nextToolCallId && nextToolCallId !== toolResult.toolCallId) {
+          updates.toolCallId = nextToolCallId;
+          toolResultChanged = true;
+        }
+      }
+
+      if (typeof toolResult.toolUseId === "string") {
+        const nextToolUseId = pendingRewrittenIds.get(toolResult.toolUseId);
+        if (nextToolUseId && nextToolUseId !== toolResult.toolUseId) {
+          updates.toolUseId = nextToolUseId;
+          toolResultChanged = true;
+        }
+      }
+
+      if (!toolResultChanged) {
+        rewrittenMessages.push(msg);
+        continue;
+      }
+      changed = true;
+      rewrittenMessages.push({
+        ...toolResult,
+        ...updates,
+      } as AgentMessage);
+      continue;
+    }
+
+    pendingRewrittenIds = null;
+    rewrittenMessages.push(msg);
+  }
+
+  return changed ? rewrittenMessages : messages;
+}
+
 /**
 * OpenAI Responses API can reject transcripts that contain a standalone `reasoning` item id
 * without the required following item.
--- a/src/agents/pi-embedded-runner.openai-tool-id-preservation.test.ts
+++ b/src/agents/pi-embedded-runner.openai-tool-id-preservation.test.ts
@@ -7,7 +7,7 @@ import {
 import { sanitizeSessionHistory } from "./pi-embedded-runner/google.js";

 describe("sanitizeSessionHistory openai tool id preservation", () => {
-  it("keeps canonical call_id|fc_id pairings for same-model openai replay", async () => {
+  it("strips fc ids when replayable reasoning metadata is missing", async () => {
    const sessionEntries = [
      makeModelSnapshotEntry({
        provider: "openai",
@@ -40,6 +40,54 @@ describe("sanitizeSessionHistory openai tool id preservation", () => {
      sessionId: "test-session",
    });

+    const assistant = result[0] as { content?: Array<{ type?: string; id?: string }> };
+    const toolCall = assistant.content?.find((block) => block.type === "toolCall");
+    expect(toolCall?.id).toBe("call_123");
+
+    const toolResult = result[1] as { toolCallId?: string };
+    expect(toolResult.toolCallId).toBe("call_123");
+  });
+
+  it("keeps canonical call_id|fc_id pairings when replayable reasoning is present", async () => {
+    const sessionEntries = [
+      makeModelSnapshotEntry({
+        provider: "openai",
+        modelApi: "openai-responses",
+        modelId: "gpt-5.2-codex",
+      }),
+    ];
+    const sessionManager = makeInMemorySessionManager(sessionEntries);
+
+    const messages: AgentMessage[] = [
+      {
+        role: "assistant",
+        content: [
+          {
+            type: "thinking",
+            thinking: "internal reasoning",
+            thinkingSignature: JSON.stringify({ id: "rs_123", type: "reasoning" }),
+          },
+          { type: "toolCall", id: "call_123|fc_123", name: "noop", arguments: {} },
+        ],
+      } as unknown as AgentMessage,
+      {
+        role: "toolResult",
+        toolCallId: "call_123|fc_123",
+        toolName: "noop",
+        content: [{ type: "text", text: "ok" }],
+        isError: false,
+      } as unknown as AgentMessage,
+    ];
+
+    const result = await sanitizeSessionHistory({
+      messages,
+      modelApi: "openai-responses",
+      provider: "openai",
+      modelId: "gpt-5.2-codex",
+      sessionManager,
+      sessionId: "test-session",
+    });
+
    const assistant = result[0] as { content?: Array<{ type?: string; id?: string }> };
    const toolCall = assistant.content?.find((block) => block.type === "toolCall");
    expect(toolCall?.id).toBe("call_123|fc_123");
--- a/src/agents/pi-embedded-runner/google.ts
+++ b/src/agents/pi-embedded-runner/google.ts
@@ -10,6 +10,7 @@ import {
 } from "../../sessions/input-provenance.js";
 import { resolveImageSanitizationLimits } from "../image-sanitization.js";
 import {
+  downgradeOpenAIFunctionCallReasoningPairs,
  downgradeOpenAIReasoningBlocks,
  isCompactionFailureError,
  isGoogleModelApi,
@@ -464,7 +465,9 @@ export async function sanitizeSessionHistory(params: {
      })
    : false;
  const sanitizedOpenAI = isOpenAIResponsesApi
-    ? downgradeOpenAIReasoningBlocks(sanitizedCompactionUsage)
+    ? downgradeOpenAIFunctionCallReasoningPairs(
+        downgradeOpenAIReasoningBlocks(sanitizedCompactionUsage),
+      )
    : sanitizedCompactionUsage;

  if (hasSnapshot && (!priorSnapshot || modelChanged)) {
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -45,6 +45,7 @@ import { createOllamaStreamFn, OLLAMA_NATIVE_BASE_URL } from "../../ollama-strea
 import { createOpenAIWebSocketStreamFn, releaseWsSession } from "../../openai-ws-stream.js";
 import { resolveOwnerDisplaySetting } from "../../owner-display.js";
 import {
+  downgradeOpenAIFunctionCallReasoningPairs,
  isCloudCodeAssistFormatError,
  resolveBootstrapMaxChars,
  resolveBootstrapTotalMaxChars,
@@ -1032,6 +1033,29 @@ export async function runEmbeddedAttempt(
        };
      }

+      if (
+        params.model.api === "openai-responses" ||
+        params.model.api === "openai-codex-responses"
+      ) {
+        const inner = activeSession.agent.streamFn;
+        activeSession.agent.streamFn = (model, context, options) => {
+          const ctx = context as unknown as { messages?: unknown };
+          const messages = ctx?.messages;
+          if (!Array.isArray(messages)) {
+            return inner(model, context, options);
+          }
+          const sanitized = downgradeOpenAIFunctionCallReasoningPairs(messages as AgentMessage[]);
+          if (sanitized === messages) {
+            return inner(model, context, options);
+          }
+          const nextContext = {
+            ...(context as unknown as Record<string, unknown>),
+            messages: sanitized,
+          } as unknown;
+          return inner(model, nextContext as typeof context, options);
+        };
+      }
+
      // Some models emit tool names with surrounding whitespace (e.g. " read ").
      // pi-agent-core dispatches tool calls with exact string matching, so normalize
      // names on the live response stream before tool execution.