From 15258921ee1c55323f962becb40c9ae3f0b28cc5 Mon Sep 17 00:00:00 2001 From: zhulijin1991 <167979819+zhulijin1991@users.noreply.github.com> Date: Sun, 12 Apr 2026 08:16:03 +0800 Subject: [PATCH] fix(codex): avoid re-exposing image tool on vision turns --- .../codex/src/app-server/run-attempt.ts | 30 +++++++++++++++---- .../run-attempt.vision-tools.test.ts | 20 +++++++++++++ 2 files changed, 45 insertions(+), 5 deletions(-) create mode 100644 extensions/codex/src/app-server/run-attempt.vision-tools.test.ts diff --git a/extensions/codex/src/app-server/run-attempt.ts b/extensions/codex/src/app-server/run-attempt.ts index f8d7238523d..67eb23f294b 100644 --- a/extensions/codex/src/app-server/run-attempt.ts +++ b/extensions/codex/src/app-server/run-attempt.ts @@ -365,10 +365,14 @@ async function buildDynamicTools(input: DynamicToolBuildParams) { input.runAbortController.abort("sessions_yield"); }, }); + const visionFilteredTools = filterToolsForVisionInputs(allTools, { + modelHasVision, + hasInboundImages: (params.images?.length ?? 0) > 0, + }); const filteredTools = params.toolsAllow && params.toolsAllow.length > 0 - ? allTools.filter((tool) => params.toolsAllow?.includes(tool.name)) - : allTools; + ? visionFilteredTools.filter((tool) => params.toolsAllow?.includes(tool.name)) + : visionFilteredTools; return normalizeProviderToolSchemas({ tools: filteredTools, provider: params.provider, @@ -381,6 +385,19 @@ async function buildDynamicTools(input: DynamicToolBuildParams) { }); } +function filterToolsForVisionInputs( + tools: T[], + params: { + modelHasVision: boolean; + hasInboundImages: boolean; + }, +): T[] { + if (!params.modelHasVision || !params.hasInboundImages) { + return tools; + } + return tools.filter((tool) => tool.name !== "image"); +} + async function withCodexStartupTimeout(params: { timeoutMs: number; timeoutFloorMs?: number; @@ -495,6 +512,9 @@ function handleApprovalRequest(params: { }); } -export const __testing = createCodexAppServerClientFactoryTestHooks((factory) => { - clientFactory = factory; -}); +export const __testing = { + filterToolsForVisionInputs, + ...createCodexAppServerClientFactoryTestHooks((factory) => { + clientFactory = factory; + }), +} as const; diff --git a/extensions/codex/src/app-server/run-attempt.vision-tools.test.ts b/extensions/codex/src/app-server/run-attempt.vision-tools.test.ts new file mode 100644 index 00000000000..8c3e9fc5eb9 --- /dev/null +++ b/extensions/codex/src/app-server/run-attempt.vision-tools.test.ts @@ -0,0 +1,20 @@ +import { describe, expect, it } from "vitest"; +import { __testing } from "./run-attempt.js"; + +describe("Codex dynamic tool filtering", () => { + it("drops the image tool when the model already has inbound vision input", () => { + const toolNames = __testing + .filterToolsForVisionInputs( + [{ name: "image" }, { name: "read" }, { name: "write" }], + { + modelHasVision: true, + hasInboundImages: true, + }, + ) + .map((tool) => tool.name); + + expect(toolNames).toContain("read"); + expect(toolNames).toContain("write"); + expect(toolNames).not.toContain("image"); + }); +});