From 32a38f125e9d398285340a555d8999b27cd38f05 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 23 Apr 2026 17:05:59 +0100 Subject: [PATCH] fix: keep codex cli images in workspace --- extensions/openai/cli-backend.ts | 1 + src/agents/cli-backends.test.ts | 2 + src/agents/cli-runner.helpers.test.ts | 4 + src/agents/cli-runner/helpers.ts | 25 ++-- .../gateway-cli-backend.live-probe-helpers.ts | 3 +- src/gateway/gateway-cli-backend.live.test.ts | 16 ++- src/gateway/live-image-probe.test.ts | 12 ++ src/gateway/live-image-probe.ts | 110 ++++++++++++------ 8 files changed, 120 insertions(+), 53 deletions(-) create mode 100644 src/gateway/live-image-probe.test.ts diff --git a/extensions/openai/cli-backend.ts b/extensions/openai/cli-backend.ts index d4771ecf75e..ed24cfe4c55 100644 --- a/extensions/openai/cli-backend.ts +++ b/extensions/openai/cli-backend.ts @@ -55,6 +55,7 @@ export function buildOpenAICodexCliBackend(): CliBackendPlugin { systemPromptWhen: "first", imageArg: "--image", imageMode: "repeat", + imagePathScope: "workspace", reliability: { watchdog: { fresh: { ...CLI_FRESH_WATCHDOG_DEFAULTS }, diff --git a/src/agents/cli-backends.test.ts b/src/agents/cli-backends.test.ts index fa992013c9e..d81d98217db 100644 --- a/src/agents/cli-backends.test.ts +++ b/src/agents/cli-backends.test.ts @@ -264,6 +264,7 @@ beforeEach(() => { systemPromptFileConfigArg: "-c", systemPromptFileConfigKey: "model_instructions_file", systemPromptWhen: "first", + imagePathScope: "workspace", reliability: { watchdog: { fresh: { @@ -779,6 +780,7 @@ describe("resolveCliBackendConfig google-gemini-cli defaults", () => { expect(resolved?.config.systemPromptFileConfigArg).toBe("-c"); expect(resolved?.config.systemPromptFileConfigKey).toBe("model_instructions_file"); expect(resolved?.config.systemPromptWhen).toBe("first"); + expect(resolved?.config.imagePathScope).toBe("workspace"); }); }); diff --git a/src/agents/cli-runner.helpers.test.ts b/src/agents/cli-runner.helpers.test.ts index 7f4be0140c5..c1ddbe92345 100644 --- a/src/agents/cli-runner.helpers.test.ts +++ b/src/agents/cli-runner.helpers.test.ts @@ -275,11 +275,15 @@ describe("writeCliImages", () => { baseArgs: ["exec", "--json"], modelId: "gpt-5.4", imagePaths: prepared.imagePaths, + promptArg: "describe the attached image", useResume: false, }); const imageArgIndex = argv.indexOf("--image"); + const promptIndex = argv.indexOf("describe the attached image"); expect(imageArgIndex).toBeGreaterThanOrEqual(0); + expect(promptIndex).toBeGreaterThanOrEqual(0); + expect(imageArgIndex).toBeGreaterThan(promptIndex); expect(argv[imageArgIndex + 1]).toContain("openclaw-cli-images"); expect(argv[imageArgIndex + 1]).not.toBe(sourceImage); diff --git a/src/agents/cli-runner/helpers.ts b/src/agents/cli-runner/helpers.ts index ca0c19ae264..a9eb07c9738 100644 --- a/src/agents/cli-runner/helpers.ts +++ b/src/agents/cli-runner/helpers.ts @@ -391,6 +391,18 @@ export function buildCliArgs(params: { args.push(params.backend.sessionArg, params.sessionId); } } + if (params.promptArg !== undefined) { + let replacedPromptPlaceholder = false; + for (let i = 0; i < args.length; i += 1) { + if (args[i] === "{prompt}") { + args[i] = params.promptArg; + replacedPromptPlaceholder = true; + } + } + if (!replacedPromptPlaceholder) { + args.push(params.promptArg); + } + } if (params.imagePaths && params.imagePaths.length > 0) { const mode = params.backend.imageMode ?? "repeat"; const imageArg = params.backend.imageArg; @@ -404,18 +416,5 @@ export function buildCliArgs(params: { } } } - if (params.promptArg !== undefined) { - let replacedPromptPlaceholder = false; - for (let i = 0; i < args.length; i += 1) { - if (args[i] === "{prompt}") { - args[i] = params.promptArg; - replacedPromptPlaceholder = true; - } - } - if (replacedPromptPlaceholder) { - return args; - } - args.push(params.promptArg); - } return args; } diff --git a/src/gateway/gateway-cli-backend.live-probe-helpers.ts b/src/gateway/gateway-cli-backend.live-probe-helpers.ts index 2ca33999364..e7ae8cdff88 100644 --- a/src/gateway/gateway-cli-backend.live-probe-helpers.ts +++ b/src/gateway/gateway-cli-backend.live-probe-helpers.ts @@ -267,8 +267,7 @@ export async function verifyCliBackendImageProbe(params: { // still receives a local file path, but now via the runner code we // actually want to validate instead of an ad hoc prompt-only shortcut. message: - "Read the large word printed at the bottom of the attached image. " + - "Reply with that word in lowercase and nothing else.", + "What animal is drawn in the attached image? Reply with only the lowercase animal name.", attachments: [ { mimeType: "image/png", diff --git a/src/gateway/gateway-cli-backend.live.test.ts b/src/gateway/gateway-cli-backend.live.test.ts index 86bf0e32ded..2c0bc00464e 100644 --- a/src/gateway/gateway-cli-backend.live.test.ts +++ b/src/gateway/gateway-cli-backend.live.test.ts @@ -205,7 +205,13 @@ describeLive("gateway live (cli backend)", () => { clearEnv: filteredCliClearEnv.length > 0 ? filteredCliClearEnv : undefined, env: Object.keys(preservedCliEnv).length > 0 ? preservedCliEnv : undefined, systemPromptWhen: providerDefaults?.systemPromptWhen ?? "never", - ...(cliImageArg ? { imageArg: cliImageArg, imageMode: cliImageMode } : {}), + ...(cliImageArg + ? { + imageArg: cliImageArg, + imageMode: cliImageMode, + imagePathScope: providerDefaults?.imagePathScope, + } + : {}), }, }, sandbox: { mode: "off" }, @@ -355,11 +361,15 @@ describeLive("gateway live (cli backend)", () => { } if (enableCliImageProbe) { - logCliBackendLiveStep("image-probe:start", { sessionKey }); + const imageSessionKey = + providerId === "codex-cli" + ? `agent:dev:live-cli-backend-image:${randomUUID()}` + : sessionKey; + logCliBackendLiveStep("image-probe:start", { sessionKey: imageSessionKey }); await verifyCliBackendImageProbe({ client, providerId, - sessionKey, + sessionKey: imageSessionKey, tempDir, bootstrapWorkspace, }); diff --git a/src/gateway/live-image-probe.test.ts b/src/gateway/live-image-probe.test.ts new file mode 100644 index 00000000000..b93984450ca --- /dev/null +++ b/src/gateway/live-image-probe.test.ts @@ -0,0 +1,12 @@ +import { describe, expect, it } from "vitest"; +import { renderCatFacePngBase64 } from "./live-image-probe.js"; + +describe("live image probe", () => { + it("leaves room for the unclipped bottom CAT label", () => { + const png = Buffer.from(renderCatFacePngBase64(), "base64"); + + expect(png.toString("ascii", 1, 4)).toBe("PNG"); + expect(png.readUInt32BE(16)).toBe(256); + expect(png.readUInt32BE(20)).toBeGreaterThanOrEqual(274); + }); +}); diff --git a/src/gateway/live-image-probe.ts b/src/gateway/live-image-probe.ts index 964f0ab1d71..5a323839885 100644 --- a/src/gateway/live-image-probe.ts +++ b/src/gateway/live-image-probe.ts @@ -200,6 +200,49 @@ function fillTriangle(params: { } } +function drawBlockCatLabel(params: { + buf: Buffer; + width: number; + height: number; + x: number; + y: number; + color: { r: number; g: number; b: number; a?: number }; +}) { + const t = 12; + const h = 78; + const w = 58; + const gap = 20; + const cX = params.x; + const aX = cX + w + gap; + const tX = aX + w + gap; + + fillRect({ ...params, x: cX, y: params.y, w, h: t, color: params.color }); + fillRect({ ...params, x: cX, y: params.y, w: t, h, color: params.color }); + fillRect({ ...params, x: cX, y: params.y + h - t, w, h: t, color: params.color }); + + fillRect({ ...params, x: aX, y: params.y, w, h: t, color: params.color }); + fillRect({ ...params, x: aX, y: params.y, w: t, h, color: params.color }); + fillRect({ ...params, x: aX + w - t, y: params.y, w: t, h, color: params.color }); + fillRect({ + ...params, + x: aX, + y: params.y + Math.floor((h - t) / 2), + w, + h: t, + color: params.color, + }); + + fillRect({ ...params, x: tX, y: params.y, w, h: t, color: params.color }); + fillRect({ + ...params, + x: tX + Math.floor((w - t) / 2), + y: params.y, + w: t, + h, + color: params.color, + }); +} + export function renderCatNoncePngBase64(nonce: string): string { const top = "CAT"; const bottom = nonce.toUpperCase(); @@ -242,7 +285,7 @@ export function renderCatNoncePngBase64(nonce: string): string { export function renderCatFacePngBase64(): string { const width = 256; - const height = 256; + const height = 288; const buf = Buffer.alloc(width * height * 4, 255); const outline = { r: 40, g: 40, b: 40 }; const innerEar = { r: 245, g: 182, b: 193 }; @@ -253,36 +296,36 @@ export function renderCatFacePngBase64(): string { buf, width, height, - a: { x: 62, y: 86 }, - b: { x: 106, y: 18 }, - c: { x: 136, y: 104 }, + a: { x: 62, y: 74 }, + b: { x: 106, y: 12 }, + c: { x: 134, y: 88 }, color: outline, }); fillTriangle({ buf, width, height, - a: { x: 194, y: 86 }, - b: { x: 150, y: 18 }, - c: { x: 120, y: 104 }, + a: { x: 194, y: 74 }, + b: { x: 150, y: 12 }, + c: { x: 122, y: 88 }, color: outline, }); fillTriangle({ buf, width, height, - a: { x: 78, y: 82 }, - b: { x: 106, y: 38 }, - c: { x: 122, y: 92 }, + a: { x: 80, y: 70 }, + b: { x: 106, y: 34 }, + c: { x: 122, y: 80 }, color: innerEar, }); fillTriangle({ buf, width, height, - a: { x: 178, y: 82 }, - b: { x: 150, y: 38 }, - c: { x: 134, y: 92 }, + a: { x: 176, y: 70 }, + b: { x: 150, y: 34 }, + c: { x: 134, y: 80 }, color: innerEar, }); fillEllipse({ @@ -290,9 +333,9 @@ export function renderCatFacePngBase64(): string { width, height, cx: 128, - cy: 142, - rx: 82, - ry: 78, + cy: 112, + rx: 78, + ry: 66, color: outline, }); fillEllipse({ @@ -300,7 +343,7 @@ export function renderCatFacePngBase64(): string { width, height, cx: 98, - cy: 126, + cy: 100, rx: 9, ry: 12, color: { r: 255, g: 255, b: 255 }, @@ -310,7 +353,7 @@ export function renderCatFacePngBase64(): string { width, height, cx: 158, - cy: 126, + cy: 100, rx: 9, ry: 12, color: { r: 255, g: 255, b: 255 }, @@ -320,34 +363,31 @@ export function renderCatFacePngBase64(): string { width, height, cx: 128, - cy: 158, + cy: 130, rx: 22, - ry: 18, + ry: 17, color: { r: 255, g: 255, b: 255 }, }); fillTriangle({ buf, width, height, - a: { x: 128, y: 150 }, - b: { x: 118, y: 164 }, - c: { x: 138, y: 164 }, + a: { x: 128, y: 122 }, + b: { x: 118, y: 136 }, + c: { x: 138, y: 136 }, color: nose, }); - fillRect({ buf, width, height, x: 127, y: 164, w: 2, h: 16, color: whisker }); - fillRect({ buf, width, height, x: 74, y: 161, w: 42, h: 2, color: whisker }); - fillRect({ buf, width, height, x: 140, y: 161, w: 42, h: 2, color: whisker }); - fillRect({ buf, width, height, x: 76, y: 173, w: 38, h: 2, color: whisker }); - fillRect({ buf, width, height, x: 142, y: 173, w: 38, h: 2, color: whisker }); - fillRect({ buf, width, height, x: 85, y: 185, w: 30, h: 2, color: whisker }); - fillRect({ buf, width, height, x: 141, y: 185, w: 30, h: 2, color: whisker }); - drawText({ + fillRect({ buf, width, height, x: 127, y: 136, w: 2, h: 15, color: whisker }); + fillRect({ buf, width, height, x: 74, y: 134, w: 42, h: 2, color: whisker }); + fillRect({ buf, width, height, x: 140, y: 134, w: 42, h: 2, color: whisker }); + fillRect({ buf, width, height, x: 80, y: 146, w: 34, h: 2, color: whisker }); + fillRect({ buf, width, height, x: 142, y: 146, w: 34, h: 2, color: whisker }); + drawBlockCatLabel({ buf, width, - x: Math.floor((width - measureTextWidthPx("CAT", 10)) / 2), - y: 212, - text: "CAT", - scale: 10, + height, + x: 21, + y: 190, color: outline, });