From 5c3eecfea7d4f658c50fedb62dbebe4393da5cfd Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Sat, 25 Apr 2026 12:45:33 -0700 Subject: [PATCH] fix(codex): require approvals for image-understanding turns (#71703) --- CHANGELOG.md | 3 ++ .../media-understanding-provider.test.ts | 46 +++++++++++++++++-- .../codex/media-understanding-provider.ts | 32 ++++++++++++- 3 files changed, 75 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ee13f95b80..75eaac7e13e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -191,6 +191,9 @@ Docs: https://docs.openclaw.ai - Agents/CLI sessions: bind `google-gemini-cli` session auth-epoch to the Google account identity in `~/.gemini/oauth_creds.json`, so Gemini-backed agents resume their conversation after gateway restart instead of minting a fresh session, and stale bindings are invalidated when the authenticated Google account changes. Fixes #70973. (#71076) Thanks @openperf. - Slack: stop treating user mentions in assistant-authored message edit blocks as sender attribution, preventing edited bot messages from spoofing a mentioned DM user. (#71700) Thanks @vincentkoc. - Codex: consume unauthorized bound conversation inbound claims before they can fall through to other claim handlers or enqueue Codex turns. (#71702) Thanks @vincentkoc. +- Codex media understanding: require approval-checked app-server image turns while + explicitly declining tool, file, permission, and elicitation approval requests + for the bounded image worker. (#71703) Thanks @vincentkoc. ## 2026.4.24 diff --git a/extensions/codex/media-understanding-provider.test.ts b/extensions/codex/media-understanding-provider.test.ts index 42d25bb318a..091614fd90b 100644 --- a/extensions/codex/media-understanding-provider.test.ts +++ b/extensions/codex/media-understanding-provider.test.ts @@ -48,7 +48,7 @@ function threadStartResult() { serviceTier: null, cwd: "/tmp/openclaw-agent", instructionSources: [], - approvalPolicy: "never", + approvalPolicy: "on-request", approvalsReviewer: "user", sandbox: { type: "dangerFullAccess" }, permissionProfile: null, @@ -74,9 +74,12 @@ function createFakeClient(options?: { inputModalities?: string[]; completeWithItems?: boolean; notifyError?: string; + approvalRequestMethod?: string; }) { const notifications = new Set<(notification: CodexServerNotification) => void>(); + const requestHandlers = new Set<(request: { method: string }) => JsonValue | undefined>(); const requests: Array<{ method: string; params?: JsonValue }> = []; + const approvalResponses: JsonValue[] = []; const request = vi.fn(async (method: string, params?: JsonValue) => { requests.push({ method, params }); if (method === "model/list") { @@ -89,6 +92,14 @@ function createFakeClient(options?: { return threadStartResult(); } if (method === "turn/start") { + if (options?.approvalRequestMethod) { + for (const handler of requestHandlers) { + const response = handler({ method: options.approvalRequestMethod }); + if (response !== undefined) { + approvalResponses.push(response); + } + } + } if (options?.notifyError) { for (const notify of notifications) { notify({ @@ -150,9 +161,13 @@ function createFakeClient(options?: { notifications.add(handler); return () => notifications.delete(handler); }, + addRequestHandler(handler: (request: { method: string }) => JsonValue | undefined) { + requestHandlers.add(handler); + return () => requestHandlers.delete(handler); + }, } as unknown as CodexAppServerClient; - return { client, requests }; + return { client, requests, approvalResponses }; } describe("codex media understanding provider", () => { @@ -183,7 +198,7 @@ describe("codex media understanding provider", () => { expect(requests[1]?.params).toMatchObject({ model: "gpt-5.4", modelProvider: "openai", - approvalPolicy: "never", + approvalPolicy: "on-request", sandbox: "read-only", dynamicTools: [], ephemeral: true, @@ -191,7 +206,7 @@ describe("codex media understanding provider", () => { }); expect(requests[2]?.params).toMatchObject({ threadId: "thread-1", - approvalPolicy: "never", + approvalPolicy: "on-request", model: "gpt-5.4", input: [ { type: "text", text: "Describe briefly.", text_elements: [] }, @@ -200,6 +215,29 @@ describe("codex media understanding provider", () => { }); }); + it("declines approval requests during image understanding", async () => { + const { client, approvalResponses } = createFakeClient({ + approvalRequestMethod: "item/permissions/requestApproval", + }); + const provider = buildCodexMediaUnderstandingProvider({ + clientFactory: async () => client, + }); + + await provider.describeImage?.({ + buffer: Buffer.from("image-bytes"), + fileName: "image.png", + mime: "image/png", + provider: "codex", + model: "gpt-5.4", + prompt: "Describe briefly.", + timeoutMs: 30_000, + cfg: {}, + agentDir: "/tmp/openclaw-agent", + }); + + expect(approvalResponses).toEqual([{ permissions: {}, scope: "turn" }]); + }); + it("extracts text from terminal turn items", async () => { const { client } = createFakeClient({ completeWithItems: true }); const provider = buildCodexMediaUnderstandingProvider({ diff --git a/extensions/codex/media-understanding-provider.ts b/extensions/codex/media-understanding-provider.ts index 0c7a90369fb..4d9e6c237ca 100644 --- a/extensions/codex/media-understanding-provider.ts +++ b/extensions/codex/media-understanding-provider.ts @@ -22,6 +22,7 @@ import { type CodexTurn, type CodexTurnStartParams, type JsonObject, + type JsonValue, } from "./src/app-server/protocol.js"; const DEFAULT_CODEX_IMAGE_MODEL = @@ -108,7 +109,7 @@ async function describeCodexImages( model, modelProvider: "openai", cwd: req.agentDir || process.cwd(), - approvalPolicy: "never", + approvalPolicy: "on-request", sandbox: "read-only", serviceName: "OpenClaw", developerInstructions: @@ -123,6 +124,7 @@ async function describeCodexImages( ); const collector = createCodexImageTurnCollector(thread.thread.id); const cleanup = client.addNotificationHandler(collector.handleNotification); + const requestCleanup = client.addRequestHandler(denyCodexImageApprovalRequest); try { const turn = assertCodexTurnStartResponse( await client.request( @@ -137,7 +139,7 @@ async function describeCodexImages( })), ], cwd: req.agentDir || process.cwd(), - approvalPolicy: "never", + approvalPolicy: "on-request", model, effort: "low", } satisfies CodexTurnStartParams, @@ -150,6 +152,7 @@ async function describeCodexImages( }); return { text, model }; } finally { + requestCleanup(); cleanup(); } } finally { @@ -160,6 +163,31 @@ async function describeCodexImages( } } +function denyCodexImageApprovalRequest(request: { method: string }): JsonValue | undefined { + if ( + request.method === "item/commandExecution/requestApproval" || + request.method === "item/fileChange/requestApproval" + ) { + return { + decision: "decline", + reason: "OpenClaw Codex image understanding does not grant tool or file approvals.", + }; + } + if (request.method === "item/permissions/requestApproval") { + return { permissions: {}, scope: "turn" }; + } + if (request.method.includes("requestApproval")) { + return { + decision: "decline", + reason: "OpenClaw Codex image understanding does not grant native approvals.", + }; + } + if (request.method === "mcpServer/elicitation/request") { + return { action: "decline" }; + } + return undefined; +} + async function assertCodexModelSupportsImage(params: { client: CodexAppServerClient; model: string;