From 34fb96622eb69d11d95fb0ea90f755a73939842a Mon Sep 17 00:00:00 2001 From: pashpashpash Date: Sat, 25 Apr 2026 13:35:47 -0700 Subject: [PATCH] Support MCP hooks in the Codex harness (#71707) * codex harness mcp hook parity * tighten codex hook parity floor * prove security-style mcp hook blocking * bound native hook relay key handling * clarify permission relay defers to provider * harden native hook relay approvals * fix(agents): bound native hook relay JSON work budget --------- Co-authored-by: Peter Steinberger --- CHANGELOG.md | 1 + docs/plugins/codex-harness.md | 64 +-- docs/plugins/sdk-agent-harness.md | 6 +- .../media-understanding-provider.test.ts | 2 +- .../auth-profile-runtime-contract.test.ts | 2 +- .../codex/src/app-server/client.test.ts | 66 ++- extensions/codex/src/app-server/client.ts | 37 +- .../codex/src/app-server/models.test.ts | 6 +- .../src/app-server/native-hook-relay.test.ts | 10 + .../run-attempt.context-engine.test.ts | 2 +- .../codex/src/app-server/run-attempt.test.ts | 2 +- ...ema-normalization-runtime-contract.test.ts | 2 +- .../src/app-server/shared-client.test.ts | 12 +- .../app-server/transport-websocket.test.ts | 2 +- src/agents/cli-backends.test.ts | 4 +- src/agents/harness/native-hook-relay.test.ts | 389 ++++++++++++++++++ src/agents/harness/native-hook-relay.ts | 192 ++++++++- ...gateway-codex-harness.live-helpers.test.ts | 2 +- src/plugins/install-ledger-store.test.ts | 2 +- 19 files changed, 717 insertions(+), 86 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 318278d6f7b..9ca3799d4d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -56,6 +56,7 @@ Docs: https://docs.openclaw.ai - Providers/Local CLI TTS: add a bundled local command speech provider with file/stdout input, voice-note Opus conversion, and telephony PCM output. (#56239) Thanks @solar2ain. - Android/Talk Mode: expose Talk Mode in the Voice tab with runtime-owned voice capture modes and microphone foreground-service escalation. Thanks @alex-latitude. - Providers/LiteLLM: register `litellm` as an image-generation provider so `image_generate model=litellm/...` calls and `agents.defaults.imageGenerationModel.fallbacks` entries resolve through the LiteLLM proxy. Thanks @zqchris. +- Codex harness: require Codex app-server `0.125.0` or newer and cover native MCP `PreToolUse`, `PostToolUse`, and `PermissionRequest` payloads through the OpenClaw hook relay. ### Fixes diff --git a/docs/plugins/codex-harness.md b/docs/plugins/codex-harness.md index 37dd240ae2b..8798d91f772 100644 --- a/docs/plugins/codex-harness.md +++ b/docs/plugins/codex-harness.md @@ -103,7 +103,8 @@ Codex after changing config. ## Requirements - OpenClaw with the bundled `codex` plugin available. -- Codex app-server `0.118.0` or newer. +- Codex app-server `0.125.0` or newer. Native MCP hook payloads landed in Codex + `0.124.0`; OpenClaw uses `0.125.0` as the tested support floor. - Codex auth available to the app-server process. The plugin blocks older or unversioned app-server handshakes. That keeps @@ -551,7 +552,7 @@ normal turns. On the next message, OpenClaw resumes that Codex thread, passes th currently selected OpenClaw model into app-server, and keeps extended history enabled. -The command surface requires Codex app-server `0.118.0` or newer. Individual +The command surface requires Codex app-server `0.125.0` or newer. Individual control methods are reported as `unsupported by this Codex app-server` if a future or custom app-server does not expose that JSON-RPC method. @@ -597,31 +598,30 @@ around that boundary. Supported in Codex runtime v1: -| Surface | Support | Why | -| --------------------------------------- | --------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ | -| OpenAI model loop through Codex | Supported | Codex app-server owns the OpenAI turn, native thread resume, and native tool continuation. | -| OpenClaw channel routing and delivery | Supported | Telegram, Discord, Slack, WhatsApp, iMessage, and other channels stay outside the model runtime. | -| OpenClaw dynamic tools | Supported | Codex asks OpenClaw to execute these tools, so OpenClaw stays in the execution path. | -| Prompt and context plugins | Supported | OpenClaw builds prompt overlays and projects context into the Codex turn before starting or resuming the thread. | -| Context engine lifecycle | Supported | Assemble, ingest or after-turn maintenance, and context-engine compaction coordination run for Codex turns. | -| Dynamic tool hooks | Supported | `before_tool_call`, `after_tool_call`, and tool-result middleware run around OpenClaw-owned dynamic tools. | -| Lifecycle hooks | Supported as adapter observations | `llm_input`, `llm_output`, `agent_end`, `before_compaction`, and `after_compaction` fire with honest Codex-mode payloads. | -| Native shell and patch block or observe | Supported through the native hook relay | Codex `PreToolUse` and `PostToolUse` are relayed for the committed native tool surfaces. Blocking is supported; argument rewriting is not. | -| Native permission policy | Supported through the native hook relay | Codex `PermissionRequest` can be routed through OpenClaw policy where the runtime exposes it. | -| App-server trajectory capture | Supported | OpenClaw records the request it sent to app-server and the app-server notifications it receives. | +| Surface | Support | Why | +| --------------------------------------------- | --------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| OpenAI model loop through Codex | Supported | Codex app-server owns the OpenAI turn, native thread resume, and native tool continuation. | +| OpenClaw channel routing and delivery | Supported | Telegram, Discord, Slack, WhatsApp, iMessage, and other channels stay outside the model runtime. | +| OpenClaw dynamic tools | Supported | Codex asks OpenClaw to execute these tools, so OpenClaw stays in the execution path. | +| Prompt and context plugins | Supported | OpenClaw builds prompt overlays and projects context into the Codex turn before starting or resuming the thread. | +| Context engine lifecycle | Supported | Assemble, ingest or after-turn maintenance, and context-engine compaction coordination run for Codex turns. | +| Dynamic tool hooks | Supported | `before_tool_call`, `after_tool_call`, and tool-result middleware run around OpenClaw-owned dynamic tools. | +| Lifecycle hooks | Supported as adapter observations | `llm_input`, `llm_output`, `agent_end`, `before_compaction`, and `after_compaction` fire with honest Codex-mode payloads. | +| Native shell, patch, and MCP block or observe | Supported through the native hook relay | Codex `PreToolUse` and `PostToolUse` are relayed for committed native tool surfaces, including MCP payloads on Codex app-server `0.125.0` or newer. Blocking is supported; argument rewriting is not. | +| Native permission policy | Supported through the native hook relay | Codex `PermissionRequest` can be routed through OpenClaw policy where the runtime exposes it. If OpenClaw returns no decision, Codex continues through its normal guardian or user approval path. | +| App-server trajectory capture | Supported | OpenClaw records the request it sent to app-server and the app-server notifications it receives. | Not supported in Codex runtime v1: -| Surface | V1 boundary | Future path | -| --------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------- | -| Native tool argument mutation | Codex native pre-tool hooks can block, but OpenClaw does not rewrite Codex-native tool arguments. | Requires Codex hook/schema support for replacement tool input. | -| Editable Codex-native transcript history | Codex owns canonical native thread history. OpenClaw owns a mirror and can project future context, but should not mutate unsupported internals. | Add explicit Codex app-server APIs if native thread surgery is needed. | -| `tool_result_persist` for Codex-native tool records | That hook transforms OpenClaw-owned transcript writes, not Codex-native tool records. | Could mirror transformed records, but canonical rewrite needs Codex support. | -| Rich native compaction metadata | OpenClaw observes compaction start and completion, but does not receive a stable kept/dropped list, token delta, or summary payload. | Needs richer Codex compaction events. | -| Compaction intervention | Current OpenClaw compaction hooks are notification-level in Codex mode. | Add Codex pre/post compaction hooks if plugins need to veto or rewrite native compaction. | -| Stop or final-answer gating | Codex has native stop hooks, but OpenClaw does not expose final-answer gating as a v1 plugin contract. | Future opt-in primitive with loop and timeout safeguards. | -| Native MCP hook parity as a committed v1 surface | The relay is generic, but OpenClaw has not version-gated and tested native MCP pre/post hook behavior end to end. | Add OpenClaw MCP relay tests and docs once the supported app-server protocol floor covers those payloads. | -| Byte-for-byte model API request capture | OpenClaw can capture app-server requests and notifications, but Codex core builds the final OpenAI API request internally. | Needs a Codex model-request tracing event or debug API. | +| Surface | V1 boundary | Future path | +| --------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------- | +| Native tool argument mutation | Codex native pre-tool hooks can block, but OpenClaw does not rewrite Codex-native tool arguments. | Requires Codex hook/schema support for replacement tool input. | +| Editable Codex-native transcript history | Codex owns canonical native thread history. OpenClaw owns a mirror and can project future context, but should not mutate unsupported internals. | Add explicit Codex app-server APIs if native thread surgery is needed. | +| `tool_result_persist` for Codex-native tool records | That hook transforms OpenClaw-owned transcript writes, not Codex-native tool records. | Could mirror transformed records, but canonical rewrite needs Codex support. | +| Rich native compaction metadata | OpenClaw observes compaction start and completion, but does not receive a stable kept/dropped list, token delta, or summary payload. | Needs richer Codex compaction events. | +| Compaction intervention | Current OpenClaw compaction hooks are notification-level in Codex mode. | Add Codex pre/post compaction hooks if plugins need to veto or rewrite native compaction. | +| Stop or final-answer gating | Codex has native stop hooks, but OpenClaw does not expose final-answer gating as a v1 plugin contract. | Future opt-in primitive with loop and timeout safeguards. | +| Byte-for-byte model API request capture | OpenClaw can capture app-server requests and notifications, but Codex core builds the final OpenAI API request internally. | Needs a Codex model-request tracing event or debug API. | ## Tools, media, and compaction @@ -632,9 +632,15 @@ harness. Text, images, video, music, TTS, approvals, and messaging-tool output continue through the normal OpenClaw delivery path. The native hook relay is intentionally generic, but the v1 support contract is -limited to the Codex-native tool and permission paths that OpenClaw tests. Do not -assume every future Codex hook event is an OpenClaw plugin surface until the -runtime contract names it. +limited to the Codex-native tool and permission paths that OpenClaw tests. In +the Codex runtime, that includes shell, patch, and MCP `PreToolUse`, +`PostToolUse`, and `PermissionRequest` payloads. Do not assume every future +Codex hook event is an OpenClaw plugin surface until the runtime contract names +it. + +For `PermissionRequest`, OpenClaw only returns explicit allow or deny decisions +when policy decides. A no-decision result is not an allow. Codex treats it as no +hook decision and falls through to its own guardian or user approval path. Codex MCP tool approval elicitations are routed through OpenClaw's plugin approval flow when Codex marks `_meta.codex_approval_kind` as @@ -677,7 +683,9 @@ explicitly set `embeddedHarness.fallback: "pi"`. Once Codex app-server is selected, its failures surface directly without extra fallback config. **The app-server is rejected:** upgrade Codex so the app-server handshake -reports version `0.118.0` or newer. +reports version `0.125.0` or newer. Same-version prereleases or build-suffixed +versions such as `0.125.0-alpha.2` or `0.125.0+custom` are rejected because the +stable `0.125.0` protocol floor is what OpenClaw tests. **Model discovery is slow:** lower `plugins.entries.codex.config.discovery.timeoutMs` or disable discovery. diff --git a/docs/plugins/sdk-agent-harness.md b/docs/plugins/sdk-agent-harness.md index a488c411e95..c19f9027111 100644 --- a/docs/plugins/sdk-agent-harness.md +++ b/docs/plugins/sdk-agent-harness.md @@ -142,9 +142,11 @@ Codex provider and harness for compatibility. For operator setup, model prefix examples, and Codex-only configs, see [Codex Harness](/plugins/codex-harness). -OpenClaw requires Codex app-server `0.118.0` or newer. The Codex plugin checks +OpenClaw requires Codex app-server `0.125.0` or newer. The Codex plugin checks the app-server initialize handshake and blocks older or unversioned servers so -OpenClaw only runs against the protocol surface it has been tested with. +OpenClaw only runs against the protocol surface it has been tested with. The +`0.125.0` floor includes the native MCP hook payload support that landed in +Codex `0.124.0`, while pinning OpenClaw to the newer tested stable line. ### Tool-result middleware diff --git a/extensions/codex/media-understanding-provider.test.ts b/extensions/codex/media-understanding-provider.test.ts index 091614fd90b..8939d9f3e71 100644 --- a/extensions/codex/media-understanding-provider.test.ts +++ b/extensions/codex/media-understanding-provider.test.ts @@ -35,7 +35,7 @@ function threadStartResult() { status: { type: "idle" }, path: null, cwd: "/tmp/openclaw-agent", - cliVersion: "0.118.0", + cliVersion: "0.125.0", source: "unknown", agentNickname: null, agentRole: null, diff --git a/extensions/codex/src/app-server/auth-profile-runtime-contract.test.ts b/extensions/codex/src/app-server/auth-profile-runtime-contract.test.ts index df8fa307ca4..2a6cb65ad5b 100644 --- a/extensions/codex/src/app-server/auth-profile-runtime-contract.test.ts +++ b/extensions/codex/src/app-server/auth-profile-runtime-contract.test.ts @@ -43,7 +43,7 @@ function threadStartResult(threadId = "thread-auth-contract") { status: { type: "idle" }, path: null, cwd: "", - cliVersion: "0.118.0", + cliVersion: "0.125.0", source: "unknown", agentNickname: null, agentRole: null, diff --git a/extensions/codex/src/app-server/client.test.ts b/extensions/codex/src/app-server/client.test.ts index 07ed75950ad..ad4c22f7565 100644 --- a/extensions/codex/src/app-server/client.test.ts +++ b/extensions/codex/src/app-server/client.test.ts @@ -118,7 +118,7 @@ describe("CodexAppServerClient", () => { const { harness, initializing, outbound } = startInitialize(); harness.send({ id: outbound.id, - result: { userAgent: "openclaw/0.118.0 (macOS; test)" }, + result: { userAgent: "openclaw/0.125.0 (macOS; test)" }, }); await expect(initializing).resolves.toBeUndefined(); @@ -140,15 +140,63 @@ describe("CodexAppServerClient", () => { const { harness, initializing, outbound } = startInitialize(); harness.send({ id: outbound.id, - result: { userAgent: "openclaw/0.117.9 (macOS; test)" }, + result: { userAgent: "openclaw/0.124.9 (macOS; test)" }, }); await expect(initializing).rejects.toThrow( - `Codex app-server ${MIN_CODEX_APP_SERVER_VERSION} or newer is required, but detected 0.117.9`, + `Codex app-server ${MIN_CODEX_APP_SERVER_VERSION} or newer is required, but detected 0.124.9`, ); expect(harness.writes).toHaveLength(1); }); + it("blocks same-version Codex app-server prereleases below the stable floor", async () => { + const { harness, initializing, outbound } = startInitialize(); + harness.send({ + id: outbound.id, + result: { userAgent: "openclaw/0.125.0-alpha.2 (macOS; test)" }, + }); + + await expect(initializing).rejects.toThrow( + `Codex app-server ${MIN_CODEX_APP_SERVER_VERSION} or newer is required, but detected 0.125.0-alpha.2`, + ); + expect(harness.writes).toHaveLength(1); + }); + + it("blocks same-version Codex app-server build metadata below the stable floor", async () => { + const { harness, initializing, outbound } = startInitialize(); + harness.send({ + id: outbound.id, + result: { userAgent: "openclaw/0.125.0+alpha.2 (macOS; test)" }, + }); + + await expect(initializing).rejects.toThrow( + `Codex app-server ${MIN_CODEX_APP_SERVER_VERSION} or newer is required, but detected 0.125.0+alpha.2`, + ); + expect(harness.writes).toHaveLength(1); + }); + + it("accepts newer Codex app-server prereleases", async () => { + const { harness, initializing, outbound } = startInitialize(); + harness.send({ + id: outbound.id, + result: { userAgent: "openclaw/0.126.0-alpha.1 (macOS; test)" }, + }); + + await expect(initializing).resolves.toBeUndefined(); + expect(JSON.parse(harness.writes[1] ?? "{}")).toEqual({ method: "initialized" }); + }); + + it("accepts newer Codex app-server builds", async () => { + const { harness, initializing, outbound } = startInitialize(); + harness.send({ + id: outbound.id, + result: { userAgent: "openclaw/0.126.0+custom (macOS; test)" }, + }); + + await expect(initializing).resolves.toBeUndefined(); + expect(JSON.parse(harness.writes[1] ?? "{}")).toEqual({ method: "initialized" }); + }); + it("blocks app-server initialize responses without a version", async () => { const { harness, initializing, outbound } = startInitialize(); harness.send({ id: outbound.id, result: {} }); @@ -217,14 +265,14 @@ describe("CodexAppServerClient", () => { }); it("reads the Codex version from the app-server user agent", () => { - expect(readCodexVersionFromUserAgent("Codex Desktop/0.118.0")).toBe("0.118.0"); - expect(readCodexVersionFromUserAgent("openclaw/0.118.0 (macOS; test)")).toBe("0.118.0"); - expect(readCodexVersionFromUserAgent("codex_cli_rs/0.118.1-dev (linux; test)")).toBe( - "0.118.1-dev", + expect(readCodexVersionFromUserAgent("Codex Desktop/0.125.0")).toBe("0.125.0"); + expect(readCodexVersionFromUserAgent("openclaw/0.125.0 (macOS; test)")).toBe("0.125.0"); + expect(readCodexVersionFromUserAgent("codex_cli_rs/0.125.0-dev (linux; test)")).toBe( + "0.125.0-dev", ); expect(readCodexVersionFromUserAgent("Codex Desktop/not-a-version")).toBeUndefined(); - expect(readCodexVersionFromUserAgent("Codex Desktop/0.118")).toBeUndefined(); - expect(readCodexVersionFromUserAgent("openclaw/0.118.0abc")).toBeUndefined(); + expect(readCodexVersionFromUserAgent("Codex Desktop/0.124")).toBeUndefined(); + expect(readCodexVersionFromUserAgent("openclaw/0.125.0abc")).toBeUndefined(); expect(readCodexVersionFromUserAgent("missing-version")).toBeUndefined(); }); diff --git a/extensions/codex/src/app-server/client.ts b/extensions/codex/src/app-server/client.ts index 0a1850a55ea..0e6191956d7 100644 --- a/extensions/codex/src/app-server/client.ts +++ b/extensions/codex/src/app-server/client.ts @@ -18,7 +18,7 @@ import { createStdioTransport } from "./transport-stdio.js"; import { createWebSocketTransport } from "./transport-websocket.js"; import { closeCodexAppServerTransport, type CodexAppServerTransport } from "./transport.js"; -export const MIN_CODEX_APP_SERVER_VERSION = "0.118.0"; +export const MIN_CODEX_APP_SERVER_VERSION = "0.125.0"; const CODEX_APP_SERVER_PARSE_LOG_MAX = 500; type PendingRequest = { @@ -413,8 +413,10 @@ export function readCodexVersionFromUserAgent(userAgent: string | undefined): st } function compareVersions(left: string, right: string): number { - const leftParts = numericVersionParts(left); - const rightParts = numericVersionParts(right); + const leftVersion = parseVersionForComparison(left); + const rightVersion = parseVersionForComparison(right); + const leftParts = leftVersion.parts; + const rightParts = rightVersion.parts; for (let index = 0; index < Math.max(leftParts.length, rightParts.length); index += 1) { const leftPart = leftParts[index] ?? 0; const rightPart = rightParts[index] ?? 0; @@ -422,17 +424,30 @@ function compareVersions(left: string, right: string): number { return leftPart < rightPart ? -1 : 1; } } + if (leftVersion.unstableSuffix && !rightVersion.unstableSuffix) { + return -1; + } + if (!leftVersion.unstableSuffix && rightVersion.unstableSuffix) { + return 1; + } return 0; } -function numericVersionParts(version: string): number[] { - // Pre-release/build tags do not affect our minimum gate; 0.118.0-dev should - // satisfy the same protocol floor as 0.118.0. - return version - .split(/[+-]/, 1)[0] - .split(".") - .map((part) => Number.parseInt(part, 10)) - .map((part) => (Number.isFinite(part) ? part : 0)); +function parseVersionForComparison(version: string): { parts: number[]; unstableSuffix: boolean } { + // Same-version prerelease or build-suffixed versions do not satisfy a stable + // protocol floor because important app-server contract changes can land + // between alpha cuts and custom builds. + const hasBuildMetadata = version.includes("+"); + const [withoutBuild = version] = version.split("+", 1); + const prereleaseIndex = withoutBuild.indexOf("-"); + const numeric = prereleaseIndex >= 0 ? withoutBuild.slice(0, prereleaseIndex) : withoutBuild; + return { + parts: numeric + .split(".") + .map((part) => Number.parseInt(part, 10)) + .map((part) => (Number.isFinite(part) ? part : 0)), + unstableSuffix: prereleaseIndex >= 0 || hasBuildMetadata, + }; } function redactCodexAppServerLinePreview(value: string): string { diff --git a/extensions/codex/src/app-server/models.test.ts b/extensions/codex/src/app-server/models.test.ts index a1144f7508c..2778bc19f5b 100644 --- a/extensions/codex/src/app-server/models.test.ts +++ b/extensions/codex/src/app-server/models.test.ts @@ -50,7 +50,7 @@ describe("listCodexAppServerModels", () => { const initialize = JSON.parse(harness.writes[0] ?? "{}") as { id?: number }; harness.send({ id: initialize.id, - result: { userAgent: "openclaw/0.118.0 (macOS; test)" }, + result: { userAgent: "openclaw/0.125.0 (macOS; test)" }, }); await vi.waitFor(() => expect(harness.writes.length).toBeGreaterThanOrEqual(3)); const list = JSON.parse(harness.writes[2] ?? "{}") as { id?: number; method?: string }; @@ -112,7 +112,7 @@ describe("listCodexAppServerModels", () => { const initialize = JSON.parse(harness.writes[0] ?? "{}") as { id?: number }; harness.send({ id: initialize.id, - result: { userAgent: "openclaw/0.118.0 (macOS; test)" }, + result: { userAgent: "openclaw/0.125.0 (macOS; test)" }, }); await vi.waitFor(() => expect(harness.writes.length).toBeGreaterThanOrEqual(3)); const firstList = JSON.parse(harness.writes[2] ?? "{}") as { @@ -193,7 +193,7 @@ describe("listCodexAppServerModels", () => { const initialize = JSON.parse(harness.writes[0] ?? "{}") as { id?: number }; harness.send({ id: initialize.id, - result: { userAgent: "openclaw/0.118.0 (macOS; test)" }, + result: { userAgent: "openclaw/0.125.0 (macOS; test)" }, }); await vi.waitFor(() => expect(harness.writes.length).toBeGreaterThanOrEqual(3)); const firstList = JSON.parse(harness.writes[2] ?? "{}") as { id?: number }; diff --git a/extensions/codex/src/app-server/native-hook-relay.test.ts b/extensions/codex/src/app-server/native-hook-relay.test.ts index bb5c24663e4..df98cf9e633 100644 --- a/extensions/codex/src/app-server/native-hook-relay.test.ts +++ b/extensions/codex/src/app-server/native-hook-relay.test.ts @@ -92,6 +92,16 @@ describe("Codex native hook relay config", () => { }); }); + it("leaves matchers open so Codex MCP tool names reach the relay", () => { + const config = buildCodexNativeHookRelayConfig({ + relay: createRelay(), + events: ["pre_tool_use", "post_tool_use"], + }); + + expect(config["hooks.PreToolUse"]).toEqual([expect.objectContaining({ matcher: null })]); + expect(config["hooks.PostToolUse"]).toEqual([expect.objectContaining({ matcher: null })]); + }); + it("builds deterministic clearing config when the relay is disabled", () => { expect(buildCodexNativeHookRelayDisabledConfig()).toEqual({ "features.codex_hooks": false, diff --git a/extensions/codex/src/app-server/run-attempt.context-engine.test.ts b/extensions/codex/src/app-server/run-attempt.context-engine.test.ts index 7a7f3f7b324..d8402b4d105 100644 --- a/extensions/codex/src/app-server/run-attempt.context-engine.test.ts +++ b/extensions/codex/src/app-server/run-attempt.context-engine.test.ts @@ -73,7 +73,7 @@ function threadStartResult(threadId = "thread-1") { status: { type: "idle" }, path: null, cwd: tempDir || "/tmp/openclaw-codex-test", - cliVersion: "0.118.0", + cliVersion: "0.125.0", source: "unknown", agentNickname: null, agentRole: null, diff --git a/extensions/codex/src/app-server/run-attempt.test.ts b/extensions/codex/src/app-server/run-attempt.test.ts index 6ae3d95f80b..2af5d81b913 100644 --- a/extensions/codex/src/app-server/run-attempt.test.ts +++ b/extensions/codex/src/app-server/run-attempt.test.ts @@ -65,7 +65,7 @@ function threadStartResult(threadId = "thread-1") { status: { type: "idle" }, path: null, cwd: tempDir || "/tmp/openclaw-codex-test", - cliVersion: "0.118.0", + cliVersion: "0.125.0", source: "unknown", agentNickname: null, agentRole: null, diff --git a/extensions/codex/src/app-server/schema-normalization-runtime-contract.test.ts b/extensions/codex/src/app-server/schema-normalization-runtime-contract.test.ts index 87084efdc60..678842439e5 100644 --- a/extensions/codex/src/app-server/schema-normalization-runtime-contract.test.ts +++ b/extensions/codex/src/app-server/schema-normalization-runtime-contract.test.ts @@ -60,7 +60,7 @@ function threadStartResult(threadId = "thread-1") { status: { type: "idle" }, path: null, cwd: tempDir, - cliVersion: "0.118.0", + cliVersion: "0.125.0", source: "unknown", agentNickname: null, agentRole: null, diff --git a/extensions/codex/src/app-server/shared-client.test.ts b/extensions/codex/src/app-server/shared-client.test.ts index 95b6cd9fc74..80e3f1dcf38 100644 --- a/extensions/codex/src/app-server/shared-client.test.ts +++ b/extensions/codex/src/app-server/shared-client.test.ts @@ -87,7 +87,7 @@ describe("shared Codex app-server client", () => { expect(first.process.kill).toHaveBeenCalledTimes(1); const secondList = listCodexAppServerModels({ timeoutMs: 1000 }); - await sendInitializeResult(second, "openclaw/0.118.0 (macOS; test)"); + await sendInitializeResult(second, "openclaw/0.125.0 (macOS; test)"); await sendEmptyModelList(second); await expect(secondList).resolves.toEqual({ models: [] }); @@ -112,7 +112,7 @@ describe("shared Codex app-server client", () => { timeoutMs: 1000, authProfileId: "openai-codex:work", }); - await sendInitializeResult(harness, "openclaw/0.118.0 (macOS; test)"); + await sendInitializeResult(harness, "openclaw/0.125.0 (macOS; test)"); await sendEmptyModelList(harness); await expect(listPromise).resolves.toEqual({ models: [] }); @@ -147,7 +147,7 @@ describe("shared Codex app-server client", () => { headers: {}, }, }); - await sendInitializeResult(first, "openclaw/0.118.0 (macOS; test)"); + await sendInitializeResult(first, "openclaw/0.125.0 (macOS; test)"); await sendEmptyModelList(first); await expect(firstList).resolves.toEqual({ models: [] }); @@ -162,7 +162,7 @@ describe("shared Codex app-server client", () => { headers: {}, }, }); - await sendInitializeResult(second, "openclaw/0.118.0 (macOS; test)"); + await sendInitializeResult(second, "openclaw/0.125.0 (macOS; test)"); await sendEmptyModelList(second); await expect(secondList).resolves.toEqual({ models: [] }); @@ -206,7 +206,7 @@ describe("shared Codex app-server client", () => { await expect(firstFailure).resolves.toBeInstanceOf(Error); - await sendInitializeResult(second, "openclaw/0.118.0 (macOS; test)"); + await sendInitializeResult(second, "openclaw/0.125.0 (macOS; test)"); await sendEmptyModelList(second); await expect(secondList).resolves.toEqual({ models: [] }); @@ -222,7 +222,7 @@ describe("shared Codex app-server client", () => { const message = JSON.parse(rawDataToText(data)) as { id?: number; method?: string }; if (message.method === "initialize") { socket.send( - JSON.stringify({ id: message.id, result: { userAgent: "openclaw/0.118.0" } }), + JSON.stringify({ id: message.id, result: { userAgent: "openclaw/0.125.0" } }), ); return; } diff --git a/extensions/codex/src/app-server/transport-websocket.test.ts b/extensions/codex/src/app-server/transport-websocket.test.ts index 14c805f8ad4..616b33d3b9f 100644 --- a/extensions/codex/src/app-server/transport-websocket.test.ts +++ b/extensions/codex/src/app-server/transport-websocket.test.ts @@ -33,7 +33,7 @@ describe("Codex app-server websocket transport", () => { const message = JSON.parse(rawDataToText(data)) as { id?: number; method?: string }; if (message.method === "initialize") { socket.send( - JSON.stringify({ id: message.id, result: { userAgent: "openclaw/0.118.0" } }), + JSON.stringify({ id: message.id, result: { userAgent: "openclaw/0.125.0" } }), ); return; } diff --git a/src/agents/cli-backends.test.ts b/src/agents/cli-backends.test.ts index d8fff20e037..0f4f74f1497 100644 --- a/src/agents/cli-backends.test.ts +++ b/src/agents/cli-backends.test.ts @@ -64,7 +64,7 @@ function createBackendEntry(params: { params.id === "claude-cli" ? "@anthropic-ai/claude-code" : params.id === "codex-cli" - ? "@openai/codex@0.124.0" + ? "@openai/codex@0.125.0" : params.id === "google-gemini-cli" ? "@google/gemini-cli" : undefined, @@ -448,7 +448,7 @@ describe("resolveCliBackendLiveTest", () => { defaultModelRef: "codex-cli/gpt-5.5", defaultImageProbe: true, defaultMcpProbe: true, - dockerNpmPackage: "@openai/codex@0.124.0", + dockerNpmPackage: "@openai/codex@0.125.0", dockerBinaryName: "codex", }); }); diff --git a/src/agents/harness/native-hook-relay.test.ts b/src/agents/harness/native-hook-relay.test.ts index 5f50aa80f69..9c0a57a3032 100644 --- a/src/agents/harness/native-hook-relay.test.ts +++ b/src/agents/harness/native-hook-relay.test.ts @@ -90,6 +90,34 @@ describe("native hook relay registry", () => { ]); }); + it("retains bounded payload snapshots in invocation history", async () => { + const relay = registerNativeHookRelay({ + provider: "codex", + sessionId: "session-1", + runId: "run-1", + allowedEvents: ["post_tool_use"], + }); + + await invokeNativeHookRelay({ + provider: "codex", + relayId: relay.relayId, + event: "post_tool_use", + rawPayload: { + hook_event_name: "PostToolUse", + tool_name: "mcp__filesystem__read_file", + tool_use_id: "large-payload-call", + tool_input: { path: "/repo/large.txt" }, + tool_response: "x".repeat(50_000), + }, + }); + + const [recorded] = __testing.getNativeHookRelayInvocationsForTests(); + expect(JSON.stringify(recorded?.rawPayload).length).toBeLessThan(25_000); + expect(recorded?.rawPayload).toMatchObject({ + tool_response: expect.stringContaining("[truncated]"), + }); + }); + it("removes retained invocations when a relay is unregistered", async () => { const relay = registerNativeHookRelay({ provider: "codex", @@ -204,6 +232,99 @@ describe("native hook relay registry", () => { ).rejects.toThrow("JSON-compatible"); }); + it("rejects broad object payloads before reading children beyond the JSON node budget", async () => { + const relay = registerNativeHookRelay({ + provider: "codex", + sessionId: "session-1", + runId: "run-1", + allowedEvents: ["post_tool_use"], + }); + const rawPayload: Record = {}; + for (let index = 0; index < 19_999; index += 1) { + rawPayload[`k${index}`] = index; + } + let overBudgetValueRead = false; + Object.defineProperty(rawPayload, "overBudget", { + enumerable: true, + get() { + overBudgetValueRead = true; + return "should not be read"; + }, + }); + + await expect( + invokeNativeHookRelay({ + provider: "codex", + relayId: relay.relayId, + event: "post_tool_use", + rawPayload, + }), + ).rejects.toThrow("JSON-compatible"); + expect(overBudgetValueRead).toBe(false); + }); + + it("rejects payloads beyond the relay string budget", async () => { + const relay = registerNativeHookRelay({ + provider: "codex", + sessionId: "session-1", + runId: "run-1", + allowedEvents: ["post_tool_use"], + }); + + await expect( + invokeNativeHookRelay({ + provider: "codex", + relayId: relay.relayId, + event: "post_tool_use", + rawPayload: { + tool_response: "x".repeat(1_000_001), + }, + }), + ).rejects.toThrow("JSON-compatible"); + }); + + it("rejects payloads beyond the relay aggregate string budget", async () => { + const relay = registerNativeHookRelay({ + provider: "codex", + sessionId: "session-1", + runId: "run-1", + allowedEvents: ["post_tool_use"], + }); + + await expect( + invokeNativeHookRelay({ + provider: "codex", + relayId: relay.relayId, + event: "post_tool_use", + rawPayload: Array.from({ length: 5 }, () => "x".repeat(900_000)), + }), + ).rejects.toThrow("JSON-compatible"); + }); + + it("rejects payloads beyond the relay object key budget", async () => { + const relay = registerNativeHookRelay({ + provider: "codex", + sessionId: "session-1", + runId: "run-1", + allowedEvents: ["permission_request"], + }); + + await expect( + invokeNativeHookRelay({ + provider: "codex", + relayId: relay.relayId, + event: "permission_request", + rawPayload: { + hook_event_name: "PermissionRequest", + tool_name: "mcp__shell__run_command", + tool_input: { + ["x".repeat(1_000_001)]: "value", + }, + }, + }), + ).rejects.toThrow("JSON-compatible"); + }); + it("rejects expired relay ids", async () => { vi.useFakeTimers(); vi.setSystemTime(new Date("2026-04-24T12:00:00Z")); @@ -377,6 +498,217 @@ describe("native hook relay registry", () => { ); }); + it("maps Codex MCP PreToolUse to OpenClaw before_tool_call and can block", async () => { + const beforeToolCall = vi.fn(async () => ({ + block: true, + blockReason: "MCP writes require review", + })); + initializeGlobalHookRunner( + createMockPluginRegistry([{ hookName: "before_tool_call", handler: beforeToolCall }]), + ); + const relay = registerNativeHookRelay({ + provider: "codex", + agentId: "agent-1", + sessionId: "session-1", + sessionKey: "agent:main:session-1", + runId: "run-1", + }); + + const response = await invokeNativeHookRelay({ + provider: "codex", + relayId: relay.relayId, + event: "pre_tool_use", + rawPayload: { + hook_event_name: "PreToolUse", + cwd: "/repo", + model: "gpt-5.4", + tool_name: "mcp__memory__create_entities", + tool_use_id: "mcp-call-1", + tool_input: { + entities: [{ name: "OpenClaw", entityType: "project", observations: ["test"] }], + }, + }, + }); + + expect(JSON.parse(response.stdout)).toEqual({ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: "MCP writes require review", + }, + }); + expect(beforeToolCall).toHaveBeenCalledWith( + expect.objectContaining({ + toolName: "mcp__memory__create_entities", + params: { + entities: [{ name: "OpenClaw", entityType: "project", observations: ["test"] }], + }, + runId: "run-1", + toolCallId: "mcp-call-1", + }), + expect.objectContaining({ + toolName: "mcp__memory__create_entities", + toolCallId: "mcp-call-1", + }), + ); + }); + + it("lets security-style plugins block native MCP calls by scanning tool params", async () => { + const beforeToolCall = vi.fn(async (event: unknown) => { + const hookEvent = event as { params?: unknown; toolName?: string }; + const serializedParams = JSON.stringify(hookEvent.params ?? {}); + if (hookEvent.toolName?.startsWith("mcp__") && serializedParams.includes("rm -rf")) { + return { + block: true, + blockReason: "Blocked by security policy: destructive MCP command detected", + }; + } + return undefined; + }); + initializeGlobalHookRunner( + createMockPluginRegistry([{ hookName: "before_tool_call", handler: beforeToolCall }]), + ); + const relay = registerNativeHookRelay({ + provider: "codex", + agentId: "agent-1", + sessionId: "session-1", + sessionKey: "agent:main:session-1", + runId: "run-1", + }); + + const response = await invokeNativeHookRelay({ + provider: "codex", + relayId: relay.relayId, + event: "pre_tool_use", + rawPayload: { + hook_event_name: "PreToolUse", + tool_name: "mcp__shell__run_command", + tool_use_id: "mcp-call-security", + tool_input: { + command: "rm -rf /tmp/openclaw-important-state", + }, + }, + }); + + expect(JSON.parse(response.stdout)).toEqual({ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: "Blocked by security policy: destructive MCP command detected", + }, + }); + expect(beforeToolCall).toHaveBeenCalledWith( + expect.objectContaining({ + toolName: "mcp__shell__run_command", + params: { + command: "rm -rf /tmp/openclaw-important-state", + }, + toolCallId: "mcp-call-security", + }), + expect.objectContaining({ + toolName: "mcp__shell__run_command", + toolCallId: "mcp-call-security", + }), + ); + }); + + it("maps Codex MCP PostToolUse to OpenClaw after_tool_call observation", async () => { + const afterToolCall = vi.fn(); + initializeGlobalHookRunner( + createMockPluginRegistry([{ hookName: "after_tool_call", handler: afterToolCall }]), + ); + const relay = registerNativeHookRelay({ + provider: "codex", + agentId: "agent-1", + sessionId: "session-1", + sessionKey: "agent:main:session-1", + runId: "run-1", + }); + + const response = await invokeNativeHookRelay({ + provider: "codex", + relayId: relay.relayId, + event: "post_tool_use", + rawPayload: { + hook_event_name: "PostToolUse", + tool_name: "mcp__filesystem__read_file", + tool_use_id: "mcp-call-2", + tool_input: { path: "/repo/package.json" }, + tool_response: { + content: [{ type: "text", text: '{ "name": "openclaw" }' }], + structuredContent: { bytes: 22 }, + }, + }, + }); + + expect(response).toEqual({ stdout: "", stderr: "", exitCode: 0 }); + expect(afterToolCall).toHaveBeenCalledWith( + expect.objectContaining({ + toolName: "mcp__filesystem__read_file", + params: { path: "/repo/package.json" }, + runId: "run-1", + toolCallId: "mcp-call-2", + result: { + content: [{ type: "text", text: '{ "name": "openclaw" }' }], + structuredContent: { bytes: 22 }, + }, + }), + expect.objectContaining({ + toolName: "mcp__filesystem__read_file", + toolCallId: "mcp-call-2", + }), + ); + }); + + it("routes Codex MCP PermissionRequest payloads through OpenClaw approval policy", async () => { + const relay = registerNativeHookRelay({ + provider: "codex", + agentId: "agent-1", + sessionId: "session-1", + sessionKey: "agent:main:session-1", + runId: "run-1", + }); + const approvalRequester = vi.fn(async () => "allow" as const); + __testing.setNativeHookRelayPermissionApprovalRequesterForTests(approvalRequester); + + const response = await invokeNativeHookRelay({ + provider: "codex", + relayId: relay.relayId, + event: "permission_request", + rawPayload: { + hook_event_name: "PermissionRequest", + cwd: "/repo", + model: "gpt-5.4", + tool_name: "mcp__github__create_issue", + tool_use_id: "mcp-call-3", + tool_input: { + owner: "openclaw", + repo: "openclaw", + title: "Test issue", + }, + }, + }); + + expect(JSON.parse(response.stdout)).toEqual({ + hookSpecificOutput: { + hookEventName: "PermissionRequest", + decision: { behavior: "allow" }, + }, + }); + expect(approvalRequester).toHaveBeenCalledWith( + expect.objectContaining({ + provider: "codex", + toolName: "mcp__github__create_issue", + toolCallId: "mcp-call-3", + toolInput: { + owner: "openclaw", + repo: "openclaw", + title: "Test issue", + }, + }), + ); + }); + it("maps PermissionRequest approval allow and deny decisions to Codex hook output", async () => { const relay = registerNativeHookRelay({ provider: "codex", @@ -518,6 +850,63 @@ describe("native hook relay registry", () => { ]); }); + it("does not reuse pending PermissionRequest approvals when a tool call id is reused with different input", async () => { + const relay = registerNativeHookRelay({ + provider: "codex", + sessionId: "session-1", + runId: "run-1", + }); + let resolveDecision: ((decision: "allow") => void) | undefined; + const pendingDecision = new Promise<"allow">((resolve) => { + resolveDecision = resolve; + }); + const approvalRequester = vi.fn(async (request: { toolInput?: Record }) => { + return request.toolInput?.command === "git status" ? pendingDecision : "deny"; + }); + __testing.setNativeHookRelayPermissionApprovalRequesterForTests(approvalRequester); + + const first = invokeNativeHookRelay({ + provider: "codex", + relayId: relay.relayId, + event: "permission_request", + rawPayload: { + hook_event_name: "PermissionRequest", + tool_name: "Bash", + tool_use_id: "reused-call-id", + tool_input: { command: "git status" }, + }, + }); + const second = invokeNativeHookRelay({ + provider: "codex", + relayId: relay.relayId, + event: "permission_request", + rawPayload: { + hook_event_name: "PermissionRequest", + tool_name: "Bash", + tool_use_id: "reused-call-id", + tool_input: { command: "rm -rf /tmp/openclaw-important-state" }, + }, + }); + + await Promise.resolve(); + expect(approvalRequester).toHaveBeenCalledTimes(2); + const secondResponse = await second; + expect(JSON.parse(secondResponse.stdout)).toEqual({ + hookSpecificOutput: { + hookEventName: "PermissionRequest", + decision: { behavior: "deny", message: "Denied by user" }, + }, + }); + resolveDecision?.("allow"); + const firstResponse = await first; + expect(JSON.parse(firstResponse.stdout)).toEqual({ + hookSpecificOutput: { + hookEventName: "PermissionRequest", + decision: { behavior: "allow" }, + }, + }); + }); + it("defers PermissionRequest approvals after the per-relay approval budget is exhausted", async () => { const relay = registerNativeHookRelay({ provider: "codex", diff --git a/src/agents/harness/native-hook-relay.ts b/src/agents/harness/native-hook-relay.ts index d9e22ec609b..0dd7f6161f0 100644 --- a/src/agents/harness/native-hook-relay.ts +++ b/src/agents/harness/native-hook-relay.ts @@ -1,4 +1,4 @@ -import { randomUUID } from "node:crypto"; +import { createHash, randomUUID } from "node:crypto"; import { existsSync } from "node:fs"; import path from "node:path"; import { createSubsystemLogger } from "../../logging/subsystem.js"; @@ -98,7 +98,7 @@ type NativeHookRelayInvocationMetadata = Partial< type NativeHookRelayProviderAdapter = { normalizeMetadata: (rawPayload: JsonValue) => NativeHookRelayInvocationMetadata; - readToolInput: (rawPayload: JsonValue) => Record; + readToolInput: (rawPayload: JsonValue) => Record; readToolResponse: (rawPayload: JsonValue) => unknown; renderNoopResponse: (event: NativeHookRelayEvent) => NativeHookRelayProcessResponse; renderPreToolUseBlockResponse: (reason: string) => NativeHookRelayProcessResponse; @@ -114,6 +114,14 @@ const DEFAULT_PERMISSION_TIMEOUT_MS = 120_000; const MAX_NATIVE_HOOK_RELAY_INVOCATIONS = 200; const MAX_NATIVE_HOOK_RELAY_JSON_DEPTH = 64; const MAX_NATIVE_HOOK_RELAY_JSON_NODES = 20_000; +const MAX_NATIVE_HOOK_RELAY_STRING_LENGTH = 1_000_000; +const MAX_NATIVE_HOOK_RELAY_TOTAL_STRING_LENGTH = 4_000_000; +const MAX_NATIVE_HOOK_RELAY_HISTORY_STRING_LENGTH = 4_000; +const MAX_NATIVE_HOOK_RELAY_HISTORY_TOTAL_STRING_LENGTH = 20_000; +const MAX_NATIVE_HOOK_RELAY_HISTORY_ARRAY_ITEMS = 50; +const MAX_NATIVE_HOOK_RELAY_HISTORY_OBJECT_KEYS = 50; +const MAX_PERMISSION_FALLBACK_KEYS = 200; +const MAX_PERMISSION_FALLBACK_KEY_CHARS = 240; const MAX_APPROVAL_TITLE_LENGTH = 80; const MAX_APPROVAL_DESCRIPTION_LENGTH = 700; const MAX_PERMISSION_APPROVALS_PER_WINDOW = 12; @@ -142,7 +150,7 @@ type NativeHookRelayPermissionApprovalRequest = { toolCallId?: string; cwd?: string; model?: string; - toolInput: Record; + toolInput: Record; signal?: AbortSignal; }; @@ -321,7 +329,10 @@ export function renderNativeHookRelayUnavailableResponse(params: { } function recordNativeHookRelayInvocation(invocation: NativeHookRelayInvocation): void { - invocations.push(invocation); + invocations.push({ + ...invocation, + rawPayload: snapshotNativeHookRelayPayload(invocation.rawPayload), + }); if (invocations.length > MAX_NATIVE_HOOK_RELAY_INVOCATIONS) { invocations.splice(0, invocations.length - MAX_NATIVE_HOOK_RELAY_INVOCATIONS); } @@ -443,8 +454,12 @@ async function runNativeHookRelayPermissionRequest(params: { return params.adapter.renderPermissionDecisionResponse("deny", "Denied by user"); } } catch (error) { - log.warn(`native hook permission approval failed; deferring: ${String(error)}`); + log.warn( + `native hook permission approval failed; deferring to provider approval path: ${String(error)}`, + ); } + // A PermissionRequest no-op is not an allow decision. Codex interprets it as + // "no hook decision" and falls through to its normal guardian/user approval path. return params.adapter.renderNoopResponse(params.invocation.event); } @@ -455,7 +470,7 @@ async function requestNativeHookRelayPermissionApprovalWithBudget(params: { }): Promise { if (!consumeNativeHookRelayPermissionBudget(params.registration.relayId)) { log.warn( - `native hook permission approval rate limit exceeded; deferring: relay=${params.registration.relayId} run=${params.registration.runId}`, + `native hook permission approval rate limit exceeded; deferring to provider approval path: relay=${params.registration.relayId} run=${params.registration.runId}`, ); return "defer"; } @@ -473,7 +488,10 @@ function nativeHookRelayPermissionApprovalKey(params: { return [ params.registration.relayId, params.registration.runId, - params.request.toolCallId ?? permissionRequestFallbackKey(params.request), + params.request.toolCallId + ? `call:${params.request.toolCallId}` + : permissionRequestFallbackKey(params.request), + permissionRequestContentFingerprint(params.request), ].join(":"); } @@ -482,8 +500,72 @@ function permissionRequestFallbackKey(request: NativeHookRelayPermissionApproval if (command) { return `${request.toolName}:command:${truncateText(command, 240)}`; } - const keys = Object.keys(request.toolInput).toSorted().join(","); - return `${request.toolName}:keys:${truncateText(keys, 240)}`; + return `${request.toolName}:keys:${permissionRequestToolInputKeyFingerprint(request.toolInput)}`; +} + +function permissionRequestToolInputKeyFingerprint(toolInput: Record): string { + let fingerprint = ""; + let processed = 0; + for (const key of Object.keys(toolInput).toSorted()) { + if (processed >= MAX_PERMISSION_FALLBACK_KEYS) { + break; + } + const separator = fingerprint ? "," : ""; + const remaining = MAX_PERMISSION_FALLBACK_KEY_CHARS - fingerprint.length - separator.length; + if (remaining <= 0) { + break; + } + fingerprint += `${separator}${key.slice(0, remaining)}`; + processed += 1; + } + return fingerprint || "none"; +} + +function permissionRequestContentFingerprint( + request: NativeHookRelayPermissionApprovalRequest, +): string { + const hash = createHash("sha256"); + hash.update(request.toolName); + hash.update("\0"); + updateJsonHash(hash, request.toolInput); + return hash.digest("hex"); +} + +function updateJsonHash(hash: ReturnType, value: JsonValue): void { + if (value === null) { + hash.update("null"); + return; + } + if (typeof value === "string") { + hash.update("string:"); + hash.update(JSON.stringify(value)); + return; + } + if (typeof value === "number") { + hash.update(`number:${String(value)}`); + return; + } + if (typeof value === "boolean") { + hash.update(`boolean:${String(value)}`); + return; + } + if (Array.isArray(value)) { + hash.update("["); + for (const item of value) { + updateJsonHash(hash, item); + hash.update(","); + } + hash.update("]"); + return; + } + hash.update("{"); + for (const key of Object.keys(value).toSorted()) { + hash.update(JSON.stringify(key)); + hash.update(":"); + updateJsonHash(hash, value[key]); + hash.update(","); + } + hash.update("}"); } function consumeNativeHookRelayPermissionBudget(relayId: string, now = Date.now()): boolean { @@ -509,6 +591,55 @@ function removeNativeHookRelayPermissionState(relayId: string): void { } } +function snapshotNativeHookRelayPayload(payload: JsonValue): JsonValue { + return snapshotJsonValue(payload, { + remainingStringLength: MAX_NATIVE_HOOK_RELAY_HISTORY_TOTAL_STRING_LENGTH, + }); +} + +function snapshotJsonValue(value: JsonValue, state: { remainingStringLength: number }): JsonValue { + if (value === null || typeof value === "number" || typeof value === "boolean") { + return value; + } + if (typeof value === "string") { + return snapshotString(value, state); + } + if (Array.isArray(value)) { + const items = value + .slice(0, MAX_NATIVE_HOOK_RELAY_HISTORY_ARRAY_ITEMS) + .map((item) => snapshotJsonValue(item, state)); + if (value.length > MAX_NATIVE_HOOK_RELAY_HISTORY_ARRAY_ITEMS) { + items.push("[truncated]"); + } + return items; + } + const snapshot: Record = {}; + const keys = Object.keys(value); + for (const key of keys.slice(0, MAX_NATIVE_HOOK_RELAY_HISTORY_OBJECT_KEYS)) { + snapshot[snapshotString(key, state)] = snapshotJsonValue(value[key], state); + } + if (keys.length > MAX_NATIVE_HOOK_RELAY_HISTORY_OBJECT_KEYS) { + snapshot["[truncated]"] = keys.length - MAX_NATIVE_HOOK_RELAY_HISTORY_OBJECT_KEYS; + } + return snapshot; +} + +function snapshotString(value: string, state: { remainingStringLength: number }): string { + if (state.remainingStringLength <= 0) { + return "[truncated]"; + } + const limit = Math.min( + value.length, + MAX_NATIVE_HOOK_RELAY_HISTORY_STRING_LENGTH, + state.remainingStringLength, + ); + state.remainingStringLength -= limit; + if (limit >= value.length) { + return value; + } + return `${value.slice(0, limit)}...[truncated]`; +} + function normalizeNativeHookInvocation(params: { registration: NativeHookRelayRegistration; event: NativeHookRelayEvent; @@ -563,16 +694,16 @@ function normalizeCodexHookMetadata(rawPayload: JsonValue): NativeHookRelayInvoc return metadata; } -function readCodexToolInput(rawPayload: JsonValue): Record { +function readCodexToolInput(rawPayload: JsonValue): Record { const payload = isJsonObject(rawPayload) ? rawPayload : {}; const toolInput = payload.tool_input; if (isJsonObject(toolInput)) { - return toolInput; + return toolInput as Record; } if (toolInput === undefined) { return {}; } - return { value: toolInput }; + return { value: toolInput as JsonValue }; } function readCodexToolResponse(rawPayload: JsonValue): unknown { @@ -802,6 +933,7 @@ function readOptionalString(value: unknown): string | undefined { function isJsonValue(value: unknown): value is JsonValue { const stack: Array<{ value: unknown; depth: number }> = [{ value, depth: 0 }]; let nodes = 0; + let totalStringLength = 0; while (stack.length) { const current = stack.pop()!; nodes += 1; @@ -811,7 +943,17 @@ function isJsonValue(value: unknown): value is JsonValue { if (current.depth > MAX_NATIVE_HOOK_RELAY_JSON_DEPTH) { return false; } - if (current.value === null || typeof current.value === "string") { + if (current.value === null) { + continue; + } + if (typeof current.value === "string") { + if (current.value.length > MAX_NATIVE_HOOK_RELAY_STRING_LENGTH) { + return false; + } + totalStringLength += current.value.length; + if (totalStringLength > MAX_NATIVE_HOOK_RELAY_TOTAL_STRING_LENGTH) { + return false; + } continue; } if (typeof current.value === "number") { @@ -824,8 +966,11 @@ function isJsonValue(value: unknown): value is JsonValue { continue; } if (Array.isArray(current.value)) { - for (const item of current.value) { - stack.push({ value: item, depth: current.depth + 1 }); + for (let index = 0; index < current.value.length; index += 1) { + if (nodes + stack.length + 1 > MAX_NATIVE_HOOK_RELAY_JSON_NODES) { + return false; + } + stack.push({ value: current.value[index], depth: current.depth + 1 }); } continue; } @@ -833,8 +978,21 @@ function isJsonValue(value: unknown): value is JsonValue { return false; } try { - for (const item of Object.values(current.value)) { - stack.push({ value: item, depth: current.depth + 1 }); + for (const key in current.value) { + if (!Object.prototype.hasOwnProperty.call(current.value, key)) { + continue; + } + if (key.length > MAX_NATIVE_HOOK_RELAY_STRING_LENGTH) { + return false; + } + totalStringLength += key.length; + if (totalStringLength > MAX_NATIVE_HOOK_RELAY_TOTAL_STRING_LENGTH) { + return false; + } + if (nodes + stack.length + 1 > MAX_NATIVE_HOOK_RELAY_JSON_NODES) { + return false; + } + stack.push({ value: current.value[key], depth: current.depth + 1 }); } } catch { return false; diff --git a/src/gateway/gateway-codex-harness.live-helpers.test.ts b/src/gateway/gateway-codex-harness.live-helpers.test.ts index 0ce37c66270..fd2de9372f2 100644 --- a/src/gateway/gateway-codex-harness.live-helpers.test.ts +++ b/src/gateway/gateway-codex-harness.live-helpers.test.ts @@ -171,7 +171,7 @@ describe("gateway codex harness live helpers", () => { "`codex models` didn’t return a plain list in this environment; it dropped into the interactive TUI instead.", "", "What I could confirm from that session is:", - "- Codex CLI version: `v0.118.0`", + "- Codex CLI version: `v0.125.0`", "- Current selected model: `local-default-model`", "- The UI indicates `/model` is the command to change models", ].join("\n"); diff --git a/src/plugins/install-ledger-store.test.ts b/src/plugins/install-ledger-store.test.ts index cefe0f63168..90ef1c97eb0 100644 --- a/src/plugins/install-ledger-store.test.ts +++ b/src/plugins/install-ledger-store.test.ts @@ -125,7 +125,7 @@ describe("plugin install ledger store", () => { it("updates and removes records without mutating caller state", async () => { const records: Record = { keep: { - source: "npm", + source: "npm" as const, spec: "keep@1.0.0", }, } satisfies Record;