From 0ddbae171db562df745843c5da285969f3092fdd Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Sun, 26 Apr 2026 03:45:00 +0100
Subject: [PATCH] test: cover codex app-server subagents

---
 CHANGELOG.md                                  |  68 ++++--
 docs/help/testing.md                          |   9 +
 scripts/test-live-codex-harness-docker.sh     |   4 +
 ...subagents.sessions-spawn.lifecycle.test.ts |  26 +++
 ...s.subagents.sessions-spawn.test-harness.ts |   2 +-
 src/agents/sessions-spawn-hooks.test.ts       |  35 +++-
 src/agents/subagent-spawn.attachments.test.ts |  31 ++-
 .../subagent-spawn.depth-limits.test.ts       |  42 ++--
 .../subagent-spawn.model-session.test.ts      |  11 +-
 src/agents/subagent-spawn.test-helpers.ts     |   2 +-
 src/agents/subagent-spawn.test.ts             |  21 +-
 src/agents/subagent-spawn.ts                  |  83 +++++++-
 .../gateway-codex-harness.live-helpers.ts     |   4 +
 .../gateway-codex-harness.live.test.ts        | 193 ++++++++++++++++++
 14 files changed, 448 insertions(+), 83 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 348a366e29e..e53d5f14b49 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -85,21 +85,59 @@ Docs: https://docs.openclaw.ai
   and show daemon state separately when available, so `gateway.tailscale.mode:
 "off"` no longer reads like the Tailscale daemon is stopped. Fixes #71790.
   Thanks @pesvobodak.
-- Plugins/Bonjour: stop ciao mDNS watchdog failures from looping forever when the advertiser stays stuck in `probing` or `announcing`; Bonjour now disables itself for the current Gateway process after repeated failed restarts while the Gateway keeps running. Fixes #69011. Thanks @siddharthaagarwalofficial-ux, @FiredMosquito831, and @spikefcz.
-- Gateway/Fly.io: seed Control UI allowed origins from the actual runtime bind and port so CLI-driven non-loopback starts do not crash before config exists. Fixes #71823.
-- Gateway/proxy: bootstrap env proxy dispatching from direct Gateway startup so provider and plugin network requests honor `HTTPS_PROXY`/`HTTP_PROXY` before the first embedded agent attempt runs. (#71833) Thanks @mjamiv.
-- Models/LM Studio: preserve `@iq*` quant suffixes in model refs and provider matching so `/model lmstudio/...@iq3_xxs` keeps the exact LM Studio variant. Fixes #71474. (#71486) Thanks @Bartok9, @XinwuC, and @Sanjays2402.
-- Matrix/cron: preserve the live Matrix delivery target when creating implicit announce reminder jobs so mixed-case room IDs are not reconstructed from lowercased session keys. Fixes #71798.
-- Feishu: accept Schema 2.0 card action callbacks that report `context.open_chat_id` instead of legacy `context.chat_id`, so button callbacks no longer drop as malformed. Fixes #71670. Thanks @eddy1068.
-- Feishu: keep synthetic card-action and bot-menu ids out of platform reply targets, using the real card callback message id when Feishu provides one and plain-sending otherwise. Fixes #71673. Thanks @eddy1068.
-- Plugins/QQ Bot: prefer an installed QQ Bot plugin that declares it replaces the bundled `qqbot` channel, preventing duplicate `qqbot_channel_api` and `qqbot_remind` tool registration noise. Fixes #63102.
-- Browser automation: keep stable tab ids and labels attached when Chromium replaces the raw target after form submissions or other action-triggered navigations, and return the replacement `targetId` from `/act` when the match is provable. Fixes #46137.
-- QQ Bot: make `qqbot_remind` schedule, list, and remove Gateway cron jobs directly for owner-authorized senders instead of returning `cronParams` and relying on a follow-up generic `cron` tool call. Fixes #70865. (#70937) Thanks @GaosCode.
-- Agents/ACP: hide `sessions_spawn` ACP runtime options unless an ACP backend is loaded, and make `/acp doctor` call out `plugins.allow` blocking bundled `acpx`. Thanks @vincentkoc.
-- Media delivery: avoid sending generated image attachments twice when the assistant reply already includes explicit `MEDIA:` lines for the same turn, and reject unsafe remote `MEDIA:` URLs before delivery. Thanks @pashpashpash.
-- Codex harness: ignore retryable app-server error notifications after Codex recovers, and preserve the real nested error message for terminal app-server failures instead of replacing it with a generic failure. Thanks @pashpashpash.
-- Agents/subagents: keep queued subagent announces session-only when the requester has no external channel target, avoiding ambiguous multi-channel delivery failures. Fixes #59201. Thanks @larrylhollan.
-- Image understanding: preserve configured provider-prefixed vision model metadata when callers request the model without the provider prefix, so custom image models keep their `input: ["text", "image"]` capability. Fixes #33185. Thanks @Kobe9312 and @vincentkoc.
+- Plugins/Bonjour: stop ciao mDNS watchdog failures from looping forever when
+  the advertiser stays stuck in `probing` or `announcing`; Bonjour now disables
+  itself for the current Gateway process after repeated failed restarts while
+  the Gateway keeps running. Fixes #69011. Thanks @siddharthaagarwalofficial-ux,
+  @FiredMosquito831, and @spikefcz.
+- Gateway/Fly.io: seed Control UI allowed origins from the actual runtime
+  bind and port so CLI-driven non-loopback starts do not crash before config
+  exists. Fixes #71823.
+- Gateway/proxy: bootstrap env proxy dispatching from direct Gateway startup
+  so provider and plugin network requests honor `HTTPS_PROXY`/`HTTP_PROXY`
+  before the first embedded agent attempt runs. (#71833) Thanks @mjamiv.
+- Models/LM Studio: preserve `@iq*` quant suffixes in model refs and provider
+  matching so `/model lmstudio/...@iq3_xxs` keeps the exact LM Studio variant.
+  Fixes #71474. (#71486) Thanks @Bartok9, @XinwuC, and @Sanjays2402.
+- Matrix/cron: preserve the live Matrix delivery target when creating implicit
+  announce reminder jobs so mixed-case room IDs are not reconstructed from
+  lowercased session keys. Fixes #71798.
+- Feishu: accept Schema 2.0 card action callbacks that report
+  `context.open_chat_id` instead of legacy `context.chat_id`, so button
+  callbacks no longer drop as malformed. Fixes #71670. Thanks @eddy1068.
+- Feishu: keep synthetic card-action and bot-menu ids out of platform reply
+  targets, using the real card callback message id when Feishu provides one and
+  plain-sending otherwise. Fixes #71673. Thanks @eddy1068.
+- Plugins/QQ Bot: prefer an installed QQ Bot plugin that declares it replaces
+  the bundled `qqbot` channel, preventing duplicate `qqbot_channel_api` and
+  `qqbot_remind` tool registration noise. Fixes #63102.
+- Browser automation: keep stable tab ids and labels attached when Chromium
+  replaces the raw target after form submissions or other action-triggered
+  navigations, and return the replacement `targetId` from `/act` when the match
+  is provable. Fixes #46137.
+- QQ Bot: make `qqbot_remind` schedule, list, and remove Gateway cron jobs
+  directly for owner-authorized senders instead of returning `cronParams` and
+  relying on a follow-up generic `cron` tool call. Fixes #70865. (#70937)
+  Thanks @GaosCode.
+- Agents/ACP: hide `sessions_spawn` ACP runtime options unless an ACP backend is
+  loaded, and make `/acp doctor` call out `plugins.allow` blocking bundled
+  `acpx`. Thanks @vincentkoc.
+- Media delivery: avoid sending generated image attachments twice when the
+  assistant reply already includes explicit `MEDIA:` lines for the same turn,
+  and reject unsafe remote `MEDIA:` URLs before delivery. Thanks @pashpashpash.
+- Codex harness: ignore retryable app-server error notifications after Codex
+  recovers, and preserve the real nested error message for terminal app-server
+  failures instead of replacing it with a generic failure. Thanks @pashpashpash.
+- Agents/Codex: prepare native Codex sub-agent session metadata without a
+  nested Gateway session patch and add a focused Docker smoke for the app-server
+  sub-agent path. Thanks @vincentkoc.
+- Agents/subagents: keep queued subagent announces session-only when the
+  requester has no external channel target, avoiding ambiguous multi-channel
+  delivery failures. Fixes #59201. Thanks @larrylhollan.
+- Image understanding: preserve configured provider-prefixed vision model
+  metadata when callers request the model without the provider prefix, so custom
+  image models keep their `input: ["text", "image"]` capability. Fixes #33185.
+  Thanks @Kobe9312 and @vincentkoc.
 - Plugins/install: restore the previous plugin index records if a concurrent config write conflict interrupts install, update, or uninstall metadata commits. Thanks @shakkernerd.
 - Plugins/update: restore previous plugin index records if core update or channel setup hits a concurrent config write conflict after plugin metadata changes. Thanks @shakkernerd.
 - Plugins/onboarding: defer channel/provider plugin install records until the owning config write commits, keeping setup failures from advancing the plugin index ahead of `openclaw.json`. Thanks @shakkernerd.
diff --git a/docs/help/testing.md b/docs/help/testing.md
index 78a1d253bb8..f4ac3729e6c 100644
--- a/docs/help/testing.md
+++ b/docs/help/testing.md
@@ -55,6 +55,15 @@ When debugging real providers/models (requires real creds):
     Slack DM with `/codex bind`, exercises `/codex fast` and
     `/codex permissions`, then verifies a plain reply and an image attachment
     route through the native plugin binding instead of ACP.
+- Codex app-server harness smoke: `pnpm test:docker:live-codex-harness`
+  - Runs gateway agent turns through the plugin-owned Codex app-server harness,
+    verifies `/codex status` and `/codex models`, and by default exercises image,
+    cron MCP, sub-agent, and Guardian probes. Disable the sub-agent probe with
+    `OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_PROBE=0` when isolating other Codex
+    app-server failures. For a focused sub-agent check, disable the other probes:
+    `OPENCLAW_LIVE_CODEX_HARNESS_IMAGE_PROBE=0 OPENCLAW_LIVE_CODEX_HARNESS_MCP_PROBE=0 OPENCLAW_LIVE_CODEX_HARNESS_GUARDIAN_PROBE=0 OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_PROBE=1 pnpm test:docker:live-codex-harness`.
+    This exits after the sub-agent probe unless
+    `OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_ONLY=0` is set.
 - Crestodian rescue command smoke: `pnpm test:live:crestodian-rescue-channel`
   - Opt-in belt-and-suspenders check for the message-channel rescue command
     surface. It exercises `/crestodian status`, queues a persistent model
diff --git a/scripts/test-live-codex-harness-docker.sh b/scripts/test-live-codex-harness-docker.sh
index 1136ca61835..21ab35ffb23 100644
--- a/scripts/test-live-codex-harness-docker.sh
+++ b/scripts/test-live-codex-harness-docker.sh
@@ -203,6 +203,8 @@ echo "==> Run Codex harness live test in Docker"
 echo "==> Model: ${OPENCLAW_LIVE_CODEX_HARNESS_MODEL:-codex/gpt-5.5}"
 echo "==> Image probe: ${OPENCLAW_LIVE_CODEX_HARNESS_IMAGE_PROBE:-1}"
 echo "==> MCP probe: ${OPENCLAW_LIVE_CODEX_HARNESS_MCP_PROBE:-1}"
+echo "==> Subagent probe: ${OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_PROBE:-1}"
+echo "==> Subagent-only fast path: ${OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_ONLY:-auto}"
 echo "==> Guardian probe: ${OPENCLAW_LIVE_CODEX_HARNESS_GUARDIAN_PROBE:-1}"
 echo "==> Auth mode: $CODEX_HARNESS_AUTH_MODE"
 echo "==> Profile file: $PROFILE_STATUS"
@@ -230,6 +232,8 @@ DOCKER_RUN_ARGS=(docker run --rm -t \
   -e OPENCLAW_LIVE_CODEX_HARNESS_MODEL="${OPENCLAW_LIVE_CODEX_HARNESS_MODEL:-codex/gpt-5.5}" \
   -e OPENCLAW_LIVE_CODEX_HARNESS_REQUIRE_GUARDIAN_EVENTS="${OPENCLAW_LIVE_CODEX_HARNESS_REQUIRE_GUARDIAN_EVENTS:-1}" \
   -e OPENCLAW_LIVE_CODEX_HARNESS_REQUEST_TIMEOUT_MS="${OPENCLAW_LIVE_CODEX_HARNESS_REQUEST_TIMEOUT_MS:-}" \
+  -e OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_ONLY="${OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_ONLY:-}" \
+  -e OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_PROBE="${OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_PROBE:-1}" \
   -e OPENCLAW_LIVE_CODEX_HARNESS_USE_CI_SAFE_CODEX_CONFIG="${OPENCLAW_LIVE_CODEX_HARNESS_USE_CI_SAFE_CODEX_CONFIG:-1}" \
   -e OPENCLAW_LIVE_CODEX_BIND="${OPENCLAW_LIVE_CODEX_BIND:-}" \
   -e OPENCLAW_LIVE_CODEX_BIND_MODEL="${OPENCLAW_LIVE_CODEX_BIND_MODEL:-}" \
diff --git a/src/agents/openclaw-tools.subagents.sessions-spawn.lifecycle.test.ts b/src/agents/openclaw-tools.subagents.sessions-spawn.lifecycle.test.ts
index 9da67a2d69b..b9ecf8d0502 100644
--- a/src/agents/openclaw-tools.subagents.sessions-spawn.lifecycle.test.ts
+++ b/src/agents/openclaw-tools.subagents.sessions-spawn.lifecycle.test.ts
@@ -269,6 +269,32 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
     expect(child.sessionKey?.startsWith("agent:main:subagent:")).toBe(true);
   });
 
+  it("gives native child agent startup enough gateway request time", async () => {
+    const ctx = setupSessionsSpawnGatewayMock({
+      includeChatHistory: true,
+      agentWaitResult: { status: "ok", startedAt: 1000, endedAt: 2000 },
+    });
+    const tool = await getSessionsSpawnTool({
+      agentSessionKey: "main",
+      agentChannel: "whatsapp",
+    });
+
+    const result = await tool.execute("call-start-timeout", {
+      task: "do thing",
+      runTimeoutSeconds: 120,
+    });
+
+    expect(result.details).toMatchObject({
+      status: "accepted",
+      runId: expect.any(String),
+    });
+    const childAgentCall = ctx.calls.find((call) => {
+      const params = call.params as { lane?: string } | undefined;
+      return call.method === "agent" && params?.lane === "subagent";
+    });
+    expect(childAgentCall?.timeoutMs).toBe(125_000);
+  });
+
   it("sessions_spawn retires bundle MCP runtime when run-mode cleanup completes", async () => {
     let resumeAnnounceFlow: ((value: boolean) => void) | undefined;
     let announceFlowStarted: (() => void) | undefined;
diff --git a/src/agents/openclaw-tools.subagents.sessions-spawn.test-harness.ts b/src/agents/openclaw-tools.subagents.sessions-spawn.test-harness.ts
index e0dcda5163a..5aead7bb7bf 100644
--- a/src/agents/openclaw-tools.subagents.sessions-spawn.test-harness.ts
+++ b/src/agents/openclaw-tools.subagents.sessions-spawn.test-harness.ts
@@ -12,7 +12,7 @@ type CreateSessionsSpawnTool =
 type SubagentRegistryTesting = (typeof import("./subagent-registry.js"))["__testing"];
 type SubagentSpawnTesting = (typeof import("./subagent-spawn.js"))["__testing"];
 export type CreateOpenClawToolsOpts = Parameters<CreateSessionsSpawnTool>[0];
-export type GatewayRequest = { method?: string; params?: unknown };
+export type GatewayRequest = { method?: string; params?: unknown; timeoutMs?: number };
 export type AgentWaitCall = { runId?: string; timeoutMs?: number };
 type SessionsSpawnGatewayMockOptions = {
   includeSessionsList?: boolean;
diff --git a/src/agents/sessions-spawn-hooks.test.ts b/src/agents/sessions-spawn-hooks.test.ts
index 91a0e835958..297c4b0b909 100644
--- a/src/agents/sessions-spawn-hooks.test.ts
+++ b/src/agents/sessions-spawn-hooks.test.ts
@@ -9,6 +9,7 @@ type GatewayRequest = { method?: string; params?: Record<string, unknown> };
 const hoisted = vi.hoisted(() => ({
   callGatewayMock: vi.fn(),
   configOverride: {} as Record<string, unknown>,
+  updateSessionStoreMock: vi.fn(),
 }));
 
 const hookRunnerMocks = vi.hoisted(() => ({
@@ -139,6 +140,7 @@ beforeAll(async () => {
   ({ resetSubagentRegistryForTests, spawnSubagentDirect } = await loadSubagentSpawnModuleForTest({
     callGatewayMock: hoisted.callGatewayMock,
     loadConfig: () => hoisted.configOverride,
+    updateSessionStoreMock: hoisted.updateSessionStoreMock,
     hookRunner: {
       hasHooks: (hookName: string) =>
         hookName === "subagent_spawning" ||
@@ -157,6 +159,7 @@ describe("sessions_spawn subagent lifecycle hooks", () => {
   beforeEach(() => {
     resetSubagentRegistryForTests();
     hoisted.callGatewayMock.mockReset();
+    hoisted.updateSessionStoreMock.mockReset();
     hookRunnerMocks.hasSubagentEndedHook = true;
     hookRunnerMocks.runSubagentSpawning.mockClear();
     hookRunnerMocks.runSubagentSpawned.mockClear();
@@ -167,6 +170,16 @@ describe("sessions_spawn subagent lifecycle hooks", () => {
         scope: "per-sender",
       },
     });
+    const store: Record<string, Record<string, unknown>> = {};
+    hoisted.updateSessionStoreMock.mockImplementation(
+      async (_storePath: unknown, mutator: unknown) => {
+        if (typeof mutator !== "function") {
+          throw new Error("missing session store mutator");
+        }
+        await mutator(store);
+        return store;
+      },
+    );
     hoisted.callGatewayMock.mockImplementation(async (opts: unknown) => {
       const request = opts as { method?: string };
       if (request.method === "sessions.patch") {
@@ -398,11 +411,21 @@ describe("sessions_spawn subagent lifecycle hooks", () => {
   });
 
   it("cleans up the provisional session when lineage patching fails after thread binding", async () => {
+    const store: Record<string, Record<string, unknown>> = {};
+    hoisted.updateSessionStoreMock.mockImplementation(
+      async (_storePath: unknown, mutator: unknown) => {
+        if (typeof mutator !== "function") {
+          throw new Error("missing session store mutator");
+        }
+        await mutator(store);
+        if (Object.values(store).some((entry) => typeof entry.spawnedBy === "string")) {
+          throw new Error("lineage patch failed");
+        }
+        return store;
+      },
+    );
     hoisted.callGatewayMock.mockImplementation(async (opts: unknown) => {
       const request = opts as { method?: string; params?: Record<string, unknown> };
-      if (request.method === "sessions.patch" && typeof request.params?.spawnedBy === "string") {
-        throw new Error("lineage patch failed");
-      }
       if (request.method === "sessions.delete") {
         return { ok: true };
       }
@@ -420,10 +443,8 @@ describe("sessions_spawn subagent lifecycle hooks", () => {
       agentThreadId: "456",
     });
 
-    expect(result).toMatchObject({
-      status: "error",
-      error: "lineage patch failed",
-    });
+    expect(result.status).toBe("error");
+    expect(result.error).toContain("lineage patch failed");
     expect(hookRunnerMocks.runSubagentSpawned).not.toHaveBeenCalled();
     expect(hookRunnerMocks.runSubagentEnded).not.toHaveBeenCalled();
     const methods = getGatewayMethods();
diff --git a/src/agents/subagent-spawn.attachments.test.ts b/src/agents/subagent-spawn.attachments.test.ts
index b2c1e470607..c44003add46 100644
--- a/src/agents/subagent-spawn.attachments.test.ts
+++ b/src/agents/subagent-spawn.attachments.test.ts
@@ -9,6 +9,7 @@ import {
 } from "./subagent-spawn.test-helpers.js";
 
 const callGatewayMock = vi.fn();
+const updateSessionStoreMock = vi.fn();
 
 let configOverride: Record<string, unknown> = {
   ...createSubagentSpawnTestConfig(),
@@ -20,6 +21,7 @@ beforeAll(async () => {
   subagentSpawnModule = await loadSubagentSpawnModuleForTest({
     callGatewayMock,
     loadConfig: () => configOverride,
+    updateSessionStoreMock,
     workspaceDir: workspaceDirOverride || os.tmpdir(),
   });
 });
@@ -92,6 +94,15 @@ describe("spawnSubagentDirect filename validation", () => {
     configOverride = createSubagentSpawnTestConfig(workspaceDirOverride);
     subagentSpawnModule.resetSubagentRegistryForTests();
     callGatewayMock.mockClear();
+    updateSessionStoreMock.mockReset();
+    const store: Record<string, Record<string, unknown>> = {};
+    updateSessionStoreMock.mockImplementation(async (_storePath: unknown, mutator: unknown) => {
+      if (typeof mutator !== "function") {
+        throw new Error("missing session store mutator");
+      }
+      await mutator(store);
+      return store;
+    });
     setupAcceptedSubagentGatewayMock(callGatewayMock);
   });
 
@@ -170,12 +181,20 @@ describe("spawnSubagentDirect filename validation", () => {
 
   it("removes materialized attachments when lineage patching fails", async () => {
     const calls: Array<{ method?: string; params?: Record<string, unknown> }> = [];
+    const store: Record<string, Record<string, unknown>> = {};
+    updateSessionStoreMock.mockImplementation(async (_storePath: unknown, mutator: unknown) => {
+      if (typeof mutator !== "function") {
+        throw new Error("missing session store mutator");
+      }
+      await mutator(store);
+      if (Object.values(store).some((entry) => typeof entry.spawnedBy === "string")) {
+        throw new Error("lineage patch failed");
+      }
+      return store;
+    });
     callGatewayMock.mockImplementation(async (opts: unknown) => {
       const request = opts as { method?: string; params?: Record<string, unknown> };
       calls.push(request);
-      if (request.method === "sessions.patch" && typeof request.params?.spawnedBy === "string") {
-        throw new Error("lineage patch failed");
-      }
       if (request.method === "sessions.delete") {
         return { ok: true };
       }
@@ -191,10 +210,8 @@ describe("spawnSubagentDirect filename validation", () => {
       ctx,
     );
 
-    expect(result).toMatchObject({
-      status: "error",
-      error: "lineage patch failed",
-    });
+    expect(result.status).toBe("error");
+    expect(result.error).toContain("lineage patch failed");
     const attachmentsRoot = path.join(workspaceDirOverride, ".openclaw", "attachments");
     const retainedDirs = fs.existsSync(attachmentsRoot)
       ? fs.readdirSync(attachmentsRoot).filter((entry) => !entry.startsWith("."))
diff --git a/src/agents/subagent-spawn.depth-limits.test.ts b/src/agents/subagent-spawn.depth-limits.test.ts
index cefc3011991..fd18b3c6c05 100644
--- a/src/agents/subagent-spawn.depth-limits.test.ts
+++ b/src/agents/subagent-spawn.depth-limits.test.ts
@@ -1,6 +1,7 @@
 import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
 import {
   createSubagentSpawnTestConfig,
+  installSessionStoreCaptureMock,
   loadSubagentSpawnModuleForTest,
   setupAcceptedSubagentGatewayMock,
 } from "./subagent-spawn.test-helpers.js";
@@ -10,10 +11,12 @@ const hoisted = vi.hoisted(() => ({
   callGatewayMock: vi.fn(),
   configOverride: {} as Record<string, unknown>,
   depthBySession: new Map<string, number>(),
+  updateSessionStoreMock: vi.fn(),
   registerSubagentRunMock: vi.fn(),
 }));
 
 let spawnSubagentDirect: typeof import("./subagent-spawn.js").spawnSubagentDirect;
+let persistedStore: Record<string, Record<string, unknown>> | undefined;
 
 function createDepthLimitConfig(subagents?: Record<string, unknown>) {
   return createSubagentSpawnTestConfig("/tmp/workspace-main", {
@@ -48,6 +51,7 @@ describe("subagent spawn depth + child limits", () => {
       callGatewayMock: hoisted.callGatewayMock,
       loadConfig: () => hoisted.configOverride,
       registerSubagentRunMock: hoisted.registerSubagentRunMock,
+      updateSessionStoreMock: hoisted.updateSessionStoreMock,
       getSubagentDepthFromSessionStore: (sessionKey) => hoisted.depthBySession.get(sessionKey) ?? 0,
       countActiveRunsForSession: (sessionKey) =>
         hoisted.activeChildrenBySession.get(sessionKey) ?? 0,
@@ -60,6 +64,13 @@ describe("subagent spawn depth + child limits", () => {
     hoisted.depthBySession.clear();
     hoisted.callGatewayMock.mockClear();
     hoisted.registerSubagentRunMock.mockClear();
+    hoisted.updateSessionStoreMock.mockReset();
+    persistedStore = undefined;
+    installSessionStoreCaptureMock(hoisted.updateSessionStoreMock, {
+      onStore: (store) => {
+        persistedStore = store;
+      },
+    });
     hoisted.configOverride = createDepthLimitConfig();
     setupAcceptedSubagentGatewayMock(hoisted.callGatewayMock);
   });
@@ -87,23 +98,14 @@ describe("subagent spawn depth + child limits", () => {
       runId: "run-1",
     });
 
-    const calls = hoisted.callGatewayMock.mock.calls.map(
-      (call) => call[0] as { method?: string; params?: Record<string, unknown> },
-    );
-    const spawnedByPatch = calls.find(
-      (entry) =>
-        entry.method === "sessions.patch" &&
-        entry.params?.spawnedBy === "agent:main:subagent:parent",
-    );
-    expect(spawnedByPatch?.params?.key).toMatch(/^agent:main:subagent:/);
-    expect(typeof spawnedByPatch?.params?.spawnedWorkspaceDir).toBe("string");
-
-    const spawnDepthPatch = calls.find(
-      (entry) => entry.method === "sessions.patch" && entry.params?.spawnDepth === 2,
-    );
-    expect(spawnDepthPatch?.params?.key).toMatch(/^agent:main:subagent:/);
-    expect(spawnDepthPatch?.params?.subagentRole).toBe("leaf");
-    expect(spawnDepthPatch?.params?.subagentControlScope).toBe("none");
+    const childSession = persistedStore?.[result.childSessionKey as string];
+    expect(childSession).toMatchObject({
+      spawnedBy: "agent:main:subagent:parent",
+      spawnDepth: 2,
+      subagentRole: "leaf",
+      subagentControlScope: "none",
+    });
+    expect(typeof childSession?.spawnedWorkspaceDir).toBe("string");
   });
 
   it("rejects callers when stored spawn depth is already at the configured max", async () => {
@@ -151,19 +153,17 @@ describe("subagent spawn depth + child limits", () => {
     });
   });
 
-  it("fails spawn when sessions.patch rejects the model", async () => {
+  it("fails spawn when the initial child session patch rejects the model", async () => {
     hoisted.configOverride = createDepthLimitConfig({ maxSpawnDepth: 2 });
     hoisted.callGatewayMock.mockImplementation(
       async (opts: { method?: string; params?: { model?: string } }) => {
-        if (opts.method === "sessions.patch" && opts.params?.model === "bad-model") {
-          throw new Error("invalid model: bad-model");
-        }
         if (opts.method === "agent") {
           return { runId: "run-depth" };
         }
         return {};
       },
     );
+    hoisted.updateSessionStoreMock.mockRejectedValueOnce(new Error("invalid model: bad-model"));
 
     const result = await spawnFrom("main", { model: "bad-model" });
 
diff --git a/src/agents/subagent-spawn.model-session.test.ts b/src/agents/subagent-spawn.model-session.test.ts
index ba6ce71b1e9..b977457fb58 100644
--- a/src/agents/subagent-spawn.model-session.test.ts
+++ b/src/agents/subagent-spawn.model-session.test.ts
@@ -83,18 +83,17 @@ describe("spawnSubagentDirect runtime model persistence", () => {
       status: "accepted",
       modelApplied: true,
     });
-    expect(updateSessionStoreMock).toHaveBeenCalledTimes(1);
+    expect(updateSessionStoreMock).toHaveBeenCalledTimes(3);
     expectPersistedRuntimeModel({
       persistedStore,
       sessionKey: /^agent:main:subagent:/,
       provider: "openai-codex",
       model: "gpt-5.4",
     });
-    expect(pruneLegacyStoreKeysMock).toHaveBeenCalledTimes(1);
-    expect(operations.indexOf("gateway:sessions.patch")).toBeGreaterThan(-1);
-    expect(operations.indexOf("store:update")).toBeGreaterThan(
-      operations.indexOf("gateway:sessions.patch"),
+    expect(pruneLegacyStoreKeysMock).toHaveBeenCalledTimes(3);
+    expect(operations.indexOf("store:update")).toBeGreaterThan(-1);
+    expect(operations.indexOf("gateway:agent")).toBeGreaterThan(
+      operations.lastIndexOf("store:update"),
     );
-    expect(operations.indexOf("gateway:agent")).toBeGreaterThan(operations.indexOf("store:update"));
   });
 });
diff --git a/src/agents/subagent-spawn.test-helpers.ts b/src/agents/subagent-spawn.test-helpers.ts
index eadd9933c8a..a10b1325b10 100644
--- a/src/agents/subagent-spawn.test-helpers.ts
+++ b/src/agents/subagent-spawn.test-helpers.ts
@@ -81,10 +81,10 @@ export function installSessionStoreCaptureMock(
     onStore?: (store: SessionStore) => void;
   },
 ) {
+  const store: SessionStore = {};
   updateSessionStoreMock.mockImplementation(
     async (_storePath: string, mutator: SessionStoreMutator) => {
       params?.operations?.push("store:update");
-      const store: SessionStore = {};
       await mutator(store);
       params?.onStore?.(store);
       return store;
diff --git a/src/agents/subagent-spawn.test.ts b/src/agents/subagent-spawn.test.ts
index 97f1d92ae90..52b2f061d3d 100644
--- a/src/agents/subagent-spawn.test.ts
+++ b/src/agents/subagent-spawn.test.ts
@@ -121,8 +121,8 @@ describe("spawnSubagentDirect seam flow", () => {
     expect(result.childSessionKey).toMatch(/^agent:main:subagent:/);
 
     const childSessionKey = result.childSessionKey as string;
-    expect(hoisted.pruneLegacyStoreKeysMock).toHaveBeenCalledTimes(1);
-    expect(hoisted.updateSessionStoreMock).toHaveBeenCalledTimes(1);
+    expect(hoisted.pruneLegacyStoreKeysMock).toHaveBeenCalledTimes(3);
+    expect(hoisted.updateSessionStoreMock).toHaveBeenCalledTimes(3);
     expect(hoisted.registerSubagentRunMock).toHaveBeenCalledWith(
       expect.objectContaining({
         runId: "run-1",
@@ -156,11 +156,10 @@ describe("spawnSubagentDirect seam flow", () => {
       provider: "openai-codex",
       model: "gpt-5.4",
     });
-    expect(operations.indexOf("gateway:sessions.patch")).toBeGreaterThan(-1);
-    expect(operations.indexOf("store:update")).toBeGreaterThan(
-      operations.indexOf("gateway:sessions.patch"),
+    expect(operations.indexOf("store:update")).toBeGreaterThan(-1);
+    expect(operations.indexOf("gateway:agent")).toBeGreaterThan(
+      operations.lastIndexOf("store:update"),
     );
-    expect(operations.indexOf("gateway:agent")).toBeGreaterThan(operations.indexOf("store:update"));
     expect(hoisted.callGatewayMock).toHaveBeenCalledWith(
       expect.objectContaining({
         method: "agent",
@@ -289,16 +288,9 @@ describe("spawnSubagentDirect seam flow", () => {
     });
   });
 
-  it("returns an error when the initial model patch is rejected", async () => {
+  it("returns an error when the initial child session patch is rejected", async () => {
     hoisted.callGatewayMock.mockImplementation(
       async (request: { method?: string; params?: unknown }) => {
-        if (request.method === "sessions.patch") {
-          const model = (request.params as { model?: unknown } | undefined)?.model;
-          if (model === "bad-model") {
-            throw new Error("invalid model: bad-model");
-          }
-          return { ok: true };
-        }
         if (request.method === "agent") {
           return { runId: "run-1", status: "accepted", acceptedAt: 1000 };
         }
@@ -308,6 +300,7 @@ describe("spawnSubagentDirect seam flow", () => {
         return {};
       },
     );
+    hoisted.updateSessionStoreMock.mockRejectedValueOnce(new Error("invalid model: bad-model"));
 
     const result = await spawnSubagentDirect(
       {
diff --git a/src/agents/subagent-spawn.ts b/src/agents/subagent-spawn.ts
index 95a87b0591e..7c6f1f52e6c 100644
--- a/src/agents/subagent-spawn.ts
+++ b/src/agents/subagent-spawn.ts
@@ -107,6 +107,9 @@ const defaultSubagentSpawnDeps: SubagentSpawnDeps = {
 };
 
 let subagentSpawnDeps: SubagentSpawnDeps = defaultSubagentSpawnDeps;
+const SUBAGENT_CONTROL_GATEWAY_TIMEOUT_MS = 60_000;
+const DEFAULT_SUBAGENT_AGENT_GATEWAY_TIMEOUT_MS = 60_000;
+const MAX_SUBAGENT_AGENT_GATEWAY_TIMEOUT_MS = 300_000;
 
 export type SpawnSubagentParams = {
   task: string;
@@ -199,6 +202,53 @@ function readGatewayRunId(response: Awaited<ReturnType<typeof callGateway>>): st
   return typeof runId === "string" && runId ? runId : undefined;
 }
 
+function resolveSubagentAgentGatewayTimeoutMs(runTimeoutSeconds: number): number {
+  const runTimeoutMs =
+    Number.isFinite(runTimeoutSeconds) && runTimeoutSeconds > 0
+      ? Math.floor(runTimeoutSeconds * 1000)
+      : 0;
+  if (runTimeoutMs <= 0) {
+    return DEFAULT_SUBAGENT_AGENT_GATEWAY_TIMEOUT_MS;
+  }
+  return Math.min(
+    MAX_SUBAGENT_AGENT_GATEWAY_TIMEOUT_MS,
+    Math.max(DEFAULT_SUBAGENT_AGENT_GATEWAY_TIMEOUT_MS, runTimeoutMs + 5_000),
+  );
+}
+
+function buildDirectChildSessionPatch(patch: Record<string, unknown>): Partial<SessionEntry> {
+  const entry: Partial<SessionEntry> = {};
+  const spawnDepth = patch.spawnDepth;
+  if (typeof spawnDepth === "number" && Number.isFinite(spawnDepth) && spawnDepth >= 0) {
+    entry.spawnDepth = Math.floor(spawnDepth);
+  }
+  if (patch.subagentRole === "orchestrator" || patch.subagentRole === "leaf") {
+    entry.subagentRole = patch.subagentRole;
+  }
+  if (patch.subagentControlScope === "children" || patch.subagentControlScope === "none") {
+    entry.subagentControlScope = patch.subagentControlScope;
+  }
+  if (typeof patch.spawnedBy === "string" && patch.spawnedBy.trim()) {
+    entry.spawnedBy = patch.spawnedBy.trim();
+  }
+  if (typeof patch.spawnedWorkspaceDir === "string" && patch.spawnedWorkspaceDir.trim()) {
+    entry.spawnedWorkspaceDir = patch.spawnedWorkspaceDir.trim();
+  }
+  if (typeof patch.thinkingLevel === "string" && patch.thinkingLevel.trim()) {
+    entry.thinkingLevel = patch.thinkingLevel.trim();
+  }
+  if (typeof patch.model === "string" && patch.model.trim()) {
+    const { provider, model } = splitModelRef(patch.model.trim());
+    if (model) {
+      entry.model = model;
+      if (provider) {
+        entry.modelProvider = provider;
+      }
+    }
+  }
+  return entry;
+}
+
 function loadSubagentConfig() {
   return subagentSpawnDeps.loadConfig();
 }
@@ -430,7 +480,7 @@ async function cleanupProvisionalSession(
         emitLifecycleHooks: options?.emitLifecycleHooks === true,
         deleteTranscript: options?.deleteTranscript === true,
       },
-      timeoutMs: 10_000,
+      timeoutMs: SUBAGENT_CONTROL_GATEWAY_TIMEOUT_MS,
     });
   } catch {
     // Best-effort cleanup only.
@@ -752,14 +802,25 @@ export async function spawnSubagentDirect(
   const { resolvedModel, thinkingOverride } = plan;
   const patchChildSession = async (patch: Record<string, unknown>): Promise<string | undefined> => {
     try {
-      await callSubagentGateway({
-        method: "sessions.patch",
-        params: { key: childSessionKey, ...patch },
-        timeoutMs: 10_000,
+      const target = resolveGatewaySessionStoreTarget({
+        cfg,
+        key: childSessionKey,
+      });
+      await updateSubagentSessionStore(target.storePath, (store) => {
+        pruneLegacyStoreKeys({
+          store,
+          canonicalKey: target.canonicalKey,
+          candidates: target.storeKeys,
+        });
+        store[target.canonicalKey] = mergeSessionEntry(
+          store[target.canonicalKey],
+          buildDirectChildSessionPatch(patch),
+        );
       });
       return undefined;
     } catch (err) {
-      return err instanceof Error ? err.message : typeof err === "string" ? err : "error";
+      const message = err instanceof Error ? err.message : typeof err === "string" ? err : "error";
+      return `child session patch failed: ${message}`;
     }
   };
 
@@ -808,7 +869,7 @@ export async function spawnSubagentDirect(
         await callSubagentGateway({
           method: "sessions.delete",
           params: { key: childSessionKey, emitLifecycleHooks: false },
-          timeoutMs: 10_000,
+          timeoutMs: SUBAGENT_CONTROL_GATEWAY_TIMEOUT_MS,
         });
       } catch {
         // Best-effort cleanup only.
@@ -841,7 +902,7 @@ export async function spawnSubagentDirect(
         await callSubagentGateway({
           method: "sessions.delete",
           params: { key: childSessionKey, emitLifecycleHooks: false },
-          timeoutMs: 10_000,
+          timeoutMs: SUBAGENT_CONTROL_GATEWAY_TIMEOUT_MS,
         });
       } catch {
         // Best-effort cleanup only.
@@ -1019,7 +1080,7 @@ export async function spawnSubagentDirect(
           : {}),
         ...publicSpawnedMetadata,
       },
-      timeoutMs: 10_000,
+      timeoutMs: resolveSubagentAgentGatewayTimeoutMs(runTimeoutSeconds),
     });
     const runId = readGatewayRunId(response);
     if (runId) {
@@ -1074,7 +1135,7 @@ export async function spawnSubagentDirect(
           deleteTranscript: true,
           emitLifecycleHooks,
         },
-        timeoutMs: 10_000,
+        timeoutMs: SUBAGENT_CONTROL_GATEWAY_TIMEOUT_MS,
       });
     } catch {
       // Best-effort only.
@@ -1125,7 +1186,7 @@ export async function spawnSubagentDirect(
           deleteTranscript: true,
           emitLifecycleHooks: threadBindingReady,
         },
-        timeoutMs: 10_000,
+        timeoutMs: SUBAGENT_CONTROL_GATEWAY_TIMEOUT_MS,
       });
     } catch {
       // Best-effort cleanup only.
diff --git a/src/gateway/gateway-codex-harness.live-helpers.ts b/src/gateway/gateway-codex-harness.live-helpers.ts
index 2225ccf9d97..71016e6528d 100644
--- a/src/gateway/gateway-codex-harness.live-helpers.ts
+++ b/src/gateway/gateway-codex-harness.live-helpers.ts
@@ -34,6 +34,8 @@ export const EXPECTED_CODEX_MODELS_COMMAND_TEXT = [
   "Available model overrides:",
   "Available model overrides exposed in this session",
   "Available model overrides here:",
+  "Available model overrides listed in this session:",
+  "Available model overrides shown in this session:",
   "Available model overrides in this session:",
   "Available agent models:",
   "Visible options in this session:",
@@ -132,6 +134,8 @@ export function isExpectedCodexModelsCommandText(text: string): boolean {
   const mentionsVisibleOptions =
     normalized.includes("visible options in this session:") ||
     normalized.includes("visible options:") ||
+    normalized.includes("available model overrides listed in this session:") ||
+    normalized.includes("available model overrides shown in this session:") ||
     normalized.includes("available here:") ||
     normalized.includes("available agent ids in this session:");
   const mentionsCurrentActiveModel =
diff --git a/src/gateway/gateway-codex-harness.live.test.ts b/src/gateway/gateway-codex-harness.live.test.ts
index 44040ba1092..0728c772334 100644
--- a/src/gateway/gateway-codex-harness.live.test.ts
+++ b/src/gateway/gateway-codex-harness.live.test.ts
@@ -3,10 +3,13 @@ import fs from "node:fs/promises";
 import { createServer } from "node:net";
 import os from "node:os";
 import path from "node:path";
+import { setTimeout as delay } from "node:timers/promises";
 import { describe, expect, it } from "vitest";
 import { isLiveTestEnabled } from "../agents/live-test-helpers.js";
 import type { OpenClawConfig } from "../config/config.js";
+import type { ContextEngine } from "../context-engine/types.js";
 import { isTruthyEnvValue } from "../infra/env.js";
+import type { CallGatewayOptions } from "./call.js";
 import type { GatewayClient } from "./client.js";
 import {
   connectTestGatewayClient,
@@ -34,9 +37,18 @@ const CODEX_HARNESS_IMAGE_PROBE = isTruthyEnvValue(
   process.env.OPENCLAW_LIVE_CODEX_HARNESS_IMAGE_PROBE,
 );
 const CODEX_HARNESS_MCP_PROBE = isTruthyEnvValue(process.env.OPENCLAW_LIVE_CODEX_HARNESS_MCP_PROBE);
+const CODEX_HARNESS_SUBAGENT_PROBE = isTruthyEnvValue(
+  process.env.OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_PROBE,
+);
 const CODEX_HARNESS_GUARDIAN_PROBE = isTruthyEnvValue(
   process.env.OPENCLAW_LIVE_CODEX_HARNESS_GUARDIAN_PROBE,
 );
+const CODEX_HARNESS_SUBAGENT_ONLY =
+  CODEX_HARNESS_SUBAGENT_PROBE &&
+  !CODEX_HARNESS_IMAGE_PROBE &&
+  !CODEX_HARNESS_MCP_PROBE &&
+  !CODEX_HARNESS_GUARDIAN_PROBE &&
+  process.env.OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_ONLY !== "0";
 const CODEX_HARNESS_REQUIRE_GUARDIAN_EVENTS = isTruthyEnvValue(
   process.env.OPENCLAW_LIVE_CODEX_HARNESS_REQUIRE_GUARDIAN_EVENTS,
 );
@@ -79,6 +91,10 @@ function isCodexAccountTokenError(error: unknown): boolean {
   return error instanceof Error && error.message.includes("Failed to extract accountId from token");
 }
 
+function asRecord(value: unknown): Record<string, unknown> | undefined {
+  return value && typeof value === "object" ? (value as Record<string, unknown>) : undefined;
+}
+
 async function subscribeCodexLiveDebugEvents(sessionKey: string): Promise<() => void> {
   if (!CODEX_HARNESS_DEBUG) {
     return () => undefined;
@@ -498,6 +514,172 @@ async function verifyCodexCronMcpProbe(params: {
   }
 }
 
+async function readSpawnedChildRow(params: {
+  childSessionKey: string;
+  client: GatewayClient;
+  parentSessionKey: string;
+}): Promise<Record<string, unknown> | undefined> {
+  const result = await params.client.request(
+    "sessions.list",
+    {
+      spawnedBy: params.parentSessionKey,
+      includeLastMessage: true,
+      limit: 20,
+    },
+    { timeoutMs: 10_000 },
+  );
+  const sessions = asRecord(result)?.sessions;
+  if (!Array.isArray(sessions)) {
+    return undefined;
+  }
+  return sessions
+    .map((entry) => asRecord(entry))
+    .find((entry): entry is Record<string, unknown> => entry?.key === params.childSessionKey);
+}
+
+async function waitForCodexSubagentStarted(params: {
+  childSessionKey: string;
+  client: GatewayClient;
+  events: CapturedAgentEvent[];
+  parentSessionKey: string;
+}): Promise<Record<string, unknown> | undefined> {
+  const deadline = Date.now() + Math.min(CODEX_HARNESS_REQUEST_TIMEOUT_MS, 30_000);
+  let lastRow: Record<string, unknown> | undefined;
+  let lastError: unknown;
+  while (Date.now() < deadline) {
+    try {
+      lastRow = await readSpawnedChildRow({
+        childSessionKey: params.childSessionKey,
+        client: params.client,
+        parentSessionKey: params.parentSessionKey,
+      });
+      if (
+        lastRow &&
+        params.events.some(
+          (event) =>
+            event.sessionKey === params.childSessionKey &&
+            event.stream === "codex_app_server.lifecycle",
+        )
+      ) {
+        return lastRow;
+      }
+    } catch (error) {
+      lastError = error;
+    }
+    await delay(2_000);
+  }
+  throw new Error(
+    [
+      `subagent ${params.childSessionKey} did not start through the Codex app-server harness`,
+      `lastRow=${JSON.stringify(lastRow)}`,
+      `events=${JSON.stringify(params.events)}`,
+      `lastError=${lastError instanceof Error ? lastError.message : String(lastError)}`,
+    ].join("\n"),
+  );
+}
+
+async function verifyCodexSubagentProbe(params: {
+  client: GatewayClient;
+  sessionKey: string;
+}): Promise<void> {
+  const runId = randomUUID();
+  const expectedToken = `CODEX-SUBAGENT-${runId.slice(0, 6).toUpperCase()}`;
+  const events: CapturedAgentEvent[] = [];
+  const { onAgentEvent } = await import("../infra/agent-events.js");
+  const unsubscribe = onAgentEvent((event) => {
+    if (!event.stream.startsWith("codex_app_server.")) {
+      return;
+    }
+    events.push({
+      stream: event.stream,
+      sessionKey: event.sessionKey,
+      data: event.data,
+    });
+  });
+  try {
+    const { __testing: subagentSpawnTesting, spawnSubagentDirect } =
+      await import("../agents/subagent-spawn.js");
+    const noOpContextEngine: ContextEngine = {
+      info: { id: "codex-harness-subagent-smoke", name: "Codex harness subagent smoke" },
+      ingest: async () => ({ ingested: false }),
+      assemble: async () => ({ messages: [], estimatedTokens: 0 }),
+      compact: async () => ({ ok: true, compacted: false }),
+    };
+    const gatewayTrace: Array<{
+      durationMs: number;
+      error?: string;
+      method: string;
+      status: "error" | "ok";
+      timeoutMs?: number;
+    }> = [];
+    subagentSpawnTesting.setDepsForTest({
+      resolveContextEngine: async () => noOpContextEngine,
+      callGateway: async <T = Record<string, unknown>>(opts: CallGatewayOptions): Promise<T> => {
+        const startedAt = Date.now();
+        try {
+          const result = await params.client.request(opts.method, opts.params, {
+            expectFinal: opts.method === "agent" ? false : opts.expectFinal,
+            timeoutMs: opts.timeoutMs,
+          });
+          gatewayTrace.push({
+            durationMs: Date.now() - startedAt,
+            method: opts.method,
+            status: "ok",
+            timeoutMs: opts.timeoutMs,
+          });
+          return result as T;
+        } catch (err) {
+          gatewayTrace.push({
+            durationMs: Date.now() - startedAt,
+            error: err instanceof Error ? err.message : String(err),
+            method: opts.method,
+            status: "error",
+            timeoutMs: opts.timeoutMs,
+          });
+          throw err;
+        }
+      },
+    });
+    const spawnResult = await spawnSubagentDirect(
+      {
+        task: `Reply exactly ${expectedToken} and nothing else.`,
+        agentId: "dev",
+        thinking: "low",
+        mode: "run",
+        cleanup: "keep",
+        context: "isolated",
+        expectsCompletionMessage: false,
+        runTimeoutSeconds: CODEX_HARNESS_AGENT_TIMEOUT_SECONDS,
+      },
+      {
+        agentSessionKey: params.sessionKey,
+      },
+    );
+    if (spawnResult.status !== "accepted") {
+      throw new Error(
+        `Codex subagent spawn failed: ${JSON.stringify(spawnResult)} trace=${JSON.stringify(gatewayTrace)}`,
+      );
+    }
+    const childSessionKey = spawnResult.childSessionKey;
+    if (!childSessionKey?.includes(":subagent:")) {
+      throw new Error(
+        `subagent spawn did not return a child session key: ${JSON.stringify(spawnResult)}`,
+      );
+    }
+    const childRow = await waitForCodexSubagentStarted({
+      childSessionKey,
+      client: params.client,
+      events,
+      parentSessionKey: params.sessionKey,
+    });
+    expect(childRow?.key).toBe(childSessionKey);
+  } finally {
+    const { __testing: subagentSpawnTesting } = await import("../agents/subagent-spawn.js");
+    subagentSpawnTesting.setDepsForTest();
+    unsubscribe();
+  }
+}
+
 describeLive("gateway live (Codex harness)", () => {
   it(
     "runs gateway agent turns through the plugin-owned Codex app-server harness",
@@ -569,6 +751,16 @@ describeLive("gateway live (Codex harness)", () => {
       try {
         try {
           const sessionKey = "agent:dev:live-codex-harness";
+
+          if (CODEX_HARNESS_SUBAGENT_PROBE) {
+            logCodexLiveStep("subagent-probe:start", { sessionKey });
+            await verifyCodexSubagentProbe({ client, sessionKey });
+            logCodexLiveStep("subagent-probe:done");
+            if (CODEX_HARNESS_SUBAGENT_ONLY) {
+              return;
+            }
+          }
+
           const unsubscribeDebugEvents = await subscribeCodexLiveDebugEvents(sessionKey);
           const firstNonce = randomBytes(3).toString("hex").toUpperCase();
           try {
@@ -609,6 +801,7 @@ describeLive("gateway live (Codex harness)", () => {
               "model `codex/",
               "session `agent:dev:live-codex-harness`",
               "Model/status card shown above",
+              "Status shown above.",
             ],
           });
           logCodexLiveStep("codex-status-command", { statusText });