From 0ddbae171db562df745843c5da285969f3092fdd Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 26 Apr 2026 03:45:00 +0100 Subject: [PATCH] test: cover codex app-server subagents --- CHANGELOG.md | 68 ++++-- docs/help/testing.md | 9 + scripts/test-live-codex-harness-docker.sh | 4 + ...subagents.sessions-spawn.lifecycle.test.ts | 26 +++ ...s.subagents.sessions-spawn.test-harness.ts | 2 +- src/agents/sessions-spawn-hooks.test.ts | 35 +++- src/agents/subagent-spawn.attachments.test.ts | 31 ++- .../subagent-spawn.depth-limits.test.ts | 42 ++-- .../subagent-spawn.model-session.test.ts | 11 +- src/agents/subagent-spawn.test-helpers.ts | 2 +- src/agents/subagent-spawn.test.ts | 21 +- src/agents/subagent-spawn.ts | 83 +++++++- .../gateway-codex-harness.live-helpers.ts | 4 + .../gateway-codex-harness.live.test.ts | 193 ++++++++++++++++++ 14 files changed, 448 insertions(+), 83 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 348a366e29e..e53d5f14b49 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -85,21 +85,59 @@ Docs: https://docs.openclaw.ai and show daemon state separately when available, so `gateway.tailscale.mode: "off"` no longer reads like the Tailscale daemon is stopped. Fixes #71790. Thanks @pesvobodak. -- Plugins/Bonjour: stop ciao mDNS watchdog failures from looping forever when the advertiser stays stuck in `probing` or `announcing`; Bonjour now disables itself for the current Gateway process after repeated failed restarts while the Gateway keeps running. Fixes #69011. Thanks @siddharthaagarwalofficial-ux, @FiredMosquito831, and @spikefcz. -- Gateway/Fly.io: seed Control UI allowed origins from the actual runtime bind and port so CLI-driven non-loopback starts do not crash before config exists. Fixes #71823. -- Gateway/proxy: bootstrap env proxy dispatching from direct Gateway startup so provider and plugin network requests honor `HTTPS_PROXY`/`HTTP_PROXY` before the first embedded agent attempt runs. (#71833) Thanks @mjamiv. -- Models/LM Studio: preserve `@iq*` quant suffixes in model refs and provider matching so `/model lmstudio/...@iq3_xxs` keeps the exact LM Studio variant. Fixes #71474. (#71486) Thanks @Bartok9, @XinwuC, and @Sanjays2402. -- Matrix/cron: preserve the live Matrix delivery target when creating implicit announce reminder jobs so mixed-case room IDs are not reconstructed from lowercased session keys. Fixes #71798. -- Feishu: accept Schema 2.0 card action callbacks that report `context.open_chat_id` instead of legacy `context.chat_id`, so button callbacks no longer drop as malformed. Fixes #71670. Thanks @eddy1068. -- Feishu: keep synthetic card-action and bot-menu ids out of platform reply targets, using the real card callback message id when Feishu provides one and plain-sending otherwise. Fixes #71673. Thanks @eddy1068. -- Plugins/QQ Bot: prefer an installed QQ Bot plugin that declares it replaces the bundled `qqbot` channel, preventing duplicate `qqbot_channel_api` and `qqbot_remind` tool registration noise. Fixes #63102. -- Browser automation: keep stable tab ids and labels attached when Chromium replaces the raw target after form submissions or other action-triggered navigations, and return the replacement `targetId` from `/act` when the match is provable. Fixes #46137. -- QQ Bot: make `qqbot_remind` schedule, list, and remove Gateway cron jobs directly for owner-authorized senders instead of returning `cronParams` and relying on a follow-up generic `cron` tool call. Fixes #70865. (#70937) Thanks @GaosCode. -- Agents/ACP: hide `sessions_spawn` ACP runtime options unless an ACP backend is loaded, and make `/acp doctor` call out `plugins.allow` blocking bundled `acpx`. Thanks @vincentkoc. -- Media delivery: avoid sending generated image attachments twice when the assistant reply already includes explicit `MEDIA:` lines for the same turn, and reject unsafe remote `MEDIA:` URLs before delivery. Thanks @pashpashpash. -- Codex harness: ignore retryable app-server error notifications after Codex recovers, and preserve the real nested error message for terminal app-server failures instead of replacing it with a generic failure. Thanks @pashpashpash. -- Agents/subagents: keep queued subagent announces session-only when the requester has no external channel target, avoiding ambiguous multi-channel delivery failures. Fixes #59201. Thanks @larrylhollan. -- Image understanding: preserve configured provider-prefixed vision model metadata when callers request the model without the provider prefix, so custom image models keep their `input: ["text", "image"]` capability. Fixes #33185. Thanks @Kobe9312 and @vincentkoc. +- Plugins/Bonjour: stop ciao mDNS watchdog failures from looping forever when + the advertiser stays stuck in `probing` or `announcing`; Bonjour now disables + itself for the current Gateway process after repeated failed restarts while + the Gateway keeps running. Fixes #69011. Thanks @siddharthaagarwalofficial-ux, + @FiredMosquito831, and @spikefcz. +- Gateway/Fly.io: seed Control UI allowed origins from the actual runtime + bind and port so CLI-driven non-loopback starts do not crash before config + exists. Fixes #71823. +- Gateway/proxy: bootstrap env proxy dispatching from direct Gateway startup + so provider and plugin network requests honor `HTTPS_PROXY`/`HTTP_PROXY` + before the first embedded agent attempt runs. (#71833) Thanks @mjamiv. +- Models/LM Studio: preserve `@iq*` quant suffixes in model refs and provider + matching so `/model lmstudio/...@iq3_xxs` keeps the exact LM Studio variant. + Fixes #71474. (#71486) Thanks @Bartok9, @XinwuC, and @Sanjays2402. +- Matrix/cron: preserve the live Matrix delivery target when creating implicit + announce reminder jobs so mixed-case room IDs are not reconstructed from + lowercased session keys. Fixes #71798. +- Feishu: accept Schema 2.0 card action callbacks that report + `context.open_chat_id` instead of legacy `context.chat_id`, so button + callbacks no longer drop as malformed. Fixes #71670. Thanks @eddy1068. +- Feishu: keep synthetic card-action and bot-menu ids out of platform reply + targets, using the real card callback message id when Feishu provides one and + plain-sending otherwise. Fixes #71673. Thanks @eddy1068. +- Plugins/QQ Bot: prefer an installed QQ Bot plugin that declares it replaces + the bundled `qqbot` channel, preventing duplicate `qqbot_channel_api` and + `qqbot_remind` tool registration noise. Fixes #63102. +- Browser automation: keep stable tab ids and labels attached when Chromium + replaces the raw target after form submissions or other action-triggered + navigations, and return the replacement `targetId` from `/act` when the match + is provable. Fixes #46137. +- QQ Bot: make `qqbot_remind` schedule, list, and remove Gateway cron jobs + directly for owner-authorized senders instead of returning `cronParams` and + relying on a follow-up generic `cron` tool call. Fixes #70865. (#70937) + Thanks @GaosCode. +- Agents/ACP: hide `sessions_spawn` ACP runtime options unless an ACP backend is + loaded, and make `/acp doctor` call out `plugins.allow` blocking bundled + `acpx`. Thanks @vincentkoc. +- Media delivery: avoid sending generated image attachments twice when the + assistant reply already includes explicit `MEDIA:` lines for the same turn, + and reject unsafe remote `MEDIA:` URLs before delivery. Thanks @pashpashpash. +- Codex harness: ignore retryable app-server error notifications after Codex + recovers, and preserve the real nested error message for terminal app-server + failures instead of replacing it with a generic failure. Thanks @pashpashpash. +- Agents/Codex: prepare native Codex sub-agent session metadata without a + nested Gateway session patch and add a focused Docker smoke for the app-server + sub-agent path. Thanks @vincentkoc. +- Agents/subagents: keep queued subagent announces session-only when the + requester has no external channel target, avoiding ambiguous multi-channel + delivery failures. Fixes #59201. Thanks @larrylhollan. +- Image understanding: preserve configured provider-prefixed vision model + metadata when callers request the model without the provider prefix, so custom + image models keep their `input: ["text", "image"]` capability. Fixes #33185. + Thanks @Kobe9312 and @vincentkoc. - Plugins/install: restore the previous plugin index records if a concurrent config write conflict interrupts install, update, or uninstall metadata commits. Thanks @shakkernerd. - Plugins/update: restore previous plugin index records if core update or channel setup hits a concurrent config write conflict after plugin metadata changes. Thanks @shakkernerd. - Plugins/onboarding: defer channel/provider plugin install records until the owning config write commits, keeping setup failures from advancing the plugin index ahead of `openclaw.json`. Thanks @shakkernerd. diff --git a/docs/help/testing.md b/docs/help/testing.md index 78a1d253bb8..f4ac3729e6c 100644 --- a/docs/help/testing.md +++ b/docs/help/testing.md @@ -55,6 +55,15 @@ When debugging real providers/models (requires real creds): Slack DM with `/codex bind`, exercises `/codex fast` and `/codex permissions`, then verifies a plain reply and an image attachment route through the native plugin binding instead of ACP. +- Codex app-server harness smoke: `pnpm test:docker:live-codex-harness` + - Runs gateway agent turns through the plugin-owned Codex app-server harness, + verifies `/codex status` and `/codex models`, and by default exercises image, + cron MCP, sub-agent, and Guardian probes. Disable the sub-agent probe with + `OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_PROBE=0` when isolating other Codex + app-server failures. For a focused sub-agent check, disable the other probes: + `OPENCLAW_LIVE_CODEX_HARNESS_IMAGE_PROBE=0 OPENCLAW_LIVE_CODEX_HARNESS_MCP_PROBE=0 OPENCLAW_LIVE_CODEX_HARNESS_GUARDIAN_PROBE=0 OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_PROBE=1 pnpm test:docker:live-codex-harness`. + This exits after the sub-agent probe unless + `OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_ONLY=0` is set. - Crestodian rescue command smoke: `pnpm test:live:crestodian-rescue-channel` - Opt-in belt-and-suspenders check for the message-channel rescue command surface. It exercises `/crestodian status`, queues a persistent model diff --git a/scripts/test-live-codex-harness-docker.sh b/scripts/test-live-codex-harness-docker.sh index 1136ca61835..21ab35ffb23 100644 --- a/scripts/test-live-codex-harness-docker.sh +++ b/scripts/test-live-codex-harness-docker.sh @@ -203,6 +203,8 @@ echo "==> Run Codex harness live test in Docker" echo "==> Model: ${OPENCLAW_LIVE_CODEX_HARNESS_MODEL:-codex/gpt-5.5}" echo "==> Image probe: ${OPENCLAW_LIVE_CODEX_HARNESS_IMAGE_PROBE:-1}" echo "==> MCP probe: ${OPENCLAW_LIVE_CODEX_HARNESS_MCP_PROBE:-1}" +echo "==> Subagent probe: ${OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_PROBE:-1}" +echo "==> Subagent-only fast path: ${OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_ONLY:-auto}" echo "==> Guardian probe: ${OPENCLAW_LIVE_CODEX_HARNESS_GUARDIAN_PROBE:-1}" echo "==> Auth mode: $CODEX_HARNESS_AUTH_MODE" echo "==> Profile file: $PROFILE_STATUS" @@ -230,6 +232,8 @@ DOCKER_RUN_ARGS=(docker run --rm -t \ -e OPENCLAW_LIVE_CODEX_HARNESS_MODEL="${OPENCLAW_LIVE_CODEX_HARNESS_MODEL:-codex/gpt-5.5}" \ -e OPENCLAW_LIVE_CODEX_HARNESS_REQUIRE_GUARDIAN_EVENTS="${OPENCLAW_LIVE_CODEX_HARNESS_REQUIRE_GUARDIAN_EVENTS:-1}" \ -e OPENCLAW_LIVE_CODEX_HARNESS_REQUEST_TIMEOUT_MS="${OPENCLAW_LIVE_CODEX_HARNESS_REQUEST_TIMEOUT_MS:-}" \ + -e OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_ONLY="${OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_ONLY:-}" \ + -e OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_PROBE="${OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_PROBE:-1}" \ -e OPENCLAW_LIVE_CODEX_HARNESS_USE_CI_SAFE_CODEX_CONFIG="${OPENCLAW_LIVE_CODEX_HARNESS_USE_CI_SAFE_CODEX_CONFIG:-1}" \ -e OPENCLAW_LIVE_CODEX_BIND="${OPENCLAW_LIVE_CODEX_BIND:-}" \ -e OPENCLAW_LIVE_CODEX_BIND_MODEL="${OPENCLAW_LIVE_CODEX_BIND_MODEL:-}" \ diff --git a/src/agents/openclaw-tools.subagents.sessions-spawn.lifecycle.test.ts b/src/agents/openclaw-tools.subagents.sessions-spawn.lifecycle.test.ts index 9da67a2d69b..b9ecf8d0502 100644 --- a/src/agents/openclaw-tools.subagents.sessions-spawn.lifecycle.test.ts +++ b/src/agents/openclaw-tools.subagents.sessions-spawn.lifecycle.test.ts @@ -269,6 +269,32 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => { expect(child.sessionKey?.startsWith("agent:main:subagent:")).toBe(true); }); + it("gives native child agent startup enough gateway request time", async () => { + const ctx = setupSessionsSpawnGatewayMock({ + includeChatHistory: true, + agentWaitResult: { status: "ok", startedAt: 1000, endedAt: 2000 }, + }); + const tool = await getSessionsSpawnTool({ + agentSessionKey: "main", + agentChannel: "whatsapp", + }); + + const result = await tool.execute("call-start-timeout", { + task: "do thing", + runTimeoutSeconds: 120, + }); + + expect(result.details).toMatchObject({ + status: "accepted", + runId: expect.any(String), + }); + const childAgentCall = ctx.calls.find((call) => { + const params = call.params as { lane?: string } | undefined; + return call.method === "agent" && params?.lane === "subagent"; + }); + expect(childAgentCall?.timeoutMs).toBe(125_000); + }); + it("sessions_spawn retires bundle MCP runtime when run-mode cleanup completes", async () => { let resumeAnnounceFlow: ((value: boolean) => void) | undefined; let announceFlowStarted: (() => void) | undefined; diff --git a/src/agents/openclaw-tools.subagents.sessions-spawn.test-harness.ts b/src/agents/openclaw-tools.subagents.sessions-spawn.test-harness.ts index e0dcda5163a..5aead7bb7bf 100644 --- a/src/agents/openclaw-tools.subagents.sessions-spawn.test-harness.ts +++ b/src/agents/openclaw-tools.subagents.sessions-spawn.test-harness.ts @@ -12,7 +12,7 @@ type CreateSessionsSpawnTool = type SubagentRegistryTesting = (typeof import("./subagent-registry.js"))["__testing"]; type SubagentSpawnTesting = (typeof import("./subagent-spawn.js"))["__testing"]; export type CreateOpenClawToolsOpts = Parameters[0]; -export type GatewayRequest = { method?: string; params?: unknown }; +export type GatewayRequest = { method?: string; params?: unknown; timeoutMs?: number }; export type AgentWaitCall = { runId?: string; timeoutMs?: number }; type SessionsSpawnGatewayMockOptions = { includeSessionsList?: boolean; diff --git a/src/agents/sessions-spawn-hooks.test.ts b/src/agents/sessions-spawn-hooks.test.ts index 91a0e835958..297c4b0b909 100644 --- a/src/agents/sessions-spawn-hooks.test.ts +++ b/src/agents/sessions-spawn-hooks.test.ts @@ -9,6 +9,7 @@ type GatewayRequest = { method?: string; params?: Record }; const hoisted = vi.hoisted(() => ({ callGatewayMock: vi.fn(), configOverride: {} as Record, + updateSessionStoreMock: vi.fn(), })); const hookRunnerMocks = vi.hoisted(() => ({ @@ -139,6 +140,7 @@ beforeAll(async () => { ({ resetSubagentRegistryForTests, spawnSubagentDirect } = await loadSubagentSpawnModuleForTest({ callGatewayMock: hoisted.callGatewayMock, loadConfig: () => hoisted.configOverride, + updateSessionStoreMock: hoisted.updateSessionStoreMock, hookRunner: { hasHooks: (hookName: string) => hookName === "subagent_spawning" || @@ -157,6 +159,7 @@ describe("sessions_spawn subagent lifecycle hooks", () => { beforeEach(() => { resetSubagentRegistryForTests(); hoisted.callGatewayMock.mockReset(); + hoisted.updateSessionStoreMock.mockReset(); hookRunnerMocks.hasSubagentEndedHook = true; hookRunnerMocks.runSubagentSpawning.mockClear(); hookRunnerMocks.runSubagentSpawned.mockClear(); @@ -167,6 +170,16 @@ describe("sessions_spawn subagent lifecycle hooks", () => { scope: "per-sender", }, }); + const store: Record> = {}; + hoisted.updateSessionStoreMock.mockImplementation( + async (_storePath: unknown, mutator: unknown) => { + if (typeof mutator !== "function") { + throw new Error("missing session store mutator"); + } + await mutator(store); + return store; + }, + ); hoisted.callGatewayMock.mockImplementation(async (opts: unknown) => { const request = opts as { method?: string }; if (request.method === "sessions.patch") { @@ -398,11 +411,21 @@ describe("sessions_spawn subagent lifecycle hooks", () => { }); it("cleans up the provisional session when lineage patching fails after thread binding", async () => { + const store: Record> = {}; + hoisted.updateSessionStoreMock.mockImplementation( + async (_storePath: unknown, mutator: unknown) => { + if (typeof mutator !== "function") { + throw new Error("missing session store mutator"); + } + await mutator(store); + if (Object.values(store).some((entry) => typeof entry.spawnedBy === "string")) { + throw new Error("lineage patch failed"); + } + return store; + }, + ); hoisted.callGatewayMock.mockImplementation(async (opts: unknown) => { const request = opts as { method?: string; params?: Record }; - if (request.method === "sessions.patch" && typeof request.params?.spawnedBy === "string") { - throw new Error("lineage patch failed"); - } if (request.method === "sessions.delete") { return { ok: true }; } @@ -420,10 +443,8 @@ describe("sessions_spawn subagent lifecycle hooks", () => { agentThreadId: "456", }); - expect(result).toMatchObject({ - status: "error", - error: "lineage patch failed", - }); + expect(result.status).toBe("error"); + expect(result.error).toContain("lineage patch failed"); expect(hookRunnerMocks.runSubagentSpawned).not.toHaveBeenCalled(); expect(hookRunnerMocks.runSubagentEnded).not.toHaveBeenCalled(); const methods = getGatewayMethods(); diff --git a/src/agents/subagent-spawn.attachments.test.ts b/src/agents/subagent-spawn.attachments.test.ts index b2c1e470607..c44003add46 100644 --- a/src/agents/subagent-spawn.attachments.test.ts +++ b/src/agents/subagent-spawn.attachments.test.ts @@ -9,6 +9,7 @@ import { } from "./subagent-spawn.test-helpers.js"; const callGatewayMock = vi.fn(); +const updateSessionStoreMock = vi.fn(); let configOverride: Record = { ...createSubagentSpawnTestConfig(), @@ -20,6 +21,7 @@ beforeAll(async () => { subagentSpawnModule = await loadSubagentSpawnModuleForTest({ callGatewayMock, loadConfig: () => configOverride, + updateSessionStoreMock, workspaceDir: workspaceDirOverride || os.tmpdir(), }); }); @@ -92,6 +94,15 @@ describe("spawnSubagentDirect filename validation", () => { configOverride = createSubagentSpawnTestConfig(workspaceDirOverride); subagentSpawnModule.resetSubagentRegistryForTests(); callGatewayMock.mockClear(); + updateSessionStoreMock.mockReset(); + const store: Record> = {}; + updateSessionStoreMock.mockImplementation(async (_storePath: unknown, mutator: unknown) => { + if (typeof mutator !== "function") { + throw new Error("missing session store mutator"); + } + await mutator(store); + return store; + }); setupAcceptedSubagentGatewayMock(callGatewayMock); }); @@ -170,12 +181,20 @@ describe("spawnSubagentDirect filename validation", () => { it("removes materialized attachments when lineage patching fails", async () => { const calls: Array<{ method?: string; params?: Record }> = []; + const store: Record> = {}; + updateSessionStoreMock.mockImplementation(async (_storePath: unknown, mutator: unknown) => { + if (typeof mutator !== "function") { + throw new Error("missing session store mutator"); + } + await mutator(store); + if (Object.values(store).some((entry) => typeof entry.spawnedBy === "string")) { + throw new Error("lineage patch failed"); + } + return store; + }); callGatewayMock.mockImplementation(async (opts: unknown) => { const request = opts as { method?: string; params?: Record }; calls.push(request); - if (request.method === "sessions.patch" && typeof request.params?.spawnedBy === "string") { - throw new Error("lineage patch failed"); - } if (request.method === "sessions.delete") { return { ok: true }; } @@ -191,10 +210,8 @@ describe("spawnSubagentDirect filename validation", () => { ctx, ); - expect(result).toMatchObject({ - status: "error", - error: "lineage patch failed", - }); + expect(result.status).toBe("error"); + expect(result.error).toContain("lineage patch failed"); const attachmentsRoot = path.join(workspaceDirOverride, ".openclaw", "attachments"); const retainedDirs = fs.existsSync(attachmentsRoot) ? fs.readdirSync(attachmentsRoot).filter((entry) => !entry.startsWith(".")) diff --git a/src/agents/subagent-spawn.depth-limits.test.ts b/src/agents/subagent-spawn.depth-limits.test.ts index cefc3011991..fd18b3c6c05 100644 --- a/src/agents/subagent-spawn.depth-limits.test.ts +++ b/src/agents/subagent-spawn.depth-limits.test.ts @@ -1,6 +1,7 @@ import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest"; import { createSubagentSpawnTestConfig, + installSessionStoreCaptureMock, loadSubagentSpawnModuleForTest, setupAcceptedSubagentGatewayMock, } from "./subagent-spawn.test-helpers.js"; @@ -10,10 +11,12 @@ const hoisted = vi.hoisted(() => ({ callGatewayMock: vi.fn(), configOverride: {} as Record, depthBySession: new Map(), + updateSessionStoreMock: vi.fn(), registerSubagentRunMock: vi.fn(), })); let spawnSubagentDirect: typeof import("./subagent-spawn.js").spawnSubagentDirect; +let persistedStore: Record> | undefined; function createDepthLimitConfig(subagents?: Record) { return createSubagentSpawnTestConfig("/tmp/workspace-main", { @@ -48,6 +51,7 @@ describe("subagent spawn depth + child limits", () => { callGatewayMock: hoisted.callGatewayMock, loadConfig: () => hoisted.configOverride, registerSubagentRunMock: hoisted.registerSubagentRunMock, + updateSessionStoreMock: hoisted.updateSessionStoreMock, getSubagentDepthFromSessionStore: (sessionKey) => hoisted.depthBySession.get(sessionKey) ?? 0, countActiveRunsForSession: (sessionKey) => hoisted.activeChildrenBySession.get(sessionKey) ?? 0, @@ -60,6 +64,13 @@ describe("subagent spawn depth + child limits", () => { hoisted.depthBySession.clear(); hoisted.callGatewayMock.mockClear(); hoisted.registerSubagentRunMock.mockClear(); + hoisted.updateSessionStoreMock.mockReset(); + persistedStore = undefined; + installSessionStoreCaptureMock(hoisted.updateSessionStoreMock, { + onStore: (store) => { + persistedStore = store; + }, + }); hoisted.configOverride = createDepthLimitConfig(); setupAcceptedSubagentGatewayMock(hoisted.callGatewayMock); }); @@ -87,23 +98,14 @@ describe("subagent spawn depth + child limits", () => { runId: "run-1", }); - const calls = hoisted.callGatewayMock.mock.calls.map( - (call) => call[0] as { method?: string; params?: Record }, - ); - const spawnedByPatch = calls.find( - (entry) => - entry.method === "sessions.patch" && - entry.params?.spawnedBy === "agent:main:subagent:parent", - ); - expect(spawnedByPatch?.params?.key).toMatch(/^agent:main:subagent:/); - expect(typeof spawnedByPatch?.params?.spawnedWorkspaceDir).toBe("string"); - - const spawnDepthPatch = calls.find( - (entry) => entry.method === "sessions.patch" && entry.params?.spawnDepth === 2, - ); - expect(spawnDepthPatch?.params?.key).toMatch(/^agent:main:subagent:/); - expect(spawnDepthPatch?.params?.subagentRole).toBe("leaf"); - expect(spawnDepthPatch?.params?.subagentControlScope).toBe("none"); + const childSession = persistedStore?.[result.childSessionKey as string]; + expect(childSession).toMatchObject({ + spawnedBy: "agent:main:subagent:parent", + spawnDepth: 2, + subagentRole: "leaf", + subagentControlScope: "none", + }); + expect(typeof childSession?.spawnedWorkspaceDir).toBe("string"); }); it("rejects callers when stored spawn depth is already at the configured max", async () => { @@ -151,19 +153,17 @@ describe("subagent spawn depth + child limits", () => { }); }); - it("fails spawn when sessions.patch rejects the model", async () => { + it("fails spawn when the initial child session patch rejects the model", async () => { hoisted.configOverride = createDepthLimitConfig({ maxSpawnDepth: 2 }); hoisted.callGatewayMock.mockImplementation( async (opts: { method?: string; params?: { model?: string } }) => { - if (opts.method === "sessions.patch" && opts.params?.model === "bad-model") { - throw new Error("invalid model: bad-model"); - } if (opts.method === "agent") { return { runId: "run-depth" }; } return {}; }, ); + hoisted.updateSessionStoreMock.mockRejectedValueOnce(new Error("invalid model: bad-model")); const result = await spawnFrom("main", { model: "bad-model" }); diff --git a/src/agents/subagent-spawn.model-session.test.ts b/src/agents/subagent-spawn.model-session.test.ts index ba6ce71b1e9..b977457fb58 100644 --- a/src/agents/subagent-spawn.model-session.test.ts +++ b/src/agents/subagent-spawn.model-session.test.ts @@ -83,18 +83,17 @@ describe("spawnSubagentDirect runtime model persistence", () => { status: "accepted", modelApplied: true, }); - expect(updateSessionStoreMock).toHaveBeenCalledTimes(1); + expect(updateSessionStoreMock).toHaveBeenCalledTimes(3); expectPersistedRuntimeModel({ persistedStore, sessionKey: /^agent:main:subagent:/, provider: "openai-codex", model: "gpt-5.4", }); - expect(pruneLegacyStoreKeysMock).toHaveBeenCalledTimes(1); - expect(operations.indexOf("gateway:sessions.patch")).toBeGreaterThan(-1); - expect(operations.indexOf("store:update")).toBeGreaterThan( - operations.indexOf("gateway:sessions.patch"), + expect(pruneLegacyStoreKeysMock).toHaveBeenCalledTimes(3); + expect(operations.indexOf("store:update")).toBeGreaterThan(-1); + expect(operations.indexOf("gateway:agent")).toBeGreaterThan( + operations.lastIndexOf("store:update"), ); - expect(operations.indexOf("gateway:agent")).toBeGreaterThan(operations.indexOf("store:update")); }); }); diff --git a/src/agents/subagent-spawn.test-helpers.ts b/src/agents/subagent-spawn.test-helpers.ts index eadd9933c8a..a10b1325b10 100644 --- a/src/agents/subagent-spawn.test-helpers.ts +++ b/src/agents/subagent-spawn.test-helpers.ts @@ -81,10 +81,10 @@ export function installSessionStoreCaptureMock( onStore?: (store: SessionStore) => void; }, ) { + const store: SessionStore = {}; updateSessionStoreMock.mockImplementation( async (_storePath: string, mutator: SessionStoreMutator) => { params?.operations?.push("store:update"); - const store: SessionStore = {}; await mutator(store); params?.onStore?.(store); return store; diff --git a/src/agents/subagent-spawn.test.ts b/src/agents/subagent-spawn.test.ts index 97f1d92ae90..52b2f061d3d 100644 --- a/src/agents/subagent-spawn.test.ts +++ b/src/agents/subagent-spawn.test.ts @@ -121,8 +121,8 @@ describe("spawnSubagentDirect seam flow", () => { expect(result.childSessionKey).toMatch(/^agent:main:subagent:/); const childSessionKey = result.childSessionKey as string; - expect(hoisted.pruneLegacyStoreKeysMock).toHaveBeenCalledTimes(1); - expect(hoisted.updateSessionStoreMock).toHaveBeenCalledTimes(1); + expect(hoisted.pruneLegacyStoreKeysMock).toHaveBeenCalledTimes(3); + expect(hoisted.updateSessionStoreMock).toHaveBeenCalledTimes(3); expect(hoisted.registerSubagentRunMock).toHaveBeenCalledWith( expect.objectContaining({ runId: "run-1", @@ -156,11 +156,10 @@ describe("spawnSubagentDirect seam flow", () => { provider: "openai-codex", model: "gpt-5.4", }); - expect(operations.indexOf("gateway:sessions.patch")).toBeGreaterThan(-1); - expect(operations.indexOf("store:update")).toBeGreaterThan( - operations.indexOf("gateway:sessions.patch"), + expect(operations.indexOf("store:update")).toBeGreaterThan(-1); + expect(operations.indexOf("gateway:agent")).toBeGreaterThan( + operations.lastIndexOf("store:update"), ); - expect(operations.indexOf("gateway:agent")).toBeGreaterThan(operations.indexOf("store:update")); expect(hoisted.callGatewayMock).toHaveBeenCalledWith( expect.objectContaining({ method: "agent", @@ -289,16 +288,9 @@ describe("spawnSubagentDirect seam flow", () => { }); }); - it("returns an error when the initial model patch is rejected", async () => { + it("returns an error when the initial child session patch is rejected", async () => { hoisted.callGatewayMock.mockImplementation( async (request: { method?: string; params?: unknown }) => { - if (request.method === "sessions.patch") { - const model = (request.params as { model?: unknown } | undefined)?.model; - if (model === "bad-model") { - throw new Error("invalid model: bad-model"); - } - return { ok: true }; - } if (request.method === "agent") { return { runId: "run-1", status: "accepted", acceptedAt: 1000 }; } @@ -308,6 +300,7 @@ describe("spawnSubagentDirect seam flow", () => { return {}; }, ); + hoisted.updateSessionStoreMock.mockRejectedValueOnce(new Error("invalid model: bad-model")); const result = await spawnSubagentDirect( { diff --git a/src/agents/subagent-spawn.ts b/src/agents/subagent-spawn.ts index 95a87b0591e..7c6f1f52e6c 100644 --- a/src/agents/subagent-spawn.ts +++ b/src/agents/subagent-spawn.ts @@ -107,6 +107,9 @@ const defaultSubagentSpawnDeps: SubagentSpawnDeps = { }; let subagentSpawnDeps: SubagentSpawnDeps = defaultSubagentSpawnDeps; +const SUBAGENT_CONTROL_GATEWAY_TIMEOUT_MS = 60_000; +const DEFAULT_SUBAGENT_AGENT_GATEWAY_TIMEOUT_MS = 60_000; +const MAX_SUBAGENT_AGENT_GATEWAY_TIMEOUT_MS = 300_000; export type SpawnSubagentParams = { task: string; @@ -199,6 +202,53 @@ function readGatewayRunId(response: Awaited>): st return typeof runId === "string" && runId ? runId : undefined; } +function resolveSubagentAgentGatewayTimeoutMs(runTimeoutSeconds: number): number { + const runTimeoutMs = + Number.isFinite(runTimeoutSeconds) && runTimeoutSeconds > 0 + ? Math.floor(runTimeoutSeconds * 1000) + : 0; + if (runTimeoutMs <= 0) { + return DEFAULT_SUBAGENT_AGENT_GATEWAY_TIMEOUT_MS; + } + return Math.min( + MAX_SUBAGENT_AGENT_GATEWAY_TIMEOUT_MS, + Math.max(DEFAULT_SUBAGENT_AGENT_GATEWAY_TIMEOUT_MS, runTimeoutMs + 5_000), + ); +} + +function buildDirectChildSessionPatch(patch: Record): Partial { + const entry: Partial = {}; + const spawnDepth = patch.spawnDepth; + if (typeof spawnDepth === "number" && Number.isFinite(spawnDepth) && spawnDepth >= 0) { + entry.spawnDepth = Math.floor(spawnDepth); + } + if (patch.subagentRole === "orchestrator" || patch.subagentRole === "leaf") { + entry.subagentRole = patch.subagentRole; + } + if (patch.subagentControlScope === "children" || patch.subagentControlScope === "none") { + entry.subagentControlScope = patch.subagentControlScope; + } + if (typeof patch.spawnedBy === "string" && patch.spawnedBy.trim()) { + entry.spawnedBy = patch.spawnedBy.trim(); + } + if (typeof patch.spawnedWorkspaceDir === "string" && patch.spawnedWorkspaceDir.trim()) { + entry.spawnedWorkspaceDir = patch.spawnedWorkspaceDir.trim(); + } + if (typeof patch.thinkingLevel === "string" && patch.thinkingLevel.trim()) { + entry.thinkingLevel = patch.thinkingLevel.trim(); + } + if (typeof patch.model === "string" && patch.model.trim()) { + const { provider, model } = splitModelRef(patch.model.trim()); + if (model) { + entry.model = model; + if (provider) { + entry.modelProvider = provider; + } + } + } + return entry; +} + function loadSubagentConfig() { return subagentSpawnDeps.loadConfig(); } @@ -430,7 +480,7 @@ async function cleanupProvisionalSession( emitLifecycleHooks: options?.emitLifecycleHooks === true, deleteTranscript: options?.deleteTranscript === true, }, - timeoutMs: 10_000, + timeoutMs: SUBAGENT_CONTROL_GATEWAY_TIMEOUT_MS, }); } catch { // Best-effort cleanup only. @@ -752,14 +802,25 @@ export async function spawnSubagentDirect( const { resolvedModel, thinkingOverride } = plan; const patchChildSession = async (patch: Record): Promise => { try { - await callSubagentGateway({ - method: "sessions.patch", - params: { key: childSessionKey, ...patch }, - timeoutMs: 10_000, + const target = resolveGatewaySessionStoreTarget({ + cfg, + key: childSessionKey, + }); + await updateSubagentSessionStore(target.storePath, (store) => { + pruneLegacyStoreKeys({ + store, + canonicalKey: target.canonicalKey, + candidates: target.storeKeys, + }); + store[target.canonicalKey] = mergeSessionEntry( + store[target.canonicalKey], + buildDirectChildSessionPatch(patch), + ); }); return undefined; } catch (err) { - return err instanceof Error ? err.message : typeof err === "string" ? err : "error"; + const message = err instanceof Error ? err.message : typeof err === "string" ? err : "error"; + return `child session patch failed: ${message}`; } }; @@ -808,7 +869,7 @@ export async function spawnSubagentDirect( await callSubagentGateway({ method: "sessions.delete", params: { key: childSessionKey, emitLifecycleHooks: false }, - timeoutMs: 10_000, + timeoutMs: SUBAGENT_CONTROL_GATEWAY_TIMEOUT_MS, }); } catch { // Best-effort cleanup only. @@ -841,7 +902,7 @@ export async function spawnSubagentDirect( await callSubagentGateway({ method: "sessions.delete", params: { key: childSessionKey, emitLifecycleHooks: false }, - timeoutMs: 10_000, + timeoutMs: SUBAGENT_CONTROL_GATEWAY_TIMEOUT_MS, }); } catch { // Best-effort cleanup only. @@ -1019,7 +1080,7 @@ export async function spawnSubagentDirect( : {}), ...publicSpawnedMetadata, }, - timeoutMs: 10_000, + timeoutMs: resolveSubagentAgentGatewayTimeoutMs(runTimeoutSeconds), }); const runId = readGatewayRunId(response); if (runId) { @@ -1074,7 +1135,7 @@ export async function spawnSubagentDirect( deleteTranscript: true, emitLifecycleHooks, }, - timeoutMs: 10_000, + timeoutMs: SUBAGENT_CONTROL_GATEWAY_TIMEOUT_MS, }); } catch { // Best-effort only. @@ -1125,7 +1186,7 @@ export async function spawnSubagentDirect( deleteTranscript: true, emitLifecycleHooks: threadBindingReady, }, - timeoutMs: 10_000, + timeoutMs: SUBAGENT_CONTROL_GATEWAY_TIMEOUT_MS, }); } catch { // Best-effort cleanup only. diff --git a/src/gateway/gateway-codex-harness.live-helpers.ts b/src/gateway/gateway-codex-harness.live-helpers.ts index 2225ccf9d97..71016e6528d 100644 --- a/src/gateway/gateway-codex-harness.live-helpers.ts +++ b/src/gateway/gateway-codex-harness.live-helpers.ts @@ -34,6 +34,8 @@ export const EXPECTED_CODEX_MODELS_COMMAND_TEXT = [ "Available model overrides:", "Available model overrides exposed in this session", "Available model overrides here:", + "Available model overrides listed in this session:", + "Available model overrides shown in this session:", "Available model overrides in this session:", "Available agent models:", "Visible options in this session:", @@ -132,6 +134,8 @@ export function isExpectedCodexModelsCommandText(text: string): boolean { const mentionsVisibleOptions = normalized.includes("visible options in this session:") || normalized.includes("visible options:") || + normalized.includes("available model overrides listed in this session:") || + normalized.includes("available model overrides shown in this session:") || normalized.includes("available here:") || normalized.includes("available agent ids in this session:"); const mentionsCurrentActiveModel = diff --git a/src/gateway/gateway-codex-harness.live.test.ts b/src/gateway/gateway-codex-harness.live.test.ts index 44040ba1092..0728c772334 100644 --- a/src/gateway/gateway-codex-harness.live.test.ts +++ b/src/gateway/gateway-codex-harness.live.test.ts @@ -3,10 +3,13 @@ import fs from "node:fs/promises"; import { createServer } from "node:net"; import os from "node:os"; import path from "node:path"; +import { setTimeout as delay } from "node:timers/promises"; import { describe, expect, it } from "vitest"; import { isLiveTestEnabled } from "../agents/live-test-helpers.js"; import type { OpenClawConfig } from "../config/config.js"; +import type { ContextEngine } from "../context-engine/types.js"; import { isTruthyEnvValue } from "../infra/env.js"; +import type { CallGatewayOptions } from "./call.js"; import type { GatewayClient } from "./client.js"; import { connectTestGatewayClient, @@ -34,9 +37,18 @@ const CODEX_HARNESS_IMAGE_PROBE = isTruthyEnvValue( process.env.OPENCLAW_LIVE_CODEX_HARNESS_IMAGE_PROBE, ); const CODEX_HARNESS_MCP_PROBE = isTruthyEnvValue(process.env.OPENCLAW_LIVE_CODEX_HARNESS_MCP_PROBE); +const CODEX_HARNESS_SUBAGENT_PROBE = isTruthyEnvValue( + process.env.OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_PROBE, +); const CODEX_HARNESS_GUARDIAN_PROBE = isTruthyEnvValue( process.env.OPENCLAW_LIVE_CODEX_HARNESS_GUARDIAN_PROBE, ); +const CODEX_HARNESS_SUBAGENT_ONLY = + CODEX_HARNESS_SUBAGENT_PROBE && + !CODEX_HARNESS_IMAGE_PROBE && + !CODEX_HARNESS_MCP_PROBE && + !CODEX_HARNESS_GUARDIAN_PROBE && + process.env.OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_ONLY !== "0"; const CODEX_HARNESS_REQUIRE_GUARDIAN_EVENTS = isTruthyEnvValue( process.env.OPENCLAW_LIVE_CODEX_HARNESS_REQUIRE_GUARDIAN_EVENTS, ); @@ -79,6 +91,10 @@ function isCodexAccountTokenError(error: unknown): boolean { return error instanceof Error && error.message.includes("Failed to extract accountId from token"); } +function asRecord(value: unknown): Record | undefined { + return value && typeof value === "object" ? (value as Record) : undefined; +} + async function subscribeCodexLiveDebugEvents(sessionKey: string): Promise<() => void> { if (!CODEX_HARNESS_DEBUG) { return () => undefined; @@ -498,6 +514,172 @@ async function verifyCodexCronMcpProbe(params: { } } +async function readSpawnedChildRow(params: { + childSessionKey: string; + client: GatewayClient; + parentSessionKey: string; +}): Promise | undefined> { + const result = await params.client.request( + "sessions.list", + { + spawnedBy: params.parentSessionKey, + includeLastMessage: true, + limit: 20, + }, + { timeoutMs: 10_000 }, + ); + const sessions = asRecord(result)?.sessions; + if (!Array.isArray(sessions)) { + return undefined; + } + return sessions + .map((entry) => asRecord(entry)) + .find((entry): entry is Record => entry?.key === params.childSessionKey); +} + +async function waitForCodexSubagentStarted(params: { + childSessionKey: string; + client: GatewayClient; + events: CapturedAgentEvent[]; + parentSessionKey: string; +}): Promise | undefined> { + const deadline = Date.now() + Math.min(CODEX_HARNESS_REQUEST_TIMEOUT_MS, 30_000); + let lastRow: Record | undefined; + let lastError: unknown; + while (Date.now() < deadline) { + try { + lastRow = await readSpawnedChildRow({ + childSessionKey: params.childSessionKey, + client: params.client, + parentSessionKey: params.parentSessionKey, + }); + if ( + lastRow && + params.events.some( + (event) => + event.sessionKey === params.childSessionKey && + event.stream === "codex_app_server.lifecycle", + ) + ) { + return lastRow; + } + } catch (error) { + lastError = error; + } + await delay(2_000); + } + throw new Error( + [ + `subagent ${params.childSessionKey} did not start through the Codex app-server harness`, + `lastRow=${JSON.stringify(lastRow)}`, + `events=${JSON.stringify(params.events)}`, + `lastError=${lastError instanceof Error ? lastError.message : String(lastError)}`, + ].join("\n"), + ); +} + +async function verifyCodexSubagentProbe(params: { + client: GatewayClient; + sessionKey: string; +}): Promise { + const runId = randomUUID(); + const expectedToken = `CODEX-SUBAGENT-${runId.slice(0, 6).toUpperCase()}`; + const events: CapturedAgentEvent[] = []; + const { onAgentEvent } = await import("../infra/agent-events.js"); + const unsubscribe = onAgentEvent((event) => { + if (!event.stream.startsWith("codex_app_server.")) { + return; + } + events.push({ + stream: event.stream, + sessionKey: event.sessionKey, + data: event.data, + }); + }); + try { + const { __testing: subagentSpawnTesting, spawnSubagentDirect } = + await import("../agents/subagent-spawn.js"); + const noOpContextEngine: ContextEngine = { + info: { id: "codex-harness-subagent-smoke", name: "Codex harness subagent smoke" }, + ingest: async () => ({ ingested: false }), + assemble: async () => ({ messages: [], estimatedTokens: 0 }), + compact: async () => ({ ok: true, compacted: false }), + }; + const gatewayTrace: Array<{ + durationMs: number; + error?: string; + method: string; + status: "error" | "ok"; + timeoutMs?: number; + }> = []; + subagentSpawnTesting.setDepsForTest({ + resolveContextEngine: async () => noOpContextEngine, + callGateway: async >(opts: CallGatewayOptions): Promise => { + const startedAt = Date.now(); + try { + const result = await params.client.request(opts.method, opts.params, { + expectFinal: opts.method === "agent" ? false : opts.expectFinal, + timeoutMs: opts.timeoutMs, + }); + gatewayTrace.push({ + durationMs: Date.now() - startedAt, + method: opts.method, + status: "ok", + timeoutMs: opts.timeoutMs, + }); + return result as T; + } catch (err) { + gatewayTrace.push({ + durationMs: Date.now() - startedAt, + error: err instanceof Error ? err.message : String(err), + method: opts.method, + status: "error", + timeoutMs: opts.timeoutMs, + }); + throw err; + } + }, + }); + const spawnResult = await spawnSubagentDirect( + { + task: `Reply exactly ${expectedToken} and nothing else.`, + agentId: "dev", + thinking: "low", + mode: "run", + cleanup: "keep", + context: "isolated", + expectsCompletionMessage: false, + runTimeoutSeconds: CODEX_HARNESS_AGENT_TIMEOUT_SECONDS, + }, + { + agentSessionKey: params.sessionKey, + }, + ); + if (spawnResult.status !== "accepted") { + throw new Error( + `Codex subagent spawn failed: ${JSON.stringify(spawnResult)} trace=${JSON.stringify(gatewayTrace)}`, + ); + } + const childSessionKey = spawnResult.childSessionKey; + if (!childSessionKey?.includes(":subagent:")) { + throw new Error( + `subagent spawn did not return a child session key: ${JSON.stringify(spawnResult)}`, + ); + } + const childRow = await waitForCodexSubagentStarted({ + childSessionKey, + client: params.client, + events, + parentSessionKey: params.sessionKey, + }); + expect(childRow?.key).toBe(childSessionKey); + } finally { + const { __testing: subagentSpawnTesting } = await import("../agents/subagent-spawn.js"); + subagentSpawnTesting.setDepsForTest(); + unsubscribe(); + } +} + describeLive("gateway live (Codex harness)", () => { it( "runs gateway agent turns through the plugin-owned Codex app-server harness", @@ -569,6 +751,16 @@ describeLive("gateway live (Codex harness)", () => { try { try { const sessionKey = "agent:dev:live-codex-harness"; + + if (CODEX_HARNESS_SUBAGENT_PROBE) { + logCodexLiveStep("subagent-probe:start", { sessionKey }); + await verifyCodexSubagentProbe({ client, sessionKey }); + logCodexLiveStep("subagent-probe:done"); + if (CODEX_HARNESS_SUBAGENT_ONLY) { + return; + } + } + const unsubscribeDebugEvents = await subscribeCodexLiveDebugEvents(sessionKey); const firstNonce = randomBytes(3).toString("hex").toUpperCase(); try { @@ -609,6 +801,7 @@ describeLive("gateway live (Codex harness)", () => { "model `codex/", "session `agent:dev:live-codex-harness`", "Model/status card shown above", + "Status shown above.", ], }); logCodexLiveStep("codex-status-command", { statusText });