diff --git a/extensions/qa-lab/src/agentic-parity-report.test.ts b/extensions/qa-lab/src/agentic-parity-report.test.ts index 8fcee784939..b1a836ab86d 100644 --- a/extensions/qa-lab/src/agentic-parity-report.test.ts +++ b/extensions/qa-lab/src/agentic-parity-report.test.ts @@ -16,6 +16,7 @@ const FULL_PARITY_PASS_SCENARIOS: QaParityReportScenario[] = [ { name: "Image understanding from attachment", status: "pass" as const }, { name: "Subagent handoff", status: "pass" as const }, { name: "Subagent fanout synthesis", status: "pass" as const }, + { name: "Subagent stale child links", status: "pass" as const }, { name: "Memory recall after context switch", status: "pass" as const }, { name: "Thread memory isolation", status: "pass" as const }, { name: "Config restart capability flip", status: "pass" as const }, diff --git a/extensions/qa-lab/src/agentic-parity.ts b/extensions/qa-lab/src/agentic-parity.ts index d997778e85f..e8978cf353a 100644 --- a/extensions/qa-lab/src/agentic-parity.ts +++ b/extensions/qa-lab/src/agentic-parity.ts @@ -36,6 +36,11 @@ export const QA_AGENTIC_PARITY_SCENARIOS = [ title: "Subagent fanout synthesis", countsTowardValidToolCallRate: true, }, + { + id: "subagent-stale-child-links", + title: "Subagent stale child links", + countsTowardValidToolCallRate: false, + }, { id: "memory-recall", title: "Memory recall after context switch", diff --git a/extensions/qa-lab/src/cli.runtime.test.ts b/extensions/qa-lab/src/cli.runtime.test.ts index 98587a3ab98..a7f96c13b24 100644 --- a/extensions/qa-lab/src/cli.runtime.test.ts +++ b/extensions/qa-lab/src/cli.runtime.test.ts @@ -644,6 +644,7 @@ describe("qa cli runtime", () => { "compaction-retry-mutating-tool", "subagent-handoff", "subagent-fanout-synthesis", + "subagent-stale-child-links", "memory-recall", "thread-memory-isolation", "config-restart-capability-flip", @@ -1071,6 +1072,7 @@ describe("qa cli runtime", () => { "compaction-retry-mutating-tool", "subagent-handoff", "subagent-fanout-synthesis", + "subagent-stale-child-links", "memory-recall", "thread-memory-isolation", "config-restart-capability-flip", diff --git a/extensions/qa-lab/src/suite-runtime-types.ts b/extensions/qa-lab/src/suite-runtime-types.ts index c4ba16e7a3d..b16b64846a5 100644 --- a/extensions/qa-lab/src/suite-runtime-types.ts +++ b/extensions/qa-lab/src/suite-runtime-types.ts @@ -7,6 +7,14 @@ export type QaRuntimeGatewayClient = { tempRoot: string; workspaceDir: string; runtimeEnv: NodeJS.ProcessEnv; + restartAfterStateMutation?: ( + mutateState: (context: { + configPath: string; + runtimeEnv: NodeJS.ProcessEnv; + stateDir: string; + tempRoot: string; + }) => Promise, + ) => Promise; call: ( method: string, params?: unknown, diff --git a/qa/scenarios/agents/subagent-stale-child-links.md b/qa/scenarios/agents/subagent-stale-child-links.md new file mode 100644 index 00000000000..7f6a18b86dd --- /dev/null +++ b/qa/scenarios/agents/subagent-stale-child-links.md @@ -0,0 +1,175 @@ +# Subagent stale child links + +```yaml qa-scenario +id: subagent-stale-child-links +title: Subagent stale child links +surface: subagents +coverage: + primary: + - agents.subagents + secondary: + - gateway.sessions-list +objective: Verify restarted gateways hide stale persisted subagent child links without hiding live or fresh children. +successCriteria: + - Old ended subagent run records are not exposed as current children. + - Old store-only spawnedBy and parentSessionKey rows are not exposed as current children. + - Child-side ACP store rows from sibling agents are not exposed as current children. + - Live subagent runs and fresh dashboard children remain visible. +docsRefs: + - docs/tools/subagents.md + - docs/concepts/qa-e2e-automation.md + - docs/help/testing.md +codeRefs: + - src/gateway/session-utils.ts + - src/agents/subagent-run-liveness.ts + - extensions/qa-lab/src/gateway-child.ts +execution: + kind: flow + summary: Seed stale subagent session state on disk, restart the real gateway, then assert sessions.list filters only the stale child links. +``` + +```yaml qa-flow +steps: + - name: restarted gateway filters stale subagent child links + actions: + - call: waitForGatewayHealthy + args: + - ref: env + - 60000 + - set: mainKey + value: "agent:qa:main" + - set: staleRunKey + value: "agent:qa:subagent:qa-stale-ended" + - set: staleOrphanKey + value: "agent:qa:subagent:qa-orphan" + - set: staleAcpKey + value: "agent:claude:acp:qa-stale-acp" + - set: freshDashboardKey + value: "agent:qa:dashboard:qa-fresh-child" + - set: liveRunKey + value: "agent:qa:subagent:qa-live-child" + - call: env.gateway.restartAfterStateMutation + args: + - lambda: + params: + - ctx + async: true + expr: |- + await (async () => { + const now = Date.now(); + const old = now - 2 * 60 * 60 * 1000; + const recent = now - 5000; + const qaSessionsDir = path.join(ctx.stateDir, "agents", "qa", "sessions"); + const claudeSessionsDir = path.join(ctx.stateDir, "agents", "claude", "sessions"); + const subagentDir = path.join(ctx.stateDir, "subagents"); + await fs.mkdir(qaSessionsDir, { recursive: true }); + await fs.mkdir(claudeSessionsDir, { recursive: true }); + await fs.mkdir(subagentDir, { recursive: true }); + await fs.writeFile(path.join(subagentDir, "runs.json"), `${JSON.stringify({ + version: 2, + runs: { + "run-stale-ended": { + runId: "run-stale-ended", + childSessionKey: staleRunKey, + controllerSessionKey: mainKey, + requesterSessionKey: mainKey, + requesterDisplayKey: "main", + task: "old ended ghost", + cleanup: "keep", + createdAt: old - 60000, + startedAt: old - 50000, + endedAt: old, + outcome: { status: "ok" }, + }, + "run-live-visible": { + runId: "run-live-visible", + childSessionKey: liveRunKey, + controllerSessionKey: mainKey, + requesterSessionKey: mainKey, + requesterDisplayKey: "main", + task: "live child remains visible", + cleanup: "keep", + createdAt: recent, + startedAt: recent, + }, + }, + }, null, 2)}\n`, "utf8"); + await fs.writeFile(path.join(qaSessionsDir, "sessions.json"), `${JSON.stringify({ + [mainKey]: { + sessionId: "sess-main", + updatedAt: now, + }, + [staleRunKey]: { + sessionId: "sess-stale-run", + updatedAt: old, + spawnedBy: mainKey, + status: "done", + endedAt: old, + }, + [staleOrphanKey]: { + sessionId: "sess-orphan", + updatedAt: old, + parentSessionKey: mainKey, + }, + [freshDashboardKey]: { + sessionId: "sess-fresh-dashboard", + updatedAt: now, + parentSessionKey: mainKey, + }, + [liveRunKey]: { + sessionId: "sess-live-child", + updatedAt: recent, + spawnedBy: mainKey, + }, + }, null, 2)}\n`, "utf8"); + await fs.writeFile(path.join(claudeSessionsDir, "sessions.json"), `${JSON.stringify({ + [staleAcpKey]: { + sessionId: "sess-acp-stale", + updatedAt: old, + spawnedBy: mainKey, + status: "done", + endedAt: old, + }, + }, null, 2)}\n`, "utf8"); + })() + - call: waitForGatewayHealthy + args: + - ref: env + - 60000 + - call: env.gateway.call + saveAs: listed + args: + - "sessions.list" + - {} + - timeoutMs: 60000 + - call: env.gateway.call + saveAs: filtered + args: + - "sessions.list" + - spawnedBy: + ref: mainKey + - timeoutMs: 60000 + - set: mainChildren + value: + expr: "(listed.sessions.find((session) => session.key === mainKey)?.childSessions ?? [])" + - set: filteredKeys + value: + expr: "filtered.sessions.map((session) => session.key)" + - assert: + expr: "mainChildren.includes(freshDashboardKey)" + message: + expr: "`fresh dashboard child missing from main children: ${JSON.stringify(mainChildren)}`" + - assert: + expr: "mainChildren.includes(liveRunKey)" + message: + expr: "`live subagent child missing from main children: ${JSON.stringify(mainChildren)}`" + - assert: + expr: "filteredKeys.includes(freshDashboardKey) && filteredKeys.includes(liveRunKey)" + message: + expr: "`spawnedBy filter dropped live/fresh children: ${JSON.stringify(filteredKeys)}`" + - assert: + expr: "![staleRunKey, staleOrphanKey, staleAcpKey].some((key) => mainChildren.includes(key) || filteredKeys.includes(key))" + message: + expr: "`stale child leaked through sessions.list (main=${JSON.stringify(mainChildren)} filtered=${JSON.stringify(filteredKeys)})`" + detailsExpr: "({ mainChildren, filteredKeys })" +``` diff --git a/qa/scenarios/index.md b/qa/scenarios/index.md index d1d1edd4ef2..c790310c31e 100644 --- a/qa/scenarios/index.md +++ b/qa/scenarios/index.md @@ -25,7 +25,7 @@ Coverage tracking: Theme directories: -- `agents/` - agent behavior, instructions, and subagent flows +- `agents/` - agent behavior, instructions, subagent flows, and persisted child-link regressions - `channels/` - DM, shared channel, thread, and message-action behavior - `character/` - persona and style eval scenarios - `config/` - config patch, apply, and restart behavior