test(qa): cover subagent completion fallback

2026-05-06 06:20:43 +00:00 · 2026-04-26 01:54:10 -07:00
parent a1b6567059
commit a911eb748b
3 changed files with 198 additions and 0 deletions
--- a/extensions/qa-lab/src/providers/mock-openai/server.test.ts
+++ b/extensions/qa-lab/src/providers/mock-openai/server.test.ts
@@ -68,6 +68,7 @@ function makeUserInput(text: string) {
 }

 const SESSIONS_SPAWN_TOOL = { type: "function", name: "sessions_spawn" } as const;
+const SESSIONS_YIELD_TOOL = { type: "function", name: "sessions_yield" } as const;
 const THREAD_SUBAGENT_CHILD_ERROR_TOKEN = "QA_SUBAGENT_CHILD_ERROR";
 const THREAD_SUBAGENT_TOOL_ERROR =
  "thread=true requested but thread delivery is unavailable in this test harness.";
@@ -707,6 +708,75 @@ describe("qa mock openai server", () => {
    });
  });

+  it("drives yielded-parent subagent fallback QA through sessions_spawn and sessions_yield", async () => {
+    const server = await startMockServer();
+    const prompt =
+      "Subagent direct fallback QA check: spawn one worker and yield until QA-SUBAGENT-DIRECT-FALLBACK-OK is delivered.";
+
+    await expectResponsesText(server, {
+      stream: true,
+      tools: [SESSIONS_SPAWN_TOOL, SESSIONS_YIELD_TOOL],
+      input: [makeUserInput(prompt)],
+    });
+
+    await expect(
+      (await fetch(`${server.baseUrl}/debug/last-request`)).json(),
+    ).resolves.toMatchObject({
+      plannedToolName: "sessions_spawn",
+      plannedToolArgs: {
+        label: "qa-direct-fallback-worker",
+        thread: false,
+        mode: "run",
+      },
+    });
+
+    const body = await expectResponsesText(server, {
+      stream: true,
+      tools: [SESSIONS_SPAWN_TOOL, SESSIONS_YIELD_TOOL],
+      input: [
+        makeUserInput(prompt),
+        {
+          type: "function_call_output",
+          call_id: "call_mock_sessions_spawn_1",
+          output: JSON.stringify({
+            status: "accepted",
+            childSessionKey: "agent:qa:subagent:child",
+            runId: "run-child-1",
+          }),
+        },
+      ],
+    });
+
+    expect(body).toContain('"name":"sessions_yield"');
+    expect(body).toContain("QA-SUBAGENT-DIRECT-FALLBACK-OK");
+    await expect(
+      (await fetch(`${server.baseUrl}/debug/last-request`)).json(),
+    ).resolves.toMatchObject({
+      plannedToolName: "sessions_yield",
+    });
+  });
+
+  it("returns no visible announce output for the direct fallback QA marker", async () => {
+    const server = await startMockServer();
+
+    const body = await expectResponsesJson<{
+      output?: Array<{ content?: Array<{ text?: string }> }>;
+    }>(server, {
+      stream: false,
+      input: [
+        makeUserInput(
+          [
+            "[Internal task completion event]",
+            "Task: qa-direct-fallback-worker",
+            "Result: QA-SUBAGENT-DIRECT-FALLBACK-OK",
+          ].join("\n"),
+        ),
+      ],
+    });
+
+    expect(body.output?.[0]?.content?.[0]?.text).toBe("");
+  });
+
  it("surfaces sessions_spawn tool errors instead of echoing child-task tokens", async () => {
    const server = await startMockServer();

--- a/extensions/qa-lab/src/providers/mock-openai/server.ts
+++ b/extensions/qa-lab/src/providers/mock-openai/server.ts
@@ -147,6 +147,9 @@ const QA_EMPTY_RESPONSE_RECOVERY_PROMPT_RE = /empty response continuation qa che
 const QA_EMPTY_RESPONSE_EXHAUSTION_PROMPT_RE = /empty response exhaustion qa check/i;
 const QA_QUIET_STREAMING_PROMPT_RE = /quiet streaming qa check/i;
 const QA_BLOCK_STREAMING_PROMPT_RE = /block streaming qa check/i;
+const QA_SUBAGENT_DIRECT_FALLBACK_PROMPT_RE = /subagent direct fallback qa check/i;
+const QA_SUBAGENT_DIRECT_FALLBACK_WORKER_RE = /subagent direct fallback worker/i;
+const QA_SUBAGENT_DIRECT_FALLBACK_MARKER = "QA-SUBAGENT-DIRECT-FALLBACK-OK";
 const QA_REASONING_ONLY_RETRY_NEEDLE =
  "recorded reasoning but did not produce a user-visible answer";
 const QA_EMPTY_RESPONSE_RETRY_NEEDLE =
@@ -784,6 +787,9 @@ function buildAssistantText(
  if (/fanout worker beta/i.test(prompt)) {
    return "BETA-OK";
  }
+  if (QA_SUBAGENT_DIRECT_FALLBACK_WORKER_RE.test(prompt)) {
+    return QA_SUBAGENT_DIRECT_FALLBACK_MARKER;
+  }
  if (/report the visible code/i.test(prompt) && /FORKED-CONTEXT-ALPHA/i.test(allInputText)) {
    return "FORKED-CONTEXT-ALPHA";
  }
@@ -1153,6 +1159,29 @@ async function buildResponsesPayload(
  const hasReasoningOnlyRetryInstruction = allInputText.includes(QA_REASONING_ONLY_RETRY_NEEDLE);
  const hasEmptyResponseRetryInstruction = allInputText.includes(QA_EMPTY_RESPONSE_RETRY_NEEDLE);
  const canCallSessionsSpawn = hasDeclaredTool(body, "sessions_spawn");
+  const canCallSessionsYield = hasDeclaredTool(body, "sessions_yield");
+  if (
+    allInputText.includes(QA_SUBAGENT_DIRECT_FALLBACK_MARKER) &&
+    /Internal task completion event/i.test(allInputText)
+  ) {
+    return buildAssistantEvents("");
+  }
+  if (QA_SUBAGENT_DIRECT_FALLBACK_PROMPT_RE.test(allInputText)) {
+    if (!toolOutput && canCallSessionsSpawn) {
+      return buildToolCallEventsWithArgs("sessions_spawn", {
+        task: `Subagent direct fallback worker: finish with exactly ${QA_SUBAGENT_DIRECT_FALLBACK_MARKER}.`,
+        label: "qa-direct-fallback-worker",
+        thread: false,
+        mode: "run",
+        runTimeoutSeconds: 30,
+      });
+    }
+    if (toolOutput && canCallSessionsYield && !/\byielded\b/i.test(toolOutput)) {
+      return buildToolCallEventsWithArgs("sessions_yield", {
+        message: `Waiting for ${QA_SUBAGENT_DIRECT_FALLBACK_MARKER}.`,
+      });
+    }
+  }
  if (/remember this fact/i.test(prompt)) {
    return buildAssistantEvents(buildAssistantText(input, body, scenarioState));
  }
--- a/qa/scenarios/agents/subagent-completion-direct-fallback.md
+++ b/qa/scenarios/agents/subagent-completion-direct-fallback.md
@@ -0,0 +1,99 @@
+# Subagent completion direct fallback
+
+```yaml qa-scenario
+id: subagent-completion-direct-fallback
+title: Subagent completion direct fallback
+surface: subagents
+coverage:
+  primary:
+    - agents.subagents
+  secondary:
+    - runtime.delivery
+    - channels.qa-channel
+objective: Verify a yielded parent still receives a successful subagent result through direct fallback delivery when the dormant announce turn produces no visible reply.
+successCriteria:
+  - Parent launches a native subagent.
+  - Parent yields instead of waiting in-turn.
+  - Subagent completion result is delivered to the original QA DM without a thread id.
+  - Durable task delivery is marked delivered, not failed.
+docsRefs:
+  - docs/tools/subagents.md
+  - docs/help/testing.md
+  - docs/channels/qa-channel.md
+codeRefs:
+  - src/agents/subagent-announce-delivery.ts
+  - src/agents/subagent-registry-lifecycle.ts
+  - src/agents/tools/sessions-yield-tool.ts
+  - extensions/qa-lab/src/providers/mock-openai/server.ts
+execution:
+  kind: flow
+  summary: Reproduce yielded-parent subagent completion delivery and require frozen-result fallback to the QA DM.
+  config:
+    prompt: "Subagent direct fallback QA check: spawn one native subagent worker. The worker must finish with exactly QA-SUBAGENT-DIRECT-FALLBACK-OK. After spawning it, call sessions_yield and wait for the completion event. Do not use ACP."
+    expectedMarker: QA-SUBAGENT-DIRECT-FALLBACK-OK
+    expectedLabel: qa-direct-fallback-worker
+```
+
+```yaml qa-flow
+steps:
+  - name: yielded parent receives child completion through direct fallback
+    actions:
+      - call: waitForGatewayHealthy
+        args:
+          - ref: env
+          - 120000
+      - call: waitForQaChannelReady
+        args:
+          - ref: env
+          - 120000
+      - call: reset
+      - set: sessionKey
+        value:
+          expr: "`agent:qa:subagent-direct-fallback:${randomUUID().slice(0, 8)}`"
+      - call: runAgentPrompt
+        args:
+          - ref: env
+          - sessionKey:
+              ref: sessionKey
+            message:
+              expr: config.prompt
+            timeoutMs:
+              expr: liveTurnTimeoutMs(env, 90000)
+      - call: waitForCondition
+        saveAs: outbound
+        args:
+          - lambda:
+              expr: "state.getSnapshot().messages.filter((message) => message.direction === 'outbound' && String(message.text ?? '').includes(config.expectedMarker)).at(-1)"
+          - expr: liveTurnTimeoutMs(env, 60000)
+          - expr: "env.providerMode === 'mock-openai' ? 100 : 250"
+      - assert:
+          expr: "String(outbound.text ?? '').trim().includes(config.expectedMarker)"
+          message:
+            expr: "`fallback completion marker missing from outbound QA DM: ${recentOutboundSummary(state)}`"
+      - if:
+          expr: "Boolean(env.mock)"
+          then:
+            - set: fallbackDebugRequests
+              value:
+                expr: "[...(await fetchJson(`${env.mock.baseUrl}/debug/requests`))]"
+            - assert:
+                expr: "fallbackDebugRequests.some((request) => !request.toolOutput && /subagent direct fallback qa check/i.test(String(request.allInputText ?? '')) && request.plannedToolName === 'sessions_spawn' && request.plannedToolArgs?.label === config.expectedLabel)"
+                message:
+                  expr: "`expected sessions_spawn for yielded fallback scenario, saw ${JSON.stringify(fallbackDebugRequests.map((request) => ({ plannedToolName: request.plannedToolName ?? null, plannedToolArgs: request.plannedToolArgs ?? null })))}`"
+            - assert:
+                expr: "fallbackDebugRequests.some((request) => /subagent direct fallback qa check/i.test(String(request.allInputText ?? '')) && request.plannedToolName === 'sessions_yield')"
+                message:
+                  expr: "`expected sessions_yield for yielded fallback scenario, saw ${JSON.stringify(fallbackDebugRequests.map((request) => request.plannedToolName ?? null))}`"
+            - call: waitForCondition
+              saveAs: deliveredTask
+              args:
+                - lambda:
+                    expr: "(async () => { const payload = await runQaCli(env, ['tasks', 'list', '--json', '--runtime', 'subagent'], { timeoutMs: liveTurnTimeoutMs(env, 60000), json: true }); return (payload.tasks ?? []).find((task) => task.label === config.expectedLabel && task.deliveryStatus === 'delivered' && task.status === 'succeeded') ?? null; })()"
+                - expr: liveTurnTimeoutMs(env, 30000)
+                - 250
+            - assert:
+                expr: "deliveredTask.deliveryStatus === 'delivered'"
+                message:
+                  expr: "`expected delivered task status for ${config.expectedLabel}, got ${JSON.stringify(deliveredTask)}`"
+    detailsExpr: "outbound.text"
+```