diff --git a/extensions/qa-lab/src/providers/mock-openai/server.test.ts b/extensions/qa-lab/src/providers/mock-openai/server.test.ts index e6f1d529c8c..50c5172ef6f 100644 --- a/extensions/qa-lab/src/providers/mock-openai/server.test.ts +++ b/extensions/qa-lab/src/providers/mock-openai/server.test.ts @@ -68,6 +68,7 @@ function makeUserInput(text: string) { } const SESSIONS_SPAWN_TOOL = { type: "function", name: "sessions_spawn" } as const; +const SESSIONS_YIELD_TOOL = { type: "function", name: "sessions_yield" } as const; const THREAD_SUBAGENT_CHILD_ERROR_TOKEN = "QA_SUBAGENT_CHILD_ERROR"; const THREAD_SUBAGENT_TOOL_ERROR = "thread=true requested but thread delivery is unavailable in this test harness."; @@ -707,6 +708,75 @@ describe("qa mock openai server", () => { }); }); + it("drives yielded-parent subagent fallback QA through sessions_spawn and sessions_yield", async () => { + const server = await startMockServer(); + const prompt = + "Subagent direct fallback QA check: spawn one worker and yield until QA-SUBAGENT-DIRECT-FALLBACK-OK is delivered."; + + await expectResponsesText(server, { + stream: true, + tools: [SESSIONS_SPAWN_TOOL, SESSIONS_YIELD_TOOL], + input: [makeUserInput(prompt)], + }); + + await expect( + (await fetch(`${server.baseUrl}/debug/last-request`)).json(), + ).resolves.toMatchObject({ + plannedToolName: "sessions_spawn", + plannedToolArgs: { + label: "qa-direct-fallback-worker", + thread: false, + mode: "run", + }, + }); + + const body = await expectResponsesText(server, { + stream: true, + tools: [SESSIONS_SPAWN_TOOL, SESSIONS_YIELD_TOOL], + input: [ + makeUserInput(prompt), + { + type: "function_call_output", + call_id: "call_mock_sessions_spawn_1", + output: JSON.stringify({ + status: "accepted", + childSessionKey: "agent:qa:subagent:child", + runId: "run-child-1", + }), + }, + ], + }); + + expect(body).toContain('"name":"sessions_yield"'); + expect(body).toContain("QA-SUBAGENT-DIRECT-FALLBACK-OK"); + await expect( + (await fetch(`${server.baseUrl}/debug/last-request`)).json(), + ).resolves.toMatchObject({ + plannedToolName: "sessions_yield", + }); + }); + + it("returns no visible announce output for the direct fallback QA marker", async () => { + const server = await startMockServer(); + + const body = await expectResponsesJson<{ + output?: Array<{ content?: Array<{ text?: string }> }>; + }>(server, { + stream: false, + input: [ + makeUserInput( + [ + "[Internal task completion event]", + "Task: qa-direct-fallback-worker", + "Result: QA-SUBAGENT-DIRECT-FALLBACK-OK", + ].join("\n"), + ), + ], + }); + + expect(body.output?.[0]?.content?.[0]?.text).toBe(""); + }); + it("surfaces sessions_spawn tool errors instead of echoing child-task tokens", async () => { const server = await startMockServer(); diff --git a/extensions/qa-lab/src/providers/mock-openai/server.ts b/extensions/qa-lab/src/providers/mock-openai/server.ts index 233f99dba2f..6ac729c839a 100644 --- a/extensions/qa-lab/src/providers/mock-openai/server.ts +++ b/extensions/qa-lab/src/providers/mock-openai/server.ts @@ -147,6 +147,9 @@ const QA_EMPTY_RESPONSE_RECOVERY_PROMPT_RE = /empty response continuation qa che const QA_EMPTY_RESPONSE_EXHAUSTION_PROMPT_RE = /empty response exhaustion qa check/i; const QA_QUIET_STREAMING_PROMPT_RE = /quiet streaming qa check/i; const QA_BLOCK_STREAMING_PROMPT_RE = /block streaming qa check/i; +const QA_SUBAGENT_DIRECT_FALLBACK_PROMPT_RE = /subagent direct fallback qa check/i; +const QA_SUBAGENT_DIRECT_FALLBACK_WORKER_RE = /subagent direct fallback worker/i; +const QA_SUBAGENT_DIRECT_FALLBACK_MARKER = "QA-SUBAGENT-DIRECT-FALLBACK-OK"; const QA_REASONING_ONLY_RETRY_NEEDLE = "recorded reasoning but did not produce a user-visible answer"; const QA_EMPTY_RESPONSE_RETRY_NEEDLE = @@ -784,6 +787,9 @@ function buildAssistantText( if (/fanout worker beta/i.test(prompt)) { return "BETA-OK"; } + if (QA_SUBAGENT_DIRECT_FALLBACK_WORKER_RE.test(prompt)) { + return QA_SUBAGENT_DIRECT_FALLBACK_MARKER; + } if (/report the visible code/i.test(prompt) && /FORKED-CONTEXT-ALPHA/i.test(allInputText)) { return "FORKED-CONTEXT-ALPHA"; } @@ -1153,6 +1159,29 @@ async function buildResponsesPayload( const hasReasoningOnlyRetryInstruction = allInputText.includes(QA_REASONING_ONLY_RETRY_NEEDLE); const hasEmptyResponseRetryInstruction = allInputText.includes(QA_EMPTY_RESPONSE_RETRY_NEEDLE); const canCallSessionsSpawn = hasDeclaredTool(body, "sessions_spawn"); + const canCallSessionsYield = hasDeclaredTool(body, "sessions_yield"); + if ( + allInputText.includes(QA_SUBAGENT_DIRECT_FALLBACK_MARKER) && + /Internal task completion event/i.test(allInputText) + ) { + return buildAssistantEvents(""); + } + if (QA_SUBAGENT_DIRECT_FALLBACK_PROMPT_RE.test(allInputText)) { + if (!toolOutput && canCallSessionsSpawn) { + return buildToolCallEventsWithArgs("sessions_spawn", { + task: `Subagent direct fallback worker: finish with exactly ${QA_SUBAGENT_DIRECT_FALLBACK_MARKER}.`, + label: "qa-direct-fallback-worker", + thread: false, + mode: "run", + runTimeoutSeconds: 30, + }); + } + if (toolOutput && canCallSessionsYield && !/\byielded\b/i.test(toolOutput)) { + return buildToolCallEventsWithArgs("sessions_yield", { + message: `Waiting for ${QA_SUBAGENT_DIRECT_FALLBACK_MARKER}.`, + }); + } + } if (/remember this fact/i.test(prompt)) { return buildAssistantEvents(buildAssistantText(input, body, scenarioState)); } diff --git a/qa/scenarios/agents/subagent-completion-direct-fallback.md b/qa/scenarios/agents/subagent-completion-direct-fallback.md new file mode 100644 index 00000000000..4b62d365a18 --- /dev/null +++ b/qa/scenarios/agents/subagent-completion-direct-fallback.md @@ -0,0 +1,99 @@ +# Subagent completion direct fallback + +```yaml qa-scenario +id: subagent-completion-direct-fallback +title: Subagent completion direct fallback +surface: subagents +coverage: + primary: + - agents.subagents + secondary: + - runtime.delivery + - channels.qa-channel +objective: Verify a yielded parent still receives a successful subagent result through direct fallback delivery when the dormant announce turn produces no visible reply. +successCriteria: + - Parent launches a native subagent. + - Parent yields instead of waiting in-turn. + - Subagent completion result is delivered to the original QA DM without a thread id. + - Durable task delivery is marked delivered, not failed. +docsRefs: + - docs/tools/subagents.md + - docs/help/testing.md + - docs/channels/qa-channel.md +codeRefs: + - src/agents/subagent-announce-delivery.ts + - src/agents/subagent-registry-lifecycle.ts + - src/agents/tools/sessions-yield-tool.ts + - extensions/qa-lab/src/providers/mock-openai/server.ts +execution: + kind: flow + summary: Reproduce yielded-parent subagent completion delivery and require frozen-result fallback to the QA DM. + config: + prompt: "Subagent direct fallback QA check: spawn one native subagent worker. The worker must finish with exactly QA-SUBAGENT-DIRECT-FALLBACK-OK. After spawning it, call sessions_yield and wait for the completion event. Do not use ACP." + expectedMarker: QA-SUBAGENT-DIRECT-FALLBACK-OK + expectedLabel: qa-direct-fallback-worker +``` + +```yaml qa-flow +steps: + - name: yielded parent receives child completion through direct fallback + actions: + - call: waitForGatewayHealthy + args: + - ref: env + - 120000 + - call: waitForQaChannelReady + args: + - ref: env + - 120000 + - call: reset + - set: sessionKey + value: + expr: "`agent:qa:subagent-direct-fallback:${randomUUID().slice(0, 8)}`" + - call: runAgentPrompt + args: + - ref: env + - sessionKey: + ref: sessionKey + message: + expr: config.prompt + timeoutMs: + expr: liveTurnTimeoutMs(env, 90000) + - call: waitForCondition + saveAs: outbound + args: + - lambda: + expr: "state.getSnapshot().messages.filter((message) => message.direction === 'outbound' && String(message.text ?? '').includes(config.expectedMarker)).at(-1)" + - expr: liveTurnTimeoutMs(env, 60000) + - expr: "env.providerMode === 'mock-openai' ? 100 : 250" + - assert: + expr: "String(outbound.text ?? '').trim().includes(config.expectedMarker)" + message: + expr: "`fallback completion marker missing from outbound QA DM: ${recentOutboundSummary(state)}`" + - if: + expr: "Boolean(env.mock)" + then: + - set: fallbackDebugRequests + value: + expr: "[...(await fetchJson(`${env.mock.baseUrl}/debug/requests`))]" + - assert: + expr: "fallbackDebugRequests.some((request) => !request.toolOutput && /subagent direct fallback qa check/i.test(String(request.allInputText ?? '')) && request.plannedToolName === 'sessions_spawn' && request.plannedToolArgs?.label === config.expectedLabel)" + message: + expr: "`expected sessions_spawn for yielded fallback scenario, saw ${JSON.stringify(fallbackDebugRequests.map((request) => ({ plannedToolName: request.plannedToolName ?? null, plannedToolArgs: request.plannedToolArgs ?? null })))}`" + - assert: + expr: "fallbackDebugRequests.some((request) => /subagent direct fallback qa check/i.test(String(request.allInputText ?? '')) && request.plannedToolName === 'sessions_yield')" + message: + expr: "`expected sessions_yield for yielded fallback scenario, saw ${JSON.stringify(fallbackDebugRequests.map((request) => request.plannedToolName ?? null))}`" + - call: waitForCondition + saveAs: deliveredTask + args: + - lambda: + expr: "(async () => { const payload = await runQaCli(env, ['tasks', 'list', '--json', '--runtime', 'subagent'], { timeoutMs: liveTurnTimeoutMs(env, 60000), json: true }); return (payload.tasks ?? []).find((task) => task.label === config.expectedLabel && task.deliveryStatus === 'delivered' && task.status === 'succeeded') ?? null; })()" + - expr: liveTurnTimeoutMs(env, 30000) + - 250 + - assert: + expr: "deliveredTask.deliveryStatus === 'delivered'" + message: + expr: "`expected delivered task status for ${config.expectedLabel}, got ${JSON.stringify(deliveredTask)}`" + detailsExpr: "outbound.text" +```