test(qa): cover subagent completion fallback

This commit is contained in:
Vincent Koc
2026-04-26 01:54:10 -07:00
parent a1b6567059
commit a911eb748b
3 changed files with 198 additions and 0 deletions

View File

@@ -68,6 +68,7 @@ function makeUserInput(text: string) {
}
const SESSIONS_SPAWN_TOOL = { type: "function", name: "sessions_spawn" } as const;
const SESSIONS_YIELD_TOOL = { type: "function", name: "sessions_yield" } as const;
const THREAD_SUBAGENT_CHILD_ERROR_TOKEN = "QA_SUBAGENT_CHILD_ERROR";
const THREAD_SUBAGENT_TOOL_ERROR =
"thread=true requested but thread delivery is unavailable in this test harness.";
@@ -707,6 +708,75 @@ describe("qa mock openai server", () => {
});
});
it("drives yielded-parent subagent fallback QA through sessions_spawn and sessions_yield", async () => {
const server = await startMockServer();
const prompt =
"Subagent direct fallback QA check: spawn one worker and yield until QA-SUBAGENT-DIRECT-FALLBACK-OK is delivered.";
await expectResponsesText(server, {
stream: true,
tools: [SESSIONS_SPAWN_TOOL, SESSIONS_YIELD_TOOL],
input: [makeUserInput(prompt)],
});
await expect(
(await fetch(`${server.baseUrl}/debug/last-request`)).json(),
).resolves.toMatchObject({
plannedToolName: "sessions_spawn",
plannedToolArgs: {
label: "qa-direct-fallback-worker",
thread: false,
mode: "run",
},
});
const body = await expectResponsesText(server, {
stream: true,
tools: [SESSIONS_SPAWN_TOOL, SESSIONS_YIELD_TOOL],
input: [
makeUserInput(prompt),
{
type: "function_call_output",
call_id: "call_mock_sessions_spawn_1",
output: JSON.stringify({
status: "accepted",
childSessionKey: "agent:qa:subagent:child",
runId: "run-child-1",
}),
},
],
});
expect(body).toContain('"name":"sessions_yield"');
expect(body).toContain("QA-SUBAGENT-DIRECT-FALLBACK-OK");
await expect(
(await fetch(`${server.baseUrl}/debug/last-request`)).json(),
).resolves.toMatchObject({
plannedToolName: "sessions_yield",
});
});
it("returns no visible announce output for the direct fallback QA marker", async () => {
const server = await startMockServer();
const body = await expectResponsesJson<{
output?: Array<{ content?: Array<{ text?: string }> }>;
}>(server, {
stream: false,
input: [
makeUserInput(
[
"[Internal task completion event]",
"Task: qa-direct-fallback-worker",
"Result: QA-SUBAGENT-DIRECT-FALLBACK-OK",
].join("\n"),
),
],
});
expect(body.output?.[0]?.content?.[0]?.text).toBe("");
});
it("surfaces sessions_spawn tool errors instead of echoing child-task tokens", async () => {
const server = await startMockServer();

View File

@@ -147,6 +147,9 @@ const QA_EMPTY_RESPONSE_RECOVERY_PROMPT_RE = /empty response continuation qa che
const QA_EMPTY_RESPONSE_EXHAUSTION_PROMPT_RE = /empty response exhaustion qa check/i;
const QA_QUIET_STREAMING_PROMPT_RE = /quiet streaming qa check/i;
const QA_BLOCK_STREAMING_PROMPT_RE = /block streaming qa check/i;
const QA_SUBAGENT_DIRECT_FALLBACK_PROMPT_RE = /subagent direct fallback qa check/i;
const QA_SUBAGENT_DIRECT_FALLBACK_WORKER_RE = /subagent direct fallback worker/i;
const QA_SUBAGENT_DIRECT_FALLBACK_MARKER = "QA-SUBAGENT-DIRECT-FALLBACK-OK";
const QA_REASONING_ONLY_RETRY_NEEDLE =
"recorded reasoning but did not produce a user-visible answer";
const QA_EMPTY_RESPONSE_RETRY_NEEDLE =
@@ -784,6 +787,9 @@ function buildAssistantText(
if (/fanout worker beta/i.test(prompt)) {
return "BETA-OK";
}
if (QA_SUBAGENT_DIRECT_FALLBACK_WORKER_RE.test(prompt)) {
return QA_SUBAGENT_DIRECT_FALLBACK_MARKER;
}
if (/report the visible code/i.test(prompt) && /FORKED-CONTEXT-ALPHA/i.test(allInputText)) {
return "FORKED-CONTEXT-ALPHA";
}
@@ -1153,6 +1159,29 @@ async function buildResponsesPayload(
const hasReasoningOnlyRetryInstruction = allInputText.includes(QA_REASONING_ONLY_RETRY_NEEDLE);
const hasEmptyResponseRetryInstruction = allInputText.includes(QA_EMPTY_RESPONSE_RETRY_NEEDLE);
const canCallSessionsSpawn = hasDeclaredTool(body, "sessions_spawn");
const canCallSessionsYield = hasDeclaredTool(body, "sessions_yield");
if (
allInputText.includes(QA_SUBAGENT_DIRECT_FALLBACK_MARKER) &&
/Internal task completion event/i.test(allInputText)
) {
return buildAssistantEvents("");
}
if (QA_SUBAGENT_DIRECT_FALLBACK_PROMPT_RE.test(allInputText)) {
if (!toolOutput && canCallSessionsSpawn) {
return buildToolCallEventsWithArgs("sessions_spawn", {
task: `Subagent direct fallback worker: finish with exactly ${QA_SUBAGENT_DIRECT_FALLBACK_MARKER}.`,
label: "qa-direct-fallback-worker",
thread: false,
mode: "run",
runTimeoutSeconds: 30,
});
}
if (toolOutput && canCallSessionsYield && !/\byielded\b/i.test(toolOutput)) {
return buildToolCallEventsWithArgs("sessions_yield", {
message: `Waiting for ${QA_SUBAGENT_DIRECT_FALLBACK_MARKER}.`,
});
}
}
if (/remember this fact/i.test(prompt)) {
return buildAssistantEvents(buildAssistantText(input, body, scenarioState));
}