test(qa): add compaction retry parity scenario

This commit is contained in:
Eva
2026-04-11 05:35:08 +07:00
committed by Peter Steinberger
parent 3211aa2540
commit fd45ea2bf1
9 changed files with 230 additions and 8 deletions

View File

@@ -137,6 +137,7 @@ describe("qa agentic parity report", () => {
candidateSummary: {
scenarios: [
{ name: "Approval turn tool followthrough", status: "pass" },
{ name: "Compaction retry after mutating tool", status: "pass" },
{ name: "Model switch with tool continuity", status: "pass" },
{ name: "Source and docs discovery report", status: "pass" },
{ name: "Image understanding from attachment", status: "pass" },
@@ -145,6 +146,7 @@ describe("qa agentic parity report", () => {
baselineSummary: {
scenarios: [
{ name: "Approval turn tool followthrough", status: "pass" },
{ name: "Compaction retry after mutating tool", status: "pass" },
{ name: "Model switch with tool continuity", status: "pass" },
{ name: "Source and docs discovery report", status: "pass" },
{ name: "Image understanding from attachment", status: "pass" },

View File

@@ -17,6 +17,10 @@ export const QA_AGENTIC_PARITY_SCENARIOS = [
id: "image-understanding-attachment",
title: "Image understanding from attachment",
},
{
id: "compaction-retry-mutating-tool",
title: "Compaction retry after mutating tool",
},
] as const;
export const QA_AGENTIC_PARITY_SCENARIO_IDS = QA_AGENTIC_PARITY_SCENARIOS.map(({ id }) => id);

View File

@@ -334,6 +334,7 @@ describe("qa cli runtime", () => {
"model-switch-tool-continuity",
"source-docs-discovery-report",
"image-understanding-attachment",
"compaction-retry-mutating-tool",
],
}),
);

View File

@@ -169,6 +169,77 @@ describe("qa mock openai server", () => {
]);
});
it("drives the compaction retry mutating tool parity flow", async () => {
const server = await startQaMockOpenAiServer({
host: "127.0.0.1",
port: 0,
});
cleanups.push(async () => {
await server.stop();
});
const writePlan = await fetch(`${server.baseUrl}/v1/responses`, {
method: "POST",
headers: {
"content-type": "application/json",
},
body: JSON.stringify({
stream: true,
model: "gpt-5.4",
input: [
{
role: "user",
content: [
{
type: "input_text",
text: "Compaction retry mutating tool check: read COMPACTION_RETRY_CONTEXT.md, then create compaction-retry-summary.txt and keep replay safety explicit.",
},
],
},
{
type: "function_call_output",
output: "compaction retry evidence block 0000\ncompaction retry evidence block 0001",
},
],
}),
});
expect(writePlan.status).toBe(200);
const writePlanBody = await writePlan.text();
expect(writePlanBody).toContain('"name":"write"');
expect(writePlanBody).toContain("compaction-retry-summary.txt");
const finalReply = await fetch(`${server.baseUrl}/v1/responses`, {
method: "POST",
headers: {
"content-type": "application/json",
},
body: JSON.stringify({
stream: false,
model: "gpt-5.4",
input: [
{
role: "user",
content: [
{
type: "input_text",
text: "Compaction retry mutating tool check: read COMPACTION_RETRY_CONTEXT.md, then create compaction-retry-summary.txt and keep replay safety explicit.",
},
],
},
{
type: "function_call_output",
output: "Replay safety: unsafe after write.\n",
},
],
}),
});
expect(finalReply.status).toBe(200);
const finalPayload = (await finalReply.json()) as {
output?: Array<{ content?: Array<{ text?: string }> }>;
};
expect(finalPayload.output?.[0]?.content?.[0]?.text).toContain("replay unsafe after write");
});
it("supports exact reply memory prompts and embeddings requests", async () => {
const server = await startQaMockOpenAiServer({
host: "127.0.0.1",

View File

@@ -452,6 +452,12 @@ function buildAssistantText(input: ResponsesInputItem[], body: Record<string, un
}
return `Protocol note: Lobster Invaders built at lobster-invaders.html.`;
}
if (toolOutput && /compaction retry mutating tool check/i.test(prompt)) {
if (toolOutput.includes("Replay safety: unsafe after write.")) {
return "Protocol note: replay unsafe after write.";
}
return "";
}
if (toolOutput) {
const snippet = toolOutput.replace(/\s+/g, " ").trim().slice(0, 220);
return `Protocol note: I reviewed the requested material. Evidence snippet: ${snippet || "no content"}`;
@@ -541,6 +547,17 @@ async function buildResponsesPayload(body: Record<string, unknown>) {
});
}
}
if (/compaction retry mutating tool check/i.test(prompt)) {
if (!toolOutput) {
return buildToolCallEventsWithArgs("read", { path: "COMPACTION_RETRY_CONTEXT.md" });
}
if (toolOutput.includes("compaction retry evidence")) {
return buildToolCallEventsWithArgs("write", {
path: "compaction-retry-summary.txt",
content: "Replay safety: unsafe after write.\n",
});
}
}
if (/memory tools check/i.test(prompt)) {
if (!toolOutput) {
return buildToolCallEventsWithArgs("memory_search", {