From 6e2cbe3fafc509e1c49b552d9fd8fe2ae548a51c Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 21 Apr 2026 04:14:32 +0100 Subject: [PATCH] test(qa): add long-running release audit scenario --- .../workspace/long-running-release-audit.md | 243 ++++++++++++++++++ 1 file changed, 243 insertions(+) create mode 100644 qa/scenarios/workspace/long-running-release-audit.md diff --git a/qa/scenarios/workspace/long-running-release-audit.md b/qa/scenarios/workspace/long-running-release-audit.md new file mode 100644 index 00000000000..a65a1cd4e4f --- /dev/null +++ b/qa/scenarios/workspace/long-running-release-audit.md @@ -0,0 +1,243 @@ +# Long-running release audit + +```yaml qa-scenario +id: long-running-release-audit +title: Long-running release audit +surface: workspace +coverage: + primary: + - workspace.long-running-task + secondary: + - workspace.repo-discovery + - workspace.artifacts +objective: Verify a model can complete a sustained repo audit that requires reading several scattered files, ignoring stale decoys, writing artifacts, and checking its own output. +successCriteria: + - Agent starts from the seeded project README instead of guessing. + - Agent inspects docs and source files across multiple directories. + - Agent writes a JSON audit and a Markdown handoff with all required findings. + - Agent verifies the generated JSON before finalizing. +docsRefs: + - docs/help/testing.md + - qa/scenarios/index.md +codeRefs: + - extensions/qa-lab/src/suite.ts + - extensions/qa-lab/src/scenario-flow-runner.ts +execution: + kind: flow + summary: Verify sustained repo-style auditing with scattered evidence and artifact verification. + config: + reportFile: audit-fixture/release-audit.json + handoffFile: audit-fixture/release-handoff.md + expectedFindings: + - REL-GATEWAY-417 + - REL-CHANNEL-238 + - REL-CRON-904 + - REL-MEMORY-552 + - REL-PLUGIN-319 + - REL-INSTALL-846 + - REL-DOCS-611 + workspaceFiles: + audit-fixture/README.md: |- + # Finch Relay + + Release readiness task: + + The release checklist was renamed during docs cleanup. + Find the current checklist under `docs/`, then inspect the referenced + source and notes. Produce: + + - `release-audit.json` + - `release-handoff.md` + + Do not use `docs/archive/old-release-notes.md` as current evidence. + Historical path that may appear in stale notes: `docs/release-checklist.md`. + audit-fixture/docs/current-readiness-checklist.md: |- + # Release Checklist + + Current release readiness requires checking eight areas: + + 1. gateway reconnect handling + 2. channel delivery ordering + 3. cron duplicate prevention + 4. memory recall fallback + 5. plugin runtime loading + 6. control UI status copy + 7. installer update path + 8. docs update status + + Useful current sources: + + - `src/gateway/reconnect.ts` + - `src/channels/delivery.ts` + - `src/scheduling/cron.ts` + - `src/memory/recall.ts` + - `src/plugins/runtime.ts` + - `ui/control-panel.ts` + - `install/update.ts` + - `docs/operator-notes.md` + + The archive folder contains stale notes and should not be treated as + current release evidence. + audit-fixture/docs/operator-notes.md: |- + # Operator Notes + + Current docs update status: + + Finding id: REL-DOCS-611 + Status: docs mention reconnect, cron, memory, plugin, and installer checks, + but the channel ordering and UI notes still need maintainer handoff. + audit-fixture/docs/archive/old-release-notes.md: |- + # Old Release Notes + + Stale finding id: REL-STALE-000 + This file is from a previous release and should not appear in the new + release audit. + audit-fixture/src/gateway/reconnect.ts: |- + export const gatewayReconnectReleaseFinding = { + id: "REL-GATEWAY-417", + area: "gateway reconnect handling", + status: "retry jitter verified, resume token fallback still needs manual spot check", + }; + audit-fixture/src/channels/delivery.ts: |- + export const channelDeliveryReleaseFinding = { + id: "REL-CHANNEL-238", + area: "channel delivery ordering", + status: "thread replies preserve ordering, root-channel fallback needs handoff note", + }; + audit-fixture/src/scheduling/cron.ts: |- + export const cronDuplicateReleaseFinding = { + id: "REL-CRON-904", + area: "cron duplicate prevention", + status: "single-run lock verified for restart wakeups", + }; + audit-fixture/src/memory/recall.ts: |- + export const memoryRecallReleaseFinding = { + id: "REL-MEMORY-552", + area: "memory recall fallback", + status: "fallback summary survives empty memory search, but ranking sample needs second reviewer", + }; + audit-fixture/src/plugins/runtime.ts: |- + export const pluginRuntimeReleaseFinding = { + id: "REL-PLUGIN-319", + area: "plugin runtime loading", + status: "bundled runtime manifest loads cleanly after restart", + }; + audit-fixture/install/update.ts: |- + export const installerUpdateReleaseFinding = { + id: "REL-INSTALL-846", + area: "installer update path", + status: "update smoke passed from previous stable tag", + }; + prompt: |- + Do a release readiness audit for the small project under `audit-fixture/`. + Start from `audit-fixture/README.md`, find the current checklist, inspect the referenced docs/source, then create `audit-fixture/release-audit.json` and `audit-fixture/release-handoff.md`. + + The JSON should include current finding ids, source files, statuses, and a boolean `verified`. + The Markdown handoff should summarize what is ready and what needs follow-up. + Check your generated JSON before finalizing. + Final reply exactly: RELEASE-AUDIT-COMPLETE +``` + +```yaml qa-flow +steps: + - name: completes the sustained release audit with verified artifacts + actions: + - call: waitForGatewayHealthy + args: + - ref: env + - 60000 + - call: reset + - forEach: + items: + expr: "Object.entries(config.workspaceFiles ?? {})" + item: workspaceFile + actions: + - set: seededPath + value: + expr: "path.join(env.gateway.workspaceDir, String(workspaceFile[0]))" + - call: fs.mkdir + args: + - expr: "path.dirname(seededPath)" + - recursive: true + - call: fs.writeFile + args: + - ref: seededPath + - expr: "`${String(workspaceFile[1] ?? '').trimEnd()}\\n`" + - utf8 + - set: sessionKey + value: + expr: "`agent:qa:release-audit:${randomUUID().slice(0, 8)}`" + - call: runAgentPrompt + args: + - ref: env + - sessionKey: + ref: sessionKey + message: + expr: config.prompt + timeoutMs: + expr: liveTurnTimeoutMs(env, 120000) + - set: reportPath + value: + expr: "path.join(env.gateway.workspaceDir, config.reportFile)" + - set: handoffPath + value: + expr: "path.join(env.gateway.workspaceDir, config.handoffFile)" + - call: waitForCondition + saveAs: reportText + args: + - lambda: + async: true + expr: "fs.readFile(reportPath, 'utf8').then((value) => config.expectedFindings.every((finding) => value.includes(finding)) ? value : undefined).catch(() => undefined)" + - expr: liveTurnTimeoutMs(env, 60000) + - expr: "env.providerMode === 'mock-openai' ? 100 : 250" + - call: waitForCondition + saveAs: handoffText + args: + - lambda: + async: true + expr: "fs.readFile(handoffPath, 'utf8').then((value) => config.expectedFindings.every((finding) => value.includes(finding)) && !value.includes('REL-STALE-000') ? value : undefined).catch(() => undefined)" + - expr: liveTurnTimeoutMs(env, 30000) + - expr: "env.providerMode === 'mock-openai' ? 100 : 250" + - set: report + value: + expr: "JSON.parse(reportText)" + - assert: + expr: "['src/gateway/reconnect.ts', 'src/channels/delivery.ts', 'src/scheduling/cron.ts', 'src/memory/recall.ts', 'src/plugins/runtime.ts', 'install/update.ts', 'docs/operator-notes.md'].every((file) => JSON.stringify(report).includes(file))" + message: + expr: "`report missing expected source refs: ${reportText}`" + - assert: + expr: "config.expectedFindings.every((finding) => JSON.stringify(report).includes(finding))" + message: + expr: "`report missing expected finding ids: ${reportText}`" + - assert: + expr: "!JSON.stringify(report).includes('REL-STALE-000') && !handoffText.includes('REL-STALE-000')" + message: + expr: "`stale archive finding leaked into audit: report=${reportText}\\nhandoff=${handoffText}`" + - assert: + expr: "JSON.stringify(report).includes('ui/control-panel.ts') && /blocked|missing|not found/i.test(`${reportText}\\n${handoffText}`)" + message: + expr: "`missing UI evidence was not explicitly blocked: report=${reportText}\\nhandoff=${handoffText}`" + - assert: + expr: "JSON.stringify(report).includes('verified')" + message: + expr: "`report did not include a verification field: ${reportText}`" + - call: waitForCondition + saveAs: outbound + args: + - lambda: + expr: "state.getSnapshot().messages.filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-operator' && candidate.text.trim() === 'RELEASE-AUDIT-COMPLETE').at(-1)" + - expr: liveTurnTimeoutMs(env, 45000) + - expr: "env.providerMode === 'mock-openai' ? 100 : 250" + - call: readRawQaSessionStore + saveAs: store + args: + - ref: env + - set: sessionEntry + value: + expr: "store[sessionKey]" + - assert: + expr: "Boolean(sessionEntry)" + message: + expr: "`missing QA session entry for ${sessionKey}`" + detailsExpr: "`${outbound.text}\\n${reportText}\\n\\n${handoffText}`" +```