From 82fe6f50efc4b1cdcf191283829ecb42a513bcf8 Mon Sep 17 00:00:00 2001 From: Gustavo Madeira Santana Date: Fri, 17 Apr 2026 11:02:43 -0400 Subject: [PATCH] QA: organize scenarios by theme --- docs/concepts/qa-e2e-automation.md | 7 +- docs/help/testing.md | 2 +- docs/refactor/qa.md | 10 +-- .../qa-lab/src/bundled-plugin-staging.ts | 21 ++++-- extensions/qa-lab/src/gateway-child.test.ts | 37 ++++++++++ .../qa-lab/src/scenario-catalog.test.ts | 7 +- extensions/qa-lab/src/scenario-catalog.ts | 54 +++++++++++--- ...-runtime-agent-process.integration.test.ts | 74 +++++++++++++++++++ qa/README.md | 3 +- qa/scenarios.md | 2 +- ...instruction-followthrough-repo-contract.md | 0 .../{ => agents}/subagent-fanout-synthesis.md | 2 +- qa/scenarios/{ => agents}/subagent-handoff.md | 0 .../{ => channels}/channel-chat-baseline.md | 0 .../{ => channels}/dm-chat-baseline.md | 0 .../{ => channels}/reaction-edit-delete.md | 0 .../{ => channels}/thread-follow-up.md | 0 .../{ => character}/character-vibes-c3po.md | 0 .../{ => character}/character-vibes-gollum.md | 0 .../config-apply-restart-wakeup.md | 0 .../{ => config}/config-patch-hot-apply.md | 0 .../config-restart-capability-flip.md | 0 qa/scenarios/index.md | 22 +++++- .../{ => media}/image-generation-roundtrip.md | 0 .../image-understanding-attachment.md | 0 .../{ => media}/native-image-generation.md | 0 .../active-memory-preprompt-recall.md | 0 .../{ => memory}/memory-dreaming-sweep.md | 0 .../{ => memory}/memory-failure-fallback.md | 0 qa/scenarios/{ => memory}/memory-recall.md | 0 .../memory-tools-channel-context.md | 0 .../{ => memory}/session-memory-ranking.md | 0 .../{ => memory}/thread-memory-isolation.md | 0 .../anthropic-opus-api-key-smoke.md | 0 .../anthropic-opus-setup-token-smoke.md | 0 ...-cli-provider-capabilities-subscription.md | 0 .../claude-cli-provider-capabilities.md | 0 .../codex-harness-no-meta-leak.md | 0 .../{ => models}/model-switch-follow-up.md | 0 .../model-switch-tool-continuity.md | 0 .../bundled-plugin-skill-runtime.md | 0 .../{ => plugins}/mcp-plugin-tools-call.md | 0 .../skill-install-hot-availability.md | 0 .../skill-visibility-invocation.md | 0 .../approval-turn-tool-followthrough.md | 0 .../compaction-retry-mutating-tool.md | 0 ...mpty-response-recovery-replay-safe-read.md | 0 .../empty-response-retry-budget-exhausted.md | 0 ...easoning-only-no-auto-retry-after-write.md | 0 ...easoning-only-recovery-replay-safe-read.md | 0 .../runtime-inventory-drift-check.md | 0 .../{ => scheduling}/cron-one-minute-ping.md | 0 .../control-ui-qa-channel-image-roundtrip.md | 0 .../{ => workspace}/lobster-invaders-build.md | 0 .../medium-game-plan-codex-harness.md | 0 .../medium-game-plan-pi-harness.md | 0 .../source-docs-discovery-report.md | 0 57 files changed, 209 insertions(+), 32 deletions(-) create mode 100644 extensions/qa-lab/src/suite-runtime-agent-process.integration.test.ts rename qa/scenarios/{ => agents}/instruction-followthrough-repo-contract.md (100%) rename qa/scenarios/{ => agents}/subagent-fanout-synthesis.md (98%) rename qa/scenarios/{ => agents}/subagent-handoff.md (100%) rename qa/scenarios/{ => channels}/channel-chat-baseline.md (100%) rename qa/scenarios/{ => channels}/dm-chat-baseline.md (100%) rename qa/scenarios/{ => channels}/reaction-edit-delete.md (100%) rename qa/scenarios/{ => channels}/thread-follow-up.md (100%) rename qa/scenarios/{ => character}/character-vibes-c3po.md (100%) rename qa/scenarios/{ => character}/character-vibes-gollum.md (100%) rename qa/scenarios/{ => config}/config-apply-restart-wakeup.md (100%) rename qa/scenarios/{ => config}/config-patch-hot-apply.md (100%) rename qa/scenarios/{ => config}/config-restart-capability-flip.md (100%) rename qa/scenarios/{ => media}/image-generation-roundtrip.md (100%) rename qa/scenarios/{ => media}/image-understanding-attachment.md (100%) rename qa/scenarios/{ => media}/native-image-generation.md (100%) rename qa/scenarios/{ => memory}/active-memory-preprompt-recall.md (100%) rename qa/scenarios/{ => memory}/memory-dreaming-sweep.md (100%) rename qa/scenarios/{ => memory}/memory-failure-fallback.md (100%) rename qa/scenarios/{ => memory}/memory-recall.md (100%) rename qa/scenarios/{ => memory}/memory-tools-channel-context.md (100%) rename qa/scenarios/{ => memory}/session-memory-ranking.md (100%) rename qa/scenarios/{ => memory}/thread-memory-isolation.md (100%) rename qa/scenarios/{ => models}/anthropic-opus-api-key-smoke.md (100%) rename qa/scenarios/{ => models}/anthropic-opus-setup-token-smoke.md (100%) rename qa/scenarios/{ => models}/claude-cli-provider-capabilities-subscription.md (100%) rename qa/scenarios/{ => models}/claude-cli-provider-capabilities.md (100%) rename qa/scenarios/{ => models}/codex-harness-no-meta-leak.md (100%) rename qa/scenarios/{ => models}/model-switch-follow-up.md (100%) rename qa/scenarios/{ => models}/model-switch-tool-continuity.md (100%) rename qa/scenarios/{ => plugins}/bundled-plugin-skill-runtime.md (100%) rename qa/scenarios/{ => plugins}/mcp-plugin-tools-call.md (100%) rename qa/scenarios/{ => plugins}/skill-install-hot-availability.md (100%) rename qa/scenarios/{ => plugins}/skill-visibility-invocation.md (100%) rename qa/scenarios/{ => runtime}/approval-turn-tool-followthrough.md (100%) rename qa/scenarios/{ => runtime}/compaction-retry-mutating-tool.md (100%) rename qa/scenarios/{ => runtime}/empty-response-recovery-replay-safe-read.md (100%) rename qa/scenarios/{ => runtime}/empty-response-retry-budget-exhausted.md (100%) rename qa/scenarios/{ => runtime}/reasoning-only-no-auto-retry-after-write.md (100%) rename qa/scenarios/{ => runtime}/reasoning-only-recovery-replay-safe-read.md (100%) rename qa/scenarios/{ => runtime}/runtime-inventory-drift-check.md (100%) rename qa/scenarios/{ => scheduling}/cron-one-minute-ping.md (100%) rename qa/scenarios/{ => ui}/control-ui-qa-channel-image-roundtrip.md (100%) rename qa/scenarios/{ => workspace}/lobster-invaders-build.md (100%) rename qa/scenarios/{ => workspace}/medium-game-plan-codex-harness.md (100%) rename qa/scenarios/{ => workspace}/medium-game-plan-pi-harness.md (100%) rename qa/scenarios/{ => workspace}/source-docs-discovery-report.md (100%) diff --git a/docs/concepts/qa-e2e-automation.md b/docs/concepts/qa-e2e-automation.md index 94ad02848af..14953f3aa81 100644 --- a/docs/concepts/qa-e2e-automation.md +++ b/docs/concepts/qa-e2e-automation.md @@ -120,7 +120,7 @@ can write back through the mounted workspace. Seed assets live in `qa/`: - `qa/scenarios/index.md` -- `qa/scenarios/*.md` +- `qa/scenarios//*.md` These are intentionally in git so the QA plan is visible to both humans and the agent. @@ -129,6 +129,7 @@ agent. the source of truth for one test run and should define: - scenario metadata +- optional category, capability, lane, and risk metadata - docs and code refs - optional plugin requirements - optional gateway config patch @@ -139,6 +140,10 @@ and cross-cutting. For example, markdown scenarios can combine transport-side helpers with browser-side helpers that drive the embedded Control UI through the Gateway `browser.request` seam without adding a special-case runner. +Scenario files should be grouped by product capability rather than source tree +folder. Keep scenario IDs stable when files move; use `docsRefs` and `codeRefs` +for implementation traceability. + The baseline list should stay broad enough to cover: - DM and channel chat diff --git a/docs/help/testing.md b/docs/help/testing.md index 09b31f876b4..3aa61439670 100644 --- a/docs/help/testing.md +++ b/docs/help/testing.md @@ -213,7 +213,7 @@ The minimum adoption bar for a new channel is: 4. Mount the runner as `openclaw qa ` instead of registering a competing root command. Runner plugins should declare `qaRunners` in `openclaw.plugin.json` and export a matching `qaRunnerCliRegistrations` array from `runtime-api.ts`. Keep `runtime-api.ts` light; lazy CLI and runner execution should stay behind separate entrypoints. -5. Author or adapt markdown scenarios under `qa/scenarios/`. +5. Author or adapt markdown scenarios under the themed `qa/scenarios/` directories. 6. Use the generic scenario helpers for new scenarios. 7. Keep existing compatibility aliases working unless the repo is doing an intentional migration. diff --git a/docs/refactor/qa.md b/docs/refactor/qa.md index 139eb967d30..e22a6d52fba 100644 --- a/docs/refactor/qa.md +++ b/docs/refactor/qa.md @@ -18,7 +18,7 @@ The desired end state is a generic QA harness that loads powerful scenario defin ## Current State Primary source of truth now lives in `qa/scenarios/index.md` plus one file per -scenario under `qa/scenarios/*.md`. +scenario under `qa/scenarios//*.md`. Implemented: @@ -26,7 +26,7 @@ Implemented: - canonical QA pack metadata - operator identity - kickoff mission -- `qa/scenarios/*.md` +- `qa/scenarios//*.md` - one markdown file per scenario - scenario metadata - handler bindings @@ -107,8 +107,8 @@ These categories matter because they drive DSL requirements. A flat list of prom ### Single source of truth -Use `qa/scenarios/index.md` plus `qa/scenarios/*.md` as the authored source of -truth. +Use `qa/scenarios/index.md` plus `qa/scenarios//*.md` as the authored +source of truth. The pack should stay: @@ -363,7 +363,7 @@ Generated compatibility: Done. - added `qa/scenarios/index.md` -- split scenarios into `qa/scenarios/*.md` +- split scenarios into `qa/scenarios//*.md` - added parser for named markdown YAML pack content - validated with zod - switched consumers to the parsed pack diff --git a/extensions/qa-lab/src/bundled-plugin-staging.ts b/extensions/qa-lab/src/bundled-plugin-staging.ts index d26b8a5363e..27d33c82757 100644 --- a/extensions/qa-lab/src/bundled-plugin-staging.ts +++ b/extensions/qa-lab/src/bundled-plugin-staging.ts @@ -10,6 +10,12 @@ const QA_ALWAYS_STAGE_RUNTIME_PLUGIN_IDS = Object.freeze([ ]); const QA_OPENAI_PLUGIN_ID = "openai"; const QA_BUNDLED_PLUGIN_ID_PATTERN = /^[A-Za-z0-9][A-Za-z0-9._-]*$/; +const QA_CLI_METADATA_ENTRY_BASENAMES = Object.freeze([ + "cli-metadata.ts", + "cli-metadata.js", + "cli-metadata.mjs", + "cli-metadata.cjs", +]); function assertSafeQaBundledPluginId(pluginId: string) { if (!QA_BUNDLED_PLUGIN_ID_PATTERN.test(pluginId)) { @@ -69,12 +75,17 @@ export function resolveQaBundledPluginSourceDir(params: { repoRoot: string; plug path.join(params.repoRoot, "dist-runtime", "extensions", params.pluginId), path.join(params.repoRoot, "extensions", params.pluginId), ]; - for (const candidate of candidates) { - if (existsSync(candidate)) { - return candidate; - } + const existingCandidates = candidates.filter((candidate) => existsSync(candidate)); + if (existingCandidates.length === 0) { + return null; } - return null; + const cliMetadataCandidate = existingCandidates.find((candidate) => + QA_CLI_METADATA_ENTRY_BASENAMES.some((basename) => existsSync(path.join(candidate, basename))), + ); + if (cliMetadataCandidate) { + return cliMetadataCandidate; + } + return existingCandidates[0] ?? null; } function resolveQaBundledPluginScanRoots(repoRoot: string) { diff --git a/extensions/qa-lab/src/gateway-child.test.ts b/extensions/qa-lab/src/gateway-child.test.ts index 74008bda948..820f60c2774 100644 --- a/extensions/qa-lab/src/gateway-child.test.ts +++ b/extensions/qa-lab/src/gateway-child.test.ts @@ -714,6 +714,43 @@ describe("qa bundled plugin dir", () => { ).toBe(path.join(repoRoot, "extensions", "qa-channel")); }); + it("uses a source bundled plugin when the built copy is missing CLI metadata", async () => { + const repoRoot = await mkdtemp(path.join(os.tmpdir(), "qa-bundled-cli-metadata-root-")); + cleanups.push(async () => { + await rm(repoRoot, { recursive: true, force: true }); + }); + await mkdir(path.join(repoRoot, "dist", "extensions", "memory-core"), { recursive: true }); + await writeFile( + path.join(repoRoot, "dist", "extensions", "memory-core", "package.json"), + "{}", + "utf8", + ); + await writeFile( + path.join(repoRoot, "dist", "extensions", "memory-core", "openclaw.plugin.json"), + JSON.stringify({ id: "memory-core", kind: "memory" }), + "utf8", + ); + await mkdir(path.join(repoRoot, "extensions", "memory-core"), { recursive: true }); + await writeFile(path.join(repoRoot, "extensions", "memory-core", "package.json"), "{}", "utf8"); + await writeFile( + path.join(repoRoot, "extensions", "memory-core", "openclaw.plugin.json"), + JSON.stringify({ id: "memory-core", kind: "memory" }), + "utf8", + ); + await writeFile( + path.join(repoRoot, "extensions", "memory-core", "cli-metadata.ts"), + "export default { id: 'memory-core' };\n", + "utf8", + ); + + expect( + __testing.resolveQaBundledPluginSourceDir({ + repoRoot, + pluginId: "memory-core", + }), + ).toBe(path.join(repoRoot, "extensions", "memory-core")); + }); + it("creates a scoped bundled plugin tree for allowed plugins plus always-allowed runtime facades", async () => { const repoRoot = await mkdtemp(path.join(os.tmpdir(), "qa-bundled-scope-")); cleanups.push(async () => { diff --git a/extensions/qa-lab/src/scenario-catalog.test.ts b/extensions/qa-lab/src/scenario-catalog.test.ts index db630b9a143..dbebaa1182d 100644 --- a/extensions/qa-lab/src/scenario-catalog.test.ts +++ b/extensions/qa-lab/src/scenario-catalog.test.ts @@ -17,6 +17,9 @@ describe("qa scenario catalog", () => { expect(pack.agent.identityMarkdown).toContain("Dev C-3PO"); expect(pack.kickoffTask).toContain("Lobster Invaders"); expect(listQaScenarioMarkdownPaths().length).toBe(pack.scenarios.length); + expect(listQaScenarioMarkdownPaths()).toContain( + "qa/scenarios/media/image-generation-roundtrip.md", + ); expect(pack.scenarios.some((scenario) => scenario.id === "image-generation-roundtrip")).toBe( true, ); @@ -112,7 +115,7 @@ describe("qa scenario catalog", () => { (candidate) => candidate.id === "codex-harness-no-meta-leak", ); - expect(scenario?.sourcePath).toBe("qa/scenarios/codex-harness-no-meta-leak.md"); + expect(scenario?.sourcePath).toBe("qa/scenarios/models/codex-harness-no-meta-leak.md"); expect(scenario?.execution.flow?.steps.map((step) => step.name)).toContain( "keeps codex coordination chatter out of the visible reply", ); @@ -135,7 +138,7 @@ describe("qa scenario catalog", () => { } | undefined; - expect(scenario.sourcePath).toBe(`qa/scenarios/${scenarioId}.md`); + expect(scenario.sourcePath).toBe(`qa/scenarios/runtime/${scenarioId}.md`); expect(config?.requiredProvider).toBe("mock-openai"); expect(config?.prompt).toContain("check"); expect(scenario.execution.flow?.steps.length).toBeGreaterThan(0); diff --git a/extensions/qa-lab/src/scenario-catalog.ts b/extensions/qa-lab/src/scenario-catalog.ts index 82671905024..64dee666683 100644 --- a/extensions/qa-lab/src/scenario-catalog.ts +++ b/extensions/qa-lab/src/scenario-catalog.ts @@ -137,6 +137,10 @@ const qaSeedScenarioSchema = z.object({ id: z.string().trim().min(1), title: z.string().trim().min(1), surface: z.string().trim().min(1), + category: z.string().trim().min(1).optional(), + capabilities: z.array(z.string().trim().min(1)).optional(), + lane: z.record(z.string(), z.union([z.boolean(), z.string()])).optional(), + riskLevel: z.string().trim().min(1).optional(), objective: z.string().trim().min(1), successCriteria: z.array(z.string().trim().min(1)).min(1), plugins: z.array(z.string().trim().min(1)).optional(), @@ -225,14 +229,6 @@ function readTextFile(relativePath: string): string { return fs.readFileSync(resolved, "utf8"); } -function readDirEntries(relativePath: string): string[] { - const resolved = resolveRepoPath(relativePath, "directory"); - if (!resolved) { - return []; - } - return fs.readdirSync(resolved); -} - function extractQaPackYaml(content: string) { const match = content.match(QA_PACK_FENCE_RE); if (!match?.[1]) { @@ -324,6 +320,13 @@ export function readQaScenarioPack(): QaScenarioPack { } satisfies QaSeedScenarioWithSource; })(), ); + const seenScenarioIds = new Set(); + for (const scenario of scenarios) { + if (seenScenarioIds.has(scenario.id)) { + throw new Error(`duplicate qa scenario id: ${scenario.id}`); + } + seenScenarioIds.add(scenario.id); + } return { ...parsedPack, scenarios, @@ -331,10 +334,37 @@ export function readQaScenarioPack(): QaScenarioPack { } export function listQaScenarioMarkdownPaths(): string[] { - return readDirEntries(QA_SCENARIO_DIR_PATH) - .filter((entry) => entry.endsWith(".md") && entry !== "index.md") - .map((entry) => `${QA_SCENARIO_DIR_PATH}/${entry}`) - .toSorted(); + const resolved = resolveRepoPath(QA_SCENARIO_DIR_PATH, "directory"); + if (!resolved) { + return []; + } + return listQaScenarioMarkdownPathsInDirectory(resolved, QA_SCENARIO_DIR_PATH).toSorted(); +} + +function listQaScenarioMarkdownPathsInDirectory( + absoluteDir: string, + relativeDir: string, +): string[] { + const paths: string[] = []; + const entries = fs + .readdirSync(absoluteDir, { withFileTypes: true }) + .toSorted((left, right) => left.name.localeCompare(right.name)); + for (const entry of entries) { + if (entry.name.startsWith(".")) { + continue; + } + const relativePath = `${relativeDir}/${entry.name}`; + if (entry.isDirectory()) { + paths.push( + ...listQaScenarioMarkdownPathsInDirectory(path.join(absoluteDir, entry.name), relativePath), + ); + continue; + } + if (entry.isFile() && entry.name.endsWith(".md") && entry.name !== "index.md") { + paths.push(relativePath); + } + } + return paths; } export function readQaScenarioOverviewMarkdown(): string { diff --git a/extensions/qa-lab/src/suite-runtime-agent-process.integration.test.ts b/extensions/qa-lab/src/suite-runtime-agent-process.integration.test.ts new file mode 100644 index 00000000000..f1a3d04c579 --- /dev/null +++ b/extensions/qa-lab/src/suite-runtime-agent-process.integration.test.ts @@ -0,0 +1,74 @@ +import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, describe, expect, it } from "vitest"; +import { runQaCli } from "./suite-runtime-agent-process.js"; + +const cleanups: Array<() => Promise> = []; + +afterEach(async () => { + while (cleanups.length > 0) { + await cleanups.pop()?.(); + } +}); + +describe("qa suite runtime CLI integration", () => { + it("runs the plugin-owned memory status command with staged CLI metadata", async () => { + const repoRoot = await mkdtemp(path.join(os.tmpdir(), "qa-cli-memory-repo-")); + const tempRoot = await mkdtemp(path.join(os.tmpdir(), "qa-cli-memory-runtime-")); + cleanups.push(async () => { + await rm(repoRoot, { recursive: true, force: true }); + await rm(tempRoot, { recursive: true, force: true }); + }); + const distDir = path.join(repoRoot, "dist"); + const bundledPluginsDir = path.join(tempRoot, "dist", "extensions"); + await mkdir(path.join(distDir), { recursive: true }); + await mkdir(path.join(bundledPluginsDir, "memory-core"), { recursive: true }); + await writeFile( + path.join(bundledPluginsDir, "memory-core", "cli-metadata.js"), + "export default { id: 'memory-core' };\n", + "utf8", + ); + await writeFile( + path.join(distDir, "index.js"), + [ + "import fs from 'node:fs';", + "import path from 'node:path';", + "const [command, subcommand] = process.argv.slice(2);", + "const metadataPath = path.join(process.env.OPENCLAW_BUNDLED_PLUGINS_DIR ?? '', 'memory-core', 'cli-metadata.js');", + "if (command === 'memory' && subcommand === 'status' && fs.existsSync(metadataPath)) {", + " console.log(JSON.stringify({ command, subcommand, status: 'ok' }));", + " process.exit(0);", + "}", + "console.error(\"error: unknown command 'memory'\");", + "process.exit(1);", + "", + ].join("\n"), + "utf8", + ); + + await expect( + runQaCli( + { + repoRoot, + gateway: { + tempRoot, + runtimeEnv: { + ...process.env, + OPENCLAW_BUNDLED_PLUGINS_DIR: bundledPluginsDir, + }, + }, + primaryModel: "openai/gpt-5.4", + alternateModel: "openai/gpt-5.4", + providerMode: "mock-openai", + } as never, + ["memory", "status", "--json"], + { json: true }, + ), + ).resolves.toEqual({ + command: "memory", + subcommand: "status", + status: "ok", + }); + }); +}); diff --git a/qa/README.md b/qa/README.md index e8040c21ea0..98447b0c65c 100644 --- a/qa/README.md +++ b/qa/README.md @@ -4,7 +4,8 @@ Seed QA assets for the private `qa-lab` extension. Files: -- `scenarios.md` - canonical QA scenario pack, kickoff mission, and operator identity. +- `scenarios/index.md` - canonical QA scenario pack, kickoff mission, and operator identity. +- `scenarios//*.md` - one runnable scenario per markdown file. - `frontier-harness-plan.md` - big-model bakeoff and tuning loop for harness work. - `convex-credential-broker/` - standalone Convex v1 lease broker for pooled live credentials. diff --git a/qa/scenarios.md b/qa/scenarios.md index 8ebab06230c..352e4cff8da 100644 --- a/qa/scenarios.md +++ b/qa/scenarios.md @@ -3,6 +3,6 @@ Canonical scenario source now lives in: - `qa/scenarios/index.md` -- `qa/scenarios/*.md` +- `qa/scenarios//*.md` Each QA scenario has its own markdown file. diff --git a/qa/scenarios/instruction-followthrough-repo-contract.md b/qa/scenarios/agents/instruction-followthrough-repo-contract.md similarity index 100% rename from qa/scenarios/instruction-followthrough-repo-contract.md rename to qa/scenarios/agents/instruction-followthrough-repo-contract.md diff --git a/qa/scenarios/subagent-fanout-synthesis.md b/qa/scenarios/agents/subagent-fanout-synthesis.md similarity index 98% rename from qa/scenarios/subagent-fanout-synthesis.md rename to qa/scenarios/agents/subagent-fanout-synthesis.md index 4d142151620..60104f44de6 100644 --- a/qa/scenarios/subagent-fanout-synthesis.md +++ b/qa/scenarios/agents/subagent-fanout-synthesis.md @@ -23,7 +23,7 @@ execution: prompt: |- Subagent fanout synthesis check: delegate exactly two bounded subagents sequentially. Subagent 1: verify that `HEARTBEAT.md` exists and report `ok` if it does. - Subagent 2: verify that `repo/qa/scenarios/subagent-fanout-synthesis.md` exists and report `ok` if it does. + Subagent 2: verify that `repo/qa/scenarios/agents/subagent-fanout-synthesis.md` exists and report `ok` if it does. Wait for both subagents to finish. Then reply with exactly these two lines and nothing else: subagent-1: ok diff --git a/qa/scenarios/subagent-handoff.md b/qa/scenarios/agents/subagent-handoff.md similarity index 100% rename from qa/scenarios/subagent-handoff.md rename to qa/scenarios/agents/subagent-handoff.md diff --git a/qa/scenarios/channel-chat-baseline.md b/qa/scenarios/channels/channel-chat-baseline.md similarity index 100% rename from qa/scenarios/channel-chat-baseline.md rename to qa/scenarios/channels/channel-chat-baseline.md diff --git a/qa/scenarios/dm-chat-baseline.md b/qa/scenarios/channels/dm-chat-baseline.md similarity index 100% rename from qa/scenarios/dm-chat-baseline.md rename to qa/scenarios/channels/dm-chat-baseline.md diff --git a/qa/scenarios/reaction-edit-delete.md b/qa/scenarios/channels/reaction-edit-delete.md similarity index 100% rename from qa/scenarios/reaction-edit-delete.md rename to qa/scenarios/channels/reaction-edit-delete.md diff --git a/qa/scenarios/thread-follow-up.md b/qa/scenarios/channels/thread-follow-up.md similarity index 100% rename from qa/scenarios/thread-follow-up.md rename to qa/scenarios/channels/thread-follow-up.md diff --git a/qa/scenarios/character-vibes-c3po.md b/qa/scenarios/character/character-vibes-c3po.md similarity index 100% rename from qa/scenarios/character-vibes-c3po.md rename to qa/scenarios/character/character-vibes-c3po.md diff --git a/qa/scenarios/character-vibes-gollum.md b/qa/scenarios/character/character-vibes-gollum.md similarity index 100% rename from qa/scenarios/character-vibes-gollum.md rename to qa/scenarios/character/character-vibes-gollum.md diff --git a/qa/scenarios/config-apply-restart-wakeup.md b/qa/scenarios/config/config-apply-restart-wakeup.md similarity index 100% rename from qa/scenarios/config-apply-restart-wakeup.md rename to qa/scenarios/config/config-apply-restart-wakeup.md diff --git a/qa/scenarios/config-patch-hot-apply.md b/qa/scenarios/config/config-patch-hot-apply.md similarity index 100% rename from qa/scenarios/config-patch-hot-apply.md rename to qa/scenarios/config/config-patch-hot-apply.md diff --git a/qa/scenarios/config-restart-capability-flip.md b/qa/scenarios/config/config-restart-capability-flip.md similarity index 100% rename from qa/scenarios/config-restart-capability-flip.md rename to qa/scenarios/config/config-restart-capability-flip.md diff --git a/qa/scenarios/index.md b/qa/scenarios/index.md index 8386bf70a65..29ad2d1d6aa 100644 --- a/qa/scenarios/index.md +++ b/qa/scenarios/index.md @@ -4,12 +4,28 @@ Single source of truth for repo-backed QA suite bootstrap data. `qa-lab` should treat this directory as a generic markdown scenario pack: - `index.md` defines pack-level bootstrap data -- each `*.md` scenario defines one runnable test via `qa-scenario` + `qa-flow` -- scenario markdown may also define required plugins and gateway config patching +- each nested `*.md` scenario defines one runnable test via `qa-scenario` + `qa-flow` +- scenario markdown may also define category metadata, required plugins, lane filters, + and gateway config patching - kickoff mission - QA operator identity -- scenario files under `./` +- scenario files under one-level theme directories + +Theme directories: + +- `agents/` - agent behavior, instructions, and subagent flows +- `channels/` - DM, shared channel, thread, and message-action behavior +- `character/` - persona and style eval scenarios +- `config/` - config patch, apply, and restart behavior +- `media/` - image understanding and generation +- `memory/` - recall, ranking, active memory, and thread isolation +- `models/` - provider capabilities and model switching +- `plugins/` - plugin, skill, and MCP tool integration +- `runtime/` - turn recovery, compaction, approval, and inventory behavior +- `scheduling/` - cron and recurring work +- `ui/` - Control UI plus qa-channel flows +- `workspace/` - repo-reading and workspace artifact tasks ```yaml qa-pack version: 1 diff --git a/qa/scenarios/image-generation-roundtrip.md b/qa/scenarios/media/image-generation-roundtrip.md similarity index 100% rename from qa/scenarios/image-generation-roundtrip.md rename to qa/scenarios/media/image-generation-roundtrip.md diff --git a/qa/scenarios/image-understanding-attachment.md b/qa/scenarios/media/image-understanding-attachment.md similarity index 100% rename from qa/scenarios/image-understanding-attachment.md rename to qa/scenarios/media/image-understanding-attachment.md diff --git a/qa/scenarios/native-image-generation.md b/qa/scenarios/media/native-image-generation.md similarity index 100% rename from qa/scenarios/native-image-generation.md rename to qa/scenarios/media/native-image-generation.md diff --git a/qa/scenarios/active-memory-preprompt-recall.md b/qa/scenarios/memory/active-memory-preprompt-recall.md similarity index 100% rename from qa/scenarios/active-memory-preprompt-recall.md rename to qa/scenarios/memory/active-memory-preprompt-recall.md diff --git a/qa/scenarios/memory-dreaming-sweep.md b/qa/scenarios/memory/memory-dreaming-sweep.md similarity index 100% rename from qa/scenarios/memory-dreaming-sweep.md rename to qa/scenarios/memory/memory-dreaming-sweep.md diff --git a/qa/scenarios/memory-failure-fallback.md b/qa/scenarios/memory/memory-failure-fallback.md similarity index 100% rename from qa/scenarios/memory-failure-fallback.md rename to qa/scenarios/memory/memory-failure-fallback.md diff --git a/qa/scenarios/memory-recall.md b/qa/scenarios/memory/memory-recall.md similarity index 100% rename from qa/scenarios/memory-recall.md rename to qa/scenarios/memory/memory-recall.md diff --git a/qa/scenarios/memory-tools-channel-context.md b/qa/scenarios/memory/memory-tools-channel-context.md similarity index 100% rename from qa/scenarios/memory-tools-channel-context.md rename to qa/scenarios/memory/memory-tools-channel-context.md diff --git a/qa/scenarios/session-memory-ranking.md b/qa/scenarios/memory/session-memory-ranking.md similarity index 100% rename from qa/scenarios/session-memory-ranking.md rename to qa/scenarios/memory/session-memory-ranking.md diff --git a/qa/scenarios/thread-memory-isolation.md b/qa/scenarios/memory/thread-memory-isolation.md similarity index 100% rename from qa/scenarios/thread-memory-isolation.md rename to qa/scenarios/memory/thread-memory-isolation.md diff --git a/qa/scenarios/anthropic-opus-api-key-smoke.md b/qa/scenarios/models/anthropic-opus-api-key-smoke.md similarity index 100% rename from qa/scenarios/anthropic-opus-api-key-smoke.md rename to qa/scenarios/models/anthropic-opus-api-key-smoke.md diff --git a/qa/scenarios/anthropic-opus-setup-token-smoke.md b/qa/scenarios/models/anthropic-opus-setup-token-smoke.md similarity index 100% rename from qa/scenarios/anthropic-opus-setup-token-smoke.md rename to qa/scenarios/models/anthropic-opus-setup-token-smoke.md diff --git a/qa/scenarios/claude-cli-provider-capabilities-subscription.md b/qa/scenarios/models/claude-cli-provider-capabilities-subscription.md similarity index 100% rename from qa/scenarios/claude-cli-provider-capabilities-subscription.md rename to qa/scenarios/models/claude-cli-provider-capabilities-subscription.md diff --git a/qa/scenarios/claude-cli-provider-capabilities.md b/qa/scenarios/models/claude-cli-provider-capabilities.md similarity index 100% rename from qa/scenarios/claude-cli-provider-capabilities.md rename to qa/scenarios/models/claude-cli-provider-capabilities.md diff --git a/qa/scenarios/codex-harness-no-meta-leak.md b/qa/scenarios/models/codex-harness-no-meta-leak.md similarity index 100% rename from qa/scenarios/codex-harness-no-meta-leak.md rename to qa/scenarios/models/codex-harness-no-meta-leak.md diff --git a/qa/scenarios/model-switch-follow-up.md b/qa/scenarios/models/model-switch-follow-up.md similarity index 100% rename from qa/scenarios/model-switch-follow-up.md rename to qa/scenarios/models/model-switch-follow-up.md diff --git a/qa/scenarios/model-switch-tool-continuity.md b/qa/scenarios/models/model-switch-tool-continuity.md similarity index 100% rename from qa/scenarios/model-switch-tool-continuity.md rename to qa/scenarios/models/model-switch-tool-continuity.md diff --git a/qa/scenarios/bundled-plugin-skill-runtime.md b/qa/scenarios/plugins/bundled-plugin-skill-runtime.md similarity index 100% rename from qa/scenarios/bundled-plugin-skill-runtime.md rename to qa/scenarios/plugins/bundled-plugin-skill-runtime.md diff --git a/qa/scenarios/mcp-plugin-tools-call.md b/qa/scenarios/plugins/mcp-plugin-tools-call.md similarity index 100% rename from qa/scenarios/mcp-plugin-tools-call.md rename to qa/scenarios/plugins/mcp-plugin-tools-call.md diff --git a/qa/scenarios/skill-install-hot-availability.md b/qa/scenarios/plugins/skill-install-hot-availability.md similarity index 100% rename from qa/scenarios/skill-install-hot-availability.md rename to qa/scenarios/plugins/skill-install-hot-availability.md diff --git a/qa/scenarios/skill-visibility-invocation.md b/qa/scenarios/plugins/skill-visibility-invocation.md similarity index 100% rename from qa/scenarios/skill-visibility-invocation.md rename to qa/scenarios/plugins/skill-visibility-invocation.md diff --git a/qa/scenarios/approval-turn-tool-followthrough.md b/qa/scenarios/runtime/approval-turn-tool-followthrough.md similarity index 100% rename from qa/scenarios/approval-turn-tool-followthrough.md rename to qa/scenarios/runtime/approval-turn-tool-followthrough.md diff --git a/qa/scenarios/compaction-retry-mutating-tool.md b/qa/scenarios/runtime/compaction-retry-mutating-tool.md similarity index 100% rename from qa/scenarios/compaction-retry-mutating-tool.md rename to qa/scenarios/runtime/compaction-retry-mutating-tool.md diff --git a/qa/scenarios/empty-response-recovery-replay-safe-read.md b/qa/scenarios/runtime/empty-response-recovery-replay-safe-read.md similarity index 100% rename from qa/scenarios/empty-response-recovery-replay-safe-read.md rename to qa/scenarios/runtime/empty-response-recovery-replay-safe-read.md diff --git a/qa/scenarios/empty-response-retry-budget-exhausted.md b/qa/scenarios/runtime/empty-response-retry-budget-exhausted.md similarity index 100% rename from qa/scenarios/empty-response-retry-budget-exhausted.md rename to qa/scenarios/runtime/empty-response-retry-budget-exhausted.md diff --git a/qa/scenarios/reasoning-only-no-auto-retry-after-write.md b/qa/scenarios/runtime/reasoning-only-no-auto-retry-after-write.md similarity index 100% rename from qa/scenarios/reasoning-only-no-auto-retry-after-write.md rename to qa/scenarios/runtime/reasoning-only-no-auto-retry-after-write.md diff --git a/qa/scenarios/reasoning-only-recovery-replay-safe-read.md b/qa/scenarios/runtime/reasoning-only-recovery-replay-safe-read.md similarity index 100% rename from qa/scenarios/reasoning-only-recovery-replay-safe-read.md rename to qa/scenarios/runtime/reasoning-only-recovery-replay-safe-read.md diff --git a/qa/scenarios/runtime-inventory-drift-check.md b/qa/scenarios/runtime/runtime-inventory-drift-check.md similarity index 100% rename from qa/scenarios/runtime-inventory-drift-check.md rename to qa/scenarios/runtime/runtime-inventory-drift-check.md diff --git a/qa/scenarios/cron-one-minute-ping.md b/qa/scenarios/scheduling/cron-one-minute-ping.md similarity index 100% rename from qa/scenarios/cron-one-minute-ping.md rename to qa/scenarios/scheduling/cron-one-minute-ping.md diff --git a/qa/scenarios/control-ui-qa-channel-image-roundtrip.md b/qa/scenarios/ui/control-ui-qa-channel-image-roundtrip.md similarity index 100% rename from qa/scenarios/control-ui-qa-channel-image-roundtrip.md rename to qa/scenarios/ui/control-ui-qa-channel-image-roundtrip.md diff --git a/qa/scenarios/lobster-invaders-build.md b/qa/scenarios/workspace/lobster-invaders-build.md similarity index 100% rename from qa/scenarios/lobster-invaders-build.md rename to qa/scenarios/workspace/lobster-invaders-build.md diff --git a/qa/scenarios/medium-game-plan-codex-harness.md b/qa/scenarios/workspace/medium-game-plan-codex-harness.md similarity index 100% rename from qa/scenarios/medium-game-plan-codex-harness.md rename to qa/scenarios/workspace/medium-game-plan-codex-harness.md diff --git a/qa/scenarios/medium-game-plan-pi-harness.md b/qa/scenarios/workspace/medium-game-plan-pi-harness.md similarity index 100% rename from qa/scenarios/medium-game-plan-pi-harness.md rename to qa/scenarios/workspace/medium-game-plan-pi-harness.md diff --git a/qa/scenarios/source-docs-discovery-report.md b/qa/scenarios/workspace/source-docs-discovery-report.md similarity index 100% rename from qa/scenarios/source-docs-discovery-report.md rename to qa/scenarios/workspace/source-docs-discovery-report.md