From 82fe6f50efc4b1cdcf191283829ecb42a513bcf8 Mon Sep 17 00:00:00 2001
From: Gustavo Madeira Santana <gumadeiras@gmail.com>
Date: Fri, 17 Apr 2026 11:02:43 -0400
Subject: [PATCH] QA: organize scenarios by theme

---
 docs/concepts/qa-e2e-automation.md            |  7 +-
 docs/help/testing.md                          |  2 +-
 docs/refactor/qa.md                           | 10 +--
 .../qa-lab/src/bundled-plugin-staging.ts      | 21 ++++--
 extensions/qa-lab/src/gateway-child.test.ts   | 37 ++++++++++
 .../qa-lab/src/scenario-catalog.test.ts       |  7 +-
 extensions/qa-lab/src/scenario-catalog.ts     | 54 +++++++++++---
 ...-runtime-agent-process.integration.test.ts | 74 +++++++++++++++++++
 qa/README.md                                  |  3 +-
 qa/scenarios.md                               |  2 +-
 ...instruction-followthrough-repo-contract.md |  0
 .../{ => agents}/subagent-fanout-synthesis.md |  2 +-
 qa/scenarios/{ => agents}/subagent-handoff.md |  0
 .../{ => channels}/channel-chat-baseline.md   |  0
 .../{ => channels}/dm-chat-baseline.md        |  0
 .../{ => channels}/reaction-edit-delete.md    |  0
 .../{ => channels}/thread-follow-up.md        |  0
 .../{ => character}/character-vibes-c3po.md   |  0
 .../{ => character}/character-vibes-gollum.md |  0
 .../config-apply-restart-wakeup.md            |  0
 .../{ => config}/config-patch-hot-apply.md    |  0
 .../config-restart-capability-flip.md         |  0
 qa/scenarios/index.md                         | 22 +++++-
 .../{ => media}/image-generation-roundtrip.md |  0
 .../image-understanding-attachment.md         |  0
 .../{ => media}/native-image-generation.md    |  0
 .../active-memory-preprompt-recall.md         |  0
 .../{ => memory}/memory-dreaming-sweep.md     |  0
 .../{ => memory}/memory-failure-fallback.md   |  0
 qa/scenarios/{ => memory}/memory-recall.md    |  0
 .../memory-tools-channel-context.md           |  0
 .../{ => memory}/session-memory-ranking.md    |  0
 .../{ => memory}/thread-memory-isolation.md   |  0
 .../anthropic-opus-api-key-smoke.md           |  0
 .../anthropic-opus-setup-token-smoke.md       |  0
 ...-cli-provider-capabilities-subscription.md |  0
 .../claude-cli-provider-capabilities.md       |  0
 .../codex-harness-no-meta-leak.md             |  0
 .../{ => models}/model-switch-follow-up.md    |  0
 .../model-switch-tool-continuity.md           |  0
 .../bundled-plugin-skill-runtime.md           |  0
 .../{ => plugins}/mcp-plugin-tools-call.md    |  0
 .../skill-install-hot-availability.md         |  0
 .../skill-visibility-invocation.md            |  0
 .../approval-turn-tool-followthrough.md       |  0
 .../compaction-retry-mutating-tool.md         |  0
 ...mpty-response-recovery-replay-safe-read.md |  0
 .../empty-response-retry-budget-exhausted.md  |  0
 ...easoning-only-no-auto-retry-after-write.md |  0
 ...easoning-only-recovery-replay-safe-read.md |  0
 .../runtime-inventory-drift-check.md          |  0
 .../{ => scheduling}/cron-one-minute-ping.md  |  0
 .../control-ui-qa-channel-image-roundtrip.md  |  0
 .../{ => workspace}/lobster-invaders-build.md |  0
 .../medium-game-plan-codex-harness.md         |  0
 .../medium-game-plan-pi-harness.md            |  0
 .../source-docs-discovery-report.md           |  0
 57 files changed, 209 insertions(+), 32 deletions(-)
 create mode 100644 extensions/qa-lab/src/suite-runtime-agent-process.integration.test.ts
 rename qa/scenarios/{ => agents}/instruction-followthrough-repo-contract.md (100%)
 rename qa/scenarios/{ => agents}/subagent-fanout-synthesis.md (98%)
 rename qa/scenarios/{ => agents}/subagent-handoff.md (100%)
 rename qa/scenarios/{ => channels}/channel-chat-baseline.md (100%)
 rename qa/scenarios/{ => channels}/dm-chat-baseline.md (100%)
 rename qa/scenarios/{ => channels}/reaction-edit-delete.md (100%)
 rename qa/scenarios/{ => channels}/thread-follow-up.md (100%)
 rename qa/scenarios/{ => character}/character-vibes-c3po.md (100%)
 rename qa/scenarios/{ => character}/character-vibes-gollum.md (100%)
 rename qa/scenarios/{ => config}/config-apply-restart-wakeup.md (100%)
 rename qa/scenarios/{ => config}/config-patch-hot-apply.md (100%)
 rename qa/scenarios/{ => config}/config-restart-capability-flip.md (100%)
 rename qa/scenarios/{ => media}/image-generation-roundtrip.md (100%)
 rename qa/scenarios/{ => media}/image-understanding-attachment.md (100%)
 rename qa/scenarios/{ => media}/native-image-generation.md (100%)
 rename qa/scenarios/{ => memory}/active-memory-preprompt-recall.md (100%)
 rename qa/scenarios/{ => memory}/memory-dreaming-sweep.md (100%)
 rename qa/scenarios/{ => memory}/memory-failure-fallback.md (100%)
 rename qa/scenarios/{ => memory}/memory-recall.md (100%)
 rename qa/scenarios/{ => memory}/memory-tools-channel-context.md (100%)
 rename qa/scenarios/{ => memory}/session-memory-ranking.md (100%)
 rename qa/scenarios/{ => memory}/thread-memory-isolation.md (100%)
 rename qa/scenarios/{ => models}/anthropic-opus-api-key-smoke.md (100%)
 rename qa/scenarios/{ => models}/anthropic-opus-setup-token-smoke.md (100%)
 rename qa/scenarios/{ => models}/claude-cli-provider-capabilities-subscription.md (100%)
 rename qa/scenarios/{ => models}/claude-cli-provider-capabilities.md (100%)
 rename qa/scenarios/{ => models}/codex-harness-no-meta-leak.md (100%)
 rename qa/scenarios/{ => models}/model-switch-follow-up.md (100%)
 rename qa/scenarios/{ => models}/model-switch-tool-continuity.md (100%)
 rename qa/scenarios/{ => plugins}/bundled-plugin-skill-runtime.md (100%)
 rename qa/scenarios/{ => plugins}/mcp-plugin-tools-call.md (100%)
 rename qa/scenarios/{ => plugins}/skill-install-hot-availability.md (100%)
 rename qa/scenarios/{ => plugins}/skill-visibility-invocation.md (100%)
 rename qa/scenarios/{ => runtime}/approval-turn-tool-followthrough.md (100%)
 rename qa/scenarios/{ => runtime}/compaction-retry-mutating-tool.md (100%)
 rename qa/scenarios/{ => runtime}/empty-response-recovery-replay-safe-read.md (100%)
 rename qa/scenarios/{ => runtime}/empty-response-retry-budget-exhausted.md (100%)
 rename qa/scenarios/{ => runtime}/reasoning-only-no-auto-retry-after-write.md (100%)
 rename qa/scenarios/{ => runtime}/reasoning-only-recovery-replay-safe-read.md (100%)
 rename qa/scenarios/{ => runtime}/runtime-inventory-drift-check.md (100%)
 rename qa/scenarios/{ => scheduling}/cron-one-minute-ping.md (100%)
 rename qa/scenarios/{ => ui}/control-ui-qa-channel-image-roundtrip.md (100%)
 rename qa/scenarios/{ => workspace}/lobster-invaders-build.md (100%)
 rename qa/scenarios/{ => workspace}/medium-game-plan-codex-harness.md (100%)
 rename qa/scenarios/{ => workspace}/medium-game-plan-pi-harness.md (100%)
 rename qa/scenarios/{ => workspace}/source-docs-discovery-report.md (100%)

diff --git a/docs/concepts/qa-e2e-automation.md b/docs/concepts/qa-e2e-automation.md
index 94ad02848af..14953f3aa81 100644
--- a/docs/concepts/qa-e2e-automation.md
+++ b/docs/concepts/qa-e2e-automation.md
@@ -120,7 +120,7 @@ can write back through the mounted workspace.
 Seed assets live in `qa/`:
 
 - `qa/scenarios/index.md`
-- `qa/scenarios/*.md`
+- `qa/scenarios/<theme>/*.md`
 
 These are intentionally in git so the QA plan is visible to both humans and the
 agent.
@@ -129,6 +129,7 @@ agent.
 the source of truth for one test run and should define:
 
 - scenario metadata
+- optional category, capability, lane, and risk metadata
 - docs and code refs
 - optional plugin requirements
 - optional gateway config patch
@@ -139,6 +140,10 @@ and cross-cutting. For example, markdown scenarios can combine transport-side
 helpers with browser-side helpers that drive the embedded Control UI through the
 Gateway `browser.request` seam without adding a special-case runner.
 
+Scenario files should be grouped by product capability rather than source tree
+folder. Keep scenario IDs stable when files move; use `docsRefs` and `codeRefs`
+for implementation traceability.
+
 The baseline list should stay broad enough to cover:
 
 - DM and channel chat
diff --git a/docs/help/testing.md b/docs/help/testing.md
index 09b31f876b4..3aa61439670 100644
--- a/docs/help/testing.md
+++ b/docs/help/testing.md
@@ -213,7 +213,7 @@ The minimum adoption bar for a new channel is:
 4. Mount the runner as `openclaw qa <runner>` instead of registering a competing root command.
    Runner plugins should declare `qaRunners` in `openclaw.plugin.json` and export a matching `qaRunnerCliRegistrations` array from `runtime-api.ts`.
    Keep `runtime-api.ts` light; lazy CLI and runner execution should stay behind separate entrypoints.
-5. Author or adapt markdown scenarios under `qa/scenarios/`.
+5. Author or adapt markdown scenarios under the themed `qa/scenarios/` directories.
 6. Use the generic scenario helpers for new scenarios.
 7. Keep existing compatibility aliases working unless the repo is doing an intentional migration.
 
diff --git a/docs/refactor/qa.md b/docs/refactor/qa.md
index 139eb967d30..e22a6d52fba 100644
--- a/docs/refactor/qa.md
+++ b/docs/refactor/qa.md
@@ -18,7 +18,7 @@ The desired end state is a generic QA harness that loads powerful scenario defin
 ## Current State
 
 Primary source of truth now lives in `qa/scenarios/index.md` plus one file per
-scenario under `qa/scenarios/*.md`.
+scenario under `qa/scenarios/<theme>/*.md`.
 
 Implemented:
 
@@ -26,7 +26,7 @@ Implemented:
   - canonical QA pack metadata
   - operator identity
   - kickoff mission
-- `qa/scenarios/*.md`
+- `qa/scenarios/<theme>/*.md`
   - one markdown file per scenario
   - scenario metadata
   - handler bindings
@@ -107,8 +107,8 @@ These categories matter because they drive DSL requirements. A flat list of prom
 
 ### Single source of truth
 
-Use `qa/scenarios/index.md` plus `qa/scenarios/*.md` as the authored source of
-truth.
+Use `qa/scenarios/index.md` plus `qa/scenarios/<theme>/*.md` as the authored
+source of truth.
 
 The pack should stay:
 
@@ -363,7 +363,7 @@ Generated compatibility:
 Done.
 
 - added `qa/scenarios/index.md`
-- split scenarios into `qa/scenarios/*.md`
+- split scenarios into `qa/scenarios/<theme>/*.md`
 - added parser for named markdown YAML pack content
 - validated with zod
 - switched consumers to the parsed pack
diff --git a/extensions/qa-lab/src/bundled-plugin-staging.ts b/extensions/qa-lab/src/bundled-plugin-staging.ts
index d26b8a5363e..27d33c82757 100644
--- a/extensions/qa-lab/src/bundled-plugin-staging.ts
+++ b/extensions/qa-lab/src/bundled-plugin-staging.ts
@@ -10,6 +10,12 @@ const QA_ALWAYS_STAGE_RUNTIME_PLUGIN_IDS = Object.freeze([
 ]);
 const QA_OPENAI_PLUGIN_ID = "openai";
 const QA_BUNDLED_PLUGIN_ID_PATTERN = /^[A-Za-z0-9][A-Za-z0-9._-]*$/;
+const QA_CLI_METADATA_ENTRY_BASENAMES = Object.freeze([
+  "cli-metadata.ts",
+  "cli-metadata.js",
+  "cli-metadata.mjs",
+  "cli-metadata.cjs",
+]);
 
 function assertSafeQaBundledPluginId(pluginId: string) {
   if (!QA_BUNDLED_PLUGIN_ID_PATTERN.test(pluginId)) {
@@ -69,12 +75,17 @@ export function resolveQaBundledPluginSourceDir(params: { repoRoot: string; plug
     path.join(params.repoRoot, "dist-runtime", "extensions", params.pluginId),
     path.join(params.repoRoot, "extensions", params.pluginId),
   ];
-  for (const candidate of candidates) {
-    if (existsSync(candidate)) {
-      return candidate;
-    }
+  const existingCandidates = candidates.filter((candidate) => existsSync(candidate));
+  if (existingCandidates.length === 0) {
+    return null;
   }
-  return null;
+  const cliMetadataCandidate = existingCandidates.find((candidate) =>
+    QA_CLI_METADATA_ENTRY_BASENAMES.some((basename) => existsSync(path.join(candidate, basename))),
+  );
+  if (cliMetadataCandidate) {
+    return cliMetadataCandidate;
+  }
+  return existingCandidates[0] ?? null;
 }
 
 function resolveQaBundledPluginScanRoots(repoRoot: string) {
diff --git a/extensions/qa-lab/src/gateway-child.test.ts b/extensions/qa-lab/src/gateway-child.test.ts
index 74008bda948..820f60c2774 100644
--- a/extensions/qa-lab/src/gateway-child.test.ts
+++ b/extensions/qa-lab/src/gateway-child.test.ts
@@ -714,6 +714,43 @@ describe("qa bundled plugin dir", () => {
     ).toBe(path.join(repoRoot, "extensions", "qa-channel"));
   });
 
+  it("uses a source bundled plugin when the built copy is missing CLI metadata", async () => {
+    const repoRoot = await mkdtemp(path.join(os.tmpdir(), "qa-bundled-cli-metadata-root-"));
+    cleanups.push(async () => {
+      await rm(repoRoot, { recursive: true, force: true });
+    });
+    await mkdir(path.join(repoRoot, "dist", "extensions", "memory-core"), { recursive: true });
+    await writeFile(
+      path.join(repoRoot, "dist", "extensions", "memory-core", "package.json"),
+      "{}",
+      "utf8",
+    );
+    await writeFile(
+      path.join(repoRoot, "dist", "extensions", "memory-core", "openclaw.plugin.json"),
+      JSON.stringify({ id: "memory-core", kind: "memory" }),
+      "utf8",
+    );
+    await mkdir(path.join(repoRoot, "extensions", "memory-core"), { recursive: true });
+    await writeFile(path.join(repoRoot, "extensions", "memory-core", "package.json"), "{}", "utf8");
+    await writeFile(
+      path.join(repoRoot, "extensions", "memory-core", "openclaw.plugin.json"),
+      JSON.stringify({ id: "memory-core", kind: "memory" }),
+      "utf8",
+    );
+    await writeFile(
+      path.join(repoRoot, "extensions", "memory-core", "cli-metadata.ts"),
+      "export default { id: 'memory-core' };\n",
+      "utf8",
+    );
+
+    expect(
+      __testing.resolveQaBundledPluginSourceDir({
+        repoRoot,
+        pluginId: "memory-core",
+      }),
+    ).toBe(path.join(repoRoot, "extensions", "memory-core"));
+  });
+
   it("creates a scoped bundled plugin tree for allowed plugins plus always-allowed runtime facades", async () => {
     const repoRoot = await mkdtemp(path.join(os.tmpdir(), "qa-bundled-scope-"));
     cleanups.push(async () => {
diff --git a/extensions/qa-lab/src/scenario-catalog.test.ts b/extensions/qa-lab/src/scenario-catalog.test.ts
index db630b9a143..dbebaa1182d 100644
--- a/extensions/qa-lab/src/scenario-catalog.test.ts
+++ b/extensions/qa-lab/src/scenario-catalog.test.ts
@@ -17,6 +17,9 @@ describe("qa scenario catalog", () => {
     expect(pack.agent.identityMarkdown).toContain("Dev C-3PO");
     expect(pack.kickoffTask).toContain("Lobster Invaders");
     expect(listQaScenarioMarkdownPaths().length).toBe(pack.scenarios.length);
+    expect(listQaScenarioMarkdownPaths()).toContain(
+      "qa/scenarios/media/image-generation-roundtrip.md",
+    );
     expect(pack.scenarios.some((scenario) => scenario.id === "image-generation-roundtrip")).toBe(
       true,
     );
@@ -112,7 +115,7 @@ describe("qa scenario catalog", () => {
       (candidate) => candidate.id === "codex-harness-no-meta-leak",
     );
 
-    expect(scenario?.sourcePath).toBe("qa/scenarios/codex-harness-no-meta-leak.md");
+    expect(scenario?.sourcePath).toBe("qa/scenarios/models/codex-harness-no-meta-leak.md");
     expect(scenario?.execution.flow?.steps.map((step) => step.name)).toContain(
       "keeps codex coordination chatter out of the visible reply",
     );
@@ -135,7 +138,7 @@ describe("qa scenario catalog", () => {
           }
         | undefined;
 
-      expect(scenario.sourcePath).toBe(`qa/scenarios/${scenarioId}.md`);
+      expect(scenario.sourcePath).toBe(`qa/scenarios/runtime/${scenarioId}.md`);
       expect(config?.requiredProvider).toBe("mock-openai");
       expect(config?.prompt).toContain("check");
       expect(scenario.execution.flow?.steps.length).toBeGreaterThan(0);
diff --git a/extensions/qa-lab/src/scenario-catalog.ts b/extensions/qa-lab/src/scenario-catalog.ts
index 82671905024..64dee666683 100644
--- a/extensions/qa-lab/src/scenario-catalog.ts
+++ b/extensions/qa-lab/src/scenario-catalog.ts
@@ -137,6 +137,10 @@ const qaSeedScenarioSchema = z.object({
   id: z.string().trim().min(1),
   title: z.string().trim().min(1),
   surface: z.string().trim().min(1),
+  category: z.string().trim().min(1).optional(),
+  capabilities: z.array(z.string().trim().min(1)).optional(),
+  lane: z.record(z.string(), z.union([z.boolean(), z.string()])).optional(),
+  riskLevel: z.string().trim().min(1).optional(),
   objective: z.string().trim().min(1),
   successCriteria: z.array(z.string().trim().min(1)).min(1),
   plugins: z.array(z.string().trim().min(1)).optional(),
@@ -225,14 +229,6 @@ function readTextFile(relativePath: string): string {
   return fs.readFileSync(resolved, "utf8");
 }
 
-function readDirEntries(relativePath: string): string[] {
-  const resolved = resolveRepoPath(relativePath, "directory");
-  if (!resolved) {
-    return [];
-  }
-  return fs.readdirSync(resolved);
-}
-
 function extractQaPackYaml(content: string) {
   const match = content.match(QA_PACK_FENCE_RE);
   if (!match?.[1]) {
@@ -324,6 +320,13 @@ export function readQaScenarioPack(): QaScenarioPack {
       } satisfies QaSeedScenarioWithSource;
     })(),
   );
+  const seenScenarioIds = new Set<string>();
+  for (const scenario of scenarios) {
+    if (seenScenarioIds.has(scenario.id)) {
+      throw new Error(`duplicate qa scenario id: ${scenario.id}`);
+    }
+    seenScenarioIds.add(scenario.id);
+  }
   return {
     ...parsedPack,
     scenarios,
@@ -331,10 +334,37 @@ export function readQaScenarioPack(): QaScenarioPack {
 }
 
 export function listQaScenarioMarkdownPaths(): string[] {
-  return readDirEntries(QA_SCENARIO_DIR_PATH)
-    .filter((entry) => entry.endsWith(".md") && entry !== "index.md")
-    .map((entry) => `${QA_SCENARIO_DIR_PATH}/${entry}`)
-    .toSorted();
+  const resolved = resolveRepoPath(QA_SCENARIO_DIR_PATH, "directory");
+  if (!resolved) {
+    return [];
+  }
+  return listQaScenarioMarkdownPathsInDirectory(resolved, QA_SCENARIO_DIR_PATH).toSorted();
+}
+
+function listQaScenarioMarkdownPathsInDirectory(
+  absoluteDir: string,
+  relativeDir: string,
+): string[] {
+  const paths: string[] = [];
+  const entries = fs
+    .readdirSync(absoluteDir, { withFileTypes: true })
+    .toSorted((left, right) => left.name.localeCompare(right.name));
+  for (const entry of entries) {
+    if (entry.name.startsWith(".")) {
+      continue;
+    }
+    const relativePath = `${relativeDir}/${entry.name}`;
+    if (entry.isDirectory()) {
+      paths.push(
+        ...listQaScenarioMarkdownPathsInDirectory(path.join(absoluteDir, entry.name), relativePath),
+      );
+      continue;
+    }
+    if (entry.isFile() && entry.name.endsWith(".md") && entry.name !== "index.md") {
+      paths.push(relativePath);
+    }
+  }
+  return paths;
 }
 
 export function readQaScenarioOverviewMarkdown(): string {
diff --git a/extensions/qa-lab/src/suite-runtime-agent-process.integration.test.ts b/extensions/qa-lab/src/suite-runtime-agent-process.integration.test.ts
new file mode 100644
index 00000000000..f1a3d04c579
--- /dev/null
+++ b/extensions/qa-lab/src/suite-runtime-agent-process.integration.test.ts
@@ -0,0 +1,74 @@
+import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
+import { afterEach, describe, expect, it } from "vitest";
+import { runQaCli } from "./suite-runtime-agent-process.js";
+
+const cleanups: Array<() => Promise<void>> = [];
+
+afterEach(async () => {
+  while (cleanups.length > 0) {
+    await cleanups.pop()?.();
+  }
+});
+
+describe("qa suite runtime CLI integration", () => {
+  it("runs the plugin-owned memory status command with staged CLI metadata", async () => {
+    const repoRoot = await mkdtemp(path.join(os.tmpdir(), "qa-cli-memory-repo-"));
+    const tempRoot = await mkdtemp(path.join(os.tmpdir(), "qa-cli-memory-runtime-"));
+    cleanups.push(async () => {
+      await rm(repoRoot, { recursive: true, force: true });
+      await rm(tempRoot, { recursive: true, force: true });
+    });
+    const distDir = path.join(repoRoot, "dist");
+    const bundledPluginsDir = path.join(tempRoot, "dist", "extensions");
+    await mkdir(path.join(distDir), { recursive: true });
+    await mkdir(path.join(bundledPluginsDir, "memory-core"), { recursive: true });
+    await writeFile(
+      path.join(bundledPluginsDir, "memory-core", "cli-metadata.js"),
+      "export default { id: 'memory-core' };\n",
+      "utf8",
+    );
+    await writeFile(
+      path.join(distDir, "index.js"),
+      [
+        "import fs from 'node:fs';",
+        "import path from 'node:path';",
+        "const [command, subcommand] = process.argv.slice(2);",
+        "const metadataPath = path.join(process.env.OPENCLAW_BUNDLED_PLUGINS_DIR ?? '', 'memory-core', 'cli-metadata.js');",
+        "if (command === 'memory' && subcommand === 'status' && fs.existsSync(metadataPath)) {",
+        "  console.log(JSON.stringify({ command, subcommand, status: 'ok' }));",
+        "  process.exit(0);",
+        "}",
+        "console.error(\"error: unknown command 'memory'\");",
+        "process.exit(1);",
+        "",
+      ].join("\n"),
+      "utf8",
+    );
+
+    await expect(
+      runQaCli(
+        {
+          repoRoot,
+          gateway: {
+            tempRoot,
+            runtimeEnv: {
+              ...process.env,
+              OPENCLAW_BUNDLED_PLUGINS_DIR: bundledPluginsDir,
+            },
+          },
+          primaryModel: "openai/gpt-5.4",
+          alternateModel: "openai/gpt-5.4",
+          providerMode: "mock-openai",
+        } as never,
+        ["memory", "status", "--json"],
+        { json: true },
+      ),
+    ).resolves.toEqual({
+      command: "memory",
+      subcommand: "status",
+      status: "ok",
+    });
+  });
+});
diff --git a/qa/README.md b/qa/README.md
index e8040c21ea0..98447b0c65c 100644
--- a/qa/README.md
+++ b/qa/README.md
@@ -4,7 +4,8 @@ Seed QA assets for the private `qa-lab` extension.
 
 Files:
 
-- `scenarios.md` - canonical QA scenario pack, kickoff mission, and operator identity.
+- `scenarios/index.md` - canonical QA scenario pack, kickoff mission, and operator identity.
+- `scenarios/<theme>/*.md` - one runnable scenario per markdown file.
 - `frontier-harness-plan.md` - big-model bakeoff and tuning loop for harness work.
 - `convex-credential-broker/` - standalone Convex v1 lease broker for pooled live credentials.
 
diff --git a/qa/scenarios.md b/qa/scenarios.md
index 8ebab06230c..352e4cff8da 100644
--- a/qa/scenarios.md
+++ b/qa/scenarios.md
@@ -3,6 +3,6 @@
 Canonical scenario source now lives in:
 
 - `qa/scenarios/index.md`
-- `qa/scenarios/*.md`
+- `qa/scenarios/<theme>/*.md`
 
 Each QA scenario has its own markdown file.
diff --git a/qa/scenarios/instruction-followthrough-repo-contract.md b/qa/scenarios/agents/instruction-followthrough-repo-contract.md
similarity index 100%
rename from qa/scenarios/instruction-followthrough-repo-contract.md
rename to qa/scenarios/agents/instruction-followthrough-repo-contract.md
diff --git a/qa/scenarios/subagent-fanout-synthesis.md b/qa/scenarios/agents/subagent-fanout-synthesis.md
similarity index 98%
rename from qa/scenarios/subagent-fanout-synthesis.md
rename to qa/scenarios/agents/subagent-fanout-synthesis.md
index 4d142151620..60104f44de6 100644
--- a/qa/scenarios/subagent-fanout-synthesis.md
+++ b/qa/scenarios/agents/subagent-fanout-synthesis.md
@@ -23,7 +23,7 @@ execution:
     prompt: |-
       Subagent fanout synthesis check: delegate exactly two bounded subagents sequentially.
       Subagent 1: verify that `HEARTBEAT.md` exists and report `ok` if it does.
-      Subagent 2: verify that `repo/qa/scenarios/subagent-fanout-synthesis.md` exists and report `ok` if it does.
+      Subagent 2: verify that `repo/qa/scenarios/agents/subagent-fanout-synthesis.md` exists and report `ok` if it does.
       Wait for both subagents to finish.
       Then reply with exactly these two lines and nothing else:
       subagent-1: ok
diff --git a/qa/scenarios/subagent-handoff.md b/qa/scenarios/agents/subagent-handoff.md
similarity index 100%
rename from qa/scenarios/subagent-handoff.md
rename to qa/scenarios/agents/subagent-handoff.md
diff --git a/qa/scenarios/channel-chat-baseline.md b/qa/scenarios/channels/channel-chat-baseline.md
similarity index 100%
rename from qa/scenarios/channel-chat-baseline.md
rename to qa/scenarios/channels/channel-chat-baseline.md
diff --git a/qa/scenarios/dm-chat-baseline.md b/qa/scenarios/channels/dm-chat-baseline.md
similarity index 100%
rename from qa/scenarios/dm-chat-baseline.md
rename to qa/scenarios/channels/dm-chat-baseline.md
diff --git a/qa/scenarios/reaction-edit-delete.md b/qa/scenarios/channels/reaction-edit-delete.md
similarity index 100%
rename from qa/scenarios/reaction-edit-delete.md
rename to qa/scenarios/channels/reaction-edit-delete.md
diff --git a/qa/scenarios/thread-follow-up.md b/qa/scenarios/channels/thread-follow-up.md
similarity index 100%
rename from qa/scenarios/thread-follow-up.md
rename to qa/scenarios/channels/thread-follow-up.md
diff --git a/qa/scenarios/character-vibes-c3po.md b/qa/scenarios/character/character-vibes-c3po.md
similarity index 100%
rename from qa/scenarios/character-vibes-c3po.md
rename to qa/scenarios/character/character-vibes-c3po.md
diff --git a/qa/scenarios/character-vibes-gollum.md b/qa/scenarios/character/character-vibes-gollum.md
similarity index 100%
rename from qa/scenarios/character-vibes-gollum.md
rename to qa/scenarios/character/character-vibes-gollum.md
diff --git a/qa/scenarios/config-apply-restart-wakeup.md b/qa/scenarios/config/config-apply-restart-wakeup.md
similarity index 100%
rename from qa/scenarios/config-apply-restart-wakeup.md
rename to qa/scenarios/config/config-apply-restart-wakeup.md
diff --git a/qa/scenarios/config-patch-hot-apply.md b/qa/scenarios/config/config-patch-hot-apply.md
similarity index 100%
rename from qa/scenarios/config-patch-hot-apply.md
rename to qa/scenarios/config/config-patch-hot-apply.md
diff --git a/qa/scenarios/config-restart-capability-flip.md b/qa/scenarios/config/config-restart-capability-flip.md
similarity index 100%
rename from qa/scenarios/config-restart-capability-flip.md
rename to qa/scenarios/config/config-restart-capability-flip.md
diff --git a/qa/scenarios/index.md b/qa/scenarios/index.md
index 8386bf70a65..29ad2d1d6aa 100644
--- a/qa/scenarios/index.md
+++ b/qa/scenarios/index.md
@@ -4,12 +4,28 @@ Single source of truth for repo-backed QA suite bootstrap data.
 `qa-lab` should treat this directory as a generic markdown scenario pack:
 
 - `index.md` defines pack-level bootstrap data
-- each `*.md` scenario defines one runnable test via `qa-scenario` + `qa-flow`
-- scenario markdown may also define required plugins and gateway config patching
+- each nested `*.md` scenario defines one runnable test via `qa-scenario` + `qa-flow`
+- scenario markdown may also define category metadata, required plugins, lane filters,
+  and gateway config patching
 
 - kickoff mission
 - QA operator identity
-- scenario files under `./`
+- scenario files under one-level theme directories
+
+Theme directories:
+
+- `agents/` - agent behavior, instructions, and subagent flows
+- `channels/` - DM, shared channel, thread, and message-action behavior
+- `character/` - persona and style eval scenarios
+- `config/` - config patch, apply, and restart behavior
+- `media/` - image understanding and generation
+- `memory/` - recall, ranking, active memory, and thread isolation
+- `models/` - provider capabilities and model switching
+- `plugins/` - plugin, skill, and MCP tool integration
+- `runtime/` - turn recovery, compaction, approval, and inventory behavior
+- `scheduling/` - cron and recurring work
+- `ui/` - Control UI plus qa-channel flows
+- `workspace/` - repo-reading and workspace artifact tasks
 
 ```yaml qa-pack
 version: 1
diff --git a/qa/scenarios/image-generation-roundtrip.md b/qa/scenarios/media/image-generation-roundtrip.md
similarity index 100%
rename from qa/scenarios/image-generation-roundtrip.md
rename to qa/scenarios/media/image-generation-roundtrip.md
diff --git a/qa/scenarios/image-understanding-attachment.md b/qa/scenarios/media/image-understanding-attachment.md
similarity index 100%
rename from qa/scenarios/image-understanding-attachment.md
rename to qa/scenarios/media/image-understanding-attachment.md
diff --git a/qa/scenarios/native-image-generation.md b/qa/scenarios/media/native-image-generation.md
similarity index 100%
rename from qa/scenarios/native-image-generation.md
rename to qa/scenarios/media/native-image-generation.md
diff --git a/qa/scenarios/active-memory-preprompt-recall.md b/qa/scenarios/memory/active-memory-preprompt-recall.md
similarity index 100%
rename from qa/scenarios/active-memory-preprompt-recall.md
rename to qa/scenarios/memory/active-memory-preprompt-recall.md
diff --git a/qa/scenarios/memory-dreaming-sweep.md b/qa/scenarios/memory/memory-dreaming-sweep.md
similarity index 100%
rename from qa/scenarios/memory-dreaming-sweep.md
rename to qa/scenarios/memory/memory-dreaming-sweep.md
diff --git a/qa/scenarios/memory-failure-fallback.md b/qa/scenarios/memory/memory-failure-fallback.md
similarity index 100%
rename from qa/scenarios/memory-failure-fallback.md
rename to qa/scenarios/memory/memory-failure-fallback.md
diff --git a/qa/scenarios/memory-recall.md b/qa/scenarios/memory/memory-recall.md
similarity index 100%
rename from qa/scenarios/memory-recall.md
rename to qa/scenarios/memory/memory-recall.md
diff --git a/qa/scenarios/memory-tools-channel-context.md b/qa/scenarios/memory/memory-tools-channel-context.md
similarity index 100%
rename from qa/scenarios/memory-tools-channel-context.md
rename to qa/scenarios/memory/memory-tools-channel-context.md
diff --git a/qa/scenarios/session-memory-ranking.md b/qa/scenarios/memory/session-memory-ranking.md
similarity index 100%
rename from qa/scenarios/session-memory-ranking.md
rename to qa/scenarios/memory/session-memory-ranking.md
diff --git a/qa/scenarios/thread-memory-isolation.md b/qa/scenarios/memory/thread-memory-isolation.md
similarity index 100%
rename from qa/scenarios/thread-memory-isolation.md
rename to qa/scenarios/memory/thread-memory-isolation.md
diff --git a/qa/scenarios/anthropic-opus-api-key-smoke.md b/qa/scenarios/models/anthropic-opus-api-key-smoke.md
similarity index 100%
rename from qa/scenarios/anthropic-opus-api-key-smoke.md
rename to qa/scenarios/models/anthropic-opus-api-key-smoke.md
diff --git a/qa/scenarios/anthropic-opus-setup-token-smoke.md b/qa/scenarios/models/anthropic-opus-setup-token-smoke.md
similarity index 100%
rename from qa/scenarios/anthropic-opus-setup-token-smoke.md
rename to qa/scenarios/models/anthropic-opus-setup-token-smoke.md
diff --git a/qa/scenarios/claude-cli-provider-capabilities-subscription.md b/qa/scenarios/models/claude-cli-provider-capabilities-subscription.md
similarity index 100%
rename from qa/scenarios/claude-cli-provider-capabilities-subscription.md
rename to qa/scenarios/models/claude-cli-provider-capabilities-subscription.md
diff --git a/qa/scenarios/claude-cli-provider-capabilities.md b/qa/scenarios/models/claude-cli-provider-capabilities.md
similarity index 100%
rename from qa/scenarios/claude-cli-provider-capabilities.md
rename to qa/scenarios/models/claude-cli-provider-capabilities.md
diff --git a/qa/scenarios/codex-harness-no-meta-leak.md b/qa/scenarios/models/codex-harness-no-meta-leak.md
similarity index 100%
rename from qa/scenarios/codex-harness-no-meta-leak.md
rename to qa/scenarios/models/codex-harness-no-meta-leak.md
diff --git a/qa/scenarios/model-switch-follow-up.md b/qa/scenarios/models/model-switch-follow-up.md
similarity index 100%
rename from qa/scenarios/model-switch-follow-up.md
rename to qa/scenarios/models/model-switch-follow-up.md
diff --git a/qa/scenarios/model-switch-tool-continuity.md b/qa/scenarios/models/model-switch-tool-continuity.md
similarity index 100%
rename from qa/scenarios/model-switch-tool-continuity.md
rename to qa/scenarios/models/model-switch-tool-continuity.md
diff --git a/qa/scenarios/bundled-plugin-skill-runtime.md b/qa/scenarios/plugins/bundled-plugin-skill-runtime.md
similarity index 100%
rename from qa/scenarios/bundled-plugin-skill-runtime.md
rename to qa/scenarios/plugins/bundled-plugin-skill-runtime.md
diff --git a/qa/scenarios/mcp-plugin-tools-call.md b/qa/scenarios/plugins/mcp-plugin-tools-call.md
similarity index 100%
rename from qa/scenarios/mcp-plugin-tools-call.md
rename to qa/scenarios/plugins/mcp-plugin-tools-call.md
diff --git a/qa/scenarios/skill-install-hot-availability.md b/qa/scenarios/plugins/skill-install-hot-availability.md
similarity index 100%
rename from qa/scenarios/skill-install-hot-availability.md
rename to qa/scenarios/plugins/skill-install-hot-availability.md
diff --git a/qa/scenarios/skill-visibility-invocation.md b/qa/scenarios/plugins/skill-visibility-invocation.md
similarity index 100%
rename from qa/scenarios/skill-visibility-invocation.md
rename to qa/scenarios/plugins/skill-visibility-invocation.md
diff --git a/qa/scenarios/approval-turn-tool-followthrough.md b/qa/scenarios/runtime/approval-turn-tool-followthrough.md
similarity index 100%
rename from qa/scenarios/approval-turn-tool-followthrough.md
rename to qa/scenarios/runtime/approval-turn-tool-followthrough.md
diff --git a/qa/scenarios/compaction-retry-mutating-tool.md b/qa/scenarios/runtime/compaction-retry-mutating-tool.md
similarity index 100%
rename from qa/scenarios/compaction-retry-mutating-tool.md
rename to qa/scenarios/runtime/compaction-retry-mutating-tool.md
diff --git a/qa/scenarios/empty-response-recovery-replay-safe-read.md b/qa/scenarios/runtime/empty-response-recovery-replay-safe-read.md
similarity index 100%
rename from qa/scenarios/empty-response-recovery-replay-safe-read.md
rename to qa/scenarios/runtime/empty-response-recovery-replay-safe-read.md
diff --git a/qa/scenarios/empty-response-retry-budget-exhausted.md b/qa/scenarios/runtime/empty-response-retry-budget-exhausted.md
similarity index 100%
rename from qa/scenarios/empty-response-retry-budget-exhausted.md
rename to qa/scenarios/runtime/empty-response-retry-budget-exhausted.md
diff --git a/qa/scenarios/reasoning-only-no-auto-retry-after-write.md b/qa/scenarios/runtime/reasoning-only-no-auto-retry-after-write.md
similarity index 100%
rename from qa/scenarios/reasoning-only-no-auto-retry-after-write.md
rename to qa/scenarios/runtime/reasoning-only-no-auto-retry-after-write.md
diff --git a/qa/scenarios/reasoning-only-recovery-replay-safe-read.md b/qa/scenarios/runtime/reasoning-only-recovery-replay-safe-read.md
similarity index 100%
rename from qa/scenarios/reasoning-only-recovery-replay-safe-read.md
rename to qa/scenarios/runtime/reasoning-only-recovery-replay-safe-read.md
diff --git a/qa/scenarios/runtime-inventory-drift-check.md b/qa/scenarios/runtime/runtime-inventory-drift-check.md
similarity index 100%
rename from qa/scenarios/runtime-inventory-drift-check.md
rename to qa/scenarios/runtime/runtime-inventory-drift-check.md
diff --git a/qa/scenarios/cron-one-minute-ping.md b/qa/scenarios/scheduling/cron-one-minute-ping.md
similarity index 100%
rename from qa/scenarios/cron-one-minute-ping.md
rename to qa/scenarios/scheduling/cron-one-minute-ping.md
diff --git a/qa/scenarios/control-ui-qa-channel-image-roundtrip.md b/qa/scenarios/ui/control-ui-qa-channel-image-roundtrip.md
similarity index 100%
rename from qa/scenarios/control-ui-qa-channel-image-roundtrip.md
rename to qa/scenarios/ui/control-ui-qa-channel-image-roundtrip.md
diff --git a/qa/scenarios/lobster-invaders-build.md b/qa/scenarios/workspace/lobster-invaders-build.md
similarity index 100%
rename from qa/scenarios/lobster-invaders-build.md
rename to qa/scenarios/workspace/lobster-invaders-build.md
diff --git a/qa/scenarios/medium-game-plan-codex-harness.md b/qa/scenarios/workspace/medium-game-plan-codex-harness.md
similarity index 100%
rename from qa/scenarios/medium-game-plan-codex-harness.md
rename to qa/scenarios/workspace/medium-game-plan-codex-harness.md
diff --git a/qa/scenarios/medium-game-plan-pi-harness.md b/qa/scenarios/workspace/medium-game-plan-pi-harness.md
similarity index 100%
rename from qa/scenarios/medium-game-plan-pi-harness.md
rename to qa/scenarios/workspace/medium-game-plan-pi-harness.md
diff --git a/qa/scenarios/source-docs-discovery-report.md b/qa/scenarios/workspace/source-docs-discovery-report.md
similarity index 100%
rename from qa/scenarios/source-docs-discovery-report.md
rename to qa/scenarios/workspace/source-docs-discovery-report.md