test(qa): expand smoke-ci and release categories and coverage (#93175)

* test(qa): add smoke ci primary coverage evidence * test(qa): remove overstated primary coverage claims * test(qa): make release profile include smoke ci * test(qa): trim taxonomy formatting churn * test(qa): avoid hardcoded profile names in coverage test * test(qa): make release profile cover taxonomy * test(qa): type profile fixture all category flag * test(qa): include channel delivery in smoke ci profile
2026-06-22 12:58:09 +00:00 · 2026-06-15 18:05:52 -07:00
parent 6bc57ca73a
commit 450060d7a2
8 changed files with 53 additions and 39 deletions
--- a/extensions/qa-lab/src/coverage-report.test.ts
+++ b/extensions/qa-lab/src/coverage-report.test.ts
@@ -18,6 +18,7 @@ const TEST_WEBCHAT_COVERAGE_ID = "ui.webchat";
 function testMaturityTaxonomy(params?: {
  categoryId?: string;
  coverageIds?: readonly string[];
+  includeAllCategories?: boolean;
  profileCategoryIds?: readonly string[];
 }) {
  const categoryId = params?.categoryId ?? TEST_EXECUTABLE_CATEGORY_ID;
@@ -31,12 +32,16 @@ function testMaturityTaxonomy(params?: {
      {
        id: "smoke-ci",
        description: "Test smoke profile.",
+        includeAllCategories: false,
        categoryIds: [],
      },
      {
        id: "release",
        description: "Test release profile.",
-        categoryIds: [...(params?.profileCategoryIds ?? [categoryId])],
+        includeAllCategories: params?.includeAllCategories ?? false,
+        categoryIds: [
+          ...(params?.includeAllCategories ? [] : (params?.profileCategoryIds ?? [categoryId])),
+        ],
      },
    ],
    surfaces: [
@@ -115,7 +120,10 @@ describe("qa coverage report", () => {
    ]);
    expect(inventory.scorecardTaxonomy.profileCount).toBe(2);
    expect(inventory.scorecardTaxonomy.categoryCount).toBeGreaterThan(200);
-    expect(inventory.scorecardTaxonomy.requiredCategoryCount).toBe(15);
+    expect(inventory.scorecardTaxonomy.requiredCategoryCount).toBeGreaterThan(0);
+    expect(inventory.scorecardTaxonomy.requiredCategoryCount).toBeLessThanOrEqual(
+      inventory.scorecardTaxonomy.categoryCount,
+    );
    expect(inventory.scorecardTaxonomy.requiredFeatureCount).toBeGreaterThan(0);
    expect(inventory.scorecardTaxonomy.fulfilledFeatureCount).toBeGreaterThan(0);
    expect(inventory.scorecardTaxonomy.taxonomyFulfillmentPercent).toBeGreaterThan(0);
@@ -124,30 +132,15 @@ describe("qa coverage report", () => {
    expect(inventory.scorecardTaxonomy.unknownCoverageIdCount).toBe(0);
    expect(inventory.scorecardTaxonomy.validationIssues.length).toBeGreaterThan(0);
    expect(
-      inventory.scorecardTaxonomy.validationIssues.every(
+      inventory.scorecardTaxonomy.validationIssues.some((issue) =>
+        issue.code.endsWith("not-found"),
+      ),
+    ).toBe(false);
+    expect(
+      inventory.scorecardTaxonomy.validationIssues.some(
        (issue) => issue.code === "coverage-id-missing-primary-evidence",
      ),
    ).toBe(true);
-    expect(
-      inventory.scorecardTaxonomy.profiles
-        .find((profile) => profile.id === "release")
-        ?.categoryIds.toSorted(),
-    ).toEqual([
-      "agent-runtime-and-provider-execution.agent-turn-execution",
-      "automation-cron-hooks-tasks-polling.cron-jobs",
-      "browser-automation-and-exec-sandbox-tools.tool-invocation-and-execution",
-      "browser-control-ui-and-webchat.browser-ui",
-      "media-understanding-and-media-generation.media-generation",
-      "media-understanding-and-media-generation.media-understanding",
-      "openai-codex-provider-path.responses-and-tool-compatibility",
-      "plugin-sdk-and-bundled-plugin-architecture.installing-and-running-plugins",
-      "security-auth-pairing-and-secrets.approval-policy-and-tool-safeguards",
-      "security-auth-pairing-and-secrets.credential-and-secret-hygiene",
-      "session-memory-and-context-engine.diagnostics-maintenance-and-recovery",
-      "session-memory-and-context-engine.memory",
-      "session-memory-and-context-engine.token-management",
-      "telemetry-diagnostics-and-observability.telemetry-export",
-    ]);
    expect(
      inventory.scorecardTaxonomy.categories.find(
        (category) => category.id === TEST_BROWSER_CATEGORY_ID,
@@ -349,6 +342,21 @@ describe("qa coverage report", () => {
    );
  });

+  it("resolves all-category profiles from taxonomy categories", () => {
+    const report = buildQaScorecardTaxonomyReport({
+      taxonomy: testMaturityTaxonomy({
+        includeAllCategories: true,
+      }),
+      repoRoot: process.cwd(),
+      scenarios: [],
+    });
+
+    expect(report.profiles.find((profile) => profile.id === "release")?.categoryIds).toStrictEqual([
+      TEST_EXECUTABLE_CATEGORY_ID,
+    ]);
+    expect(report.requiredCategoryCount).toBe(1);
+  });
+
  it("reports profile categories missing primary coverage evidence", () => {
    const report = buildQaScorecardTaxonomyReport({
      taxonomy: testMaturityTaxonomy(),
--- a/extensions/qa-lab/src/scorecard-taxonomy.ts
+++ b/extensions/qa-lab/src/scorecard-taxonomy.ts
@@ -25,6 +25,7 @@ const qaScorecardProfileSchema = z.object({
  id: qaScorecardIdSchema,
  description: z.string().trim().min(1),
  evidenceMode: qaScorecardEvidenceModeSchema.optional(),
+  includeAllCategories: z.boolean().default(false),
  categoryIds: z.array(qaScorecardIdSchema).default([]),
 });

@@ -67,6 +68,14 @@ const qaMaturityTaxonomySchema = z
      }
      seenProfileIds.add(profile.id);

+      if (profile.includeAllCategories && profile.categoryIds.length > 0) {
+        ctx.addIssue({
+          code: z.ZodIssueCode.custom,
+          path: ["profiles", profileIndex, "categoryIds"],
+          message: `profile ${profile.id} cannot set categoryIds when includeAllCategories is true`,
+        });
+      }
+
      const seenProfileCategoryIds = new Set<string>();
      for (const [categoryIndex, categoryId] of profile.categoryIds.entries()) {
        if (seenProfileCategoryIds.has(categoryId)) {
@@ -466,7 +475,10 @@ export function buildQaScorecardTaxonomyReport(params: {
  const profiles =
    params.taxonomy?.profiles.map((profile) => {
      const validCategoryIds: string[] = [];
-      for (const categoryId of profile.categoryIds) {
+      const selectedCategoryIds = profile.includeAllCategories
+        ? [...maturityRefs.categories.keys()]
+        : profile.categoryIds;
+      for (const categoryId of selectedCategoryIds) {
        if (!maturityRefs.categories.has(categoryId)) {
          issues.push({
            code: "profile-category-ref-not-found",
--- a/qa/scenarios/channels/thread-follow-up.yaml
+++ b/qa/scenarios/channels/thread-follow-up.yaml
@@ -6,6 +6,7 @@ scenario:
  coverage:
    primary:
      - channels.threads
+      - thread-parent-child-placement
    secondary:
      - channels.qa-channel
  objective: Verify the agent can keep follow-up work inside a thread and not leak context into the root channel.
--- a/qa/scenarios/media/image-generation-roundtrip.yaml
+++ b/qa/scenarios/media/image-generation-roundtrip.yaml
@@ -6,6 +6,7 @@ scenario:
  coverage:
    primary:
      - media.image-generation
+      - generated-image-persistence-and-delivery
    secondary:
      - channels.qa-channel
  objective: Verify a generated image is saved as media, reattached on the next turn, and described correctly through the vision path.
--- a/qa/scenarios/runtime/docker-prometheus-smoke.yaml
+++ b/qa/scenarios/runtime/docker-prometheus-smoke.yaml
@@ -6,6 +6,7 @@ scenario:
  coverage:
    primary:
      - telemetry.prometheus
+      - gateway-authenticated-get-api-diagnostics-prometheus
    secondary:
      - harness.qa-lab
      - docker.e2e
--- a/qa/scenarios/scheduling/cron-one-minute-ping.yaml
+++ b/qa/scenarios/scheduling/cron-one-minute-ping.yaml
@@ -6,6 +6,8 @@ scenario:
  coverage:
    primary:
      - scheduling.cron
+      - cron-rpcs
+      - chat-announce-delivery
    secondary:
      - channels.qa-channel
  objective: Verify the agent can schedule a cron reminder one minute in the future and receive the follow-up in the QA channel.
--- a/qa/scenarios/ui/control-ui-qa-channel-image-roundtrip.yaml
+++ b/qa/scenarios/ui/control-ui-qa-channel-image-roundtrip.yaml
@@ -6,6 +6,7 @@ scenario:
  coverage:
    primary:
      - ui.control
+      - dashboard-open-auth-bootstrap
    secondary:
      - media.image-understanding
      - channels.qa-channel
--- a/taxonomy.yaml
+++ b/taxonomy.yaml
@@ -17,6 +17,8 @@ profiles:
      - security-auth-pairing-and-secrets.approval-policy-and-tool-safeguards
      - telemetry-diagnostics-and-observability.telemetry-export
      - channel-framework.conversation-routing-and-delivery
+      - channel-framework.outbound-delivery-and-reply-pipeline
+      - channel-framework.group-thread-and-ambient-room-behavior
      - session-memory-and-context-engine.memory
      - session-memory-and-context-engine.diagnostics-maintenance-and-recovery
      - automation-cron-hooks-tasks-polling.cron-jobs
@@ -29,21 +31,7 @@ profiles:
    description: Stable/LTS proof selector for live providers, live channels, package artifacts,
      upgrade paths, and platform proof where the claim depends on real upstreams or release
      artifacts.
-    categoryIds:
-      - agent-runtime-and-provider-execution.agent-turn-execution
-      - session-memory-and-context-engine.token-management
-      - browser-automation-and-exec-sandbox-tools.tool-invocation-and-execution
-      - security-auth-pairing-and-secrets.approval-policy-and-tool-safeguards
-      - telemetry-diagnostics-and-observability.telemetry-export
-      - openai-codex-provider-path.responses-and-tool-compatibility
-      - session-memory-and-context-engine.memory
-      - session-memory-and-context-engine.diagnostics-maintenance-and-recovery
-      - automation-cron-hooks-tasks-polling.cron-jobs
-      - plugin-sdk-and-bundled-plugin-architecture.installing-and-running-plugins
-      - media-understanding-and-media-generation.media-understanding
-      - media-understanding-and-media-generation.media-generation
-      - browser-control-ui-and-webchat.browser-ui
-      - security-auth-pairing-and-secrets.credential-and-secret-hygiene
+    includeAllCategories: true
 levels:
  - id: planned
    code: M0