From 450060d7a2bbd6999dbfe8ec4e063e5da34530e5 Mon Sep 17 00:00:00 2001 From: Dallin Romney Date: Mon, 15 Jun 2026 18:05:52 -0700 Subject: [PATCH] test(qa): expand smoke-ci and release categories and coverage (#93175) * test(qa): add smoke ci primary coverage evidence * test(qa): remove overstated primary coverage claims * test(qa): make release profile include smoke ci * test(qa): trim taxonomy formatting churn * test(qa): avoid hardcoded profile names in coverage test * test(qa): make release profile cover taxonomy * test(qa): type profile fixture all category flag * test(qa): include channel delivery in smoke ci profile --- extensions/qa-lab/src/coverage-report.test.ts | 54 +++++++++++-------- extensions/qa-lab/src/scorecard-taxonomy.ts | 14 ++++- qa/scenarios/channels/thread-follow-up.yaml | 1 + .../media/image-generation-roundtrip.yaml | 1 + .../runtime/docker-prometheus-smoke.yaml | 1 + .../scheduling/cron-one-minute-ping.yaml | 2 + ...control-ui-qa-channel-image-roundtrip.yaml | 1 + taxonomy.yaml | 18 ++----- 8 files changed, 53 insertions(+), 39 deletions(-) diff --git a/extensions/qa-lab/src/coverage-report.test.ts b/extensions/qa-lab/src/coverage-report.test.ts index f280704e590..4707e8ccb26 100644 --- a/extensions/qa-lab/src/coverage-report.test.ts +++ b/extensions/qa-lab/src/coverage-report.test.ts @@ -18,6 +18,7 @@ const TEST_WEBCHAT_COVERAGE_ID = "ui.webchat"; function testMaturityTaxonomy(params?: { categoryId?: string; coverageIds?: readonly string[]; + includeAllCategories?: boolean; profileCategoryIds?: readonly string[]; }) { const categoryId = params?.categoryId ?? TEST_EXECUTABLE_CATEGORY_ID; @@ -31,12 +32,16 @@ function testMaturityTaxonomy(params?: { { id: "smoke-ci", description: "Test smoke profile.", + includeAllCategories: false, categoryIds: [], }, { id: "release", description: "Test release profile.", - categoryIds: [...(params?.profileCategoryIds ?? [categoryId])], + includeAllCategories: params?.includeAllCategories ?? false, + categoryIds: [ + ...(params?.includeAllCategories ? [] : (params?.profileCategoryIds ?? [categoryId])), + ], }, ], surfaces: [ @@ -115,7 +120,10 @@ describe("qa coverage report", () => { ]); expect(inventory.scorecardTaxonomy.profileCount).toBe(2); expect(inventory.scorecardTaxonomy.categoryCount).toBeGreaterThan(200); - expect(inventory.scorecardTaxonomy.requiredCategoryCount).toBe(15); + expect(inventory.scorecardTaxonomy.requiredCategoryCount).toBeGreaterThan(0); + expect(inventory.scorecardTaxonomy.requiredCategoryCount).toBeLessThanOrEqual( + inventory.scorecardTaxonomy.categoryCount, + ); expect(inventory.scorecardTaxonomy.requiredFeatureCount).toBeGreaterThan(0); expect(inventory.scorecardTaxonomy.fulfilledFeatureCount).toBeGreaterThan(0); expect(inventory.scorecardTaxonomy.taxonomyFulfillmentPercent).toBeGreaterThan(0); @@ -124,30 +132,15 @@ describe("qa coverage report", () => { expect(inventory.scorecardTaxonomy.unknownCoverageIdCount).toBe(0); expect(inventory.scorecardTaxonomy.validationIssues.length).toBeGreaterThan(0); expect( - inventory.scorecardTaxonomy.validationIssues.every( + inventory.scorecardTaxonomy.validationIssues.some((issue) => + issue.code.endsWith("not-found"), + ), + ).toBe(false); + expect( + inventory.scorecardTaxonomy.validationIssues.some( (issue) => issue.code === "coverage-id-missing-primary-evidence", ), ).toBe(true); - expect( - inventory.scorecardTaxonomy.profiles - .find((profile) => profile.id === "release") - ?.categoryIds.toSorted(), - ).toEqual([ - "agent-runtime-and-provider-execution.agent-turn-execution", - "automation-cron-hooks-tasks-polling.cron-jobs", - "browser-automation-and-exec-sandbox-tools.tool-invocation-and-execution", - "browser-control-ui-and-webchat.browser-ui", - "media-understanding-and-media-generation.media-generation", - "media-understanding-and-media-generation.media-understanding", - "openai-codex-provider-path.responses-and-tool-compatibility", - "plugin-sdk-and-bundled-plugin-architecture.installing-and-running-plugins", - "security-auth-pairing-and-secrets.approval-policy-and-tool-safeguards", - "security-auth-pairing-and-secrets.credential-and-secret-hygiene", - "session-memory-and-context-engine.diagnostics-maintenance-and-recovery", - "session-memory-and-context-engine.memory", - "session-memory-and-context-engine.token-management", - "telemetry-diagnostics-and-observability.telemetry-export", - ]); expect( inventory.scorecardTaxonomy.categories.find( (category) => category.id === TEST_BROWSER_CATEGORY_ID, @@ -349,6 +342,21 @@ describe("qa coverage report", () => { ); }); + it("resolves all-category profiles from taxonomy categories", () => { + const report = buildQaScorecardTaxonomyReport({ + taxonomy: testMaturityTaxonomy({ + includeAllCategories: true, + }), + repoRoot: process.cwd(), + scenarios: [], + }); + + expect(report.profiles.find((profile) => profile.id === "release")?.categoryIds).toStrictEqual([ + TEST_EXECUTABLE_CATEGORY_ID, + ]); + expect(report.requiredCategoryCount).toBe(1); + }); + it("reports profile categories missing primary coverage evidence", () => { const report = buildQaScorecardTaxonomyReport({ taxonomy: testMaturityTaxonomy(), diff --git a/extensions/qa-lab/src/scorecard-taxonomy.ts b/extensions/qa-lab/src/scorecard-taxonomy.ts index cf47f9a603a..7359c206cc9 100644 --- a/extensions/qa-lab/src/scorecard-taxonomy.ts +++ b/extensions/qa-lab/src/scorecard-taxonomy.ts @@ -25,6 +25,7 @@ const qaScorecardProfileSchema = z.object({ id: qaScorecardIdSchema, description: z.string().trim().min(1), evidenceMode: qaScorecardEvidenceModeSchema.optional(), + includeAllCategories: z.boolean().default(false), categoryIds: z.array(qaScorecardIdSchema).default([]), }); @@ -67,6 +68,14 @@ const qaMaturityTaxonomySchema = z } seenProfileIds.add(profile.id); + if (profile.includeAllCategories && profile.categoryIds.length > 0) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + path: ["profiles", profileIndex, "categoryIds"], + message: `profile ${profile.id} cannot set categoryIds when includeAllCategories is true`, + }); + } + const seenProfileCategoryIds = new Set(); for (const [categoryIndex, categoryId] of profile.categoryIds.entries()) { if (seenProfileCategoryIds.has(categoryId)) { @@ -466,7 +475,10 @@ export function buildQaScorecardTaxonomyReport(params: { const profiles = params.taxonomy?.profiles.map((profile) => { const validCategoryIds: string[] = []; - for (const categoryId of profile.categoryIds) { + const selectedCategoryIds = profile.includeAllCategories + ? [...maturityRefs.categories.keys()] + : profile.categoryIds; + for (const categoryId of selectedCategoryIds) { if (!maturityRefs.categories.has(categoryId)) { issues.push({ code: "profile-category-ref-not-found", diff --git a/qa/scenarios/channels/thread-follow-up.yaml b/qa/scenarios/channels/thread-follow-up.yaml index 17325da70fc..83a91d44fc5 100644 --- a/qa/scenarios/channels/thread-follow-up.yaml +++ b/qa/scenarios/channels/thread-follow-up.yaml @@ -6,6 +6,7 @@ scenario: coverage: primary: - channels.threads + - thread-parent-child-placement secondary: - channels.qa-channel objective: Verify the agent can keep follow-up work inside a thread and not leak context into the root channel. diff --git a/qa/scenarios/media/image-generation-roundtrip.yaml b/qa/scenarios/media/image-generation-roundtrip.yaml index 4ca517259f5..79b667fb983 100644 --- a/qa/scenarios/media/image-generation-roundtrip.yaml +++ b/qa/scenarios/media/image-generation-roundtrip.yaml @@ -6,6 +6,7 @@ scenario: coverage: primary: - media.image-generation + - generated-image-persistence-and-delivery secondary: - channels.qa-channel objective: Verify a generated image is saved as media, reattached on the next turn, and described correctly through the vision path. diff --git a/qa/scenarios/runtime/docker-prometheus-smoke.yaml b/qa/scenarios/runtime/docker-prometheus-smoke.yaml index 456ca13869a..6f990ca7fae 100644 --- a/qa/scenarios/runtime/docker-prometheus-smoke.yaml +++ b/qa/scenarios/runtime/docker-prometheus-smoke.yaml @@ -6,6 +6,7 @@ scenario: coverage: primary: - telemetry.prometheus + - gateway-authenticated-get-api-diagnostics-prometheus secondary: - harness.qa-lab - docker.e2e diff --git a/qa/scenarios/scheduling/cron-one-minute-ping.yaml b/qa/scenarios/scheduling/cron-one-minute-ping.yaml index 5cccc5f4d16..420fea9411f 100644 --- a/qa/scenarios/scheduling/cron-one-minute-ping.yaml +++ b/qa/scenarios/scheduling/cron-one-minute-ping.yaml @@ -6,6 +6,8 @@ scenario: coverage: primary: - scheduling.cron + - cron-rpcs + - chat-announce-delivery secondary: - channels.qa-channel objective: Verify the agent can schedule a cron reminder one minute in the future and receive the follow-up in the QA channel. diff --git a/qa/scenarios/ui/control-ui-qa-channel-image-roundtrip.yaml b/qa/scenarios/ui/control-ui-qa-channel-image-roundtrip.yaml index b077a81fee3..59676a9e183 100644 --- a/qa/scenarios/ui/control-ui-qa-channel-image-roundtrip.yaml +++ b/qa/scenarios/ui/control-ui-qa-channel-image-roundtrip.yaml @@ -6,6 +6,7 @@ scenario: coverage: primary: - ui.control + - dashboard-open-auth-bootstrap secondary: - media.image-understanding - channels.qa-channel diff --git a/taxonomy.yaml b/taxonomy.yaml index f24b5e1ac1e..9e9038e1848 100644 --- a/taxonomy.yaml +++ b/taxonomy.yaml @@ -17,6 +17,8 @@ profiles: - security-auth-pairing-and-secrets.approval-policy-and-tool-safeguards - telemetry-diagnostics-and-observability.telemetry-export - channel-framework.conversation-routing-and-delivery + - channel-framework.outbound-delivery-and-reply-pipeline + - channel-framework.group-thread-and-ambient-room-behavior - session-memory-and-context-engine.memory - session-memory-and-context-engine.diagnostics-maintenance-and-recovery - automation-cron-hooks-tasks-polling.cron-jobs @@ -29,21 +31,7 @@ profiles: description: Stable/LTS proof selector for live providers, live channels, package artifacts, upgrade paths, and platform proof where the claim depends on real upstreams or release artifacts. - categoryIds: - - agent-runtime-and-provider-execution.agent-turn-execution - - session-memory-and-context-engine.token-management - - browser-automation-and-exec-sandbox-tools.tool-invocation-and-execution - - security-auth-pairing-and-secrets.approval-policy-and-tool-safeguards - - telemetry-diagnostics-and-observability.telemetry-export - - openai-codex-provider-path.responses-and-tool-compatibility - - session-memory-and-context-engine.memory - - session-memory-and-context-engine.diagnostics-maintenance-and-recovery - - automation-cron-hooks-tasks-polling.cron-jobs - - plugin-sdk-and-bundled-plugin-architecture.installing-and-running-plugins - - media-understanding-and-media-generation.media-understanding - - media-understanding-and-media-generation.media-generation - - browser-control-ui-and-webchat.browser-ui - - security-auth-pairing-and-secrets.credential-and-secret-hygiene + includeAllCategories: true levels: - id: planned code: M0