diff --git a/extensions/qa-lab/src/cli.runtime.test.ts b/extensions/qa-lab/src/cli.runtime.test.ts index 0590b0d1e40..e5a2f2cef7b 100644 --- a/extensions/qa-lab/src/cli.runtime.test.ts +++ b/extensions/qa-lab/src/cli.runtime.test.ts @@ -473,11 +473,11 @@ describe("qa cli runtime", () => { expect(evidence.scorecard).not.toHaveProperty("kind"); expect(evidence.scorecard).not.toHaveProperty("taxonomy"); expect(evidence.scorecard).not.toHaveProperty("profile"); - expect(evidence.scorecard?.features?.fulfilled).toBe(1); + expect(evidence.scorecard?.features?.fulfilled).toBe(0); expect(evidence.scorecard?.categoryReports?.[0]).toMatchObject({ id: "agent-runtime-and-provider-execution.agent-turn-execution", features: { - fulfilled: 1, + fulfilled: 0, }, }); expect(evidence.entries?.[0]).not.toHaveProperty("execution"); diff --git a/extensions/qa-lab/src/coverage-report.test.ts b/extensions/qa-lab/src/coverage-report.test.ts index 3b56fadd6fb..42757a619ae 100644 --- a/extensions/qa-lab/src/coverage-report.test.ts +++ b/extensions/qa-lab/src/coverage-report.test.ts @@ -18,6 +18,7 @@ const TEST_WEBCHAT_COVERAGE_ID = "ui.webchat"; function testMaturityTaxonomy(params?: { categoryId?: string; coverageIds?: readonly string[]; + featureCoverageIds?: readonly (readonly string[])[]; includeAllCategories?: boolean; profileCategoryIds?: readonly string[]; }) { @@ -52,9 +53,14 @@ function testMaturityTaxonomy(params?: { { id: categoryLocalId, name: "Test category", - features: (params?.coverageIds ?? [TEST_EXECUTABLE_COVERAGE_ID]).map((coverageId) => ({ - name: coverageId, - coverageIds: [coverageId], + features: ( + params?.featureCoverageIds ?? + (params?.coverageIds ?? [TEST_EXECUTABLE_COVERAGE_ID]).map((coverageId) => [ + coverageId, + ]) + ).map((coverageIds) => ({ + name: coverageIds.join(" + "), + coverageIds: [...coverageIds], })), }, ], @@ -330,6 +336,33 @@ describe("qa coverage report", () => { ]); }); + it("requires every coverage ID on a taxonomy feature to have primary evidence", () => { + const report = buildQaScorecardTaxonomyReport({ + taxonomy: testMaturityTaxonomy({ + featureCoverageIds: [[TEST_EXECUTABLE_COVERAGE_ID, TEST_WEBCHAT_COVERAGE_ID]], + }), + repoRoot: process.cwd(), + scenarios: [ + scenarioWithCoverage({ + primary: [TEST_EXECUTABLE_COVERAGE_ID], + secondary: [TEST_WEBCHAT_COVERAGE_ID], + sourcePath: "qa/scenarios/channels/dm-chat-baseline.yaml", + }), + ], + }); + + expect(report.fulfilledCategoryCount).toBe(0); + expect(report.fulfilledFeatureCount).toBe(0); + expect(report.categories[0]?.coverageStatus).toBe("partial"); + expect(report.categories[0]?.fulfilledCoverageIds).toStrictEqual([TEST_EXECUTABLE_COVERAGE_ID]); + expect(report.validationIssues).toContainEqual( + expect.objectContaining({ + code: "coverage-id-missing-primary-evidence", + ref: TEST_WEBCHAT_COVERAGE_ID, + }), + ); + }); + it("uses script producer evidence as coverage fulfillment", () => { const report = buildQaScorecardTaxonomyReport({ taxonomy: testMaturityTaxonomy({ diff --git a/extensions/qa-lab/src/scorecard-evidence.ts b/extensions/qa-lab/src/scorecard-evidence.ts index 5bed4688065..965ae992082 100644 --- a/extensions/qa-lab/src/scorecard-evidence.ts +++ b/extensions/qa-lab/src/scorecard-evidence.ts @@ -79,20 +79,23 @@ export function buildQaProfileScorecardEvidence(params: { category, featureCoverageByCategoryId: params.featureCoverageByCategoryId, }); - const fulfilledFeatureCount = featureCoverageIds.filter((coverageIds) => - coverageIds.some((coverageId) => primaryCoverageIds.has(coverageId)), + const fulfilledFeatureCount = featureCoverageIds.filter( + (coverageIds) => + coverageIds.length > 0 && + coverageIds.every((coverageId) => primaryCoverageIds.has(coverageId)), ).length; const secondaryOnlyFeatureCount = featureCoverageIds.filter( (coverageIds) => - !coverageIds.some((coverageId) => primaryCoverageIds.has(coverageId)) && - coverageIds.some((coverageId) => secondaryCoverageIds.has(coverageId)), + coverageIds.some((coverageId) => !primaryCoverageIds.has(coverageId)) && + coverageIds.some( + (coverageId) => + !primaryCoverageIds.has(coverageId) && secondaryCoverageIds.has(coverageId), + ), ).length; const missingCoverageIds = uniqueSortedStrings( - featureCoverageIds - .filter( - (coverageIds) => !coverageIds.some((coverageId) => primaryCoverageIds.has(coverageId)), - ) - .flat(), + featureCoverageIds.flatMap((coverageIds) => + coverageIds.filter((coverageId) => !primaryCoverageIds.has(coverageId)), + ), ); const missingFeatureCount = featureCoverageIds.length - fulfilledFeatureCount; return { diff --git a/extensions/qa-lab/src/scorecard-taxonomy.ts b/extensions/qa-lab/src/scorecard-taxonomy.ts index b2cfde1ba2a..e726ad11b4a 100644 --- a/extensions/qa-lab/src/scorecard-taxonomy.ts +++ b/extensions/qa-lab/src/scorecard-taxonomy.ts @@ -364,24 +364,21 @@ function pushMissingPrimaryIssues(params: { coverageIdsWithSecondaryEvidence: ReadonlySet; }) { for (const feature of params.category.features) { - if ( - feature.coverageIds.some((coverageId) => - params.coverageIdsWithPrimaryEvidence.has(coverageId), - ) - ) { - continue; + for (const coverageId of feature.coverageIds) { + if (params.coverageIdsWithPrimaryEvidence.has(coverageId)) { + continue; + } + const reason = params.coverageIdsWithSecondaryEvidence.has(coverageId) + ? "only has secondary evidence" + : "has no primary evidence"; + params.issues.push({ + code: "coverage-id-missing-primary-evidence", + severity: "warning", + categoryId: params.category.id, + ref: coverageId, + message: `${params.category.id} feature ${feature.name} coverage ID ${coverageId} ${reason}`, + }); } - const hasSecondaryEvidence = feature.coverageIds.some((coverageId) => - params.coverageIdsWithSecondaryEvidence.has(coverageId), - ); - const reason = hasSecondaryEvidence ? "only has secondary evidence" : "has no primary evidence"; - params.issues.push({ - code: "coverage-id-missing-primary-evidence", - severity: "warning", - categoryId: params.category.id, - ref: feature.coverageIds.join(", ") || feature.name, - message: `${params.category.id} feature ${feature.name} ${reason}`, - }); } } @@ -582,8 +579,10 @@ export function buildQaScorecardTaxonomyReport(params: { } } - const fulfilledFeatureCountForCategory = category.features.filter((feature) => - feature.coverageIds.some((coverageId) => fulfilledCoverageIds.has(coverageId)), + const fulfilledFeatureCountForCategory = category.features.filter( + (feature) => + feature.coverageIds.length > 0 && + feature.coverageIds.every((coverageId) => fulfilledCoverageIds.has(coverageId)), ).length; if (required) { requiredFeatureCount += category.features.length; diff --git a/qa/scenarios/runtime/gateway-smoke.yaml b/qa/scenarios/runtime/gateway-smoke.yaml index 44c778cd8b6..c791b5fc323 100644 --- a/qa/scenarios/runtime/gateway-smoke.yaml +++ b/qa/scenarios/runtime/gateway-smoke.yaml @@ -5,10 +5,10 @@ scenario: surface: runtime coverage: primary: - - gateway.smoke + - websocket-transport secondary: - - gateway.health - - gateway.protocol + - health-apis + - hello-ok-snapshot objective: Exercise gateway health and WebSocket smoke assertions through QA Lab evidence. successCriteria: - Gateway health probe succeeds against a reachable local endpoint. diff --git a/taxonomy.yaml b/taxonomy.yaml index 95124908ce0..9e9038e1848 100644 --- a/taxonomy.yaml +++ b/taxonomy.yaml @@ -164,7 +164,7 @@ surfaces: id: gateway-rpc-apis-and-events features: - name: Health APIs - coverageIds: [gateway.health, health-apis] + coverageIds: [health-apis] description: '`health` and `status` RPCs.' - name: Identity and presence APIs coverageIds: [identity-and-presence-apis] @@ -504,7 +504,7 @@ surfaces: id: websocket-connection features: - name: WebSocket transport - coverageIds: [gateway.smoke, websocket-transport] + coverageIds: [websocket-transport] description: WebSocket transport with JSON text frames. - name: Connect challenge coverageIds: [connect-challenge] @@ -516,7 +516,7 @@ surfaces: coverageIds: [protocol-version-negotiation] description: Protocol range negotiation (`minProtocol`/`maxProtocol`). - name: hello-ok snapshot - coverageIds: [gateway.protocol, hello-ok-snapshot] + coverageIds: [hello-ok-snapshot] description: 'Required `hello-ok` payload structure: server identity, negotiated auth, feature discovery, snapshot, and policy limits.' - name: Startup retry coverageIds: [startup-retry]