fix: require all taxonomy coverage ids (#94296)

This commit is contained in:
Dallin Romney
2026-06-17 16:38:14 -07:00
committed by GitHub
parent 2364c7ebc0
commit e17d111990
6 changed files with 74 additions and 39 deletions

View File

@@ -473,11 +473,11 @@ describe("qa cli runtime", () => {
expect(evidence.scorecard).not.toHaveProperty("kind");
expect(evidence.scorecard).not.toHaveProperty("taxonomy");
expect(evidence.scorecard).not.toHaveProperty("profile");
expect(evidence.scorecard?.features?.fulfilled).toBe(1);
expect(evidence.scorecard?.features?.fulfilled).toBe(0);
expect(evidence.scorecard?.categoryReports?.[0]).toMatchObject({
id: "agent-runtime-and-provider-execution.agent-turn-execution",
features: {
fulfilled: 1,
fulfilled: 0,
},
});
expect(evidence.entries?.[0]).not.toHaveProperty("execution");

View File

@@ -18,6 +18,7 @@ const TEST_WEBCHAT_COVERAGE_ID = "ui.webchat";
function testMaturityTaxonomy(params?: {
categoryId?: string;
coverageIds?: readonly string[];
featureCoverageIds?: readonly (readonly string[])[];
includeAllCategories?: boolean;
profileCategoryIds?: readonly string[];
}) {
@@ -52,9 +53,14 @@ function testMaturityTaxonomy(params?: {
{
id: categoryLocalId,
name: "Test category",
features: (params?.coverageIds ?? [TEST_EXECUTABLE_COVERAGE_ID]).map((coverageId) => ({
name: coverageId,
coverageIds: [coverageId],
features: (
params?.featureCoverageIds ??
(params?.coverageIds ?? [TEST_EXECUTABLE_COVERAGE_ID]).map((coverageId) => [
coverageId,
])
).map((coverageIds) => ({
name: coverageIds.join(" + "),
coverageIds: [...coverageIds],
})),
},
],
@@ -330,6 +336,33 @@ describe("qa coverage report", () => {
]);
});
it("requires every coverage ID on a taxonomy feature to have primary evidence", () => {
const report = buildQaScorecardTaxonomyReport({
taxonomy: testMaturityTaxonomy({
featureCoverageIds: [[TEST_EXECUTABLE_COVERAGE_ID, TEST_WEBCHAT_COVERAGE_ID]],
}),
repoRoot: process.cwd(),
scenarios: [
scenarioWithCoverage({
primary: [TEST_EXECUTABLE_COVERAGE_ID],
secondary: [TEST_WEBCHAT_COVERAGE_ID],
sourcePath: "qa/scenarios/channels/dm-chat-baseline.yaml",
}),
],
});
expect(report.fulfilledCategoryCount).toBe(0);
expect(report.fulfilledFeatureCount).toBe(0);
expect(report.categories[0]?.coverageStatus).toBe("partial");
expect(report.categories[0]?.fulfilledCoverageIds).toStrictEqual([TEST_EXECUTABLE_COVERAGE_ID]);
expect(report.validationIssues).toContainEqual(
expect.objectContaining({
code: "coverage-id-missing-primary-evidence",
ref: TEST_WEBCHAT_COVERAGE_ID,
}),
);
});
it("uses script producer evidence as coverage fulfillment", () => {
const report = buildQaScorecardTaxonomyReport({
taxonomy: testMaturityTaxonomy({

View File

@@ -79,20 +79,23 @@ export function buildQaProfileScorecardEvidence(params: {
category,
featureCoverageByCategoryId: params.featureCoverageByCategoryId,
});
const fulfilledFeatureCount = featureCoverageIds.filter((coverageIds) =>
coverageIds.some((coverageId) => primaryCoverageIds.has(coverageId)),
const fulfilledFeatureCount = featureCoverageIds.filter(
(coverageIds) =>
coverageIds.length > 0 &&
coverageIds.every((coverageId) => primaryCoverageIds.has(coverageId)),
).length;
const secondaryOnlyFeatureCount = featureCoverageIds.filter(
(coverageIds) =>
!coverageIds.some((coverageId) => primaryCoverageIds.has(coverageId)) &&
coverageIds.some((coverageId) => secondaryCoverageIds.has(coverageId)),
coverageIds.some((coverageId) => !primaryCoverageIds.has(coverageId)) &&
coverageIds.some(
(coverageId) =>
!primaryCoverageIds.has(coverageId) && secondaryCoverageIds.has(coverageId),
),
).length;
const missingCoverageIds = uniqueSortedStrings(
featureCoverageIds
.filter(
(coverageIds) => !coverageIds.some((coverageId) => primaryCoverageIds.has(coverageId)),
)
.flat(),
featureCoverageIds.flatMap((coverageIds) =>
coverageIds.filter((coverageId) => !primaryCoverageIds.has(coverageId)),
),
);
const missingFeatureCount = featureCoverageIds.length - fulfilledFeatureCount;
return {

View File

@@ -364,24 +364,21 @@ function pushMissingPrimaryIssues(params: {
coverageIdsWithSecondaryEvidence: ReadonlySet<string>;
}) {
for (const feature of params.category.features) {
if (
feature.coverageIds.some((coverageId) =>
params.coverageIdsWithPrimaryEvidence.has(coverageId),
)
) {
continue;
for (const coverageId of feature.coverageIds) {
if (params.coverageIdsWithPrimaryEvidence.has(coverageId)) {
continue;
}
const reason = params.coverageIdsWithSecondaryEvidence.has(coverageId)
? "only has secondary evidence"
: "has no primary evidence";
params.issues.push({
code: "coverage-id-missing-primary-evidence",
severity: "warning",
categoryId: params.category.id,
ref: coverageId,
message: `${params.category.id} feature ${feature.name} coverage ID ${coverageId} ${reason}`,
});
}
const hasSecondaryEvidence = feature.coverageIds.some((coverageId) =>
params.coverageIdsWithSecondaryEvidence.has(coverageId),
);
const reason = hasSecondaryEvidence ? "only has secondary evidence" : "has no primary evidence";
params.issues.push({
code: "coverage-id-missing-primary-evidence",
severity: "warning",
categoryId: params.category.id,
ref: feature.coverageIds.join(", ") || feature.name,
message: `${params.category.id} feature ${feature.name} ${reason}`,
});
}
}
@@ -582,8 +579,10 @@ export function buildQaScorecardTaxonomyReport(params: {
}
}
const fulfilledFeatureCountForCategory = category.features.filter((feature) =>
feature.coverageIds.some((coverageId) => fulfilledCoverageIds.has(coverageId)),
const fulfilledFeatureCountForCategory = category.features.filter(
(feature) =>
feature.coverageIds.length > 0 &&
feature.coverageIds.every((coverageId) => fulfilledCoverageIds.has(coverageId)),
).length;
if (required) {
requiredFeatureCount += category.features.length;

View File

@@ -5,10 +5,10 @@ scenario:
surface: runtime
coverage:
primary:
- gateway.smoke
- websocket-transport
secondary:
- gateway.health
- gateway.protocol
- health-apis
- hello-ok-snapshot
objective: Exercise gateway health and WebSocket smoke assertions through QA Lab evidence.
successCriteria:
- Gateway health probe succeeds against a reachable local endpoint.

View File

@@ -164,7 +164,7 @@ surfaces:
id: gateway-rpc-apis-and-events
features:
- name: Health APIs
coverageIds: [gateway.health, health-apis]
coverageIds: [health-apis]
description: '`health` and `status` RPCs.'
- name: Identity and presence APIs
coverageIds: [identity-and-presence-apis]
@@ -504,7 +504,7 @@ surfaces:
id: websocket-connection
features:
- name: WebSocket transport
coverageIds: [gateway.smoke, websocket-transport]
coverageIds: [websocket-transport]
description: WebSocket transport with JSON text frames.
- name: Connect challenge
coverageIds: [connect-challenge]
@@ -516,7 +516,7 @@ surfaces:
coverageIds: [protocol-version-negotiation]
description: Protocol range negotiation (`minProtocol`/`maxProtocol`).
- name: hello-ok snapshot
coverageIds: [gateway.protocol, hello-ok-snapshot]
coverageIds: [hello-ok-snapshot]
description: 'Required `hello-ok` payload structure: server identity, negotiated auth, feature discovery, snapshot, and policy limits.'
- name: Startup retry
coverageIds: [startup-retry]