mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-22 06:38:13 +00:00
fix: require all taxonomy coverage ids (#94296)
This commit is contained in:
@@ -473,11 +473,11 @@ describe("qa cli runtime", () => {
|
||||
expect(evidence.scorecard).not.toHaveProperty("kind");
|
||||
expect(evidence.scorecard).not.toHaveProperty("taxonomy");
|
||||
expect(evidence.scorecard).not.toHaveProperty("profile");
|
||||
expect(evidence.scorecard?.features?.fulfilled).toBe(1);
|
||||
expect(evidence.scorecard?.features?.fulfilled).toBe(0);
|
||||
expect(evidence.scorecard?.categoryReports?.[0]).toMatchObject({
|
||||
id: "agent-runtime-and-provider-execution.agent-turn-execution",
|
||||
features: {
|
||||
fulfilled: 1,
|
||||
fulfilled: 0,
|
||||
},
|
||||
});
|
||||
expect(evidence.entries?.[0]).not.toHaveProperty("execution");
|
||||
|
||||
@@ -18,6 +18,7 @@ const TEST_WEBCHAT_COVERAGE_ID = "ui.webchat";
|
||||
function testMaturityTaxonomy(params?: {
|
||||
categoryId?: string;
|
||||
coverageIds?: readonly string[];
|
||||
featureCoverageIds?: readonly (readonly string[])[];
|
||||
includeAllCategories?: boolean;
|
||||
profileCategoryIds?: readonly string[];
|
||||
}) {
|
||||
@@ -52,9 +53,14 @@ function testMaturityTaxonomy(params?: {
|
||||
{
|
||||
id: categoryLocalId,
|
||||
name: "Test category",
|
||||
features: (params?.coverageIds ?? [TEST_EXECUTABLE_COVERAGE_ID]).map((coverageId) => ({
|
||||
name: coverageId,
|
||||
coverageIds: [coverageId],
|
||||
features: (
|
||||
params?.featureCoverageIds ??
|
||||
(params?.coverageIds ?? [TEST_EXECUTABLE_COVERAGE_ID]).map((coverageId) => [
|
||||
coverageId,
|
||||
])
|
||||
).map((coverageIds) => ({
|
||||
name: coverageIds.join(" + "),
|
||||
coverageIds: [...coverageIds],
|
||||
})),
|
||||
},
|
||||
],
|
||||
@@ -330,6 +336,33 @@ describe("qa coverage report", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it("requires every coverage ID on a taxonomy feature to have primary evidence", () => {
|
||||
const report = buildQaScorecardTaxonomyReport({
|
||||
taxonomy: testMaturityTaxonomy({
|
||||
featureCoverageIds: [[TEST_EXECUTABLE_COVERAGE_ID, TEST_WEBCHAT_COVERAGE_ID]],
|
||||
}),
|
||||
repoRoot: process.cwd(),
|
||||
scenarios: [
|
||||
scenarioWithCoverage({
|
||||
primary: [TEST_EXECUTABLE_COVERAGE_ID],
|
||||
secondary: [TEST_WEBCHAT_COVERAGE_ID],
|
||||
sourcePath: "qa/scenarios/channels/dm-chat-baseline.yaml",
|
||||
}),
|
||||
],
|
||||
});
|
||||
|
||||
expect(report.fulfilledCategoryCount).toBe(0);
|
||||
expect(report.fulfilledFeatureCount).toBe(0);
|
||||
expect(report.categories[0]?.coverageStatus).toBe("partial");
|
||||
expect(report.categories[0]?.fulfilledCoverageIds).toStrictEqual([TEST_EXECUTABLE_COVERAGE_ID]);
|
||||
expect(report.validationIssues).toContainEqual(
|
||||
expect.objectContaining({
|
||||
code: "coverage-id-missing-primary-evidence",
|
||||
ref: TEST_WEBCHAT_COVERAGE_ID,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("uses script producer evidence as coverage fulfillment", () => {
|
||||
const report = buildQaScorecardTaxonomyReport({
|
||||
taxonomy: testMaturityTaxonomy({
|
||||
|
||||
@@ -79,20 +79,23 @@ export function buildQaProfileScorecardEvidence(params: {
|
||||
category,
|
||||
featureCoverageByCategoryId: params.featureCoverageByCategoryId,
|
||||
});
|
||||
const fulfilledFeatureCount = featureCoverageIds.filter((coverageIds) =>
|
||||
coverageIds.some((coverageId) => primaryCoverageIds.has(coverageId)),
|
||||
const fulfilledFeatureCount = featureCoverageIds.filter(
|
||||
(coverageIds) =>
|
||||
coverageIds.length > 0 &&
|
||||
coverageIds.every((coverageId) => primaryCoverageIds.has(coverageId)),
|
||||
).length;
|
||||
const secondaryOnlyFeatureCount = featureCoverageIds.filter(
|
||||
(coverageIds) =>
|
||||
!coverageIds.some((coverageId) => primaryCoverageIds.has(coverageId)) &&
|
||||
coverageIds.some((coverageId) => secondaryCoverageIds.has(coverageId)),
|
||||
coverageIds.some((coverageId) => !primaryCoverageIds.has(coverageId)) &&
|
||||
coverageIds.some(
|
||||
(coverageId) =>
|
||||
!primaryCoverageIds.has(coverageId) && secondaryCoverageIds.has(coverageId),
|
||||
),
|
||||
).length;
|
||||
const missingCoverageIds = uniqueSortedStrings(
|
||||
featureCoverageIds
|
||||
.filter(
|
||||
(coverageIds) => !coverageIds.some((coverageId) => primaryCoverageIds.has(coverageId)),
|
||||
)
|
||||
.flat(),
|
||||
featureCoverageIds.flatMap((coverageIds) =>
|
||||
coverageIds.filter((coverageId) => !primaryCoverageIds.has(coverageId)),
|
||||
),
|
||||
);
|
||||
const missingFeatureCount = featureCoverageIds.length - fulfilledFeatureCount;
|
||||
return {
|
||||
|
||||
@@ -364,24 +364,21 @@ function pushMissingPrimaryIssues(params: {
|
||||
coverageIdsWithSecondaryEvidence: ReadonlySet<string>;
|
||||
}) {
|
||||
for (const feature of params.category.features) {
|
||||
if (
|
||||
feature.coverageIds.some((coverageId) =>
|
||||
params.coverageIdsWithPrimaryEvidence.has(coverageId),
|
||||
)
|
||||
) {
|
||||
continue;
|
||||
for (const coverageId of feature.coverageIds) {
|
||||
if (params.coverageIdsWithPrimaryEvidence.has(coverageId)) {
|
||||
continue;
|
||||
}
|
||||
const reason = params.coverageIdsWithSecondaryEvidence.has(coverageId)
|
||||
? "only has secondary evidence"
|
||||
: "has no primary evidence";
|
||||
params.issues.push({
|
||||
code: "coverage-id-missing-primary-evidence",
|
||||
severity: "warning",
|
||||
categoryId: params.category.id,
|
||||
ref: coverageId,
|
||||
message: `${params.category.id} feature ${feature.name} coverage ID ${coverageId} ${reason}`,
|
||||
});
|
||||
}
|
||||
const hasSecondaryEvidence = feature.coverageIds.some((coverageId) =>
|
||||
params.coverageIdsWithSecondaryEvidence.has(coverageId),
|
||||
);
|
||||
const reason = hasSecondaryEvidence ? "only has secondary evidence" : "has no primary evidence";
|
||||
params.issues.push({
|
||||
code: "coverage-id-missing-primary-evidence",
|
||||
severity: "warning",
|
||||
categoryId: params.category.id,
|
||||
ref: feature.coverageIds.join(", ") || feature.name,
|
||||
message: `${params.category.id} feature ${feature.name} ${reason}`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -582,8 +579,10 @@ export function buildQaScorecardTaxonomyReport(params: {
|
||||
}
|
||||
}
|
||||
|
||||
const fulfilledFeatureCountForCategory = category.features.filter((feature) =>
|
||||
feature.coverageIds.some((coverageId) => fulfilledCoverageIds.has(coverageId)),
|
||||
const fulfilledFeatureCountForCategory = category.features.filter(
|
||||
(feature) =>
|
||||
feature.coverageIds.length > 0 &&
|
||||
feature.coverageIds.every((coverageId) => fulfilledCoverageIds.has(coverageId)),
|
||||
).length;
|
||||
if (required) {
|
||||
requiredFeatureCount += category.features.length;
|
||||
|
||||
@@ -5,10 +5,10 @@ scenario:
|
||||
surface: runtime
|
||||
coverage:
|
||||
primary:
|
||||
- gateway.smoke
|
||||
- websocket-transport
|
||||
secondary:
|
||||
- gateway.health
|
||||
- gateway.protocol
|
||||
- health-apis
|
||||
- hello-ok-snapshot
|
||||
objective: Exercise gateway health and WebSocket smoke assertions through QA Lab evidence.
|
||||
successCriteria:
|
||||
- Gateway health probe succeeds against a reachable local endpoint.
|
||||
|
||||
@@ -164,7 +164,7 @@ surfaces:
|
||||
id: gateway-rpc-apis-and-events
|
||||
features:
|
||||
- name: Health APIs
|
||||
coverageIds: [gateway.health, health-apis]
|
||||
coverageIds: [health-apis]
|
||||
description: '`health` and `status` RPCs.'
|
||||
- name: Identity and presence APIs
|
||||
coverageIds: [identity-and-presence-apis]
|
||||
@@ -504,7 +504,7 @@ surfaces:
|
||||
id: websocket-connection
|
||||
features:
|
||||
- name: WebSocket transport
|
||||
coverageIds: [gateway.smoke, websocket-transport]
|
||||
coverageIds: [websocket-transport]
|
||||
description: WebSocket transport with JSON text frames.
|
||||
- name: Connect challenge
|
||||
coverageIds: [connect-challenge]
|
||||
@@ -516,7 +516,7 @@ surfaces:
|
||||
coverageIds: [protocol-version-negotiation]
|
||||
description: Protocol range negotiation (`minProtocol`/`maxProtocol`).
|
||||
- name: hello-ok snapshot
|
||||
coverageIds: [gateway.protocol, hello-ok-snapshot]
|
||||
coverageIds: [hello-ok-snapshot]
|
||||
description: 'Required `hello-ok` payload structure: server identity, negotiated auth, feature discovery, snapshot, and policy limits.'
|
||||
- name: Startup retry
|
||||
coverageIds: [startup-retry]
|
||||
|
||||
Reference in New Issue
Block a user