mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-22 12:58:09 +00:00
test(qa): expand smoke-ci and release categories and coverage (#93175)
* test(qa): add smoke ci primary coverage evidence * test(qa): remove overstated primary coverage claims * test(qa): make release profile include smoke ci * test(qa): trim taxonomy formatting churn * test(qa): avoid hardcoded profile names in coverage test * test(qa): make release profile cover taxonomy * test(qa): type profile fixture all category flag * test(qa): include channel delivery in smoke ci profile
This commit is contained in:
@@ -18,6 +18,7 @@ const TEST_WEBCHAT_COVERAGE_ID = "ui.webchat";
|
||||
function testMaturityTaxonomy(params?: {
|
||||
categoryId?: string;
|
||||
coverageIds?: readonly string[];
|
||||
includeAllCategories?: boolean;
|
||||
profileCategoryIds?: readonly string[];
|
||||
}) {
|
||||
const categoryId = params?.categoryId ?? TEST_EXECUTABLE_CATEGORY_ID;
|
||||
@@ -31,12 +32,16 @@ function testMaturityTaxonomy(params?: {
|
||||
{
|
||||
id: "smoke-ci",
|
||||
description: "Test smoke profile.",
|
||||
includeAllCategories: false,
|
||||
categoryIds: [],
|
||||
},
|
||||
{
|
||||
id: "release",
|
||||
description: "Test release profile.",
|
||||
categoryIds: [...(params?.profileCategoryIds ?? [categoryId])],
|
||||
includeAllCategories: params?.includeAllCategories ?? false,
|
||||
categoryIds: [
|
||||
...(params?.includeAllCategories ? [] : (params?.profileCategoryIds ?? [categoryId])),
|
||||
],
|
||||
},
|
||||
],
|
||||
surfaces: [
|
||||
@@ -115,7 +120,10 @@ describe("qa coverage report", () => {
|
||||
]);
|
||||
expect(inventory.scorecardTaxonomy.profileCount).toBe(2);
|
||||
expect(inventory.scorecardTaxonomy.categoryCount).toBeGreaterThan(200);
|
||||
expect(inventory.scorecardTaxonomy.requiredCategoryCount).toBe(15);
|
||||
expect(inventory.scorecardTaxonomy.requiredCategoryCount).toBeGreaterThan(0);
|
||||
expect(inventory.scorecardTaxonomy.requiredCategoryCount).toBeLessThanOrEqual(
|
||||
inventory.scorecardTaxonomy.categoryCount,
|
||||
);
|
||||
expect(inventory.scorecardTaxonomy.requiredFeatureCount).toBeGreaterThan(0);
|
||||
expect(inventory.scorecardTaxonomy.fulfilledFeatureCount).toBeGreaterThan(0);
|
||||
expect(inventory.scorecardTaxonomy.taxonomyFulfillmentPercent).toBeGreaterThan(0);
|
||||
@@ -124,30 +132,15 @@ describe("qa coverage report", () => {
|
||||
expect(inventory.scorecardTaxonomy.unknownCoverageIdCount).toBe(0);
|
||||
expect(inventory.scorecardTaxonomy.validationIssues.length).toBeGreaterThan(0);
|
||||
expect(
|
||||
inventory.scorecardTaxonomy.validationIssues.every(
|
||||
inventory.scorecardTaxonomy.validationIssues.some((issue) =>
|
||||
issue.code.endsWith("not-found"),
|
||||
),
|
||||
).toBe(false);
|
||||
expect(
|
||||
inventory.scorecardTaxonomy.validationIssues.some(
|
||||
(issue) => issue.code === "coverage-id-missing-primary-evidence",
|
||||
),
|
||||
).toBe(true);
|
||||
expect(
|
||||
inventory.scorecardTaxonomy.profiles
|
||||
.find((profile) => profile.id === "release")
|
||||
?.categoryIds.toSorted(),
|
||||
).toEqual([
|
||||
"agent-runtime-and-provider-execution.agent-turn-execution",
|
||||
"automation-cron-hooks-tasks-polling.cron-jobs",
|
||||
"browser-automation-and-exec-sandbox-tools.tool-invocation-and-execution",
|
||||
"browser-control-ui-and-webchat.browser-ui",
|
||||
"media-understanding-and-media-generation.media-generation",
|
||||
"media-understanding-and-media-generation.media-understanding",
|
||||
"openai-codex-provider-path.responses-and-tool-compatibility",
|
||||
"plugin-sdk-and-bundled-plugin-architecture.installing-and-running-plugins",
|
||||
"security-auth-pairing-and-secrets.approval-policy-and-tool-safeguards",
|
||||
"security-auth-pairing-and-secrets.credential-and-secret-hygiene",
|
||||
"session-memory-and-context-engine.diagnostics-maintenance-and-recovery",
|
||||
"session-memory-and-context-engine.memory",
|
||||
"session-memory-and-context-engine.token-management",
|
||||
"telemetry-diagnostics-and-observability.telemetry-export",
|
||||
]);
|
||||
expect(
|
||||
inventory.scorecardTaxonomy.categories.find(
|
||||
(category) => category.id === TEST_BROWSER_CATEGORY_ID,
|
||||
@@ -349,6 +342,21 @@ describe("qa coverage report", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("resolves all-category profiles from taxonomy categories", () => {
|
||||
const report = buildQaScorecardTaxonomyReport({
|
||||
taxonomy: testMaturityTaxonomy({
|
||||
includeAllCategories: true,
|
||||
}),
|
||||
repoRoot: process.cwd(),
|
||||
scenarios: [],
|
||||
});
|
||||
|
||||
expect(report.profiles.find((profile) => profile.id === "release")?.categoryIds).toStrictEqual([
|
||||
TEST_EXECUTABLE_CATEGORY_ID,
|
||||
]);
|
||||
expect(report.requiredCategoryCount).toBe(1);
|
||||
});
|
||||
|
||||
it("reports profile categories missing primary coverage evidence", () => {
|
||||
const report = buildQaScorecardTaxonomyReport({
|
||||
taxonomy: testMaturityTaxonomy(),
|
||||
|
||||
@@ -25,6 +25,7 @@ const qaScorecardProfileSchema = z.object({
|
||||
id: qaScorecardIdSchema,
|
||||
description: z.string().trim().min(1),
|
||||
evidenceMode: qaScorecardEvidenceModeSchema.optional(),
|
||||
includeAllCategories: z.boolean().default(false),
|
||||
categoryIds: z.array(qaScorecardIdSchema).default([]),
|
||||
});
|
||||
|
||||
@@ -67,6 +68,14 @@ const qaMaturityTaxonomySchema = z
|
||||
}
|
||||
seenProfileIds.add(profile.id);
|
||||
|
||||
if (profile.includeAllCategories && profile.categoryIds.length > 0) {
|
||||
ctx.addIssue({
|
||||
code: z.ZodIssueCode.custom,
|
||||
path: ["profiles", profileIndex, "categoryIds"],
|
||||
message: `profile ${profile.id} cannot set categoryIds when includeAllCategories is true`,
|
||||
});
|
||||
}
|
||||
|
||||
const seenProfileCategoryIds = new Set<string>();
|
||||
for (const [categoryIndex, categoryId] of profile.categoryIds.entries()) {
|
||||
if (seenProfileCategoryIds.has(categoryId)) {
|
||||
@@ -466,7 +475,10 @@ export function buildQaScorecardTaxonomyReport(params: {
|
||||
const profiles =
|
||||
params.taxonomy?.profiles.map((profile) => {
|
||||
const validCategoryIds: string[] = [];
|
||||
for (const categoryId of profile.categoryIds) {
|
||||
const selectedCategoryIds = profile.includeAllCategories
|
||||
? [...maturityRefs.categories.keys()]
|
||||
: profile.categoryIds;
|
||||
for (const categoryId of selectedCategoryIds) {
|
||||
if (!maturityRefs.categories.has(categoryId)) {
|
||||
issues.push({
|
||||
code: "profile-category-ref-not-found",
|
||||
|
||||
@@ -6,6 +6,7 @@ scenario:
|
||||
coverage:
|
||||
primary:
|
||||
- channels.threads
|
||||
- thread-parent-child-placement
|
||||
secondary:
|
||||
- channels.qa-channel
|
||||
objective: Verify the agent can keep follow-up work inside a thread and not leak context into the root channel.
|
||||
|
||||
@@ -6,6 +6,7 @@ scenario:
|
||||
coverage:
|
||||
primary:
|
||||
- media.image-generation
|
||||
- generated-image-persistence-and-delivery
|
||||
secondary:
|
||||
- channels.qa-channel
|
||||
objective: Verify a generated image is saved as media, reattached on the next turn, and described correctly through the vision path.
|
||||
|
||||
@@ -6,6 +6,7 @@ scenario:
|
||||
coverage:
|
||||
primary:
|
||||
- telemetry.prometheus
|
||||
- gateway-authenticated-get-api-diagnostics-prometheus
|
||||
secondary:
|
||||
- harness.qa-lab
|
||||
- docker.e2e
|
||||
|
||||
@@ -6,6 +6,8 @@ scenario:
|
||||
coverage:
|
||||
primary:
|
||||
- scheduling.cron
|
||||
- cron-rpcs
|
||||
- chat-announce-delivery
|
||||
secondary:
|
||||
- channels.qa-channel
|
||||
objective: Verify the agent can schedule a cron reminder one minute in the future and receive the follow-up in the QA channel.
|
||||
|
||||
@@ -6,6 +6,7 @@ scenario:
|
||||
coverage:
|
||||
primary:
|
||||
- ui.control
|
||||
- dashboard-open-auth-bootstrap
|
||||
secondary:
|
||||
- media.image-understanding
|
||||
- channels.qa-channel
|
||||
|
||||
@@ -17,6 +17,8 @@ profiles:
|
||||
- security-auth-pairing-and-secrets.approval-policy-and-tool-safeguards
|
||||
- telemetry-diagnostics-and-observability.telemetry-export
|
||||
- channel-framework.conversation-routing-and-delivery
|
||||
- channel-framework.outbound-delivery-and-reply-pipeline
|
||||
- channel-framework.group-thread-and-ambient-room-behavior
|
||||
- session-memory-and-context-engine.memory
|
||||
- session-memory-and-context-engine.diagnostics-maintenance-and-recovery
|
||||
- automation-cron-hooks-tasks-polling.cron-jobs
|
||||
@@ -29,21 +31,7 @@ profiles:
|
||||
description: Stable/LTS proof selector for live providers, live channels, package artifacts,
|
||||
upgrade paths, and platform proof where the claim depends on real upstreams or release
|
||||
artifacts.
|
||||
categoryIds:
|
||||
- agent-runtime-and-provider-execution.agent-turn-execution
|
||||
- session-memory-and-context-engine.token-management
|
||||
- browser-automation-and-exec-sandbox-tools.tool-invocation-and-execution
|
||||
- security-auth-pairing-and-secrets.approval-policy-and-tool-safeguards
|
||||
- telemetry-diagnostics-and-observability.telemetry-export
|
||||
- openai-codex-provider-path.responses-and-tool-compatibility
|
||||
- session-memory-and-context-engine.memory
|
||||
- session-memory-and-context-engine.diagnostics-maintenance-and-recovery
|
||||
- automation-cron-hooks-tasks-polling.cron-jobs
|
||||
- plugin-sdk-and-bundled-plugin-architecture.installing-and-running-plugins
|
||||
- media-understanding-and-media-generation.media-understanding
|
||||
- media-understanding-and-media-generation.media-generation
|
||||
- browser-control-ui-and-webchat.browser-ui
|
||||
- security-auth-pairing-and-secrets.credential-and-secret-hygiene
|
||||
includeAllCategories: true
|
||||
levels:
|
||||
- id: planned
|
||||
code: M0
|
||||
|
||||
Reference in New Issue
Block a user