Files
openclaw/extensions/qa-lab/src/suite.summary-json.test.ts
2026-04-29 02:29:01 -07:00

124 lines
4.1 KiB
TypeScript

import { describe, expect, it } from "vitest";
import { buildQaSuiteSummaryJson } from "./suite.js";
describe("buildQaSuiteSummaryJson", () => {
const baseParams = {
// Test scenarios include a `steps: []` field to match the real suite
// scenario-result shape so downstream consumers that rely on the shape
// (parity gate, report render) stay aligned.
scenarios: [
{ name: "Scenario A", status: "pass" as const, steps: [] },
{ name: "Scenario B", status: "fail" as const, details: "something broke", steps: [] },
],
startedAt: new Date("2026-04-11T00:00:00.000Z"),
finishedAt: new Date("2026-04-11T00:05:00.000Z"),
providerMode: "mock-openai" as const,
primaryModel: "openai/gpt-5.5",
alternateModel: "openai/gpt-5.5-alt",
fastMode: true,
concurrency: 2,
};
it("records provider/model/mode so parity gates can verify labels", () => {
const json = buildQaSuiteSummaryJson(baseParams);
expect(json.run).toMatchObject({
startedAt: "2026-04-11T00:00:00.000Z",
finishedAt: "2026-04-11T00:05:00.000Z",
providerMode: "mock-openai",
primaryModel: "openai/gpt-5.5",
primaryProvider: "openai",
primaryModelName: "gpt-5.5",
alternateModel: "openai/gpt-5.5-alt",
alternateProvider: "openai",
alternateModelName: "gpt-5.5-alt",
fastMode: true,
concurrency: 2,
scenarioIds: null,
});
});
it("includes scenarioIds in run metadata when provided", () => {
const scenarioIds = ["approval-turn-tool-followthrough", "subagent-handoff", "memory-recall"];
const json = buildQaSuiteSummaryJson({
...baseParams,
scenarioIds,
});
expect(json.run.scenarioIds).toEqual(scenarioIds);
});
it("treats an empty scenarioIds array as unspecified (no filter)", () => {
// A CLI path that omits --scenario passes an empty array to runQaSuite.
// The summary must encode that as null so downstream parity/report
// tooling doesn't interpret a full run as an explicit empty selection.
const json = buildQaSuiteSummaryJson({
...baseParams,
scenarioIds: [],
});
expect(json.run.scenarioIds).toBeNull();
});
it("records an Anthropic baseline lane cleanly for parity runs", () => {
const json = buildQaSuiteSummaryJson({
...baseParams,
primaryModel: "anthropic/claude-opus-4-6",
alternateModel: "anthropic/claude-sonnet-4-6",
});
expect(json.run).toMatchObject({
primaryModel: "anthropic/claude-opus-4-6",
primaryProvider: "anthropic",
primaryModelName: "claude-opus-4-6",
alternateModel: "anthropic/claude-sonnet-4-6",
alternateProvider: "anthropic",
alternateModelName: "claude-sonnet-4-6",
});
});
it("leaves split fields null when a model ref is malformed", () => {
const json = buildQaSuiteSummaryJson({
...baseParams,
primaryModel: "not-a-real-ref",
alternateModel: "",
});
expect(json.run).toMatchObject({
primaryModel: "not-a-real-ref",
primaryProvider: null,
primaryModelName: null,
alternateModel: "",
alternateProvider: null,
alternateModelName: null,
});
});
it("keeps scenarios and counts alongside the run metadata", () => {
const json = buildQaSuiteSummaryJson(baseParams);
expect(json.scenarios).toHaveLength(2);
expect(json.counts).toEqual({
total: 2,
passed: 1,
failed: 1,
});
});
it("records optional runtime metrics when provided", () => {
const json = buildQaSuiteSummaryJson({
...baseParams,
metrics: {
wallMs: 12_000,
gatewayProcessCpuMs: 3_400,
gatewayCpuCoreRatio: 0.283,
gatewayProcessRssStartBytes: 100_000_000,
gatewayProcessRssEndBytes: 125_000_000,
gatewayProcessRssDeltaBytes: 25_000_000,
},
});
expect(json.metrics).toEqual({
wallMs: 12_000,
gatewayProcessCpuMs: 3_400,
gatewayCpuCoreRatio: 0.283,
gatewayProcessRssStartBytes: 100_000_000,
gatewayProcessRssEndBytes: 125_000_000,
gatewayProcessRssDeltaBytes: 25_000_000,
});
});
});