openclaw/extensions/qa-lab/src/suite.summary-json.test.ts

import { describe, expect, it } from "vitest";
import { buildQaSuiteSummaryJson } from "./suite.js";

describe("buildQaSuiteSummaryJson", () => {
  const baseParams = {
    // Test scenarios include a `steps: []` field to match the real suite
    // scenario-result shape so downstream consumers that rely on the shape
    // (parity gate, report render) stay aligned.
    scenarios: [
      { name: "Scenario A", status: "pass" as const, steps: [] },
      { name: "Scenario B", status: "fail" as const, details: "something broke", steps: [] },
    ],
    startedAt: new Date("2026-04-11T00:00:00.000Z"),
    finishedAt: new Date("2026-04-11T00:05:00.000Z"),
    providerMode: "mock-openai" as const,
    primaryModel: "openai/gpt-5.5",
    alternateModel: "openai/gpt-5.5-alt",
    fastMode: true,
    concurrency: 2,
  };

  it("records provider/model/mode so parity gates can verify labels", () => {
    const json = buildQaSuiteSummaryJson(baseParams);
    expect(json.run.startedAt).toBe("2026-04-11T00:00:00.000Z");
    expect(json.run.finishedAt).toBe("2026-04-11T00:05:00.000Z");
    expect(json.run.providerMode).toBe("mock-openai");
    expect(json.run.primaryModel).toBe("openai/gpt-5.5");
    expect(json.run.primaryProvider).toBe("openai");
    expect(json.run.primaryModelName).toBe("gpt-5.5");
    expect(json.run.alternateModel).toBe("openai/gpt-5.5-alt");
    expect(json.run.alternateProvider).toBe("openai");
    expect(json.run.alternateModelName).toBe("gpt-5.5-alt");
    expect(json.run.fastMode).toBe(true);
    expect(json.run.concurrency).toBe(2);
    expect(json.run.scenarioIds).toBeNull();
  });

  it("includes scenarioIds in run metadata when provided", () => {
    const scenarioIds = ["approval-turn-tool-followthrough", "subagent-handoff", "memory-recall"];
    const json = buildQaSuiteSummaryJson({
      ...baseParams,
      scenarioIds,
    });
    expect(json.run.scenarioIds).toEqual(scenarioIds);
  });

  it("records the runtime pair when the suite runs the runtime axis", () => {
    const json = buildQaSuiteSummaryJson({
      ...baseParams,
      runtimePair: ["pi", "codex"],
    });

    expect(json.run.runtimePair).toEqual(["pi", "codex"]);
  });

  it("treats an empty scenarioIds array as unspecified (no filter)", () => {
    // A CLI path that omits --scenario passes an empty array to runQaSuite.
    // The summary must encode that as null so downstream parity/report
    // tooling doesn't interpret a full run as an explicit empty selection.
    const json = buildQaSuiteSummaryJson({
      ...baseParams,
      scenarioIds: [],
    });
    expect(json.run.scenarioIds).toBeNull();
  });

  it("records an Anthropic baseline lane cleanly for parity runs", () => {
    const json = buildQaSuiteSummaryJson({
      ...baseParams,
      primaryModel: "anthropic/claude-opus-4-7",
      alternateModel: "anthropic/claude-sonnet-4-6",
    });
    expect(json.run.primaryModel).toBe("anthropic/claude-opus-4-7");
    expect(json.run.primaryProvider).toBe("anthropic");
    expect(json.run.primaryModelName).toBe("claude-opus-4-7");
    expect(json.run.alternateModel).toBe("anthropic/claude-sonnet-4-6");
    expect(json.run.alternateProvider).toBe("anthropic");
    expect(json.run.alternateModelName).toBe("claude-sonnet-4-6");
  });

  it("leaves split fields null when a model ref is malformed", () => {
    const json = buildQaSuiteSummaryJson({
      ...baseParams,
      primaryModel: "not-a-real-ref",
      alternateModel: "",
    });
    expect(json.run.primaryModel).toBe("not-a-real-ref");
    expect(json.run.primaryProvider).toBeNull();
    expect(json.run.primaryModelName).toBeNull();
    expect(json.run.alternateModel).toBe("");
    expect(json.run.alternateProvider).toBeNull();
    expect(json.run.alternateModelName).toBeNull();
  });

  it("keeps scenarios and counts alongside the run metadata", () => {
    const json = buildQaSuiteSummaryJson(baseParams);
    expect(json.scenarios).toHaveLength(2);
    expect(json.counts).toEqual({
      total: 2,
      passed: 1,
      failed: 1,
    });
  });

  it("preserves scenario-level runtime parity payloads", () => {
    const json = buildQaSuiteSummaryJson({
      ...baseParams,
      scenarios: [
        {
          name: "Scenario A",
          status: "pass" as const,
          steps: [],
          runtimeParity: {
            scenarioId: "scenario-a",
            drift: "none" as const,
            cells: {
              pi: {
                runtime: "pi" as const,
                transcriptBytes: "",
                toolCalls: [],
                finalText: "done",
                usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 },
                wallClockMs: 10,
                bootStateLines: [],
              },
              codex: {
                runtime: "codex" as const,
                transcriptBytes: "",
                toolCalls: [],
                finalText: "done",
                usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 },
                wallClockMs: 10,
                bootStateLines: [],
              },
            },
          },
        },
      ],
    });

    expect(json.scenarios[0]).toMatchObject({
      runtimeParity: {
        scenarioId: "scenario-a",
        drift: "none",
      },
    });
  });

  it("records optional runtime metrics when provided", () => {
    const json = buildQaSuiteSummaryJson({
      ...baseParams,
      metrics: {
        wallMs: 12_000,
        gatewayProcessCpuMs: 3_400,
        gatewayCpuCoreRatio: 0.283,
        gatewayProcessRssStartBytes: 100_000_000,
        gatewayProcessRssEndBytes: 125_000_000,
        gatewayProcessRssDeltaBytes: 25_000_000,
        gatewayProcessRssPeakBytes: 140_000_000,
        gatewayProcessRssPeakDeltaBytes: 40_000_000,
        gatewayProcessRssSamples: [
          {
            label: "suite-start",
            at: "2026-04-22T12:00:00.000Z",
            gatewayProcessRssBytes: 100_000_000,
          },
          {
            label: "scenario:canary:finish",
            at: "2026-04-22T12:00:10.000Z",
            gatewayProcessRssBytes: 140_000_000,
          },
        ],
        gatewayHeapSnapshots: [
          {
            label: "suite-start",
            at: "2026-04-22T12:00:01.000Z",
            path: "artifacts/gateway-heap-snapshots/suite-start.heapsnapshot",
            bytes: 12_345,
          },
        ],
      },
    });
    expect(json.metrics).toEqual({
      wallMs: 12_000,
      gatewayProcessCpuMs: 3_400,
      gatewayCpuCoreRatio: 0.283,
      gatewayProcessRssStartBytes: 100_000_000,
      gatewayProcessRssEndBytes: 125_000_000,
      gatewayProcessRssDeltaBytes: 25_000_000,
      gatewayProcessRssPeakBytes: 140_000_000,
      gatewayProcessRssPeakDeltaBytes: 40_000_000,
      gatewayProcessRssSamples: [
        {
          label: "suite-start",
          at: "2026-04-22T12:00:00.000Z",
          gatewayProcessRssBytes: 100_000_000,
        },
        {
          label: "scenario:canary:finish",
          at: "2026-04-22T12:00:10.000Z",
          gatewayProcessRssBytes: 140_000_000,
        },
      ],
      gatewayHeapSnapshots: [
        {
          label: "suite-start",
          at: "2026-04-22T12:00:01.000Z",
          path: "artifacts/gateway-heap-snapshots/suite-start.heapsnapshot",
          bytes: 12_345,
        },
      ],
    });
  });
});