import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; const { runQaManualLane, runQaSuiteFromRuntime, runQaCharacterEval, runQaMultipass, runTelegramQaLive, startQaLabServer, writeQaDockerHarnessFiles, buildQaDockerHarnessImage, runQaDockerUp, defaultQaRuntimeModelForMode, } = vi.hoisted(() => ({ runQaManualLane: vi.fn(), runQaSuiteFromRuntime: vi.fn(), runQaCharacterEval: vi.fn(), runQaMultipass: vi.fn(), runTelegramQaLive: vi.fn(), startQaLabServer: vi.fn(), writeQaDockerHarnessFiles: vi.fn(), buildQaDockerHarnessImage: vi.fn(), runQaDockerUp: vi.fn(), defaultQaRuntimeModelForMode: vi.fn<(mode: string, options?: { alternate?: boolean }) => string>(), })); vi.mock("./manual-lane.runtime.js", () => ({ runQaManualLane, })); vi.mock("./suite-launch.runtime.js", () => ({ runQaSuiteFromRuntime, })); vi.mock("./character-eval.js", () => ({ runQaCharacterEval, })); vi.mock("./multipass.runtime.js", () => ({ runQaMultipass, })); vi.mock("./live-transports/telegram/telegram-live.runtime.js", () => ({ runTelegramQaLive, })); vi.mock("./lab-server.js", () => ({ startQaLabServer, })); vi.mock("./docker-harness.js", () => ({ writeQaDockerHarnessFiles, buildQaDockerHarnessImage, })); vi.mock("./docker-up.runtime.js", () => ({ runQaDockerUp, })); vi.mock("./model-selection.runtime.js", () => ({ defaultQaRuntimeModelForMode, })); import { resolveRepoRelativeOutputDir } from "./cli-paths.js"; import { runQaLabSelfCheckCommand, runQaDockerBuildImageCommand, runQaDockerScaffoldCommand, runQaDockerUpCommand, runQaCharacterEvalCommand, runQaCoverageReportCommand, runQaManualLaneCommand, runQaParityReportCommand, runQaSuiteCommand, } from "./cli.runtime.js"; import { runQaTelegramCommand } from "./live-transports/telegram/cli.runtime.js"; import { defaultQaModelForMode as defaultQaProviderModelForMode } from "./model-selection.js"; import type { QaProviderModeInput } from "./run-config.js"; describe("qa cli runtime", () => { let stdoutWrite: ReturnType; let stderrWrite: ReturnType; let suiteArtifactsDir: string; let suiteReportPath: string; let suiteSummaryPath: string; beforeEach(async () => { suiteArtifactsDir = await fs.mkdtemp(path.join(os.tmpdir(), "qa-suite-runtime-")); suiteReportPath = path.join(suiteArtifactsDir, "qa-suite-report.md"); suiteSummaryPath = path.join(suiteArtifactsDir, "qa-suite-summary.json"); await fs.writeFile(suiteReportPath, "# QA Suite Report\n", "utf8"); await fs.writeFile( suiteSummaryPath, JSON.stringify({ counts: { total: 1, passed: 1, failed: 0, }, scenarios: [], }), "utf8", ); stdoutWrite = vi.spyOn(process.stdout, "write").mockReturnValue(true); stderrWrite = vi.spyOn(process.stderr, "write").mockReturnValue(true); runQaSuiteFromRuntime.mockReset(); runQaCharacterEval.mockReset(); runQaManualLane.mockReset(); runQaMultipass.mockReset(); runTelegramQaLive.mockReset(); startQaLabServer.mockReset(); writeQaDockerHarnessFiles.mockReset(); buildQaDockerHarnessImage.mockReset(); runQaDockerUp.mockReset(); defaultQaRuntimeModelForMode.mockImplementation( (mode: string, options?: { alternate?: boolean }) => defaultQaProviderModelForMode(mode as QaProviderModeInput, options), ); runQaSuiteFromRuntime.mockResolvedValue({ watchUrl: "http://127.0.0.1:43124", reportPath: suiteReportPath, summaryPath: suiteSummaryPath, scenarios: [], }); runQaCharacterEval.mockResolvedValue({ reportPath: "/tmp/character-report.md", summaryPath: "/tmp/character-summary.json", }); runQaManualLane.mockResolvedValue({ model: "openai/gpt-5.5", waited: { status: "ok" }, reply: "done", watchUrl: "http://127.0.0.1:43124", }); runQaMultipass.mockResolvedValue({ outputDir: "/tmp/multipass", reportPath: "/tmp/multipass/qa-suite-report.md", summaryPath: "/tmp/multipass/qa-suite-summary.json", hostLogPath: "/tmp/multipass/multipass-host.log", bootstrapLogPath: "/tmp/multipass/multipass-guest-bootstrap.log", guestScriptPath: "/tmp/multipass/multipass-guest-run.sh", vmName: "openclaw-qa-test", scenarioIds: ["channel-chat-baseline"], }); runTelegramQaLive.mockResolvedValue({ outputDir: "/tmp/telegram", reportPath: "/tmp/telegram/report.md", summaryPath: "/tmp/telegram/summary.json", observedMessagesPath: "/tmp/telegram/observed.json", scenarios: [], }); startQaLabServer.mockResolvedValue({ baseUrl: "http://127.0.0.1:58000", runSelfCheck: vi.fn().mockResolvedValue({ outputPath: "/tmp/report.md", }), stop: vi.fn(), }); writeQaDockerHarnessFiles.mockResolvedValue({ outputDir: "/tmp/openclaw-repo/.artifacts/qa-docker", }); buildQaDockerHarnessImage.mockResolvedValue({ imageName: "openclaw:qa-local-prebaked", }); runQaDockerUp.mockResolvedValue({ outputDir: "/tmp/openclaw-repo/.artifacts/qa-docker", qaLabUrl: "http://127.0.0.1:43124", gatewayUrl: "http://127.0.0.1:18789/", stopCommand: "docker compose down", }); }); afterEach(async () => { stdoutWrite.mockRestore(); stderrWrite.mockRestore(); vi.clearAllMocks(); await fs.rm(suiteArtifactsDir, { recursive: true, force: true }); }); it("resolves suite repo-root-relative paths before dispatching", async () => { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", outputDir: ".artifacts/qa/frontier", providerMode: "live-frontier", primaryModel: "openai/gpt-5.5", alternateModel: "anthropic/claude-sonnet-4-6", fastMode: true, thinking: "medium", scenarioIds: ["approval-turn-tool-followthrough"], }); expect(runQaSuiteFromRuntime).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), outputDir: path.resolve("/tmp/openclaw-repo", ".artifacts/qa/frontier"), transportId: "qa-channel", providerMode: "live-frontier", primaryModel: "openai/gpt-5.5", alternateModel: "anthropic/claude-sonnet-4-6", fastMode: true, thinkingDefault: "medium", scenarioIds: ["approval-turn-tool-followthrough"], }); }); it("resolves telegram qa repo-root-relative paths before dispatching", async () => { await runQaTelegramCommand({ repoRoot: "/tmp/openclaw-repo", outputDir: ".artifacts/qa/telegram", providerMode: "live-frontier", primaryModel: "openai/gpt-5.5", alternateModel: "openai/gpt-5.5", fastMode: true, scenarioIds: ["telegram-help-command"], sutAccountId: "sut-live", }); expect(runTelegramQaLive).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), outputDir: path.resolve("/tmp/openclaw-repo", ".artifacts/qa/telegram"), providerMode: "live-frontier", primaryModel: "openai/gpt-5.5", alternateModel: "openai/gpt-5.5", fastMode: true, allowFailures: undefined, scenarioIds: ["telegram-help-command"], sutAccountId: "sut-live", }); }); it("rejects output dirs that escape the repo root", () => { expect(() => resolveRepoRelativeOutputDir("/tmp/openclaw-repo", "../outside")).toThrow( "--output-dir must stay within the repo root.", ); expect(() => resolveRepoRelativeOutputDir("/tmp/openclaw-repo", "/tmp/outside")).toThrow( "--output-dir must be a relative path inside the repo root.", ); }); it("defaults telegram qa runs onto the live provider lane", async () => { await runQaTelegramCommand({ repoRoot: "/tmp/openclaw-repo", scenarioIds: ["telegram-help-command"], }); expect(runTelegramQaLive).toHaveBeenCalledWith( expect.objectContaining({ repoRoot: path.resolve("/tmp/openclaw-repo"), providerMode: "live-frontier", allowFailures: undefined, }), ); }); it("sets a failing exit code when telegram scenarios fail", async () => { const priorExitCode = process.exitCode; process.exitCode = undefined; runTelegramQaLive.mockResolvedValueOnce({ outputDir: "/tmp/telegram", reportPath: "/tmp/telegram/report.md", summaryPath: "/tmp/telegram/summary.json", observedMessagesPath: "/tmp/telegram/observed.json", scenarios: [ { id: "telegram-help-command", title: "Telegram help command reply", status: "fail", details: "missing expected text", }, ], }); try { await runQaTelegramCommand({ repoRoot: "/tmp/openclaw-repo", }); expect(process.exitCode).toBe(1); } finally { process.exitCode = priorExitCode; } }); it("keeps telegram exit code clear when --allow-failures is set", async () => { const priorExitCode = process.exitCode; process.exitCode = undefined; runTelegramQaLive.mockResolvedValueOnce({ outputDir: "/tmp/telegram", reportPath: "/tmp/telegram/report.md", summaryPath: "/tmp/telegram/summary.json", observedMessagesPath: "/tmp/telegram/observed.json", scenarios: [ { id: "telegram-help-command", title: "Telegram help command reply", status: "fail", details: "missing expected text", }, ], }); try { await runQaTelegramCommand({ repoRoot: "/tmp/openclaw-repo", allowFailures: true, }); expect(process.exitCode).toBeUndefined(); } finally { process.exitCode = priorExitCode; } }); it("passes host suite concurrency through", async () => { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", scenarioIds: ["channel-chat-baseline", "thread-follow-up"], concurrency: 3, }); expect(runQaSuiteFromRuntime).toHaveBeenCalledWith( expect.objectContaining({ repoRoot: path.resolve("/tmp/openclaw-repo"), transportId: "qa-channel", scenarioIds: ["channel-chat-baseline", "thread-follow-up"], concurrency: 3, }), ); }); it("sets a failing exit code when host suite scenarios fail", async () => { const priorExitCode = process.exitCode; process.exitCode = undefined; await fs.writeFile( suiteSummaryPath, JSON.stringify({ counts: { total: 1, passed: 0, failed: 1, }, scenarios: [{ name: "channel chat baseline", status: "fail" }], }), "utf8", ); runQaSuiteFromRuntime.mockResolvedValueOnce({ watchUrl: "http://127.0.0.1:43124", reportPath: suiteReportPath, summaryPath: suiteSummaryPath, scenarios: [ { name: "channel chat baseline", status: "fail", steps: [], }, ], }); try { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", }); expect(process.exitCode).toBe(1); } finally { process.exitCode = priorExitCode; } }); it("keeps host suite exit code clear when --allow-failures is set", async () => { const priorExitCode = process.exitCode; process.exitCode = undefined; await fs.writeFile( suiteSummaryPath, JSON.stringify({ counts: { total: 1, passed: 0, failed: 1, }, scenarios: [{ name: "channel chat baseline", status: "fail" }], }), "utf8", ); runQaSuiteFromRuntime.mockResolvedValueOnce({ watchUrl: "http://127.0.0.1:43124", reportPath: suiteReportPath, summaryPath: suiteSummaryPath, scenarios: [ { name: "channel chat baseline", status: "fail", steps: [], }, ], }); try { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", allowFailures: true, }); expect(process.exitCode).toBeUndefined(); } finally { process.exitCode = priorExitCode; } }); it("retries host suite runs once for retryable infra failures", async () => { runQaSuiteFromRuntime .mockRejectedValueOnce(new Error("agent.wait timeout while waiting for transport ready")) .mockResolvedValueOnce({ watchUrl: "http://127.0.0.1:43124", reportPath: suiteReportPath, summaryPath: suiteSummaryPath, scenarios: [], }); await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", }); expect(runQaSuiteFromRuntime).toHaveBeenCalledTimes(2); expect(stderrWrite).toHaveBeenCalledWith( expect.stringContaining("[qa-suite] infra retry 1/1: agent.wait timeout"), ); }); it("retries host suite runs once for qa-channel readiness timeouts", async () => { runQaSuiteFromRuntime .mockRejectedValueOnce( new Error( "timed out after 180000ms waiting for qa-channel ready; last status: no qa-channel accounts reported", ), ) .mockResolvedValueOnce({ watchUrl: "http://127.0.0.1:43124", reportPath: suiteReportPath, summaryPath: suiteSummaryPath, scenarios: [], }); await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", }); expect(runQaSuiteFromRuntime).toHaveBeenCalledTimes(2); expect(stderrWrite).toHaveBeenCalledWith( expect.stringContaining( "[qa-suite] infra retry 1/1: timed out after 180000ms waiting for qa-channel ready", ), ); }); it("does not retry host suite runs for generic timeout wording", async () => { runQaSuiteFromRuntime.mockRejectedValueOnce( new Error("approval-turn timed out waiting for post-approval read"), ); await expect( runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", }), ).rejects.toThrow("approval-turn timed out waiting for post-approval read"); expect(runQaSuiteFromRuntime).toHaveBeenCalledTimes(1); }); it("does not retry host suite runs for semantic failures", async () => { const priorExitCode = process.exitCode; process.exitCode = undefined; await fs.writeFile( suiteSummaryPath, JSON.stringify({ counts: { total: 1, passed: 0, failed: 1, }, scenarios: [{ name: "channel chat baseline", status: "fail" }], }), "utf8", ); runQaSuiteFromRuntime.mockResolvedValueOnce({ watchUrl: "http://127.0.0.1:43124", reportPath: suiteReportPath, summaryPath: suiteSummaryPath, scenarios: [ { name: "channel chat baseline", status: "fail", steps: [], }, ], }); try { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", }); expect(runQaSuiteFromRuntime).toHaveBeenCalledTimes(1); expect(process.exitCode).toBe(1); } finally { process.exitCode = priorExitCode; } }); it("runs a host-only parity preflight against the sentinel scenario", async () => { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", providerMode: "mock-openai", primaryModel: "openai/gpt-5.5", alternateModel: "anthropic/claude-opus-4-6", preflight: true, }); expect(runQaSuiteFromRuntime).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), outputDir: expect.stringMatching( /^\/tmp\/openclaw-repo\/\.artifacts\/qa-e2e\/preflight\/suite-/, ), transportId: "qa-channel", providerMode: "mock-openai", primaryModel: "openai/gpt-5.5", alternateModel: "anthropic/claude-opus-4-6", scenarioIds: ["approval-turn-tool-followthrough"], concurrency: 1, }); expect(stdoutWrite).toHaveBeenCalledWith( expect.stringContaining("QA parity preflight summary:"), ); }); it("throws when parity preflight finds a failing sentinel scenario", async () => { await fs.writeFile( suiteSummaryPath, JSON.stringify({ counts: { total: 1, passed: 0, failed: 1, }, scenarios: [{ name: "approval turn tool followthrough", status: "fail" }], }), "utf8", ); runQaSuiteFromRuntime.mockResolvedValueOnce({ watchUrl: "http://127.0.0.1:43124", reportPath: suiteReportPath, summaryPath: suiteSummaryPath, scenarios: [{ name: "approval turn tool followthrough", status: "fail", steps: [] }], }); await expect( runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", preflight: true, }), ).rejects.toThrow("QA parity preflight failed with 1 failing scenario."); }); it("keeps parity preflight exit code clear when --allow-failures is set", async () => { const priorExitCode = process.exitCode; process.exitCode = undefined; await fs.writeFile( suiteSummaryPath, JSON.stringify({ counts: { total: 1, passed: 0, failed: 1, }, scenarios: [{ name: "approval turn tool followthrough", status: "fail" }], }), "utf8", ); runQaSuiteFromRuntime.mockResolvedValueOnce({ watchUrl: "http://127.0.0.1:43124", reportPath: suiteReportPath, summaryPath: suiteSummaryPath, scenarios: [{ name: "approval turn tool followthrough", status: "fail", steps: [] }], }); try { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", preflight: true, allowFailures: true, }); expect(process.exitCode).toBeUndefined(); } finally { process.exitCode = priorExitCode; } }); it("rejects preflight on the multipass runner", async () => { await expect( runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", runner: "multipass", preflight: true, }), ).rejects.toThrow("--preflight requires --runner host."); }); it("passes host suite CLI auth mode through", async () => { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", providerMode: "live-frontier", primaryModel: "claude-cli/claude-sonnet-4-6", alternateModel: "claude-cli/claude-sonnet-4-6", cliAuthMode: "subscription", scenarioIds: ["claude-cli-provider-capabilities-subscription"], }); expect(runQaSuiteFromRuntime).toHaveBeenCalledWith( expect.objectContaining({ repoRoot: path.resolve("/tmp/openclaw-repo"), providerMode: "live-frontier", primaryModel: "claude-cli/claude-sonnet-4-6", alternateModel: "claude-cli/claude-sonnet-4-6", claudeCliAuthMode: "subscription", scenarioIds: ["claude-cli-provider-capabilities-subscription"], }), ); }); it("expands the agentic parity pack onto the suite scenario list", async () => { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", parityPack: "agentic", scenarioIds: ["channel-chat-baseline"], }); expect(runQaSuiteFromRuntime).toHaveBeenCalledWith( expect.objectContaining({ repoRoot: path.resolve("/tmp/openclaw-repo"), scenarioIds: [ "channel-chat-baseline", "approval-turn-tool-followthrough", "model-switch-tool-continuity", "source-docs-discovery-report", "image-understanding-attachment", "compaction-retry-mutating-tool", "subagent-handoff", "subagent-fanout-synthesis", "subagent-stale-child-links", "memory-recall", "thread-memory-isolation", "config-restart-capability-flip", "instruction-followthrough-repo-contract", ], }), ); }); it("rejects unknown suite CLI auth modes", async () => { await expect( runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", cliAuthMode: "magic", }), ).rejects.toThrow("--cli-auth-mode must be one of auto, api-key, subscription"); }); it("sets a failing exit code when the parity gate fails", async () => { const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-parity-")); const priorExitCode = process.exitCode; process.exitCode = undefined; try { await fs.writeFile( path.join(repoRoot, "candidate.json"), JSON.stringify({ scenarios: [{ name: "Approval turn tool followthrough", status: "pass" }], }), "utf8", ); await fs.writeFile( path.join(repoRoot, "baseline.json"), JSON.stringify({ scenarios: [{ name: "Approval turn tool followthrough", status: "pass" }], }), "utf8", ); await runQaParityReportCommand({ repoRoot, candidateSummary: "candidate.json", baselineSummary: "baseline.json", }); expect(process.exitCode).toBe(1); } finally { process.exitCode = priorExitCode; await fs.rm(repoRoot, { recursive: true, force: true }); } }); it("prints a markdown coverage report from scenario metadata", async () => { await runQaCoverageReportCommand({ repoRoot: process.cwd() }); expect(stdoutWrite).toHaveBeenCalledWith(expect.stringContaining("# QA Coverage Inventory")); expect(stdoutWrite).toHaveBeenCalledWith(expect.stringContaining("memory.recall")); }); it("resolves character eval paths and passes model refs through", async () => { await runQaCharacterEvalCommand({ repoRoot: "/tmp/openclaw-repo", outputDir: ".artifacts/qa/character", model: [ "openai/gpt-5.5,thinking=xhigh,fast=false", "codex-cli/test-model,thinking=high,fast", ], scenario: "character-vibes-gollum", fast: true, thinking: "medium", modelThinking: ["codex-cli/test-model=medium"], judgeModel: ["openai/gpt-5.5,thinking=xhigh,fast", "anthropic/claude-opus-4-6,thinking=high"], judgeTimeoutMs: 180_000, blindJudgeModels: true, concurrency: 4, judgeConcurrency: 3, }); expect(runQaCharacterEval).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), outputDir: path.resolve("/tmp/openclaw-repo", ".artifacts/qa/character"), models: ["openai/gpt-5.5", "codex-cli/test-model"], scenarioId: "character-vibes-gollum", candidateFastMode: true, candidateThinkingDefault: "medium", candidateThinkingByModel: { "codex-cli/test-model": "medium" }, candidateModelOptions: { "openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: false }, "codex-cli/test-model": { thinkingDefault: "high", fastMode: true }, }, judgeModels: ["openai/gpt-5.5", "anthropic/claude-opus-4-6"], judgeModelOptions: { "openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: true }, "anthropic/claude-opus-4-6": { thinkingDefault: "high" }, }, judgeTimeoutMs: 180_000, judgeBlindModels: true, candidateConcurrency: 4, judgeConcurrency: 3, progress: expect.any(Function), }); }); it("lets character eval auto-select candidate fast mode when --fast is omitted", async () => { await runQaCharacterEvalCommand({ repoRoot: "/tmp/openclaw-repo", model: ["openai/gpt-5.5"], }); expect(runQaCharacterEval).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), outputDir: undefined, models: ["openai/gpt-5.5"], scenarioId: undefined, candidateFastMode: undefined, candidateThinkingDefault: undefined, candidateThinkingByModel: undefined, candidateModelOptions: undefined, judgeModels: undefined, judgeModelOptions: undefined, judgeTimeoutMs: undefined, judgeBlindModels: undefined, candidateConcurrency: undefined, judgeConcurrency: undefined, progress: expect.any(Function), }); }); it("rejects invalid character eval thinking levels", async () => { await expect( runQaCharacterEvalCommand({ repoRoot: "/tmp/openclaw-repo", model: ["openai/gpt-5.5"], thinking: "enormous", }), ).rejects.toThrow("--thinking must be one of"); await expect( runQaCharacterEvalCommand({ repoRoot: "/tmp/openclaw-repo", model: ["openai/gpt-5.5,thinking=galaxy"], }), ).rejects.toThrow("--model thinking must be one of"); await expect( runQaCharacterEvalCommand({ repoRoot: "/tmp/openclaw-repo", model: ["openai/gpt-5.5,warp"], }), ).rejects.toThrow("--model options must be thinking="); await expect( runQaCharacterEvalCommand({ repoRoot: "/tmp/openclaw-repo", model: ["openai/gpt-5.5"], modelThinking: ["openai/gpt-5.5"], }), ).rejects.toThrow("--model-thinking must use provider/model=level"); }); it("passes the explicit repo root into manual runs", async () => { await runQaManualLaneCommand({ repoRoot: "/tmp/openclaw-repo", providerMode: "live-frontier", primaryModel: "openai/gpt-5.5", alternateModel: "openai/gpt-5.5", fastMode: true, message: "read qa kickoff and reply short", timeoutMs: 45_000, }); expect(runQaManualLane).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), transportId: "qa-channel", providerMode: "live-frontier", primaryModel: "openai/gpt-5.5", alternateModel: "openai/gpt-5.5", fastMode: true, message: "read qa kickoff and reply short", timeoutMs: 45_000, }); }); it("routes suite runs through multipass when the runner is selected", async () => { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", outputDir: ".artifacts/qa-multipass", runner: "multipass", providerMode: "mock-openai", scenarioIds: ["channel-chat-baseline"], allowFailures: true, concurrency: 3, image: "lts", cpus: 2, memory: "4G", disk: "24G", }); expect(runQaMultipass).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), outputDir: path.resolve("/tmp/openclaw-repo", ".artifacts/qa-multipass"), transportId: "qa-channel", providerMode: "mock-openai", primaryModel: undefined, alternateModel: undefined, fastMode: undefined, allowFailures: true, scenarioIds: ["channel-chat-baseline"], concurrency: 3, image: "lts", cpus: 2, memory: "4G", disk: "24G", }); expect(runQaSuiteFromRuntime).not.toHaveBeenCalled(); }); it("passes live suite selection through to the multipass runner", async () => { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", runner: "multipass", providerMode: "live-frontier", primaryModel: "openai/gpt-5.5", alternateModel: "openai/gpt-5.5", fastMode: true, allowFailures: true, scenarioIds: ["channel-chat-baseline"], }); expect(runQaMultipass).toHaveBeenCalledWith( expect.objectContaining({ repoRoot: path.resolve("/tmp/openclaw-repo"), transportId: "qa-channel", providerMode: "live-frontier", primaryModel: "openai/gpt-5.5", alternateModel: "openai/gpt-5.5", fastMode: true, allowFailures: true, scenarioIds: ["channel-chat-baseline"], }), ); }); it("sets a failing exit code when multipass summary reports failed scenarios", async () => { const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-")); const summaryPath = path.join(repoRoot, "qa-suite-summary.json"); await fs.writeFile( summaryPath, JSON.stringify({ counts: { total: 2, passed: 1, failed: 1, }, }), "utf8", ); runQaMultipass.mockResolvedValueOnce({ outputDir: repoRoot, reportPath: path.join(repoRoot, "qa-suite-report.md"), summaryPath, hostLogPath: path.join(repoRoot, "multipass-host.log"), bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log"), guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh"), vmName: "openclaw-qa-test", scenarioIds: ["channel-chat-baseline"], }); const priorExitCode = process.exitCode; process.exitCode = undefined; try { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", runner: "multipass", }); expect(process.exitCode).toBe(1); } finally { process.exitCode = priorExitCode; await fs.rm(repoRoot, { recursive: true, force: true }); } }); it("rejects malformed multipass summary JSON", async () => { const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-")); const summaryPath = path.join(repoRoot, "qa-suite-summary.json"); await fs.writeFile(summaryPath, "{not-json", "utf8"); runQaMultipass.mockResolvedValueOnce({ outputDir: repoRoot, reportPath: path.join(repoRoot, "qa-suite-report.md"), summaryPath, hostLogPath: path.join(repoRoot, "multipass-host.log"), bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log"), guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh"), vmName: "openclaw-qa-test", scenarioIds: ["channel-chat-baseline"], }); try { await expect( runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", runner: "multipass", }), ).rejects.toThrow("Could not parse QA summary JSON"); } finally { await fs.rm(repoRoot, { recursive: true, force: true }); } }); it("rejects unreadable multipass summary JSON with read/parse wording", async () => { const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-")); const summaryPath = path.join(repoRoot, "qa-suite-summary.json"); runQaMultipass.mockResolvedValueOnce({ outputDir: repoRoot, reportPath: path.join(repoRoot, "qa-suite-report.md"), summaryPath, hostLogPath: path.join(repoRoot, "multipass-host.log"), bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log"), guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh"), vmName: "openclaw-qa-test", scenarioIds: ["channel-chat-baseline"], }); try { await expect( runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", runner: "multipass", }), ).rejects.toThrow("Could not read QA summary JSON"); } finally { await fs.rm(repoRoot, { recursive: true, force: true }); } }); it("rejects partial multipass summary JSON without failure fields", async () => { const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-")); const summaryPath = path.join(repoRoot, "qa-suite-summary.json"); await fs.writeFile(summaryPath, JSON.stringify({ counts: { total: 2, passed: 2 } }), "utf8"); runQaMultipass.mockResolvedValueOnce({ outputDir: repoRoot, reportPath: path.join(repoRoot, "qa-suite-report.md"), summaryPath, hostLogPath: path.join(repoRoot, "multipass-host.log"), bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log"), guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh"), vmName: "openclaw-qa-test", scenarioIds: ["channel-chat-baseline"], }); try { await expect( runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", runner: "multipass", }), ).rejects.toThrow("did not include counts.failed or scenarios[].status"); } finally { await fs.rm(repoRoot, { recursive: true, force: true }); } }); it("keeps multipass exit code clear when --allow-failures is set", async () => { const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-")); const summaryPath = path.join(repoRoot, "qa-suite-summary.json"); await fs.writeFile( summaryPath, JSON.stringify({ counts: { total: 2, passed: 1, failed: 1, }, }), "utf8", ); runQaMultipass.mockResolvedValueOnce({ outputDir: repoRoot, reportPath: path.join(repoRoot, "qa-suite-report.md"), summaryPath, hostLogPath: path.join(repoRoot, "multipass-host.log"), bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log"), guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh"), vmName: "openclaw-qa-test", scenarioIds: ["channel-chat-baseline"], }); const priorExitCode = process.exitCode; process.exitCode = undefined; try { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", runner: "multipass", allowFailures: true, }); expect(process.exitCode).toBeUndefined(); } finally { process.exitCode = priorExitCode; await fs.rm(repoRoot, { recursive: true, force: true }); } }); it("passes provider-qualified mock parity suite selection through to the host runner", async () => { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", providerMode: "mock-openai", parityPack: "agentic", primaryModel: "openai/gpt-5.5", alternateModel: "anthropic/claude-opus-4-6", }); expect(runQaSuiteFromRuntime).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), outputDir: undefined, transportId: "qa-channel", providerMode: "mock-openai", primaryModel: "openai/gpt-5.5", alternateModel: "anthropic/claude-opus-4-6", fastMode: undefined, scenarioIds: [ "approval-turn-tool-followthrough", "model-switch-tool-continuity", "source-docs-discovery-report", "image-understanding-attachment", "compaction-retry-mutating-tool", "subagent-handoff", "subagent-fanout-synthesis", "subagent-stale-child-links", "memory-recall", "thread-memory-isolation", "config-restart-capability-flip", "instruction-followthrough-repo-contract", ], }); }); it("rejects multipass-only suite flags on the host runner", async () => { await expect( runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", runner: "host", image: "lts", }), ).rejects.toThrow("--image, --cpus, --memory, and --disk require --runner multipass."); }); it("defaults manual mock runs onto the mock-openai model lane", async () => { await runQaManualLaneCommand({ repoRoot: "/tmp/openclaw-repo", providerMode: "mock-openai", message: "read qa kickoff and reply short", }); expect(runQaManualLane).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), transportId: "qa-channel", providerMode: "mock-openai", primaryModel: "mock-openai/gpt-5.5", alternateModel: "mock-openai/gpt-5.5-alt", fastMode: undefined, message: "read qa kickoff and reply short", timeoutMs: undefined, }); }); it("defaults manual aimock runs onto the aimock model lane", async () => { await runQaManualLaneCommand({ repoRoot: "/tmp/openclaw-repo", providerMode: "aimock", message: "read qa kickoff and reply short", }); expect(runQaManualLane).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), transportId: "qa-channel", providerMode: "aimock", primaryModel: "aimock/gpt-5.5", alternateModel: "aimock/gpt-5.5-alt", fastMode: undefined, message: "read qa kickoff and reply short", timeoutMs: undefined, }); }); it("defaults manual frontier runs onto the frontier model lane", async () => { await runQaManualLaneCommand({ repoRoot: "/tmp/openclaw-repo", message: "read qa kickoff and reply short", }); expect(runQaManualLane).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), transportId: "qa-channel", providerMode: "live-frontier", primaryModel: "openai/gpt-5.5", alternateModel: "openai/gpt-5.5", fastMode: undefined, message: "read qa kickoff and reply short", timeoutMs: undefined, }); }); it("keeps an explicit manual primary model as the alternate default", async () => { await runQaManualLaneCommand({ repoRoot: "/tmp/openclaw-repo", providerMode: "live-frontier", primaryModel: "anthropic/claude-sonnet-4-6", message: "read qa kickoff and reply short", }); expect(runQaManualLane).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), transportId: "qa-channel", providerMode: "live-frontier", primaryModel: "anthropic/claude-sonnet-4-6", alternateModel: "anthropic/claude-sonnet-4-6", fastMode: undefined, message: "read qa kickoff and reply short", timeoutMs: undefined, }); }); it("defaults manual frontier runs onto Codex OAuth when the runtime resolver prefers it", async () => { defaultQaRuntimeModelForMode.mockImplementation((mode, options) => mode === "live-frontier" ? "openai/gpt-5.5" : defaultQaProviderModelForMode(mode as QaProviderModeInput, options), ); await runQaManualLaneCommand({ repoRoot: "/tmp/openclaw-repo", message: "read qa kickoff and reply short", }); expect(runQaManualLane).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), transportId: "qa-channel", providerMode: "live-frontier", primaryModel: "openai/gpt-5.5", alternateModel: "openai/gpt-5.5", fastMode: undefined, message: "read qa kickoff and reply short", timeoutMs: undefined, }); }); it("resolves self-check repo-root-relative paths before starting the lab server", async () => { await runQaLabSelfCheckCommand({ repoRoot: "/tmp/openclaw-repo", output: ".artifacts/qa/self-check.md", }); expect(startQaLabServer).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), outputPath: path.resolve("/tmp/openclaw-repo", ".artifacts/qa/self-check.md"), }); }); it("resolves docker scaffold paths relative to the explicit repo root", async () => { await runQaDockerScaffoldCommand({ repoRoot: "/tmp/openclaw-repo", outputDir: ".artifacts/qa-docker", providerBaseUrl: "http://127.0.0.1:44080/v1", usePrebuiltImage: true, }); expect(writeQaDockerHarnessFiles).toHaveBeenCalledWith({ outputDir: path.resolve("/tmp/openclaw-repo", ".artifacts/qa-docker"), repoRoot: path.resolve("/tmp/openclaw-repo"), gatewayPort: undefined, qaLabPort: undefined, providerBaseUrl: "http://127.0.0.1:44080/v1", imageName: undefined, usePrebuiltImage: true, }); }); it("passes the explicit repo root into docker image builds", async () => { await runQaDockerBuildImageCommand({ repoRoot: "/tmp/openclaw-repo", image: "openclaw:qa-local-prebaked", }); expect(buildQaDockerHarnessImage).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), imageName: "openclaw:qa-local-prebaked", }); }); it("resolves docker up paths relative to the explicit repo root", async () => { await runQaDockerUpCommand({ repoRoot: "/tmp/openclaw-repo", outputDir: ".artifacts/qa-up", usePrebuiltImage: true, skipUiBuild: true, }); expect(runQaDockerUp).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), outputDir: path.resolve("/tmp/openclaw-repo", ".artifacts/qa-up"), gatewayPort: undefined, qaLabPort: undefined, providerBaseUrl: undefined, image: undefined, usePrebuiltImage: true, skipUiBuild: true, }); }); });