import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; const { runQaManualLane, runQaSuiteFromRuntime, runQaCharacterEval, runQaMultipass, listTelegramQaScenarioCatalog, runTelegramQaLive, startQaLabServer, writeQaDockerHarnessFiles, buildQaDockerHarnessImage, runQaDockerUp, defaultQaRuntimeModelForMode, } = vi.hoisted(() => ({ runQaManualLane: vi.fn(), runQaSuiteFromRuntime: vi.fn(), runQaCharacterEval: vi.fn(), runQaMultipass: vi.fn(), listTelegramQaScenarioCatalog: vi.fn(), runTelegramQaLive: vi.fn(), startQaLabServer: vi.fn(), writeQaDockerHarnessFiles: vi.fn(), buildQaDockerHarnessImage: vi.fn(), runQaDockerUp: vi.fn(), defaultQaRuntimeModelForMode: vi.fn<(mode: string, options?: { alternate?: boolean }) => string>(), })); vi.mock("./manual-lane.runtime.js", () => ({ runQaManualLane, })); vi.mock("./suite-launch.runtime.js", () => ({ runQaSuiteFromRuntime, })); vi.mock("./character-eval.js", () => ({ runQaCharacterEval, })); vi.mock("./multipass.runtime.js", () => ({ runQaMultipass, })); vi.mock("./live-transports/telegram/telegram-live.runtime.js", () => ({ listTelegramQaScenarioCatalog, runTelegramQaLive, })); vi.mock("./lab-server.js", () => ({ startQaLabServer, })); vi.mock("./docker-harness.js", () => ({ writeQaDockerHarnessFiles, buildQaDockerHarnessImage, })); vi.mock("./docker-up.runtime.js", () => ({ runQaDockerUp, })); vi.mock("./model-selection.runtime.js", () => ({ defaultQaRuntimeModelForMode, })); import { resolveRepoRelativeOutputDir } from "./cli-paths.js"; import { runQaLabSelfCheckCommand, runQaDockerBuildImageCommand, runQaDockerScaffoldCommand, runQaDockerUpCommand, runQaCharacterEvalCommand, runQaCoverageReportCommand, runQaJsonlReplayCommand, runQaManualLaneCommand, runQaParityReportCommand, runQaSuiteCommand, } from "./cli.runtime.js"; import { runQaTelegramCommand } from "./live-transports/telegram/cli.runtime.js"; import { defaultQaModelForMode as defaultQaProviderModelForMode } from "./model-selection.js"; import type { QaProviderModeInput } from "./run-config.js"; function mockFirstObjectArg(mock: unknown): Record { const calls = (mock as { mock?: { calls?: Array> } }).mock?.calls ?? []; const [arg] = calls[0] ?? []; if (!arg || typeof arg !== "object") { throw new Error("expected first mock object argument"); } return arg as Record; } function expectFields(value: unknown, expected: Record): void { if (!value || typeof value !== "object") { throw new Error("expected fields object"); } const record = value as Record; for (const [key, expectedValue] of Object.entries(expected)) { expect(record[key], key).toEqual(expectedValue); } } function expectWriteContains(mock: unknown, fragment: string): void { const calls = (mock as { mock?: { calls?: Array> } }).mock?.calls ?? []; expect( calls.some(([value]) => String(value).includes(fragment)), `write contains ${fragment}`, ).toBe(true); } describe("qa cli runtime", () => { let stdoutWrite: ReturnType; let stderrWrite: ReturnType; let suiteArtifactsDir: string; let suiteReportPath: string; let suiteSummaryPath: string; beforeEach(async () => { suiteArtifactsDir = await fs.mkdtemp(path.join(os.tmpdir(), "qa-suite-runtime-")); suiteReportPath = path.join(suiteArtifactsDir, "qa-suite-report.md"); suiteSummaryPath = path.join(suiteArtifactsDir, "qa-suite-summary.json"); await fs.writeFile(suiteReportPath, "# QA Suite Report\n", "utf8"); await fs.writeFile( suiteSummaryPath, JSON.stringify({ counts: { total: 1, passed: 1, failed: 0, }, scenarios: [], }), "utf8", ); stdoutWrite = vi.spyOn(process.stdout, "write").mockReturnValue(true); stderrWrite = vi.spyOn(process.stderr, "write").mockReturnValue(true); runQaSuiteFromRuntime.mockReset(); runQaCharacterEval.mockReset(); runQaManualLane.mockReset(); runQaMultipass.mockReset(); listTelegramQaScenarioCatalog.mockReset(); runTelegramQaLive.mockReset(); startQaLabServer.mockReset(); writeQaDockerHarnessFiles.mockReset(); buildQaDockerHarnessImage.mockReset(); runQaDockerUp.mockReset(); defaultQaRuntimeModelForMode.mockImplementation( (mode: string, options?: { alternate?: boolean }) => defaultQaProviderModelForMode(mode as QaProviderModeInput, options), ); runQaSuiteFromRuntime.mockResolvedValue({ watchUrl: "http://127.0.0.1:43124", reportPath: suiteReportPath, summaryPath: suiteSummaryPath, scenarios: [], }); runQaCharacterEval.mockResolvedValue({ reportPath: "/tmp/character-report.md", summaryPath: "/tmp/character-summary.json", }); runQaManualLane.mockResolvedValue({ model: "openai/gpt-5.5", waited: { status: "ok" }, reply: "done", watchUrl: "http://127.0.0.1:43124", }); runQaMultipass.mockResolvedValue({ outputDir: "/tmp/multipass", reportPath: "/tmp/multipass/qa-suite-report.md", summaryPath: "/tmp/multipass/qa-suite-summary.json", hostLogPath: "/tmp/multipass/multipass-host.log", bootstrapLogPath: "/tmp/multipass/multipass-guest-bootstrap.log", guestScriptPath: "/tmp/multipass/multipass-guest-run.sh", vmName: "openclaw-qa-test", scenarioIds: ["channel-chat-baseline"], }); runTelegramQaLive.mockResolvedValue({ outputDir: "/tmp/telegram", reportPath: "/tmp/telegram/report.md", summaryPath: "/tmp/telegram/summary.json", observedMessagesPath: "/tmp/telegram/observed.json", scenarios: [], }); listTelegramQaScenarioCatalog.mockReturnValue([ { id: "telegram-status-command", title: "Telegram status command reply", defaultEnabled: true, rationale: "status rationale", regressionRefs: ["openclaw/openclaw#74698"], }, ]); startQaLabServer.mockResolvedValue({ baseUrl: "http://127.0.0.1:58000", runSelfCheck: vi.fn().mockResolvedValue({ outputPath: "/tmp/report.md", }), stop: vi.fn(), }); writeQaDockerHarnessFiles.mockResolvedValue({ outputDir: "/tmp/openclaw-repo/.artifacts/qa-docker", }); buildQaDockerHarnessImage.mockResolvedValue({ imageName: "openclaw:qa-local-prebaked", }); runQaDockerUp.mockResolvedValue({ outputDir: "/tmp/openclaw-repo/.artifacts/qa-docker", qaLabUrl: "http://127.0.0.1:43124", gatewayUrl: "http://127.0.0.1:18789/", stopCommand: "docker compose down", }); }); afterEach(async () => { stdoutWrite.mockRestore(); stderrWrite.mockRestore(); vi.clearAllMocks(); await fs.rm(suiteArtifactsDir, { recursive: true, force: true }); }); it("resolves suite repo-root-relative paths before dispatching", async () => { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", outputDir: ".artifacts/qa/frontier", providerMode: "live-frontier", primaryModel: "openai/gpt-5.5", alternateModel: "anthropic/claude-sonnet-4-6", fastMode: true, thinking: "medium", scenarioIds: ["approval-turn-tool-followthrough"], }); expect(runQaSuiteFromRuntime).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), outputDir: path.resolve("/tmp/openclaw-repo", ".artifacts/qa/frontier"), transportId: "qa-channel", providerMode: "live-frontier", primaryModel: "openai/gpt-5.5", alternateModel: "anthropic/claude-sonnet-4-6", fastMode: true, thinkingDefault: "medium", scenarioIds: ["approval-turn-tool-followthrough"], }); }); it("passes explicit suite plugin enablements into the host gateway run", async () => { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", providerMode: "mock-openai", scenarioIds: ["channel-chat-baseline"], enabledPluginIds: ["browser", "memory-core"], }); expect(runQaSuiteFromRuntime).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), outputDir: undefined, transportId: "qa-channel", providerMode: "mock-openai", primaryModel: undefined, alternateModel: undefined, fastMode: undefined, scenarioIds: ["channel-chat-baseline"], enabledPluginIds: ["browser", "memory-core"], }); }); it("passes runtime-pair suite selection through to the host runner", async () => { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", providerMode: "mock-openai", scenarioIds: ["approval-turn-tool-followthrough"], runtimePair: "pi,codex", }); expect(runQaSuiteFromRuntime).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), outputDir: undefined, transportId: "qa-channel", providerMode: "mock-openai", primaryModel: undefined, alternateModel: undefined, fastMode: undefined, scenarioIds: ["approval-turn-tool-followthrough"], runtimePair: ["pi", "codex"], }); }); it("drops blank suite model refs so provider defaults apply", async () => { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", providerMode: "mock-openai", primaryModel: " ", alternateModel: "", scenarioIds: ["thread-memory-isolation"], }); expect(runQaSuiteFromRuntime).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), outputDir: undefined, transportId: "qa-channel", providerMode: "mock-openai", primaryModel: undefined, alternateModel: undefined, fastMode: undefined, scenarioIds: ["thread-memory-isolation"], }); }); it("resolves telegram qa repo-root-relative paths before dispatching", async () => { await runQaTelegramCommand({ repoRoot: "/tmp/openclaw-repo", outputDir: ".artifacts/qa/telegram", providerMode: "live-frontier", primaryModel: "openai/gpt-5.5", alternateModel: "openai/gpt-5.5", fastMode: true, scenarioIds: ["telegram-help-command"], sutAccountId: "sut-live", }); expect(runTelegramQaLive).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), outputDir: path.resolve("/tmp/openclaw-repo", ".artifacts/qa/telegram"), providerMode: "live-frontier", primaryModel: "openai/gpt-5.5", alternateModel: "openai/gpt-5.5", fastMode: true, allowFailures: undefined, scenarioIds: ["telegram-help-command"], sutAccountId: "sut-live", }); }); it("rejects output dirs that escape the repo root", () => { expect(() => resolveRepoRelativeOutputDir("/tmp/openclaw-repo", "../outside")).toThrow( "--output-dir must stay within the repo root.", ); expect(() => resolveRepoRelativeOutputDir("/tmp/openclaw-repo", "/tmp/outside")).toThrow( "--output-dir must be a relative path inside the repo root.", ); }); it("defaults telegram qa runs onto the live provider lane", async () => { await runQaTelegramCommand({ repoRoot: "/tmp/openclaw-repo", scenarioIds: ["telegram-help-command"], }); expectFields(mockFirstObjectArg(runTelegramQaLive), { repoRoot: path.resolve("/tmp/openclaw-repo"), providerMode: "live-frontier", allowFailures: undefined, }); }); it("prints telegram scenario catalog without starting the live lane", async () => { await runQaTelegramCommand({ repoRoot: "/tmp/openclaw-repo", providerMode: "mock-openai", listScenarios: true, }); expect(listTelegramQaScenarioCatalog).toHaveBeenCalledWith("mock-openai"); expect(runTelegramQaLive).not.toHaveBeenCalled(); expectWriteContains( stdoutWrite, "telegram-status-command\tdefault\tTelegram status command reply\tstatus rationale refs=openclaw/openclaw#74698", ); }); it("sets a failing exit code when telegram scenarios fail", async () => { const priorExitCode = process.exitCode; process.exitCode = undefined; runTelegramQaLive.mockResolvedValueOnce({ outputDir: "/tmp/telegram", reportPath: "/tmp/telegram/report.md", summaryPath: "/tmp/telegram/summary.json", observedMessagesPath: "/tmp/telegram/observed.json", scenarios: [ { id: "telegram-help-command", title: "Telegram help command reply", status: "fail", details: "missing expected text", }, ], }); try { await runQaTelegramCommand({ repoRoot: "/tmp/openclaw-repo", }); expect(process.exitCode).toBe(1); } finally { process.exitCode = priorExitCode; } }); it("keeps telegram exit code clear when --allow-failures is set", async () => { const priorExitCode = process.exitCode; process.exitCode = undefined; runTelegramQaLive.mockResolvedValueOnce({ outputDir: "/tmp/telegram", reportPath: "/tmp/telegram/report.md", summaryPath: "/tmp/telegram/summary.json", observedMessagesPath: "/tmp/telegram/observed.json", scenarios: [ { id: "telegram-help-command", title: "Telegram help command reply", status: "fail", details: "missing expected text", }, ], }); try { await runQaTelegramCommand({ repoRoot: "/tmp/openclaw-repo", allowFailures: true, }); expect(process.exitCode).toBeUndefined(); } finally { process.exitCode = priorExitCode; } }); it("passes host suite concurrency through", async () => { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", scenarioIds: ["channel-chat-baseline", "thread-follow-up"], concurrency: 3, }); expectFields(mockFirstObjectArg(runQaSuiteFromRuntime), { repoRoot: path.resolve("/tmp/openclaw-repo"), transportId: "qa-channel", scenarioIds: ["channel-chat-baseline", "thread-follow-up"], concurrency: 3, }); }); it("sets a failing exit code when host suite scenarios fail", async () => { const priorExitCode = process.exitCode; process.exitCode = undefined; await fs.writeFile( suiteSummaryPath, JSON.stringify({ counts: { total: 1, passed: 0, failed: 1, }, scenarios: [{ name: "channel chat baseline", status: "fail" }], }), "utf8", ); runQaSuiteFromRuntime.mockResolvedValueOnce({ watchUrl: "http://127.0.0.1:43124", reportPath: suiteReportPath, summaryPath: suiteSummaryPath, scenarios: [ { name: "channel chat baseline", status: "fail", steps: [], }, ], }); try { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", }); expect(process.exitCode).toBe(1); } finally { process.exitCode = priorExitCode; } }); it("keeps host suite exit code clear when --allow-failures is set", async () => { const priorExitCode = process.exitCode; process.exitCode = undefined; await fs.writeFile( suiteSummaryPath, JSON.stringify({ counts: { total: 1, passed: 0, failed: 1, }, scenarios: [{ name: "channel chat baseline", status: "fail" }], }), "utf8", ); runQaSuiteFromRuntime.mockResolvedValueOnce({ watchUrl: "http://127.0.0.1:43124", reportPath: suiteReportPath, summaryPath: suiteSummaryPath, scenarios: [ { name: "channel chat baseline", status: "fail", steps: [], }, ], }); try { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", allowFailures: true, }); expect(process.exitCode).toBeUndefined(); } finally { process.exitCode = priorExitCode; } }); it("retries host suite runs once for retryable infra failures", async () => { runQaSuiteFromRuntime .mockRejectedValueOnce(new Error("agent.wait timeout while waiting for transport ready")) .mockResolvedValueOnce({ watchUrl: "http://127.0.0.1:43124", reportPath: suiteReportPath, summaryPath: suiteSummaryPath, scenarios: [], }); await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", }); expect(runQaSuiteFromRuntime).toHaveBeenCalledTimes(2); expectWriteContains(stderrWrite, "[qa-suite] infra retry 1/1: agent.wait timeout"); }); it("retries host suite runs once for qa-channel readiness timeouts", async () => { runQaSuiteFromRuntime .mockRejectedValueOnce( new Error( "timed out after 180000ms waiting for qa-channel ready; last status: no qa-channel accounts reported", ), ) .mockResolvedValueOnce({ watchUrl: "http://127.0.0.1:43124", reportPath: suiteReportPath, summaryPath: suiteSummaryPath, scenarios: [], }); await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", }); expect(runQaSuiteFromRuntime).toHaveBeenCalledTimes(2); expectWriteContains( stderrWrite, "[qa-suite] infra retry 1/1: timed out after 180000ms waiting for qa-channel ready", ); }); it("does not retry host suite runs for generic timeout wording", async () => { runQaSuiteFromRuntime.mockRejectedValueOnce( new Error("approval-turn timed out waiting for post-approval read"), ); await expect( runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", }), ).rejects.toThrow("approval-turn timed out waiting for post-approval read"); expect(runQaSuiteFromRuntime).toHaveBeenCalledTimes(1); }); it("does not retry host suite runs for semantic failures", async () => { const priorExitCode = process.exitCode; process.exitCode = undefined; await fs.writeFile( suiteSummaryPath, JSON.stringify({ counts: { total: 1, passed: 0, failed: 1, }, scenarios: [{ name: "channel chat baseline", status: "fail" }], }), "utf8", ); runQaSuiteFromRuntime.mockResolvedValueOnce({ watchUrl: "http://127.0.0.1:43124", reportPath: suiteReportPath, summaryPath: suiteSummaryPath, scenarios: [ { name: "channel chat baseline", status: "fail", steps: [], }, ], }); try { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", }); expect(runQaSuiteFromRuntime).toHaveBeenCalledTimes(1); expect(process.exitCode).toBe(1); } finally { process.exitCode = priorExitCode; } }); it("runs a host-only parity preflight against the sentinel scenario", async () => { const repoRoot = path.resolve("/tmp/openclaw-repo"); await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", providerMode: "mock-openai", primaryModel: "openai/gpt-5.5", alternateModel: "anthropic/claude-opus-4-7", preflight: true, }); const preflightArgs = mockFirstObjectArg(runQaSuiteFromRuntime); expectFields(preflightArgs, { repoRoot, transportId: "qa-channel", providerMode: "mock-openai", primaryModel: "openai/gpt-5.5", alternateModel: "anthropic/claude-opus-4-7", scenarioIds: ["approval-turn-tool-followthrough"], concurrency: 1, }); expect(String(preflightArgs.outputDir)).toContain( path.join(repoRoot, ".artifacts", "qa-e2e", "preflight", "suite-"), ); expectWriteContains(stdoutWrite, "QA parity preflight summary:"); }); it("throws when parity preflight finds a failing sentinel scenario", async () => { await fs.writeFile( suiteSummaryPath, JSON.stringify({ counts: { total: 1, passed: 0, failed: 1, }, scenarios: [{ name: "approval turn tool followthrough", status: "fail" }], }), "utf8", ); runQaSuiteFromRuntime.mockResolvedValueOnce({ watchUrl: "http://127.0.0.1:43124", reportPath: suiteReportPath, summaryPath: suiteSummaryPath, scenarios: [{ name: "approval turn tool followthrough", status: "fail", steps: [] }], }); await expect( runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", preflight: true, }), ).rejects.toThrow("QA parity preflight failed with 1 failing scenario."); }); it("keeps parity preflight exit code clear when --allow-failures is set", async () => { const priorExitCode = process.exitCode; process.exitCode = undefined; await fs.writeFile( suiteSummaryPath, JSON.stringify({ counts: { total: 1, passed: 0, failed: 1, }, scenarios: [{ name: "approval turn tool followthrough", status: "fail" }], }), "utf8", ); runQaSuiteFromRuntime.mockResolvedValueOnce({ watchUrl: "http://127.0.0.1:43124", reportPath: suiteReportPath, summaryPath: suiteSummaryPath, scenarios: [{ name: "approval turn tool followthrough", status: "fail", steps: [] }], }); try { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", preflight: true, allowFailures: true, }); expect(process.exitCode).toBeUndefined(); } finally { process.exitCode = priorExitCode; } }); it("rejects preflight on the multipass runner", async () => { await expect( runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", runner: "multipass", preflight: true, }), ).rejects.toThrow("--preflight requires --runner host."); }); it("passes host suite CLI auth mode through", async () => { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", providerMode: "live-frontier", primaryModel: "claude-cli/claude-sonnet-4-6", alternateModel: "claude-cli/claude-sonnet-4-6", cliAuthMode: "subscription", scenarioIds: ["claude-cli-provider-capabilities-subscription"], }); expectFields(mockFirstObjectArg(runQaSuiteFromRuntime), { repoRoot: path.resolve("/tmp/openclaw-repo"), providerMode: "live-frontier", primaryModel: "claude-cli/claude-sonnet-4-6", alternateModel: "claude-cli/claude-sonnet-4-6", claudeCliAuthMode: "subscription", scenarioIds: ["claude-cli-provider-capabilities-subscription"], }); }); it("expands the agentic parity pack onto the suite scenario list", async () => { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", parityPack: "agentic", scenarioIds: ["channel-chat-baseline"], }); expectFields(mockFirstObjectArg(runQaSuiteFromRuntime), { repoRoot: path.resolve("/tmp/openclaw-repo"), scenarioIds: [ "channel-chat-baseline", "approval-turn-tool-followthrough", "model-switch-tool-continuity", "source-docs-discovery-report", "image-understanding-attachment", "compaction-retry-mutating-tool", "subagent-handoff", "subagent-fanout-synthesis", "subagent-stale-child-links", "memory-recall", "thread-memory-isolation", "config-restart-capability-flip", "instruction-followthrough-repo-contract", ], }); }); it("expands the personal-agent pack onto the suite scenario list", async () => { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", pack: "personal-agent", scenarioIds: ["channel-chat-baseline"], }); expectFields(mockFirstObjectArg(runQaSuiteFromRuntime), { repoRoot: path.resolve("/tmp/openclaw-repo"), scenarioIds: [ "channel-chat-baseline", "personal-reminder-roundtrip", "personal-channel-thread-reply", "personal-memory-preference-recall", "personal-redaction-no-secret-leak", "personal-tool-safety-followthrough", "personal-approval-denial-stop", "personal-task-followthrough-status", "personal-share-safe-diagnostics-artifact", "personal-no-fake-progress", "personal-failure-recovery", ], }); }); it("expands runtime parity tier selections onto the suite scenario list", async () => { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", runtimeParityTier: ["standard"], scenarioIds: ["channel-chat-baseline", "runtime-tool-bash"], }); expectFields(mockFirstObjectArg(runQaSuiteFromRuntime), { repoRoot: path.resolve("/tmp/openclaw-repo"), scenarioIds: [ "channel-chat-baseline", "runtime-tool-bash", "auth-profile-codex-mixed-profiles", "auth-profile-doctor-migration-safety", "codex-plugin-cold-install", "codex-plugin-install-race", "codex-plugin-pinned-new", "codex-plugin-pinned-old", "runtime-first-hour-20-turn", "runtime-tool-apply-patch", "runtime-tool-edit", "runtime-tool-exec", "runtime-tool-fs-list", "runtime-tool-fs-read", "runtime-tool-fs-write", "runtime-tool-grep", "runtime-tool-image-generate", "runtime-tool-session-status", "runtime-tool-sessions-spawn", "runtime-tool-web-fetch", "runtime-tool-web-search", ], }); }); it("accepts comma-separated runtime parity tier filters", async () => { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", runtimeParityTier: ["optional,soak"], }); expectFields(mockFirstObjectArg(runQaSuiteFromRuntime), { scenarioIds: [ "runtime-soak-100-turn", "runtime-tool-memory-add", "runtime-tool-memory-recall", "runtime-tool-message-tool", "runtime-tool-skill-invocation", "runtime-tool-tavily-extract", "runtime-tool-tavily-search", "runtime-tool-tts", ], }); }); it("rejects unknown runtime parity tier filters", async () => { await expect( runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", runtimeParityTier: ["standardish"], }), ).rejects.toThrow( '--runtime-parity-tier must be one of standard, optional, live-only, soak, got "standardish".', ); }); it("rejects unknown suite packs", async () => { await expect( runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", pack: "personal-admin", }), ).rejects.toThrow('--pack must be one of personal-agent, observability, got "personal-admin"'); }); it("rejects unknown suite CLI auth modes", async () => { await expect( runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", cliAuthMode: "magic", }), ).rejects.toThrow("--cli-auth-mode must be one of auto, api-key, subscription"); }); it("sets a failing exit code when the parity gate fails", async () => { const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-parity-")); const priorExitCode = process.exitCode; process.exitCode = undefined; try { await fs.writeFile( path.join(repoRoot, "candidate.json"), JSON.stringify({ scenarios: [{ name: "Approval turn tool followthrough", status: "pass" }], }), "utf8", ); await fs.writeFile( path.join(repoRoot, "baseline.json"), JSON.stringify({ scenarios: [{ name: "Approval turn tool followthrough", status: "pass" }], }), "utf8", ); await runQaParityReportCommand({ repoRoot, candidateSummary: "candidate.json", baselineSummary: "baseline.json", }); expect(process.exitCode).toBe(1); } finally { process.exitCode = priorExitCode; await fs.rm(repoRoot, { recursive: true, force: true }); } }); it("writes a runtime-axis parity report from one summary", async () => { const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-runtime-parity-")); const priorExitCode = process.exitCode; process.exitCode = undefined; try { await fs.writeFile( path.join(repoRoot, "runtime-summary.json"), JSON.stringify({ scenarios: [ { name: "Approval turn tool followthrough", status: "fail", steps: [], runtimeParity: { scenarioId: "approval-turn-tool-followthrough", drift: "tool-call-shape", driftDetails: "tool call 1 differs", cells: { pi: { runtime: "pi", transcriptBytes: '{"role":"assistant"}\n', toolCalls: [{ tool: "read_file", argsHash: "a", resultHash: "r" }], finalText: "done", usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 }, wallClockMs: 10, bootStateLines: [], }, codex: { runtime: "codex", transcriptBytes: '{"role":"assistant"}\n', toolCalls: [{ tool: "read_file", argsHash: "b", resultHash: "r" }], finalText: "done", usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 }, wallClockMs: 10, runtimeErrorClass: "tool-error", bootStateLines: [], }, }, }, }, ], counts: { total: 1, passed: 0, failed: 1 }, run: { providerMode: "mock-openai", primaryModel: "openai/gpt-5.5", runtimePair: ["pi", "codex"], }, }), "utf8", ); await runQaParityReportCommand({ repoRoot, runtimeAxis: true, summary: "runtime-summary.json", }); expect(process.exitCode).toBe(1); expect(stdoutWrite).toHaveBeenCalledWith( expect.stringContaining("QA runtime parity report:"), ); expect(stdoutWrite).toHaveBeenCalledWith( expect.stringContaining("QA runtime parity verdict: fail"), ); } finally { process.exitCode = priorExitCode; await fs.rm(repoRoot, { recursive: true, force: true }); } }); it("writes a runtime-axis token-efficiency report when requested", async () => { const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-runtime-token-efficiency-")); const priorExitCode = process.exitCode; process.exitCode = undefined; try { await fs.writeFile( path.join(repoRoot, "runtime-summary.json"), JSON.stringify({ scenarios: [ { name: "runtime-tool-fs-read", status: "pass", steps: [], runtimeParity: { scenarioId: "runtime-tool-fs-read", drift: "none", cells: { pi: { runtime: "pi", transcriptBytes: '{"role":"assistant"}\n', toolCalls: [{ tool: "fs.read", argsHash: "a", resultHash: "r" }], finalText: "done", usage: { inputTokens: 72_000, outputTokens: 381, totalTokens: 72_381 }, wallClockMs: 10, bootStateLines: [], }, codex: { runtime: "codex", transcriptBytes: '{"role":"assistant"}\n', toolCalls: Array.from({ length: 40 }, (_, index) => ({ tool: "fs.read", argsHash: `a-${index}`, resultHash: `r-${index}`, })), finalText: "done", usage: { inputTokens: 118_000, outputTokens: 1_489, totalTokens: 119_489 }, wallClockMs: 10, bootStateLines: [], }, }, }, }, ], counts: { total: 1, passed: 1, failed: 0 }, run: { providerMode: "live-frontier", primaryModel: "openai/gpt-5.5", runtimePair: ["pi", "codex"], }, }), "utf8", ); await runQaParityReportCommand({ repoRoot, runtimeAxis: true, summary: "runtime-summary.json", tokenEfficiency: true, }); expect(process.exitCode).toBe(1); expect(stdoutWrite).toHaveBeenCalledWith( expect.stringContaining("QA runtime parity verdict: pass"), ); expect(stdoutWrite).toHaveBeenCalledWith( expect.stringContaining("QA runtime token efficiency report:"), ); expect(stdoutWrite).toHaveBeenCalledWith( expect.stringContaining("QA runtime token efficiency verdict: fail"), ); const [artifactDir] = await fs.readdir(path.join(repoRoot, ".artifacts", "qa-e2e")); const tokenSummary = JSON.parse( await fs.readFile( path.join( repoRoot, ".artifacts", "qa-e2e", artifactDir ?? "", "qa-runtime-token-efficiency-summary.json", ), "utf8", ), ) as { aggregate?: { flaggedScenarios?: string[] } }; expect(tokenSummary.aggregate?.flaggedScenarios).toEqual(["runtime-tool-fs-read"]); } finally { process.exitCode = priorExitCode; await fs.rm(repoRoot, { recursive: true, force: true }); } }); it("rejects token-efficiency without runtime-axis mode", async () => { await expect( runQaParityReportCommand({ repoRoot: process.cwd(), candidateSummary: "candidate.json", baselineSummary: "baseline.json", tokenEfficiency: true, }), ).rejects.toThrow("--token-efficiency requires --runtime-axis."); }); it("prints a markdown coverage report from scenario metadata", async () => { await runQaCoverageReportCommand({ repoRoot: process.cwd() }); expectWriteContains(stdoutWrite, "# QA Coverage Inventory"); expectWriteContains(stdoutWrite, "memory.recall"); }); it("prints a focused scenario match report from coverage metadata", async () => { await runQaCoverageReportCommand({ repoRoot: process.cwd(), match: ["image roundtrip"], }); expectWriteContains(stdoutWrite, "# QA Scenario Matches"); expectWriteContains(stdoutWrite, "image-generation-roundtrip"); expectWriteContains(stdoutWrite, "--scenario image-generation-roundtrip"); expect(stdoutWrite.mock.calls.flat().join("")).not.toContain("memory-recall"); }); it("rejects scenario match queries for tool coverage reports", async () => { await expect( runQaCoverageReportCommand({ repoRoot: process.cwd(), tools: true, match: ["runtime"], }), ).rejects.toThrow("--match cannot be combined with --tools."); }); it("prints a markdown tool coverage report from runtime tool fixtures", async () => { await runQaCoverageReportCommand({ repoRoot: process.cwd(), tools: true }); expectWriteContains(stdoutWrite, "# OpenClaw Runtime Tool Coverage"); expectWriteContains(stdoutWrite, "codex-native-workspace"); }); it("writes a curated mock JSONL replay report and summary", async () => { const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-jsonl-replay-cli-")); try { await runQaJsonlReplayCommand({ repoRoot, transcripts: path.resolve("qa/scenarios/jsonl-replay"), outputDir: "jsonl-output", runtimePair: "pi,codex", }); const report = await fs.readFile( path.join(repoRoot, "jsonl-output", "qa-jsonl-replay-report.md"), "utf8", ); const summary = JSON.parse( await fs.readFile( path.join(repoRoot, "jsonl-output", "qa-jsonl-replay-summary.json"), "utf8", ), ) as { transcripts?: Array<{ userTurnCount?: number }> }; expect(report).toContain("# OpenClaw JSONL Replay Report - pi vs codex"); expect(report).toContain("| plan-mode-boundaries.jsonl | 3 | | none, none, none |"); expect(summary.transcripts).toHaveLength(7); } finally { await fs.rm(repoRoot, { recursive: true, force: true }); } }); it("keeps JSONL replay mock-only until real runtime cell replay is wired", async () => { await expect( runQaJsonlReplayCommand({ repoRoot: process.cwd(), providerMode: "live-frontier", }), ).rejects.toThrow("qa jsonl-replay currently supports mock-openai curated fixtures only."); }); it("exits nonzero when tool coverage summary is missing a required runtime tool call", async () => { const priorExitCode = process.exitCode; const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-tool-coverage-")); try { await fs.writeFile( path.join(repoRoot, "runtime-summary.json"), JSON.stringify({ scenarios: [ { name: "runtime-tool-web-search", status: "fail", runtimeParity: { scenarioId: "runtime-tool-web-search", drift: "tool-call-shape", driftDetails: "Codex emitted no web_search call", cells: { pi: { runtime: "pi", transcriptBytes: "", toolCalls: [{ tool: "web_search", argsHash: "a", resultHash: "r" }], finalText: "", usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 }, wallClockMs: 1, bootStateLines: [], }, codex: { runtime: "codex", transcriptBytes: "", toolCalls: [], finalText: "", usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 }, wallClockMs: 1, bootStateLines: [], }, }, }, }, ], run: { runtimePair: ["pi", "codex"] }, }), "utf8", ); await runQaCoverageReportCommand({ repoRoot, tools: true, summary: "runtime-summary.json", }); expect(process.exitCode).toBe(1); expectWriteContains(stdoutWrite, "- Verdict: fail"); expectWriteContains(stdoutWrite, "web-search missing codex tool call web_search"); } finally { process.exitCode = priorExitCode; await fs.rm(repoRoot, { recursive: true, force: true }); } }); it("resolves character eval paths and passes model refs through", async () => { await runQaCharacterEvalCommand({ repoRoot: "/tmp/openclaw-repo", outputDir: ".artifacts/qa/character", model: [ "openai/gpt-5.5,thinking=xhigh,fast=false", "codex-cli/test-model,thinking=high,fast", ], scenario: "character-vibes-gollum", fast: true, thinking: "medium", modelThinking: ["codex-cli/test-model=medium"], judgeModel: ["openai/gpt-5.5,thinking=xhigh,fast", "anthropic/claude-opus-4-7,thinking=high"], judgeTimeoutMs: 180_000, blindJudgeModels: true, concurrency: 4, judgeConcurrency: 3, }); const characterEvalArgs = mockFirstObjectArg(runQaCharacterEval); expect(typeof characterEvalArgs.progress).toBe("function"); expectFields(characterEvalArgs, { repoRoot: path.resolve("/tmp/openclaw-repo"), outputDir: path.resolve("/tmp/openclaw-repo", ".artifacts/qa/character"), models: ["openai/gpt-5.5", "codex-cli/test-model"], scenarioId: "character-vibes-gollum", candidateFastMode: true, candidateThinkingDefault: "medium", candidateThinkingByModel: { "codex-cli/test-model": "medium" }, candidateModelOptions: { "openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: false }, "codex-cli/test-model": { thinkingDefault: "high", fastMode: true }, }, judgeModels: ["openai/gpt-5.5", "anthropic/claude-opus-4-7"], judgeModelOptions: { "openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: true }, "anthropic/claude-opus-4-7": { thinkingDefault: "high" }, }, judgeTimeoutMs: 180_000, judgeBlindModels: true, candidateConcurrency: 4, judgeConcurrency: 3, }); }); it("lets character eval auto-select candidate fast mode when --fast is omitted", async () => { await runQaCharacterEvalCommand({ repoRoot: "/tmp/openclaw-repo", model: ["openai/gpt-5.5"], }); const characterEvalArgs = mockFirstObjectArg(runQaCharacterEval); expect(typeof characterEvalArgs.progress).toBe("function"); expectFields(characterEvalArgs, { repoRoot: path.resolve("/tmp/openclaw-repo"), outputDir: undefined, models: ["openai/gpt-5.5"], scenarioId: undefined, candidateFastMode: undefined, candidateThinkingDefault: undefined, candidateThinkingByModel: undefined, candidateModelOptions: undefined, judgeModels: undefined, judgeModelOptions: undefined, judgeTimeoutMs: undefined, judgeBlindModels: undefined, candidateConcurrency: undefined, judgeConcurrency: undefined, }); }); it("rejects invalid character eval thinking levels", async () => { await expect( runQaCharacterEvalCommand({ repoRoot: "/tmp/openclaw-repo", model: ["openai/gpt-5.5"], thinking: "enormous", }), ).rejects.toThrow("--thinking must be one of"); await expect( runQaCharacterEvalCommand({ repoRoot: "/tmp/openclaw-repo", model: ["openai/gpt-5.5,thinking=galaxy"], }), ).rejects.toThrow("--model thinking must be one of"); await expect( runQaCharacterEvalCommand({ repoRoot: "/tmp/openclaw-repo", model: ["openai/gpt-5.5,warp"], }), ).rejects.toThrow("--model options must be thinking="); await expect( runQaCharacterEvalCommand({ repoRoot: "/tmp/openclaw-repo", model: ["openai/gpt-5.5"], modelThinking: ["openai/gpt-5.5"], }), ).rejects.toThrow("--model-thinking must use provider/model=level"); }); it("passes the explicit repo root into manual runs", async () => { await runQaManualLaneCommand({ repoRoot: "/tmp/openclaw-repo", providerMode: "live-frontier", primaryModel: "openai/gpt-5.5", alternateModel: "openai/gpt-5.5", fastMode: true, message: "read qa kickoff and reply short", timeoutMs: 45_000, }); expect(runQaManualLane).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), transportId: "qa-channel", providerMode: "live-frontier", primaryModel: "openai/gpt-5.5", alternateModel: "openai/gpt-5.5", fastMode: true, message: "read qa kickoff and reply short", timeoutMs: 45_000, }); }); it("routes suite runs through multipass when the runner is selected", async () => { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", outputDir: ".artifacts/qa-multipass", runner: "multipass", providerMode: "mock-openai", scenarioIds: ["channel-chat-baseline"], allowFailures: true, concurrency: 3, image: "lts", cpus: 2, memory: "4G", disk: "24G", }); expect(runQaMultipass).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), outputDir: path.resolve("/tmp/openclaw-repo", ".artifacts/qa-multipass"), transportId: "qa-channel", providerMode: "mock-openai", primaryModel: undefined, alternateModel: undefined, fastMode: undefined, allowFailures: true, scenarioIds: ["channel-chat-baseline"], concurrency: 3, image: "lts", cpus: 2, memory: "4G", disk: "24G", }); expect(runQaSuiteFromRuntime).not.toHaveBeenCalled(); }); it("passes runtime-pair suite selection through to the multipass runner", async () => { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", runner: "multipass", providerMode: "mock-openai", scenarioIds: ["approval-turn-tool-followthrough"], runtimePair: "codex,pi", allowFailures: true, }); expect(runQaMultipass).toHaveBeenCalledWith( expect.objectContaining({ repoRoot: path.resolve("/tmp/openclaw-repo"), runtimePair: ["pi", "codex"], }), ); }); it("passes live suite selection through to the multipass runner", async () => { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", runner: "multipass", providerMode: "live-frontier", primaryModel: "openai/gpt-5.5", alternateModel: "openai/gpt-5.5", fastMode: true, allowFailures: true, scenarioIds: ["channel-chat-baseline"], }); expectFields(mockFirstObjectArg(runQaMultipass), { repoRoot: path.resolve("/tmp/openclaw-repo"), transportId: "qa-channel", providerMode: "live-frontier", primaryModel: "openai/gpt-5.5", alternateModel: "openai/gpt-5.5", fastMode: true, allowFailures: true, scenarioIds: ["channel-chat-baseline"], }); }); it("sets a failing exit code when multipass summary reports failed scenarios", async () => { const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-")); const summaryPath = path.join(repoRoot, "qa-suite-summary.json"); await fs.writeFile( summaryPath, JSON.stringify({ counts: { total: 2, passed: 1, failed: 1, }, }), "utf8", ); runQaMultipass.mockResolvedValueOnce({ outputDir: repoRoot, reportPath: path.join(repoRoot, "qa-suite-report.md"), summaryPath, hostLogPath: path.join(repoRoot, "multipass-host.log"), bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log"), guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh"), vmName: "openclaw-qa-test", scenarioIds: ["channel-chat-baseline"], }); const priorExitCode = process.exitCode; process.exitCode = undefined; try { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", runner: "multipass", }); expect(process.exitCode).toBe(1); } finally { process.exitCode = priorExitCode; await fs.rm(repoRoot, { recursive: true, force: true }); } }); it("rejects malformed multipass summary JSON", async () => { const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-")); const summaryPath = path.join(repoRoot, "qa-suite-summary.json"); await fs.writeFile(summaryPath, "{not-json", "utf8"); runQaMultipass.mockResolvedValueOnce({ outputDir: repoRoot, reportPath: path.join(repoRoot, "qa-suite-report.md"), summaryPath, hostLogPath: path.join(repoRoot, "multipass-host.log"), bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log"), guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh"), vmName: "openclaw-qa-test", scenarioIds: ["channel-chat-baseline"], }); try { await expect( runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", runner: "multipass", }), ).rejects.toThrow("Could not parse QA summary JSON"); } finally { await fs.rm(repoRoot, { recursive: true, force: true }); } }); it("rejects unreadable multipass summary JSON with read/parse wording", async () => { const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-")); const summaryPath = path.join(repoRoot, "qa-suite-summary.json"); runQaMultipass.mockResolvedValueOnce({ outputDir: repoRoot, reportPath: path.join(repoRoot, "qa-suite-report.md"), summaryPath, hostLogPath: path.join(repoRoot, "multipass-host.log"), bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log"), guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh"), vmName: "openclaw-qa-test", scenarioIds: ["channel-chat-baseline"], }); try { await expect( runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", runner: "multipass", }), ).rejects.toThrow("Could not read QA summary JSON"); } finally { await fs.rm(repoRoot, { recursive: true, force: true }); } }); it("rejects partial multipass summary JSON without failure fields", async () => { const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-")); const summaryPath = path.join(repoRoot, "qa-suite-summary.json"); await fs.writeFile(summaryPath, JSON.stringify({ counts: { total: 2, passed: 2 } }), "utf8"); runQaMultipass.mockResolvedValueOnce({ outputDir: repoRoot, reportPath: path.join(repoRoot, "qa-suite-report.md"), summaryPath, hostLogPath: path.join(repoRoot, "multipass-host.log"), bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log"), guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh"), vmName: "openclaw-qa-test", scenarioIds: ["channel-chat-baseline"], }); try { await expect( runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", runner: "multipass", }), ).rejects.toThrow("did not include counts.failed or scenarios[].status"); } finally { await fs.rm(repoRoot, { recursive: true, force: true }); } }); it("keeps multipass exit code clear when --allow-failures is set", async () => { const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-")); const summaryPath = path.join(repoRoot, "qa-suite-summary.json"); await fs.writeFile( summaryPath, JSON.stringify({ counts: { total: 2, passed: 1, failed: 1, }, }), "utf8", ); runQaMultipass.mockResolvedValueOnce({ outputDir: repoRoot, reportPath: path.join(repoRoot, "qa-suite-report.md"), summaryPath, hostLogPath: path.join(repoRoot, "multipass-host.log"), bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log"), guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh"), vmName: "openclaw-qa-test", scenarioIds: ["channel-chat-baseline"], }); const priorExitCode = process.exitCode; process.exitCode = undefined; try { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", runner: "multipass", allowFailures: true, }); expect(process.exitCode).toBeUndefined(); } finally { process.exitCode = priorExitCode; await fs.rm(repoRoot, { recursive: true, force: true }); } }); it("passes provider-qualified mock parity suite selection through to the host runner", async () => { await runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", providerMode: "mock-openai", parityPack: "agentic", primaryModel: "openai/gpt-5.5", alternateModel: "anthropic/claude-opus-4-7", }); expect(runQaSuiteFromRuntime).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), outputDir: undefined, transportId: "qa-channel", providerMode: "mock-openai", primaryModel: "openai/gpt-5.5", alternateModel: "anthropic/claude-opus-4-7", fastMode: undefined, scenarioIds: [ "approval-turn-tool-followthrough", "model-switch-tool-continuity", "source-docs-discovery-report", "image-understanding-attachment", "compaction-retry-mutating-tool", "subagent-handoff", "subagent-fanout-synthesis", "subagent-stale-child-links", "memory-recall", "thread-memory-isolation", "config-restart-capability-flip", "instruction-followthrough-repo-contract", ], }); }); it("rejects multipass-only suite flags on the host runner", async () => { await expect( runQaSuiteCommand({ repoRoot: "/tmp/openclaw-repo", runner: "host", image: "lts", }), ).rejects.toThrow("--image, --cpus, --memory, and --disk require --runner multipass."); }); it("defaults manual mock runs onto the mock-openai model lane", async () => { await runQaManualLaneCommand({ repoRoot: "/tmp/openclaw-repo", providerMode: "mock-openai", message: "read qa kickoff and reply short", }); expect(runQaManualLane).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), transportId: "qa-channel", providerMode: "mock-openai", primaryModel: "mock-openai/gpt-5.5", alternateModel: "mock-openai/gpt-5.5-alt", fastMode: undefined, message: "read qa kickoff and reply short", timeoutMs: undefined, }); }); it("defaults manual aimock runs onto the aimock model lane", async () => { await runQaManualLaneCommand({ repoRoot: "/tmp/openclaw-repo", providerMode: "aimock", message: "read qa kickoff and reply short", }); expect(runQaManualLane).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), transportId: "qa-channel", providerMode: "aimock", primaryModel: "aimock/gpt-5.5", alternateModel: "aimock/gpt-5.5-alt", fastMode: undefined, message: "read qa kickoff and reply short", timeoutMs: undefined, }); }); it("defaults manual frontier runs onto the frontier model lane", async () => { await runQaManualLaneCommand({ repoRoot: "/tmp/openclaw-repo", message: "read qa kickoff and reply short", }); expect(runQaManualLane).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), transportId: "qa-channel", providerMode: "live-frontier", primaryModel: "openai/gpt-5.5", alternateModel: "openai/gpt-5.5", fastMode: undefined, message: "read qa kickoff and reply short", timeoutMs: undefined, }); }); it("keeps an explicit manual primary model as the alternate default", async () => { await runQaManualLaneCommand({ repoRoot: "/tmp/openclaw-repo", providerMode: "live-frontier", primaryModel: "anthropic/claude-sonnet-4-6", message: "read qa kickoff and reply short", }); expect(runQaManualLane).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), transportId: "qa-channel", providerMode: "live-frontier", primaryModel: "anthropic/claude-sonnet-4-6", alternateModel: "anthropic/claude-sonnet-4-6", fastMode: undefined, message: "read qa kickoff and reply short", timeoutMs: undefined, }); }); it("defaults manual frontier runs onto Codex OAuth when the runtime resolver prefers it", async () => { defaultQaRuntimeModelForMode.mockImplementation((mode, options) => mode === "live-frontier" ? "openai/gpt-5.5" : defaultQaProviderModelForMode(mode as QaProviderModeInput, options), ); await runQaManualLaneCommand({ repoRoot: "/tmp/openclaw-repo", message: "read qa kickoff and reply short", }); expect(runQaManualLane).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), transportId: "qa-channel", providerMode: "live-frontier", primaryModel: "openai/gpt-5.5", alternateModel: "openai/gpt-5.5", fastMode: undefined, message: "read qa kickoff and reply short", timeoutMs: undefined, }); }); it("resolves self-check repo-root-relative paths before starting the lab server", async () => { await runQaLabSelfCheckCommand({ repoRoot: "/tmp/openclaw-repo", output: ".artifacts/qa/self-check.md", }); expect(startQaLabServer).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), outputPath: path.resolve("/tmp/openclaw-repo", ".artifacts/qa/self-check.md"), }); }); it("resolves docker scaffold paths relative to the explicit repo root", async () => { await runQaDockerScaffoldCommand({ repoRoot: "/tmp/openclaw-repo", outputDir: ".artifacts/qa-docker", providerBaseUrl: "http://127.0.0.1:44080/v1", usePrebuiltImage: true, }); expect(writeQaDockerHarnessFiles).toHaveBeenCalledWith({ outputDir: path.resolve("/tmp/openclaw-repo", ".artifacts/qa-docker"), repoRoot: path.resolve("/tmp/openclaw-repo"), gatewayPort: undefined, qaLabPort: undefined, providerBaseUrl: "http://127.0.0.1:44080/v1", imageName: undefined, usePrebuiltImage: true, }); }); it("passes the explicit repo root into docker image builds", async () => { await runQaDockerBuildImageCommand({ repoRoot: "/tmp/openclaw-repo", image: "openclaw:qa-local-prebaked", }); expect(buildQaDockerHarnessImage).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), imageName: "openclaw:qa-local-prebaked", }); }); it("resolves docker up paths relative to the explicit repo root", async () => { await runQaDockerUpCommand({ repoRoot: "/tmp/openclaw-repo", outputDir: ".artifacts/qa-up", usePrebuiltImage: true, skipUiBuild: true, }); expect(runQaDockerUp).toHaveBeenCalledWith({ repoRoot: path.resolve("/tmp/openclaw-repo"), outputDir: path.resolve("/tmp/openclaw-repo", ".artifacts/qa-up"), gatewayPort: undefined, qaLabPort: undefined, providerBaseUrl: undefined, image: undefined, usePrebuiltImage: true, skipUiBuild: true, }); }); });