// Qa Lab plugin module implements cli behavior. import type { Command } from "commander"; import { parseStrictPositiveInteger } from "openclaw/plugin-sdk/number-runtime"; import { collectString } from "./cli-options.js"; import type { QaLabSelfCheckCommandOptions, QaProfileCommandOptions, QaSuiteCommandOptions, } from "./cli.runtime.js"; import { listLiveTransportQaCliRegistrations } from "./live-transports/cli.js"; import { registerMantisCli } from "./mantis/cli.js"; import { DEFAULT_QA_LIVE_PROVIDER_MODE, formatQaProviderModeHelp, listQaStandaloneProviderCommands, } from "./providers/index.js"; import { QA_FRONTIER_PARITY_BASELINE_LABEL, QA_FRONTIER_PARITY_CANDIDATE_LABEL, } from "./providers/live-frontier/parity.js"; import type { QaProviderMode, QaProviderModeInput } from "./run-config.js"; import { hasQaScenarioPack } from "./scenario-catalog.js"; type QaLabCliRuntime = typeof import("./cli.runtime.js"); type QaScenarioRunCliOptions = { repoRoot?: QaSuiteCommandOptions["repoRoot"]; outputDir?: QaSuiteCommandOptions["outputDir"]; transport?: QaSuiteCommandOptions["transportId"]; providerMode?: QaSuiteCommandOptions["providerMode"]; model?: QaSuiteCommandOptions["primaryModel"]; altModel?: QaSuiteCommandOptions["alternateModel"]; concurrency?: QaSuiteCommandOptions["concurrency"]; allowFailures?: QaSuiteCommandOptions["allowFailures"]; fast?: QaSuiteCommandOptions["fastMode"]; }; type QaRunCliOptions = QaLabSelfCheckCommandOptions & QaScenarioRunCliOptions & { qaProfile?: QaProfileCommandOptions["profile"]; surface?: QaProfileCommandOptions["surface"]; category?: QaProfileCommandOptions["category"]; evidenceMode?: QaProfileCommandOptions["evidenceMode"]; excludeTestExecutionEvidence?: boolean; }; const QA_RUN_PROFILE_ONLY_OPTIONS = [ { optionName: "outputDir", flag: "--output-dir" }, { optionName: "surface", flag: "--surface" }, { optionName: "category", flag: "--category" }, { optionName: "evidenceMode", flag: "--evidence-mode" }, { optionName: "excludeTestExecutionEvidence", flag: "--exclude-test-execution-evidence" }, { optionName: "transport", flag: "--transport" }, { optionName: "providerMode", flag: "--provider-mode" }, { optionName: "model", flag: "--model" }, { optionName: "altModel", flag: "--alt-model" }, { optionName: "concurrency", flag: "--concurrency" }, { optionName: "allowFailures", flag: "--allow-failures" }, { optionName: "fast", flag: "--fast" }, ] as const; const QA_RUN_SELF_CHECK_ONLY_OPTIONS = [{ optionName: "output", flag: "--output" }] as const; type QaSuiteCliOptions = QaScenarioRunCliOptions & { runner?: QaSuiteCommandOptions["runner"]; thinking?: QaSuiteCommandOptions["thinking"]; cliAuthMode?: QaSuiteCommandOptions["cliAuthMode"]; parityPack?: QaSuiteCommandOptions["parityPack"]; pack?: QaSuiteCommandOptions["pack"]; scenario?: QaSuiteCommandOptions["scenarioIds"]; enablePlugin?: QaSuiteCommandOptions["enabledPluginIds"]; image?: QaSuiteCommandOptions["image"]; cpus?: QaSuiteCommandOptions["cpus"]; memory?: QaSuiteCommandOptions["memory"]; disk?: QaSuiteCommandOptions["disk"]; preflight?: QaSuiteCommandOptions["preflight"]; runtimePair?: QaSuiteCommandOptions["runtimePair"]; runtimeParityTier?: QaSuiteCommandOptions["runtimeParityTier"]; }; let qaLabCliRuntimePromise: Promise | null = null; async function loadQaLabCliRuntime(): Promise { qaLabCliRuntimePromise ??= import("./cli.runtime.js"); return await qaLabCliRuntimePromise; } function invalidQaCliArgument(message: string): Error & { code: string; exitCode: number } { const error = new Error(message) as Error & { code: string; exitCode: number }; error.name = "InvalidArgumentError"; error.code = "commander.invalidArgument"; error.exitCode = 1; return error; } function parseQaCliPositiveIntegerOption(value: string, flag: string): number { const parsed = parseStrictPositiveInteger(value); if (parsed === undefined) { throw invalidQaCliArgument(`${flag} must be a positive integer.`); } return parsed; } function parseQaEvidenceModeOption(value: string): QaProfileCommandOptions["evidenceMode"] { const evidenceMode = value.trim(); if (evidenceMode === "full" || evidenceMode === "slim") { return evidenceMode; } if (evidenceMode === "compact") { return "slim"; } throw invalidQaCliArgument("--evidence-mode must be one of full, slim."); } function resolveQaEvidenceModeOptions(opts: QaRunCliOptions) { if (opts.excludeTestExecutionEvidence !== true) { return opts.evidenceMode; } if (opts.evidenceMode === "full") { throw invalidQaCliArgument( "--exclude-test-execution-evidence conflicts with --evidence-mode full.", ); } return "slim"; } function collectCliSuppliedQaRunFlags( command: Command, options: readonly { optionName: string; flag: string }[], ): string[] { return options .filter((option) => command.getOptionValueSource(option.optionName) === "cli") .map((option) => option.flag); } function formatFlagList(flags: readonly string[]): string { return flags.length === 1 ? flags[0] : flags.join(", "); } function validateQaRunMode(opts: QaRunCliOptions, command: Command) { const hasQaProfile = Boolean(opts.qaProfile?.trim()); if (command.getOptionValueSource("qaProfile") === "cli" && !hasQaProfile) { throw new Error("--qa-profile must not be empty."); } if (hasQaProfile) { const selfCheckFlags = collectCliSuppliedQaRunFlags(command, QA_RUN_SELF_CHECK_ONLY_OPTIONS); if (selfCheckFlags.length > 0) { throw new Error( `qa run ${formatFlagList(selfCheckFlags)} is only valid for the self-check mode without --qa-profile.`, ); } return; } const profileFlags = collectCliSuppliedQaRunFlags(command, QA_RUN_PROFILE_ONLY_OPTIONS); if (profileFlags.length > 0) { throw new Error( `qa run ${formatFlagList(profileFlags)} requires --qa-profile; without --qa-profile, qa run only executes the self-check.`, ); } } async function runQaSelfCheck(opts: QaLabSelfCheckCommandOptions) { const runtime = await loadQaLabCliRuntime(); await runtime.runQaLabSelfCheckCommand(opts); } async function runQaProfile(opts: QaProfileCommandOptions) { const runtime = await loadQaLabCliRuntime(); await runtime.runQaProfileCommand(opts); } async function runQaSuiteCliCommand(opts: QaSuiteCommandOptions) { const runtime = await loadQaLabCliRuntime(); await runtime.runQaSuiteCommand(opts); } async function runQaParityReport(opts: { repoRoot?: string; candidateSummary?: string; baselineSummary?: string; candidateLabel?: string; baselineLabel?: string; outputDir?: string; runtimeAxis?: boolean; summary?: string; tokenEfficiency?: boolean; }) { const runtime = await loadQaLabCliRuntime(); await runtime.runQaParityReportCommand(opts); } async function runQaConfidenceReport(opts: { repoRoot?: string; manifest: string; artifactRoot?: string; outputDir?: string; strictZeroUnknowns?: boolean; strictGlobalPass?: boolean; }) { const runtime = await loadQaLabCliRuntime(); await runtime.runQaConfidenceReportCommand(opts); } async function runQaConfidenceSelfTest(opts: { repoRoot?: string; outputDir?: string }) { const runtime = await loadQaLabCliRuntime(); await runtime.runQaConfidenceSelfTestCommand(opts); } async function runQaCoverageReport(opts: { repoRoot?: string; output?: string; json?: boolean; tools?: boolean; summary?: string; match?: string[]; }) { const runtime = await loadQaLabCliRuntime(); await runtime.runQaCoverageReportCommand(opts); } async function runQaJsonlReplay(opts: { repoRoot?: string; transcripts?: string; outputDir?: string; runtimePair?: string; providerMode?: QaProviderModeInput; }) { const runtime = await loadQaLabCliRuntime(); await runtime.runQaJsonlReplayCommand(opts); } async function runQaCharacterEval(opts: { repoRoot?: string; outputDir?: string; model?: string[]; scenario?: string; fast?: boolean; thinking?: string; modelThinking?: string[]; judgeModel?: string[]; judgeTimeoutMs?: number; blindJudgeModels?: boolean; concurrency?: number; judgeConcurrency?: number; }) { const runtime = await loadQaLabCliRuntime(); await runtime.runQaCharacterEvalCommand(opts); } async function runQaManualLane(opts: { repoRoot?: string; transportId?: string; providerMode?: QaProviderModeInput; primaryModel?: string; alternateModel?: string; fastMode?: boolean; message: string; timeoutMs?: number; }) { const runtime = await loadQaLabCliRuntime(); await runtime.runQaManualLaneCommand(opts); } async function runQaCredentialsAdd(opts: { actorId?: string; endpointPrefix?: string; json?: boolean; kind: string; note?: string; payloadFile: string; repoRoot?: string; siteUrl?: string; }) { const runtime = await loadQaLabCliRuntime(); await runtime.runQaCredentialsAddCommand(opts); } async function runQaCredentialsRemove(opts: { actorId?: string; credentialId: string; endpointPrefix?: string; json?: boolean; siteUrl?: string; }) { const runtime = await loadQaLabCliRuntime(); await runtime.runQaCredentialsRemoveCommand(opts); } async function runQaCredentialsList(opts: { actorId?: string; endpointPrefix?: string; json?: boolean; kind?: string; limit?: number; showSecrets?: boolean; siteUrl?: string; status?: string; }) { const runtime = await loadQaLabCliRuntime(); await runtime.runQaCredentialsListCommand(opts); } async function runQaCredentialsDoctor(opts: { actorId?: string; endpointPrefix?: string; json?: boolean; siteUrl?: string; }) { const runtime = await loadQaLabCliRuntime(); await runtime.runQaCredentialsDoctorCommand(opts); } async function runQaUi(opts: { repoRoot?: string; host?: string; port?: number; advertiseHost?: string; advertisePort?: number; controlUiUrl?: string; controlUiProxyTarget?: string; uiDistDir?: string; autoKickoffTarget?: string; embeddedGateway?: string; sendKickoffOnStart?: boolean; }) { const runtime = await loadQaLabCliRuntime(); await runtime.runQaLabUiCommand(opts); } async function runQaDockerScaffold(opts: { repoRoot?: string; outputDir: string; gatewayPort?: number; qaLabPort?: number; providerBaseUrl?: string; image?: string; usePrebuiltImage?: boolean; bindUiDist?: boolean; }) { const runtime = await loadQaLabCliRuntime(); await runtime.runQaDockerScaffoldCommand(opts); } async function runQaDockerBuildImage(opts: { repoRoot?: string; image?: string }) { const runtime = await loadQaLabCliRuntime(); await runtime.runQaDockerBuildImageCommand(opts); } async function runQaDockerUp(opts: { repoRoot?: string; outputDir?: string; gatewayPort?: number; qaLabPort?: number; providerBaseUrl?: string; image?: string; usePrebuiltImage?: boolean; bindUiDist?: boolean; skipUiBuild?: boolean; }) { const runtime = await loadQaLabCliRuntime(); await runtime.runQaDockerUpCommand(opts); } async function runQaProviderServer( providerMode: QaProviderMode, opts: { host?: string; port?: number }, ) { const runtime = await loadQaLabCliRuntime(); await runtime.runQaProviderServerCommand(providerMode, opts); } export function isQaLabCliAvailable(): boolean { return hasQaScenarioPack(); } function assertNoQaSubcommandCollision(qa: Command, commandName: string) { if (qa.commands.some((command) => command.name() === commandName)) { throw new Error(`QA runner command "${commandName}" conflicts with an existing qa subcommand`); } } export function registerQaLabCli(program: Command) { const qa = program .command("qa") .description("Run private QA automation flows and launch the QA debugger"); registerMantisCli(qa); const qaRun = qa .command("run") .description("Run the bundled QA self-check and write a Markdown report") .option("--repo-root ", "Repository root to target when running from a neutral cwd") .option("--output ", "Report output path") .option("--output-dir ", "Profile run artifact directory") .option("--qa-profile ", "Run the QA profile from taxonomy.yaml") .option("--surface ", "Limit --qa-profile to a taxonomy surface id") .option("--category ", "Limit --qa-profile to a taxonomy category id") .option( "--evidence-mode ", "Set profile qa-evidence.json mode: full or slim", parseQaEvidenceModeOption, ) .option( "--exclude-test-execution-evidence", "Deprecated alias for --evidence-mode slim", false, ); qaRun.options.at(-1)?.hideHelp(); qaRun .option("--transport ", "QA transport id", "qa-channel") .option("--provider-mode ", formatQaProviderModeHelp()) .option("--model ", "Primary provider/model ref") .option("--alt-model ", "Alternate provider/model ref") .option("--concurrency ", "Scenario worker concurrency", (value: string) => parseQaCliPositiveIntegerOption(value, "--concurrency"), ) .option( "--allow-failures", "Write artifacts without setting a failing exit code when scenarios fail", false, ) .option("--fast", "Enable provider fast mode where supported", false); qaRun.action(async (opts: QaRunCliOptions, command: Command) => { validateQaRunMode(opts, command); if (opts.qaProfile?.trim()) { await runQaProfile({ repoRoot: opts.repoRoot, outputDir: opts.outputDir, profile: opts.qaProfile, surface: opts.surface, category: opts.category, evidenceMode: resolveQaEvidenceModeOptions(opts), transportId: opts.transport, providerMode: opts.providerMode, primaryModel: opts.model, alternateModel: opts.altModel, concurrency: opts.concurrency, allowFailures: opts.allowFailures, fastMode: opts.fast, }); return; } await runQaSelfCheck({ repoRoot: opts.repoRoot, output: opts.output, }); }); qa.command("suite") .description("Run repo-backed QA scenarios against the QA gateway lane") .option("--repo-root ", "Repository root to target when running from a neutral cwd") .option("--output-dir ", "Suite artifact directory") .option("--runner ", "Execution runner: host or multipass", "host") .option("--transport ", "QA transport id", "qa-channel") .option("--provider-mode ", formatQaProviderModeHelp()) .option("--model ", "Primary provider/model ref") .option("--alt-model ", "Alternate provider/model ref") .option( "--cli-auth-mode ", "CLI backend auth mode for live Claude CLI runs: auto, api-key, or subscription", ) .option("--parity-pack ", 'Preset scenario pack; currently only "agentic" is supported') .option( "--pack ", 'Scenario pack id; currently "personal-agent" and "observability" are supported', ) .option("--scenario ", "Run only the named QA scenario (repeatable)", collectString, []) .option( "--enable-plugin ", "Enable an extra bundled plugin in the QA gateway config (repeatable)", collectString, [], ) .option("--concurrency ", "Scenario worker concurrency", (value: string) => parseQaCliPositiveIntegerOption(value, "--concurrency"), ) .option("--preflight", "Run a single-scenario bootstrap preflight and stop", false) .option( "--allow-failures", "Write artifacts without setting a failing exit code when scenarios fail", false, ) .option("--fast", "Enable provider fast mode where supported", false) .option( "--thinking ", "Suite thinking default: off|minimal|low|medium|high|xhigh|adaptive|max", ) .option("--image ", "Multipass image alias") .option("--cpus ", "Multipass vCPU count", (value: string) => parseQaCliPositiveIntegerOption(value, "--cpus"), ) .option("--memory ", "Multipass memory size") .option("--disk ", "Multipass disk size") .option("--runtime-pair ", "Run each scenario under both runtimes, e.g. openclaw,codex") .option( "--runtime-parity-tier ", "Add scenarios tagged with runtimeParityTier (standard, optional, live-only, soak; repeatable or comma-separated)", collectString, [], ) .action(async (opts: QaSuiteCliOptions) => { await runQaSuiteCliCommand({ repoRoot: opts.repoRoot, outputDir: opts.outputDir, transportId: opts.transport, runner: opts.runner, providerMode: opts.providerMode, primaryModel: opts.model, alternateModel: opts.altModel, fastMode: opts.fast, thinking: opts.thinking, cliAuthMode: opts.cliAuthMode, parityPack: opts.parityPack, pack: opts.pack, scenarioIds: opts.scenario, enabledPluginIds: opts.enablePlugin, concurrency: opts.concurrency, allowFailures: opts.allowFailures, image: opts.image, cpus: opts.cpus, memory: opts.memory, disk: opts.disk, preflight: opts.preflight, runtimePair: opts.runtimePair, runtimeParityTier: opts.runtimeParityTier, }); }); qa.command("parity-report") .description("Write either a model-axis parity gate report or a runtime-axis parity report") .option("--candidate-summary ", "Candidate qa-suite-summary.json path") .option("--baseline-summary ", "Baseline qa-suite-summary.json path") .option("--runtime-axis", "Interpret --summary as a runtime-pair qa-suite-summary.json", false) .option("--summary ", "Runtime-axis qa-suite-summary.json path") .option( "--token-efficiency", "Also write the runtime token-efficiency report for --runtime-axis summaries", false, ) .option("--repo-root ", "Repository root to target when running from a neutral cwd") .option( "--candidate-label