openclaw/extensions/qa-lab/src/cli.runtime.test.ts

import path from "node:path";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";

const {
  runQaManualLane,
  runQaSuiteFromRuntime,
  runQaCharacterEval,
  runQaMultipass,
  runMatrixQaLive,
  runTelegramQaLive,
  startQaLabServer,
  writeQaDockerHarnessFiles,
  buildQaDockerHarnessImage,
  runQaDockerUp,
} = vi.hoisted(() => ({
  runQaManualLane: vi.fn(),
  runQaSuiteFromRuntime: vi.fn(),
  runQaCharacterEval: vi.fn(),
  runQaMultipass: vi.fn(),
  runMatrixQaLive: vi.fn(),
  runTelegramQaLive: vi.fn(),
  startQaLabServer: vi.fn(),
  writeQaDockerHarnessFiles: vi.fn(),
  buildQaDockerHarnessImage: vi.fn(),
  runQaDockerUp: vi.fn(),
}));

vi.mock("./manual-lane.runtime.js", () => ({
  runQaManualLane,
}));

vi.mock("./suite-launch.runtime.js", () => ({
  runQaSuiteFromRuntime,
}));

vi.mock("./character-eval.js", () => ({
  runQaCharacterEval,
}));

vi.mock("./multipass.runtime.js", () => ({
  runQaMultipass,
}));

vi.mock("./live-transports/matrix/matrix-live.runtime.js", () => ({
  runMatrixQaLive,
}));

vi.mock("./live-transports/telegram/telegram-live.runtime.js", () => ({
  runTelegramQaLive,
}));

vi.mock("./lab-server.js", () => ({
  startQaLabServer,
}));

vi.mock("./docker-harness.js", () => ({
  writeQaDockerHarnessFiles,
  buildQaDockerHarnessImage,
}));

vi.mock("./docker-up.runtime.js", () => ({
  runQaDockerUp,
}));

import { resolveRepoRelativeOutputDir } from "./cli-paths.js";
import {
  runQaLabSelfCheckCommand,
  runQaDockerBuildImageCommand,
  runQaDockerScaffoldCommand,
  runQaDockerUpCommand,
  runQaCharacterEvalCommand,
  runQaManualLaneCommand,
  runQaSuiteCommand,
} from "./cli.runtime.js";
import { runQaMatrixCommand } from "./live-transports/matrix/cli.runtime.js";
import { runQaTelegramCommand } from "./live-transports/telegram/cli.runtime.js";

describe("qa cli runtime", () => {
  let stdoutWrite: ReturnType<typeof vi.spyOn>;

  beforeEach(() => {
    stdoutWrite = vi.spyOn(process.stdout, "write").mockReturnValue(true);
    runQaSuiteFromRuntime.mockReset();
    runQaCharacterEval.mockReset();
    runQaManualLane.mockReset();
    runQaMultipass.mockReset();
    runMatrixQaLive.mockReset();
    runTelegramQaLive.mockReset();
    startQaLabServer.mockReset();
    writeQaDockerHarnessFiles.mockReset();
    buildQaDockerHarnessImage.mockReset();
    runQaDockerUp.mockReset();
    runQaSuiteFromRuntime.mockResolvedValue({
      watchUrl: "http://127.0.0.1:43124",
      reportPath: "/tmp/report.md",
      summaryPath: "/tmp/summary.json",
    });
    runQaCharacterEval.mockResolvedValue({
      reportPath: "/tmp/character-report.md",
      summaryPath: "/tmp/character-summary.json",
    });
    runQaManualLane.mockResolvedValue({
      model: "openai/gpt-5.4",
      waited: { status: "ok" },
      reply: "done",
      watchUrl: "http://127.0.0.1:43124",
    });
    runQaMultipass.mockResolvedValue({
      outputDir: "/tmp/multipass",
      reportPath: "/tmp/multipass/qa-suite-report.md",
      summaryPath: "/tmp/multipass/qa-suite-summary.json",
      hostLogPath: "/tmp/multipass/multipass-host.log",
      bootstrapLogPath: "/tmp/multipass/multipass-guest-bootstrap.log",
      guestScriptPath: "/tmp/multipass/multipass-guest-run.sh",
      vmName: "openclaw-qa-test",
      scenarioIds: ["channel-chat-baseline"],
    });
    runMatrixQaLive.mockResolvedValue({
      outputDir: "/tmp/matrix",
      reportPath: "/tmp/matrix/report.md",
      summaryPath: "/tmp/matrix/summary.json",
      observedEventsPath: "/tmp/matrix/observed.json",
      scenarios: [],
    });
    runTelegramQaLive.mockResolvedValue({
      outputDir: "/tmp/telegram",
      reportPath: "/tmp/telegram/report.md",
      summaryPath: "/tmp/telegram/summary.json",
      observedMessagesPath: "/tmp/telegram/observed.json",
      scenarios: [],
    });
    startQaLabServer.mockResolvedValue({
      baseUrl: "http://127.0.0.1:58000",
      runSelfCheck: vi.fn().mockResolvedValue({
        outputPath: "/tmp/report.md",
      }),
      stop: vi.fn(),
    });
    writeQaDockerHarnessFiles.mockResolvedValue({
      outputDir: "/tmp/openclaw-repo/.artifacts/qa-docker",
    });
    buildQaDockerHarnessImage.mockResolvedValue({
      imageName: "openclaw:qa-local-prebaked",
    });
    runQaDockerUp.mockResolvedValue({
      outputDir: "/tmp/openclaw-repo/.artifacts/qa-docker",
      qaLabUrl: "http://127.0.0.1:43124",
      gatewayUrl: "http://127.0.0.1:18789/",
      stopCommand: "docker compose down",
    });
  });

  afterEach(() => {
    stdoutWrite.mockRestore();
    vi.clearAllMocks();
  });

  it("resolves suite repo-root-relative paths before dispatching", async () => {
    await runQaSuiteCommand({
      repoRoot: "/tmp/openclaw-repo",
      outputDir: ".artifacts/qa/frontier",
      providerMode: "live-frontier",
      primaryModel: "openai/gpt-5.4",
      alternateModel: "anthropic/claude-sonnet-4-6",
      fastMode: true,
      scenarioIds: ["approval-turn-tool-followthrough"],
    });

    expect(runQaSuiteFromRuntime).toHaveBeenCalledWith({
      repoRoot: path.resolve("/tmp/openclaw-repo"),
      outputDir: path.resolve("/tmp/openclaw-repo", ".artifacts/qa/frontier"),
      providerMode: "live-frontier",
      primaryModel: "openai/gpt-5.4",
      alternateModel: "anthropic/claude-sonnet-4-6",
      fastMode: true,
      scenarioIds: ["approval-turn-tool-followthrough"],
    });
  });

  it("resolves telegram qa repo-root-relative paths before dispatching", async () => {
    await runQaTelegramCommand({
      repoRoot: "/tmp/openclaw-repo",
      outputDir: ".artifacts/qa/telegram",
      providerMode: "live-frontier",
      primaryModel: "openai/gpt-5.4",
      alternateModel: "openai/gpt-5.4",
      fastMode: true,
      scenarioIds: ["telegram-help-command"],
      sutAccountId: "sut-live",
    });

    expect(runTelegramQaLive).toHaveBeenCalledWith({
      repoRoot: path.resolve("/tmp/openclaw-repo"),
      outputDir: path.resolve("/tmp/openclaw-repo", ".artifacts/qa/telegram"),
      providerMode: "live-frontier",
      primaryModel: "openai/gpt-5.4",
      alternateModel: "openai/gpt-5.4",
      fastMode: true,
      scenarioIds: ["telegram-help-command"],
      sutAccountId: "sut-live",
    });
  });

  it("resolves matrix qa repo-root-relative paths before dispatching", async () => {
    await runQaMatrixCommand({
      repoRoot: "/tmp/openclaw-repo",
      outputDir: ".artifacts/qa/matrix",
      providerMode: "live-frontier",
      primaryModel: "openai/gpt-5.4",
      alternateModel: "openai/gpt-5.4",
      fastMode: true,
      scenarioIds: ["matrix-thread-follow-up"],
      sutAccountId: "sut-live",
    });

    expect(runMatrixQaLive).toHaveBeenCalledWith({
      repoRoot: path.resolve("/tmp/openclaw-repo"),
      outputDir: path.resolve("/tmp/openclaw-repo", ".artifacts/qa/matrix"),
      providerMode: "live-frontier",
      primaryModel: "openai/gpt-5.4",
      alternateModel: "openai/gpt-5.4",
      fastMode: true,
      scenarioIds: ["matrix-thread-follow-up"],
      sutAccountId: "sut-live",
    });
  });

  it("rejects output dirs that escape the repo root", () => {
    expect(() => resolveRepoRelativeOutputDir("/tmp/openclaw-repo", "../outside")).toThrow(
      "--output-dir must stay within the repo root.",
    );
    expect(() => resolveRepoRelativeOutputDir("/tmp/openclaw-repo", "/tmp/outside")).toThrow(
      "--output-dir must be a relative path inside the repo root.",
    );
  });

  it("defaults telegram qa runs onto the live provider lane", async () => {
    await runQaTelegramCommand({
      repoRoot: "/tmp/openclaw-repo",
      scenarioIds: ["telegram-help-command"],
    });

    expect(runTelegramQaLive).toHaveBeenCalledWith(
      expect.objectContaining({
        repoRoot: path.resolve("/tmp/openclaw-repo"),
        providerMode: "live-frontier",
      }),
    );
  });

  it("defaults matrix qa runs onto the live provider lane", async () => {
    await runQaMatrixCommand({
      repoRoot: "/tmp/openclaw-repo",
      scenarioIds: ["matrix-thread-follow-up"],
    });

    expect(runMatrixQaLive).toHaveBeenCalledWith(
      expect.objectContaining({
        repoRoot: path.resolve("/tmp/openclaw-repo"),
        providerMode: "live-frontier",
      }),
    );
  });

  it("normalizes legacy live-openai suite runs onto the frontier provider mode", async () => {
    await runQaSuiteCommand({
      repoRoot: "/tmp/openclaw-repo",
      providerMode: "live-openai",
      scenarioIds: ["approval-turn-tool-followthrough"],
    });

    expect(runQaSuiteFromRuntime).toHaveBeenCalledWith(
      expect.objectContaining({
        repoRoot: path.resolve("/tmp/openclaw-repo"),
        providerMode: "live-frontier",
      }),
    );
  });

  it("passes host suite concurrency through", async () => {
    await runQaSuiteCommand({
      repoRoot: "/tmp/openclaw-repo",
      scenarioIds: ["channel-chat-baseline", "thread-follow-up"],
      concurrency: 3,
    });

    expect(runQaSuiteFromRuntime).toHaveBeenCalledWith(
      expect.objectContaining({
        repoRoot: path.resolve("/tmp/openclaw-repo"),
        scenarioIds: ["channel-chat-baseline", "thread-follow-up"],
        concurrency: 3,
      }),
    );
  });

  it("passes host suite CLI auth mode through", async () => {
    await runQaSuiteCommand({
      repoRoot: "/tmp/openclaw-repo",
      providerMode: "live-frontier",
      primaryModel: "claude-cli/claude-sonnet-4-6",
      alternateModel: "claude-cli/claude-sonnet-4-6",
      cliAuthMode: "subscription",
      scenarioIds: ["claude-cli-provider-capabilities-subscription"],
    });

    expect(runQaSuiteFromRuntime).toHaveBeenCalledWith(
      expect.objectContaining({
        repoRoot: path.resolve("/tmp/openclaw-repo"),
        providerMode: "live-frontier",
        primaryModel: "claude-cli/claude-sonnet-4-6",
        alternateModel: "claude-cli/claude-sonnet-4-6",
        claudeCliAuthMode: "subscription",
        scenarioIds: ["claude-cli-provider-capabilities-subscription"],
      }),
    );
  });

  it("rejects unknown suite CLI auth modes", async () => {
    await expect(
      runQaSuiteCommand({
        repoRoot: "/tmp/openclaw-repo",
        cliAuthMode: "magic",
      }),
    ).rejects.toThrow("--cli-auth-mode must be one of auto, api-key, subscription");
  });

  it("resolves character eval paths and passes model refs through", async () => {
    await runQaCharacterEvalCommand({
      repoRoot: "/tmp/openclaw-repo",
      outputDir: ".artifacts/qa/character",
      model: [
        "openai/gpt-5.4,thinking=xhigh,fast=false",
        "codex-cli/test-model,thinking=high,fast",
      ],
      scenario: "character-vibes-gollum",
      fast: true,
      thinking: "medium",
      modelThinking: ["codex-cli/test-model=medium"],
      judgeModel: ["openai/gpt-5.4,thinking=xhigh,fast", "anthropic/claude-opus-4-6,thinking=high"],
      judgeTimeoutMs: 180_000,
      blindJudgeModels: true,
      concurrency: 4,
      judgeConcurrency: 3,
    });

    expect(runQaCharacterEval).toHaveBeenCalledWith({
      repoRoot: path.resolve("/tmp/openclaw-repo"),
      outputDir: path.resolve("/tmp/openclaw-repo", ".artifacts/qa/character"),
      models: ["openai/gpt-5.4", "codex-cli/test-model"],
      scenarioId: "character-vibes-gollum",
      candidateFastMode: true,
      candidateThinkingDefault: "medium",
      candidateThinkingByModel: { "codex-cli/test-model": "medium" },
      candidateModelOptions: {
        "openai/gpt-5.4": { thinkingDefault: "xhigh", fastMode: false },
        "codex-cli/test-model": { thinkingDefault: "high", fastMode: true },
      },
      judgeModels: ["openai/gpt-5.4", "anthropic/claude-opus-4-6"],
      judgeModelOptions: {
        "openai/gpt-5.4": { thinkingDefault: "xhigh", fastMode: true },
        "anthropic/claude-opus-4-6": { thinkingDefault: "high" },
      },
      judgeTimeoutMs: 180_000,
      judgeBlindModels: true,
      candidateConcurrency: 4,
      judgeConcurrency: 3,
      progress: expect.any(Function),
    });
  });

  it("lets character eval auto-select candidate fast mode when --fast is omitted", async () => {
    await runQaCharacterEvalCommand({
      repoRoot: "/tmp/openclaw-repo",
      model: ["openai/gpt-5.4"],
    });

    expect(runQaCharacterEval).toHaveBeenCalledWith({
      repoRoot: path.resolve("/tmp/openclaw-repo"),
      outputDir: undefined,
      models: ["openai/gpt-5.4"],
      scenarioId: undefined,
      candidateFastMode: undefined,
      candidateThinkingDefault: undefined,
      candidateThinkingByModel: undefined,
      candidateModelOptions: undefined,
      judgeModels: undefined,
      judgeModelOptions: undefined,
      judgeTimeoutMs: undefined,
      judgeBlindModels: undefined,
      candidateConcurrency: undefined,
      judgeConcurrency: undefined,
      progress: expect.any(Function),
    });
  });

  it("rejects invalid character eval thinking levels", async () => {
    await expect(
      runQaCharacterEvalCommand({
        repoRoot: "/tmp/openclaw-repo",
        model: ["openai/gpt-5.4"],
        thinking: "enormous",
      }),
    ).rejects.toThrow("--thinking must be one of");

    await expect(
      runQaCharacterEvalCommand({
        repoRoot: "/tmp/openclaw-repo",
        model: ["openai/gpt-5.4,thinking=galaxy"],
      }),
    ).rejects.toThrow("--model thinking must be one of");

    await expect(
      runQaCharacterEvalCommand({
        repoRoot: "/tmp/openclaw-repo",
        model: ["openai/gpt-5.4,warp"],
      }),
    ).rejects.toThrow("--model options must be thinking=<level>");

    await expect(
      runQaCharacterEvalCommand({
        repoRoot: "/tmp/openclaw-repo",
        model: ["openai/gpt-5.4"],
        modelThinking: ["openai/gpt-5.4"],
      }),
    ).rejects.toThrow("--model-thinking must use provider/model=level");
  });

  it("passes the explicit repo root into manual runs", async () => {
    await runQaManualLaneCommand({
      repoRoot: "/tmp/openclaw-repo",
      providerMode: "live-frontier",
      primaryModel: "openai/gpt-5.4",
      alternateModel: "openai/gpt-5.4",
      fastMode: true,
      message: "read qa kickoff and reply short",
      timeoutMs: 45_000,
    });

    expect(runQaManualLane).toHaveBeenCalledWith({
      repoRoot: path.resolve("/tmp/openclaw-repo"),
      providerMode: "live-frontier",
      primaryModel: "openai/gpt-5.4",
      alternateModel: "openai/gpt-5.4",
      fastMode: true,
      message: "read qa kickoff and reply short",
      timeoutMs: 45_000,
    });
  });

  it("routes suite runs through multipass when the runner is selected", async () => {
    await runQaSuiteCommand({
      repoRoot: "/tmp/openclaw-repo",
      outputDir: ".artifacts/qa-multipass",
      runner: "multipass",
      providerMode: "mock-openai",
      scenarioIds: ["channel-chat-baseline"],
      concurrency: 3,
      image: "lts",
      cpus: 2,
      memory: "4G",
      disk: "24G",
    });

    expect(runQaMultipass).toHaveBeenCalledWith({
      repoRoot: path.resolve("/tmp/openclaw-repo"),
      outputDir: path.resolve("/tmp/openclaw-repo", ".artifacts/qa-multipass"),
      providerMode: "mock-openai",
      primaryModel: undefined,
      alternateModel: undefined,
      fastMode: undefined,
      scenarioIds: ["channel-chat-baseline"],
      concurrency: 3,
      image: "lts",
      cpus: 2,
      memory: "4G",
      disk: "24G",
    });
    expect(runQaSuiteFromRuntime).not.toHaveBeenCalled();
  });

  it("passes live suite selection through to the multipass runner", async () => {
    await runQaSuiteCommand({
      repoRoot: "/tmp/openclaw-repo",
      runner: "multipass",
      providerMode: "live-frontier",
      primaryModel: "openai/gpt-5.4",
      alternateModel: "openai/gpt-5.4",
      fastMode: true,
      scenarioIds: ["channel-chat-baseline"],
    });

    expect(runQaMultipass).toHaveBeenCalledWith(
      expect.objectContaining({
        repoRoot: path.resolve("/tmp/openclaw-repo"),
        providerMode: "live-frontier",
        primaryModel: "openai/gpt-5.4",
        alternateModel: "openai/gpt-5.4",
        fastMode: true,
        scenarioIds: ["channel-chat-baseline"],
      }),
    );
  });

  it("rejects multipass-only suite flags on the host runner", async () => {
    await expect(
      runQaSuiteCommand({
        repoRoot: "/tmp/openclaw-repo",
        runner: "host",
        image: "lts",
      }),
    ).rejects.toThrow("--image, --cpus, --memory, and --disk require --runner multipass.");
  });

  it("defaults manual mock runs onto the mock-openai model lane", async () => {
    await runQaManualLaneCommand({
      repoRoot: "/tmp/openclaw-repo",
      providerMode: "mock-openai",
      message: "read qa kickoff and reply short",
    });

    expect(runQaManualLane).toHaveBeenCalledWith({
      repoRoot: path.resolve("/tmp/openclaw-repo"),
      providerMode: "mock-openai",
      primaryModel: "mock-openai/gpt-5.4",
      alternateModel: "mock-openai/gpt-5.4-alt",
      fastMode: undefined,
      message: "read qa kickoff and reply short",
      timeoutMs: undefined,
    });
  });

  it("defaults manual frontier runs onto the frontier model lane", async () => {
    await runQaManualLaneCommand({
      repoRoot: "/tmp/openclaw-repo",
      message: "read qa kickoff and reply short",
    });

    expect(runQaManualLane).toHaveBeenCalledWith({
      repoRoot: path.resolve("/tmp/openclaw-repo"),
      providerMode: "live-frontier",
      primaryModel: "openai/gpt-5.4",
      alternateModel: "openai/gpt-5.4",
      fastMode: undefined,
      message: "read qa kickoff and reply short",
      timeoutMs: undefined,
    });
  });

  it("keeps an explicit manual primary model as the alternate default", async () => {
    await runQaManualLaneCommand({
      repoRoot: "/tmp/openclaw-repo",
      providerMode: "live-frontier",
      primaryModel: "anthropic/claude-sonnet-4-6",
      message: "read qa kickoff and reply short",
    });

    expect(runQaManualLane).toHaveBeenCalledWith({
      repoRoot: path.resolve("/tmp/openclaw-repo"),
      providerMode: "live-frontier",
      primaryModel: "anthropic/claude-sonnet-4-6",
      alternateModel: "anthropic/claude-sonnet-4-6",
      fastMode: undefined,
      message: "read qa kickoff and reply short",
      timeoutMs: undefined,
    });
  });

  it("normalizes legacy live-openai manual runs onto the frontier provider mode", async () => {
    await runQaManualLaneCommand({
      repoRoot: "/tmp/openclaw-repo",
      providerMode: "live-openai",
      message: "read qa kickoff and reply short",
    });

    expect(runQaManualLane).toHaveBeenCalledWith(
      expect.objectContaining({
        repoRoot: path.resolve("/tmp/openclaw-repo"),
        providerMode: "live-frontier",
        primaryModel: "openai/gpt-5.4",
        alternateModel: "openai/gpt-5.4",
      }),
    );
  });

  it("resolves self-check repo-root-relative paths before starting the lab server", async () => {
    await runQaLabSelfCheckCommand({
      repoRoot: "/tmp/openclaw-repo",
      output: ".artifacts/qa/self-check.md",
    });

    expect(startQaLabServer).toHaveBeenCalledWith({
      repoRoot: path.resolve("/tmp/openclaw-repo"),
      outputPath: path.resolve("/tmp/openclaw-repo", ".artifacts/qa/self-check.md"),
    });
  });

  it("resolves docker scaffold paths relative to the explicit repo root", async () => {
    await runQaDockerScaffoldCommand({
      repoRoot: "/tmp/openclaw-repo",
      outputDir: ".artifacts/qa-docker",
      providerBaseUrl: "http://127.0.0.1:44080/v1",
      usePrebuiltImage: true,
    });

    expect(writeQaDockerHarnessFiles).toHaveBeenCalledWith({
      outputDir: path.resolve("/tmp/openclaw-repo", ".artifacts/qa-docker"),
      repoRoot: path.resolve("/tmp/openclaw-repo"),
      gatewayPort: undefined,
      qaLabPort: undefined,
      providerBaseUrl: "http://127.0.0.1:44080/v1",
      imageName: undefined,
      usePrebuiltImage: true,
    });
  });

  it("passes the explicit repo root into docker image builds", async () => {
    await runQaDockerBuildImageCommand({
      repoRoot: "/tmp/openclaw-repo",
      image: "openclaw:qa-local-prebaked",
    });

    expect(buildQaDockerHarnessImage).toHaveBeenCalledWith({
      repoRoot: path.resolve("/tmp/openclaw-repo"),
      imageName: "openclaw:qa-local-prebaked",
    });
  });

  it("resolves docker up paths relative to the explicit repo root", async () => {
    await runQaDockerUpCommand({
      repoRoot: "/tmp/openclaw-repo",
      outputDir: ".artifacts/qa-up",
      usePrebuiltImage: true,
      skipUiBuild: true,
    });

    expect(runQaDockerUp).toHaveBeenCalledWith({
      repoRoot: path.resolve("/tmp/openclaw-repo"),
      outputDir: path.resolve("/tmp/openclaw-repo", ".artifacts/qa-up"),
      gatewayPort: undefined,
      qaLabPort: undefined,
      providerBaseUrl: undefined,
      image: undefined,
      usePrebuiltImage: true,
      skipUiBuild: true,
    });
  });
});