openclaw/test/scripts/rtt-harness.test.ts

import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { fileURLToPath } from "node:url";
import { afterEach, describe, expect, it } from "vitest";
import {
  appendJsonl,
  assertRequiredEnv,
  buildRttResult,
  buildRunId,
  createHarnessEnv,
  extractRtt,
  readTelegramSummary,
  safeRunLabel,
  validateOpenClawPackageSpec,
} from "../../scripts/lib/rtt-harness.ts";
import { __testing as cliTesting } from "../../scripts/rtt.ts";

const TEST_DIR = path.dirname(fileURLToPath(import.meta.url));
const FIXTURE_PATH = path.resolve(TEST_DIR, "../fixtures/telegram-qa-summary-rtt.json");
const DOCKER_SCRIPT_PATH = path.resolve(TEST_DIR, "../../scripts/e2e/npm-telegram-rtt-docker.sh");
const CREDENTIAL_SCRIPT_PATH = path.resolve(
  TEST_DIR,
  "../../scripts/e2e/npm-telegram-rtt-credentials.mjs",
);
const tempDirs: string[] = [];

afterEach(async () => {
  await Promise.all(tempDirs.splice(0).map((dir) => fs.rm(dir, { recursive: true, force: true })));
});

describe("RTT harness", () => {
  it("validates OpenClaw package specs", () => {
    expect(validateOpenClawPackageSpec("openclaw@main")).toBe("openclaw@main");
    expect(validateOpenClawPackageSpec("openclaw@alpha")).toBe("openclaw@alpha");
    expect(validateOpenClawPackageSpec("openclaw@beta")).toBe("openclaw@beta");
    expect(validateOpenClawPackageSpec("openclaw@latest")).toBe("openclaw@latest");
    expect(validateOpenClawPackageSpec("openclaw@2026.4.30")).toBe("openclaw@2026.4.30");
    expect(validateOpenClawPackageSpec("openclaw@2026.4.30-beta.2")).toBe(
      "openclaw@2026.4.30-beta.2",
    );
    expect(validateOpenClawPackageSpec("openclaw@2026.4.30-alpha.2")).toBe(
      "openclaw@2026.4.30-alpha.2",
    );

    expect(() => validateOpenClawPackageSpec("@openclaw/openclaw@beta")).toThrow(
      /Package spec must be/,
    );
    expect(() => validateOpenClawPackageSpec("openclaw@next")).toThrow(/Package spec must be/);
  });

  it("builds stable run labels", () => {
    expect(safeRunLabel("openclaw@beta")).toBe("openclaw_beta");
    expect(
      buildRunId({
        now: new Date("2026-05-01T03:04:05.678Z"),
        spec: "openclaw@beta",
        index: 1,
      }),
    ).toBe("2026-05-01T030405678Z-openclaw_beta-2");
  });

  it("constructs harness env without dropping caller env", () => {
    const env = createHarnessEnv({
      baseEnv: {
        OPENCLAW_QA_TELEGRAM_GROUP_ID: "-100123",
        OPENCLAW_NPM_TELEGRAM_FAST: "0",
      },
      providerMode: "mock-openai",
      rawOutputDir: ".artifacts/rtt/run/raw",
      samples: 20,
      sampleTimeoutMs: 30_000,
      scenarios: ["telegram-mentioned-message-reply"],
      spec: "openclaw@beta",
      timeoutMs: 180_000,
      version: "2026.4.30-beta.1",
    });

    expect(env.OPENCLAW_QA_TELEGRAM_GROUP_ID).toBe("-100123");
    expect(env.OPENCLAW_NPM_TELEGRAM_PACKAGE_SPEC).toBe("openclaw@beta");
    expect(env.OPENCLAW_NPM_TELEGRAM_PACKAGE_LABEL).toBe("openclaw@beta (2026.4.30-beta.1)");
    expect(env.OPENCLAW_NPM_TELEGRAM_PROVIDER_MODE).toBe("mock-openai");
    expect(env.OPENCLAW_NPM_TELEGRAM_SCENARIOS).toBe("telegram-mentioned-message-reply");
    expect(env.OPENCLAW_NPM_TELEGRAM_OUTPUT_DIR).toBe(".artifacts/rtt/run/raw");
    expect(env.OPENCLAW_NPM_TELEGRAM_FAST).toBe("0");
    expect(env.OPENCLAW_NPM_TELEGRAM_WARM_SAMPLES).toBe("20");
    expect(env.OPENCLAW_NPM_TELEGRAM_SAMPLE_TIMEOUT_MS).toBe("30000");
    expect(env.OPENCLAW_QA_TELEGRAM_CANARY_TIMEOUT_MS).toBe("180000");
    expect(env.OPENCLAW_QA_TELEGRAM_SCENARIO_TIMEOUT_MS).toBe("180000");
  });

  it("forwards Convex credential controls without dropping RTT sample controls", () => {
    const env = createHarnessEnv({
      baseEnv: {
        OPENCLAW_QA_CONVEX_SITE_URL: "https://qa-credentials.example.convex.site",
        OPENCLAW_QA_CONVEX_SECRET_MAINTAINER: "maintainer-secret",
      },
      credentialRole: "maintainer",
      credentialSource: "convex",
      providerMode: "mock-openai",
      rawOutputDir: ".artifacts/rtt/run/raw",
      samples: 7,
      sampleTimeoutMs: 45_000,
      scenarios: ["telegram-mentioned-message-reply"],
      spec: "openclaw@beta",
      timeoutMs: 180_000,
      version: "2026.4.30-beta.1",
    });

    expect(env.OPENCLAW_NPM_TELEGRAM_CREDENTIAL_SOURCE).toBe("convex");
    expect(env.OPENCLAW_NPM_TELEGRAM_CREDENTIAL_ROLE).toBe("maintainer");
    expect(env.OPENCLAW_NPM_TELEGRAM_WARM_SAMPLES).toBe("7");
    expect(env.OPENCLAW_NPM_TELEGRAM_SAMPLE_TIMEOUT_MS).toBe("45000");
    expect(() =>
      assertRequiredEnv(env, { credentialRole: "maintainer", credentialSource: "convex" }),
    ).not.toThrow();
  });

  it("exports the Telegram bot token after Convex credentials are sourced", async () => {
    const script = await fs.readFile(DOCKER_SCRIPT_PATH, "utf8");
    const sourceIndex = script.indexOf('source "$credential_env_file"');
    const tokenExportIndex = script.indexOf(
      'export TELEGRAM_BOT_TOKEN="${OPENCLAW_QA_TELEGRAM_SUT_BOT_TOKEN:?missing OPENCLAW_QA_TELEGRAM_SUT_BOT_TOKEN}"',
    );
    const installEnvSnapshotIndex = script.indexOf('install_env=("${docker_env[@]}")');
    const convexSecretForwardIndex = script.indexOf(
      "OPENCLAW_QA_CONVEX_SECRET_CI",
      installEnvSnapshotIndex,
    );
    const packageInstallIndex = script.indexOf("npm install -g");
    const credentialAcquireIndex = script.indexOf(
      "node /app/scripts/e2e/npm-telegram-rtt-credentials.mjs acquire",
    );
    const heartbeatStartIndex = script.indexOf("start_credential_heartbeat", sourceIndex);
    const driverIndex = script.indexOf("node /app/scripts/e2e/npm-telegram-rtt-driver.mjs");

    expect(sourceIndex).toBeGreaterThanOrEqual(0);
    expect(tokenExportIndex).toBeGreaterThan(sourceIndex);
    expect(installEnvSnapshotIndex).toBeGreaterThanOrEqual(0);
    expect(convexSecretForwardIndex).toBeGreaterThan(installEnvSnapshotIndex);
    expect(packageInstallIndex).toBeLessThan(credentialAcquireIndex);
    expect(heartbeatStartIndex).toBeGreaterThan(sourceIndex);
    expect(heartbeatStartIndex).toBeLessThan(driverIndex);
    expect(script).toContain("start_credential_heartbeat() {\n  (\n    set +e");
    expect(script).toContain("Convex credential heartbeat exited with status");
    expect(script).toContain('kill -TERM "$rtt_shell_pid"');
    expect(script).not.toContain('export TELEGRAM_BOT_TOKEN="$OPENCLAW_QA_TELEGRAM_SUT_BOT_TOKEN"');
  });

  it("keeps broker helper heartbeat and empty-response handling aligned with QA leases", async () => {
    const script = await fs.readFile(CREDENTIAL_SCRIPT_PATH, "utf8");

    expect(script).toContain("await response.text()");
    expect(script).toContain('response.ok\n        ? { status: "ok" }');
    expect(script).toContain("leaseTtlMs: acquired.leaseTtlMs ?? config.leaseTtlMs");
    expect(script).toContain("leaseTtlMs: leaseTtlMsFromLease(config, lease)");
  });

  it("extracts RTT values from Telegram QA summaries", async () => {
    const summary = await readTelegramSummary(FIXTURE_PATH);
    expect(extractRtt(summary)).toEqual({
      canaryMs: 1234,
      mentionReplyMs: 5000,
      warmSamples: [4000, 5000, 7000],
      avgMs: 5333,
      p50Ms: 5000,
      p95Ms: 7000,
      maxMs: 7000,
      failedSamples: 0,
    });
  });

  it("builds normalized result JSON", async () => {
    const summary = await readTelegramSummary(FIXTURE_PATH);
    const result = buildRttResult({
      artifacts: {
        rawObservedMessagesPath: "runs/run/raw/telegram-qa-observed-messages.json",
        rawReportPath: "runs/run/raw/telegram-qa-report.md",
        rawSummaryPath: "runs/run/raw/telegram-qa-summary.json",
        resultPath: "runs/run/result.json",
      },
      finishedAt: new Date("2026-05-01T00:00:12.000Z"),
      providerMode: "mock-openai",
      rawSummary: summary,
      runId: "run",
      scenarios: ["telegram-mentioned-message-reply"],
      spec: "openclaw@beta",
      startedAt: new Date("2026-05-01T00:00:00.000Z"),
      version: "2026.4.30-beta.1",
    });

    expect(result).toStrictEqual({
      artifacts: {
        rawObservedMessagesPath: "runs/run/raw/telegram-qa-observed-messages.json",
        rawReportPath: "runs/run/raw/telegram-qa-report.md",
        rawSummaryPath: "runs/run/raw/telegram-qa-summary.json",
        resultPath: "runs/run/result.json",
      },
      package: { spec: "openclaw@beta", version: "2026.4.30-beta.1" },
      run: {
        durationMs: 12_000,
        finishedAt: "2026-05-01T00:00:12.000Z",
        id: "run",
        startedAt: "2026-05-01T00:00:00.000Z",
        status: "pass",
      },
      mode: {
        providerMode: "mock-openai",
        scenarios: ["telegram-mentioned-message-reply"],
      },
      rtt: {
        canaryMs: 1234,
        mentionReplyMs: 5000,
        avgMs: 5333,
        p50Ms: 5000,
        p95Ms: 7000,
        maxMs: 7000,
        failedSamples: 0,
        warmSamples: [4000, 5000, 7000],
      },
    });
  });

  it("marks failed scenario summaries as failed results", () => {
    const result = buildRttResult({
      artifacts: {
        rawObservedMessagesPath: "runs/run/raw/telegram-qa-observed-messages.json",
        rawReportPath: "runs/run/raw/telegram-qa-report.md",
        rawSummaryPath: "runs/run/raw/telegram-qa-summary.json",
        resultPath: "runs/run/result.json",
      },
      finishedAt: new Date("2026-05-01T00:00:12.000Z"),
      providerMode: "mock-openai",
      rawSummary: {
        scenarios: [
          { id: "telegram-canary", rttMs: 5948, status: "pass" },
          { id: "telegram-mentioned-message-reply", status: "fail" },
        ],
      },
      runId: "run",
      scenarios: ["telegram-mentioned-message-reply"],
      spec: "openclaw@latest",
      startedAt: new Date("2026-05-01T00:00:00.000Z"),
      version: "2026.4.29",
    });

    expect(result.run.status).toBe("fail");
    expect(result.rtt).toEqual({ canaryMs: 5948, mentionReplyMs: undefined });
  });

  it("appends JSONL rows", async () => {
    const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-rtt-test-"));
    tempDirs.push(tempDir);
    const jsonlPath = path.join(tempDir, "data/rtt.jsonl");
    await appendJsonl(jsonlPath, { run: 1 });
    await appendJsonl(jsonlPath, { run: 2 });

    await expect(fs.readFile(jsonlPath, "utf8")).resolves.toBe('{"run":1}\n{"run":2}\n');
  });

  it("parses CLI options", () => {
    const parsed = cliTesting.parseArgs([
      "openclaw@latest",
      "--package-tgz",
      "/tmp/openclaw.tgz",
      "--provider",
      "live-frontier",
      "--credential-source",
      "convex",
      "--credential-role",
      "ci",
      "--runs",
      "3",
      "--samples",
      "5",
      "--sample-timeout-ms",
      "30000",
      "--timeout-ms",
      "240000",
      "--harness-root",
      "/tmp/openclaw",
      "--output",
      "/tmp/runs",
    ]);

    expect(parsed.spec).toBe("openclaw@latest");
    expect(parsed.options).toStrictEqual({
      packageTgz: "/tmp/openclaw.tgz",
      credentialRole: "ci",
      credentialSource: "convex",
      providerMode: "live-frontier",
      runs: 3,
      samples: 5,
      sampleTimeoutMs: 30_000,
      harnessRoot: "/tmp/openclaw",
      output: "/tmp/runs",
      scenarios: ["telegram-mentioned-message-reply"],
      timeoutMs: 240_000,
    });
  });
});