openclaw/scripts/lib/rtt-harness.ts

// Rtt Harness script supports OpenClaw repository automation.
import { execFile, spawn } from "node:child_process";
import fs from "node:fs/promises";
import path from "node:path";
import { promisify } from "node:util";
import {
  QA_EVIDENCE_FILENAME,
  validateQaEvidenceSummaryJson,
  type QaEvidenceSummaryJson,
  type QaEvidenceTiming,
} from "../../extensions/qa-lab/src/evidence-summary.ts";

const execFileAsync = promisify(execFile);

export type RttProviderMode = "mock-openai" | "live-frontier";
export type RttCredentialSource = "env" | "convex";
export type RttCredentialRole = "maintainer" | "ci";

type RttResult = {
  package: {
    spec: string;
    version: string;
  };
  run: {
    id: string;
    startedAt: string;
    finishedAt: string;
    durationMs: number;
    status: "pass" | "fail";
  };
  mode: {
    providerMode: RttProviderMode;
    scenarios: string[];
  };
  rtt: {
    canaryMs?: number;
    mentionReplyMs?: number;
    warmSamples?: number[];
    avgMs?: number;
    p50Ms?: number;
    p95Ms?: number;
    maxMs?: number;
    failedSamples?: number;
  };
  artifacts: {
    rawSummaryPath: string;
    rawReportPath: string;
    rawObservedMessagesPath: string;
    resultPath: string;
  };
};

const OPENCLAW_PACKAGE_SPEC_RE =
  /^openclaw@(main|alpha|beta|latest|[0-9]{4}\.[1-9][0-9]*\.[1-9][0-9]*(-[1-9][0-9]*|-(alpha|beta)\.[1-9][0-9]*)?)$/u;

const REQUIRED_TELEGRAM_ENV = [
  "OPENCLAW_QA_TELEGRAM_GROUP_ID",
  "OPENCLAW_QA_TELEGRAM_DRIVER_BOT_TOKEN",
  "OPENCLAW_QA_TELEGRAM_SUT_BOT_TOKEN",
] as const;

export function parseRttCredentialSource(value: string): RttCredentialSource {
  const normalized = value.trim().toLowerCase();
  if (normalized === "env" || normalized === "convex") {
    return normalized;
  }
  throw new Error(`--credential-source must be env or convex; got: ${value}`);
}

export function parseRttCredentialRole(value: string): RttCredentialRole {
  const normalized = value.trim().toLowerCase();
  if (normalized === "maintainer" || normalized === "ci") {
    return normalized;
  }
  throw new Error(`--credential-role must be maintainer or ci; got: ${value}`);
}

function resolveRttCredentialSource(
  env: NodeJS.ProcessEnv,
  credentialSource?: RttCredentialSource,
): RttCredentialSource {
  if (credentialSource) {
    return credentialSource;
  }
  const rawSource =
    env.OPENCLAW_NPM_TELEGRAM_CREDENTIAL_SOURCE ?? env.OPENCLAW_QA_CREDENTIAL_SOURCE;
  if (rawSource?.trim()) {
    return parseRttCredentialSource(rawSource);
  }
  if (
    env.CI &&
    env.OPENCLAW_QA_CONVEX_SITE_URL?.trim() &&
    (env.OPENCLAW_QA_CONVEX_SECRET_CI?.trim() || env.OPENCLAW_QA_CONVEX_SECRET_MAINTAINER?.trim())
  ) {
    return "convex";
  }
  return "env";
}

function resolveRttCredentialRole(
  env: NodeJS.ProcessEnv,
  credentialRole?: RttCredentialRole,
): RttCredentialRole {
  if (credentialRole) {
    return credentialRole;
  }
  const rawRole = env.OPENCLAW_NPM_TELEGRAM_CREDENTIAL_ROLE ?? env.OPENCLAW_QA_CREDENTIAL_ROLE;
  if (rawRole?.trim()) {
    return parseRttCredentialRole(rawRole);
  }
  return env.CI ? "ci" : "maintainer";
}

export function validateOpenClawPackageSpec(spec: string) {
  if (!OPENCLAW_PACKAGE_SPEC_RE.test(spec)) {
    throw new Error(
      `Package spec must be openclaw@main, openclaw@alpha, openclaw@beta, openclaw@latest, or an exact OpenClaw release version; got: ${spec}`,
    );
  }
  return spec;
}

export function safeRunLabel(input: string) {
  return input.replace(/[^a-zA-Z0-9.-]+/gu, "_").replace(/^_+|_+$/gu, "");
}

export function buildRunId(params: { now: Date; spec: string; index?: number }) {
  const stamp = params.now.toISOString().replaceAll(":", "").replaceAll(".", "");
  const suffix = params.index === undefined ? "" : `-${params.index + 1}`;
  return `${stamp}-${safeRunLabel(params.spec)}${suffix}`;
}

export function extractRtt(summary: QaEvidenceSummaryJson) {
  const entries = summary.entries ?? [];
  const findEntry = (id: string) => entries.find((entry) => entry.test?.id === id);
  const canary = findEntry("telegram-canary")?.result?.timing;
  const mention = findEntry("telegram-mentioned-message-reply")?.result?.timing;
  const rtt: RttResult["rtt"] = {
    canaryMs: canary?.rttMs,
    mentionReplyMs: mention?.p50Ms ?? mention?.rttMs,
  };
  appendRttTiming(rtt, mention);
  return rtt;
}

function appendRttTiming(rtt: RttResult["rtt"], timing: QaEvidenceTiming | undefined) {
  if (timing?.avgMs !== undefined) {
    rtt.avgMs = timing.avgMs;
  }
  if (timing?.p50Ms !== undefined) {
    rtt.p50Ms = timing.p50Ms;
  }
  if (timing?.p95Ms !== undefined) {
    rtt.p95Ms = timing.p95Ms;
  }
  if (timing?.maxMs !== undefined) {
    rtt.maxMs = timing.maxMs;
  }
  if (timing?.failedSamples !== undefined) {
    rtt.failedSamples = timing.failedSamples;
  }
}

export function createHarnessEnv(params: {
  baseEnv: NodeJS.ProcessEnv;
  credentialRole?: RttCredentialRole;
  credentialSource?: RttCredentialSource;
  packageTgz?: string;
  providerMode: RttProviderMode;
  scenarios: string[];
  spec: string;
  version: string;
  rawOutputDir: string;
  samples: number;
  sampleTimeoutMs: number;
  timeoutMs: number;
}) {
  const packageSourceSpec = params.packageTgz ?? params.spec;
  return {
    ...params.baseEnv,
    OPENCLAW_NPM_TELEGRAM_PACKAGE_SPEC: params.spec,
    ...(params.packageTgz ? { OPENCLAW_NPM_TELEGRAM_PACKAGE_TGZ: params.packageTgz } : {}),
    OPENCLAW_NPM_TELEGRAM_PACKAGE_LABEL: `${params.spec} (${params.version})`,
    OPENCLAW_NPM_TELEGRAM_PROVIDER_MODE: params.providerMode,
    OPENCLAW_QA_PACKAGE_SOURCE: packageSourceSpec,
    OPENCLAW_QA_PACKAGE_SOURCE_KIND: params.packageTgz ? "packed-tarball" : "npm-package",
    ...(params.credentialSource
      ? { OPENCLAW_NPM_TELEGRAM_CREDENTIAL_SOURCE: params.credentialSource }
      : {}),
    ...(params.credentialRole
      ? { OPENCLAW_NPM_TELEGRAM_CREDENTIAL_ROLE: params.credentialRole }
      : {}),
    OPENCLAW_NPM_TELEGRAM_SCENARIOS: params.scenarios.join(","),
    OPENCLAW_NPM_TELEGRAM_OUTPUT_DIR: params.rawOutputDir,
    OPENCLAW_NPM_TELEGRAM_FAST: params.baseEnv.OPENCLAW_NPM_TELEGRAM_FAST ?? "1",
    OPENCLAW_NPM_TELEGRAM_WARM_SAMPLES: String(params.samples),
    OPENCLAW_NPM_TELEGRAM_SAMPLE_TIMEOUT_MS: String(params.sampleTimeoutMs),
    OPENCLAW_QA_TELEGRAM_CANARY_TIMEOUT_MS: String(params.timeoutMs),
    OPENCLAW_QA_TELEGRAM_SCENARIO_TIMEOUT_MS: String(params.timeoutMs),
  };
}

export function assertRequiredEnv(
  env: NodeJS.ProcessEnv,
  options: {
    credentialRole?: RttCredentialRole;
    credentialSource?: RttCredentialSource;
  } = {},
) {
  const credentialSource = resolveRttCredentialSource(env, options.credentialSource);
  if (credentialSource === "convex") {
    const missing: string[] = [];
    const credentialRole = resolveRttCredentialRole(env, options.credentialRole);
    if (!env.OPENCLAW_QA_CONVEX_SITE_URL?.trim()) {
      missing.push("OPENCLAW_QA_CONVEX_SITE_URL");
    }
    if (credentialRole === "ci" && !env.OPENCLAW_QA_CONVEX_SECRET_CI?.trim()) {
      missing.push("OPENCLAW_QA_CONVEX_SECRET_CI");
    }
    if (credentialRole === "maintainer" && !env.OPENCLAW_QA_CONVEX_SECRET_MAINTAINER?.trim()) {
      missing.push("OPENCLAW_QA_CONVEX_SECRET_MAINTAINER");
    }
    if (missing.length > 0) {
      throw new Error(`Missing Convex Telegram QA credential env: ${missing.join(", ")}`);
    }
    return;
  }

  const missing = REQUIRED_TELEGRAM_ENV.filter((key) => !env[key]?.trim());
  if (missing.length > 0) {
    throw new Error(`Missing Telegram QA env: ${missing.join(", ")}`);
  }
}

export async function assertHarnessRoot(harnessRoot: string) {
  const scriptPath = path.join(harnessRoot, "scripts/e2e/npm-telegram-rtt-docker.sh");
  try {
    await fs.access(scriptPath);
  } catch {
    throw new Error(`Missing OpenClaw Telegram npm harness: ${scriptPath}`);
  }
}

export async function assertDockerAvailable() {
  try {
    await execFileAsync("docker", ["version", "--format", "{{.Server.Version}}"], {
      timeout: 10_000,
    });
  } catch {
    throw new Error("Docker is required for RTT runs; install/start Docker and retry.");
  }
}

export async function resolvePublishedVersion(spec: string) {
  const { stdout } = await execFileAsync("npm", ["view", spec, "version", "--json"], {
    timeout: 30_000,
  });
  const parsed = JSON.parse(stdout.trim()) as unknown;
  if (typeof parsed !== "string" || parsed.trim().length === 0) {
    throw new Error(`npm did not return a version for ${spec}.`);
  }
  return parsed.trim();
}

export async function resolveMainVersion(harnessRoot: string) {
  const packageJson = JSON.parse(
    await fs.readFile(path.join(harnessRoot, "package.json"), "utf8"),
  ) as { version?: unknown };
  if (typeof packageJson.version !== "string" || packageJson.version.trim().length === 0) {
    throw new Error("OpenClaw package.json must contain a non-empty version.");
  }
  const { stdout } = await execFileAsync("git", ["rev-parse", "--short=10", "HEAD"], {
    cwd: harnessRoot,
    timeout: 10_000,
  });
  return `${packageJson.version.trim()}+${stdout.trim()}`;
}

export async function readTelegramSummary(summaryPath: string) {
  return validateQaEvidenceSummaryJson(JSON.parse(await fs.readFile(summaryPath, "utf8")));
}

export async function resolveTelegramSummaryPath(outputDir: string) {
  return path.join(outputDir, QA_EVIDENCE_FILENAME);
}

export async function writeJson(pathname: string, value: unknown) {
  await fs.mkdir(path.dirname(pathname), { recursive: true });
  await fs.writeFile(pathname, `${JSON.stringify(value, null, 2)}\n`);
}

export async function appendJsonl(pathname: string, value: unknown) {
  await fs.mkdir(path.dirname(pathname), { recursive: true });
  await fs.appendFile(pathname, `${JSON.stringify(value)}\n`);
}

export async function runHarness(params: { env: NodeJS.ProcessEnv; harnessRoot: string }) {
  const scriptPath = path.join(params.harnessRoot, "scripts/e2e/npm-telegram-rtt-docker.sh");
  const child = spawn("bash", [scriptPath], {
    cwd: params.harnessRoot,
    env: params.env,
    stdio: "inherit",
  });
  const exitCode = await new Promise<number | null>((resolve, reject) => {
    child.once("error", reject);
    child.once("exit", resolve);
  });
  return exitCode ?? 1;
}

function rttSummaryFailed(summary: QaEvidenceSummaryJson, requestedScenarios: string[]) {
  const entries = summary.entries ?? [];
  const requiredScenarioIds = ["telegram-canary", ...requestedScenarios];
  for (const scenarioId of requiredScenarioIds) {
    const entry = entries.find((candidate) => candidate.test?.id === scenarioId);
    if (!entry || entry.result?.status !== "pass") {
      return true;
    }
    const timing = entry.result.timing;
    const rttMs =
      scenarioId === "telegram-mentioned-message-reply"
        ? (timing?.p50Ms ?? timing?.rttMs)
        : timing?.rttMs;
    if (typeof rttMs !== "number" || !Number.isFinite(rttMs)) {
      return true;
    }
  }
  return entries.some((entry) => entry.result?.status !== "pass");
}

export function buildRttResult(params: {
  artifacts: RttResult["artifacts"];
  finishedAt: Date;
  providerMode: RttProviderMode;
  rawSummary: QaEvidenceSummaryJson;
  runId: string;
  scenarios: string[];
  spec: string;
  startedAt: Date;
  version: string;
}): RttResult {
  const failed = rttSummaryFailed(params.rawSummary, params.scenarios);
  return {
    package: {
      spec: params.spec,
      version: params.version,
    },
    run: {
      id: params.runId,
      startedAt: params.startedAt.toISOString(),
      finishedAt: params.finishedAt.toISOString(),
      durationMs: params.finishedAt.getTime() - params.startedAt.getTime(),
      status: failed ? "fail" : "pass",
    },
    mode: {
      providerMode: params.providerMode,
      scenarios: params.scenarios,
    },
    rtt: extractRtt(params.rawSummary),
    artifacts: params.artifacts,
  };
}