From e8810c04a46bb5b68bc441e425be77e8212309ea Mon Sep 17 00:00:00 2001
From: Ayaan Zaidi <hi@obviy.us>
Date: Fri, 1 May 2026 11:16:04 +0530
Subject: [PATCH] feat(rtt): add published package measurement harness

---
 .gitignore                                 |   2 +
 package.json                               |   1 +
 scripts/lib/rtt-harness.ts                 | 215 ++++++++++++++++++++
 scripts/rtt.ts                             | 216 +++++++++++++++++++++
 test/fixtures/telegram-qa-summary-rtt.json |  31 +++
 test/scripts/rtt-harness.test.ts           | 172 ++++++++++++++++
 6 files changed, 637 insertions(+)
 create mode 100644 scripts/lib/rtt-harness.ts
 create mode 100644 scripts/rtt.ts
 create mode 100644 test/fixtures/telegram-qa-summary-rtt.json
 create mode 100644 test/scripts/rtt-harness.test.ts

diff --git a/.gitignore b/.gitignore
index 20096c48fae..b3a56ddb49b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -188,6 +188,8 @@ changelog/fragments/
 test/fixtures/openclaw-vitest-unit-report.json
 analysis/
 .artifacts/qa-e2e/
+/runs/
+/data/rtt.jsonl
 extensions/qa-lab/web/dist/
 
 # Generated bundled plugin runtime dependency manifests
diff --git a/package.json b/package.json
index 4737f482cf3..eec39ace283 100644
--- a/package.json
+++ b/package.json
@@ -1439,6 +1439,7 @@
     "release:plugins:clawhub:plan": "node --import tsx scripts/plugin-clawhub-release-plan.ts",
     "release:plugins:npm:check": "node --import tsx scripts/plugin-npm-release-check.ts",
     "release:plugins:npm:plan": "node --import tsx scripts/plugin-npm-release-plan.ts",
+    "rtt": "node --import tsx scripts/rtt.ts",
     "runtime-sidecars:check": "node --import tsx scripts/generate-runtime-sidecar-paths-baseline.ts --check",
     "runtime-sidecars:gen": "node --import tsx scripts/generate-runtime-sidecar-paths-baseline.ts --write",
     "stage:bundled-plugin-runtime-deps": "node scripts/stage-bundled-plugin-runtime-deps.mjs",
diff --git a/scripts/lib/rtt-harness.ts b/scripts/lib/rtt-harness.ts
new file mode 100644
index 00000000000..822da9d1c89
--- /dev/null
+++ b/scripts/lib/rtt-harness.ts
@@ -0,0 +1,215 @@
+import { execFile, spawn } from "node:child_process";
+import fs from "node:fs/promises";
+import path from "node:path";
+import { promisify } from "node:util";
+
+const execFileAsync = promisify(execFile);
+
+export type RttProviderMode = "mock-openai" | "live-frontier";
+
+export type RttCliOptions = {
+  providerMode: RttProviderMode;
+  runs: number;
+  harnessRoot: string;
+  output: string;
+  scenarios: string[];
+  timeoutMs: number;
+};
+
+export type RttResult = {
+  package: {
+    spec: string;
+    version: string;
+  };
+  run: {
+    id: string;
+    startedAt: string;
+    finishedAt: string;
+    durationMs: number;
+    status: "pass" | "fail";
+  };
+  mode: {
+    providerMode: RttProviderMode;
+    scenarios: string[];
+  };
+  rtt: {
+    canaryMs?: number;
+    mentionReplyMs?: number;
+  };
+  artifacts: {
+    rawSummaryPath: string;
+    rawReportPath: string;
+    rawObservedMessagesPath: string;
+    resultPath: string;
+  };
+};
+
+export type TelegramQaSummary = {
+  scenarios?: Array<{
+    id?: string;
+    rttMs?: number;
+    status?: string;
+  }>;
+};
+
+const OPENCLAW_PACKAGE_SPEC_RE =
+  /^openclaw@(beta|latest|[0-9]{4}\.[1-9][0-9]*\.[1-9][0-9]*(-[1-9][0-9]*|-beta\.[1-9][0-9]*)?)$/u;
+
+const REQUIRED_TELEGRAM_ENV = [
+  "OPENCLAW_QA_TELEGRAM_GROUP_ID",
+  "OPENCLAW_QA_TELEGRAM_DRIVER_BOT_TOKEN",
+  "OPENCLAW_QA_TELEGRAM_SUT_BOT_TOKEN",
+] as const;
+
+export function validateOpenClawPackageSpec(spec: string) {
+  if (!OPENCLAW_PACKAGE_SPEC_RE.test(spec)) {
+    throw new Error(
+      `Package spec must be openclaw@beta, openclaw@latest, or an exact OpenClaw release version; got: ${spec}`,
+    );
+  }
+  return spec;
+}
+
+export function safeRunLabel(input: string) {
+  return input.replace(/[^a-zA-Z0-9.-]+/gu, "_").replace(/^_+|_+$/gu, "");
+}
+
+export function buildRunId(params: { now: Date; spec: string; index?: number }) {
+  const stamp = params.now.toISOString().replaceAll(":", "").replaceAll(".", "");
+  const suffix = params.index === undefined ? "" : `-${params.index + 1}`;
+  return `${stamp}-${safeRunLabel(params.spec)}${suffix}`;
+}
+
+export function extractRtt(summary: TelegramQaSummary) {
+  const scenarios = summary.scenarios ?? [];
+  return {
+    canaryMs: scenarios.find((scenario) => scenario.id === "telegram-canary")?.rttMs,
+    mentionReplyMs: scenarios.find((scenario) => scenario.id === "telegram-mentioned-message-reply")
+      ?.rttMs,
+  };
+}
+
+export function createHarnessEnv(params: {
+  baseEnv: NodeJS.ProcessEnv;
+  providerMode: RttProviderMode;
+  scenarios: string[];
+  spec: string;
+  version: string;
+  rawOutputDir: string;
+  timeoutMs: number;
+}) {
+  return {
+    ...params.baseEnv,
+    OPENCLAW_NPM_TELEGRAM_PACKAGE_SPEC: params.spec,
+    OPENCLAW_NPM_TELEGRAM_PACKAGE_LABEL: `${params.spec} (${params.version})`,
+    OPENCLAW_NPM_TELEGRAM_PROVIDER_MODE: params.providerMode,
+    OPENCLAW_NPM_TELEGRAM_SCENARIOS: params.scenarios.join(","),
+    OPENCLAW_NPM_TELEGRAM_SKIP_HOTPATH: "1",
+    OPENCLAW_NPM_TELEGRAM_OUTPUT_DIR: params.rawOutputDir,
+    OPENCLAW_NPM_TELEGRAM_FAST: params.baseEnv.OPENCLAW_NPM_TELEGRAM_FAST ?? "1",
+    OPENCLAW_QA_TELEGRAM_CANARY_TIMEOUT_MS: String(params.timeoutMs),
+    OPENCLAW_QA_TELEGRAM_SCENARIO_TIMEOUT_MS: String(params.timeoutMs),
+  };
+}
+
+export function assertRequiredEnv(env: NodeJS.ProcessEnv) {
+  const missing = REQUIRED_TELEGRAM_ENV.filter((key) => !env[key]?.trim());
+  if (missing.length > 0) {
+    throw new Error(`Missing Telegram QA env: ${missing.join(", ")}`);
+  }
+}
+
+export async function assertHarnessRoot(harnessRoot: string) {
+  const scriptPath = path.join(harnessRoot, "scripts/e2e/npm-telegram-live-docker.sh");
+  try {
+    await fs.access(scriptPath);
+  } catch {
+    throw new Error(`Missing OpenClaw Telegram npm harness: ${scriptPath}`);
+  }
+}
+
+export async function assertDockerAvailable() {
+  try {
+    await execFileAsync("docker", ["version", "--format", "{{.Server.Version}}"], {
+      timeout: 10_000,
+    });
+  } catch {
+    throw new Error("Docker is required for RTT runs; install/start Docker and retry.");
+  }
+}
+
+export async function resolvePublishedVersion(spec: string) {
+  const { stdout } = await execFileAsync("npm", ["view", spec, "version", "--json"], {
+    timeout: 30_000,
+  });
+  const parsed = JSON.parse(stdout.trim()) as unknown;
+  if (typeof parsed !== "string" || parsed.trim().length === 0) {
+    throw new Error(`npm did not return a version for ${spec}.`);
+  }
+  return parsed.trim();
+}
+
+export async function readTelegramSummary(summaryPath: string) {
+  return JSON.parse(await fs.readFile(summaryPath, "utf8")) as TelegramQaSummary;
+}
+
+export async function writeJson(pathname: string, value: unknown) {
+  await fs.mkdir(path.dirname(pathname), { recursive: true });
+  await fs.writeFile(pathname, `${JSON.stringify(value, null, 2)}\n`);
+}
+
+export async function appendJsonl(pathname: string, value: unknown) {
+  await fs.mkdir(path.dirname(pathname), { recursive: true });
+  await fs.appendFile(pathname, `${JSON.stringify(value)}\n`);
+}
+
+export async function runHarness(params: { env: NodeJS.ProcessEnv; harnessRoot: string }) {
+  const scriptPath = path.join(params.harnessRoot, "scripts/e2e/npm-telegram-live-docker.sh");
+  const child = spawn("bash", [scriptPath], {
+    cwd: params.harnessRoot,
+    env: params.env,
+    stdio: "inherit",
+  });
+  const exitCode = await new Promise<number | null>((resolve, reject) => {
+    child.once("error", reject);
+    child.once("exit", resolve);
+  });
+  return exitCode ?? 1;
+}
+
+export function buildRttResult(params: {
+  artifacts: RttResult["artifacts"];
+  finishedAt: Date;
+  providerMode: RttProviderMode;
+  rawSummary: TelegramQaSummary;
+  runId: string;
+  scenarios: string[];
+  spec: string;
+  startedAt: Date;
+  version: string;
+}): RttResult {
+  const failed = (params.rawSummary.scenarios ?? []).some((scenario) => scenario.status === "fail");
+  return {
+    package: {
+      spec: params.spec,
+      version: params.version,
+    },
+    run: {
+      id: params.runId,
+      startedAt: params.startedAt.toISOString(),
+      finishedAt: params.finishedAt.toISOString(),
+      durationMs: params.finishedAt.getTime() - params.startedAt.getTime(),
+      status: failed ? "fail" : "pass",
+    },
+    mode: {
+      providerMode: params.providerMode,
+      scenarios: params.scenarios,
+    },
+    rtt: extractRtt(params.rawSummary),
+    artifacts: params.artifacts,
+  };
+}
+
+export const __testing = {
+  REQUIRED_TELEGRAM_ENV,
+};
diff --git a/scripts/rtt.ts b/scripts/rtt.ts
new file mode 100644
index 00000000000..5ae096ab3b8
--- /dev/null
+++ b/scripts/rtt.ts
@@ -0,0 +1,216 @@
+#!/usr/bin/env -S node --import tsx
+import fs from "node:fs/promises";
+import path from "node:path";
+import {
+  appendJsonl,
+  assertDockerAvailable,
+  assertHarnessRoot,
+  assertRequiredEnv,
+  buildRttResult,
+  buildRunId,
+  createHarnessEnv,
+  readTelegramSummary,
+  resolvePublishedVersion,
+  runHarness,
+  validateOpenClawPackageSpec,
+  writeJson,
+  type RttProviderMode,
+} from "./lib/rtt-harness.ts";
+
+const DEFAULT_SCENARIOS = ["telegram-mentioned-message-reply"];
+const DEFAULT_PROVIDER_MODE = "mock-openai" satisfies RttProviderMode;
+const DEFAULT_TIMEOUT_MS = 180_000;
+
+function usage() {
+  return [
+    "Usage: pnpm rtt <openclaw@spec> [--provider mock-openai|live-frontier] [--runs N] [--timeout-ms N] [--harness-root PATH] [--output PATH]",
+    "",
+    "Examples:",
+    "  pnpm rtt openclaw@beta",
+    "  pnpm rtt openclaw@2026.4.30",
+    "  pnpm rtt openclaw@latest --provider live-frontier",
+  ].join("\n");
+}
+
+function parseProviderMode(value: string): RttProviderMode {
+  if (value === "mock-openai" || value === "live-frontier") {
+    return value;
+  }
+  throw new Error(`--provider must be mock-openai or live-frontier; got: ${value}`);
+}
+
+function parsePositiveInt(label: string, value: string) {
+  const parsed = Number(value);
+  if (!Number.isInteger(parsed) || parsed < 1) {
+    throw new Error(`${label} must be a positive integer; got: ${value}`);
+  }
+  return parsed;
+}
+
+function resolveHome(input: string) {
+  if (input === "~") {
+    return process.env.HOME ?? input;
+  }
+  if (input.startsWith("~/")) {
+    return path.join(process.env.HOME ?? "~", input.slice(2));
+  }
+  return input;
+}
+
+function parseArgs(argv: string[]) {
+  let spec: string | undefined;
+  let providerMode = DEFAULT_PROVIDER_MODE;
+  let runs = 1;
+  let harnessRoot = "~/Developer/clawdbot";
+  let output = "runs";
+  let timeoutMs = DEFAULT_TIMEOUT_MS;
+
+  for (let index = 0; index < argv.length; index += 1) {
+    const arg = argv[index];
+    if (arg === "--help" || arg === "-h") {
+      process.stdout.write(`${usage()}\n`);
+      process.exit(0);
+    }
+    if (arg === "--provider") {
+      providerMode = parseProviderMode(argv[++index] ?? "");
+      continue;
+    }
+    if (arg === "--runs") {
+      runs = parsePositiveInt("--runs", argv[++index] ?? "");
+      continue;
+    }
+    if (arg === "--harness-root") {
+      harnessRoot = argv[++index] ?? "";
+      if (!harnessRoot.trim()) {
+        throw new Error("--harness-root requires a path.");
+      }
+      continue;
+    }
+    if (arg === "--timeout-ms") {
+      timeoutMs = parsePositiveInt("--timeout-ms", argv[++index] ?? "");
+      continue;
+    }
+    if (arg === "--output") {
+      output = argv[++index] ?? "";
+      if (!output.trim()) {
+        throw new Error("--output requires a path.");
+      }
+      continue;
+    }
+    if (arg.startsWith("--")) {
+      throw new Error(`Unknown option: ${arg}`);
+    }
+    if (spec) {
+      throw new Error(`Unexpected extra argument: ${arg}`);
+    }
+    spec = arg;
+  }
+
+  if (!spec) {
+    throw new Error(`Missing package spec.\n${usage()}`);
+  }
+
+  return {
+    spec: validateOpenClawPackageSpec(spec),
+    options: {
+      providerMode,
+      runs,
+      harnessRoot: path.resolve(resolveHome(harnessRoot)),
+      output: path.resolve(resolveHome(output)),
+      scenarios: DEFAULT_SCENARIOS,
+      timeoutMs,
+    },
+  };
+}
+
+async function runOne(params: {
+  index: number;
+  options: ReturnType<typeof parseArgs>["options"];
+  spec: string;
+  version: string;
+}) {
+  const runId = buildRunId({ now: new Date(), spec: params.spec, index: params.index });
+  const runDir = path.join(params.options.output, runId);
+  const rawDir = path.join(runDir, "raw");
+  const resultPath = path.join(runDir, "result.json");
+  const harnessRawDir = path.join(params.options.harnessRoot, ".artifacts/rtt", runId, "raw");
+  const rawOutputDir = path.relative(params.options.harnessRoot, harnessRawDir);
+  const startedAt = new Date();
+  const env = createHarnessEnv({
+    baseEnv: process.env,
+    providerMode: params.options.providerMode,
+    rawOutputDir,
+    scenarios: params.options.scenarios,
+    spec: params.spec,
+    timeoutMs: params.options.timeoutMs,
+    version: params.version,
+  });
+
+  process.stderr.write(`[rtt] run ${params.index + 1}/${params.options.runs}: ${params.spec}\n`);
+  const harnessExitCode = await runHarness({ env, harnessRoot: params.options.harnessRoot });
+  await readTelegramSummary(path.join(harnessRawDir, "telegram-qa-summary.json"));
+  await fs.rm(rawDir, { recursive: true, force: true });
+  await fs.mkdir(path.dirname(rawDir), { recursive: true });
+  await fs.cp(harnessRawDir, rawDir, { recursive: true });
+
+  const rawSummaryPath = path.join(rawDir, "telegram-qa-summary.json");
+  const rawReportPath = path.join(rawDir, "telegram-qa-report.md");
+  const rawObservedMessagesPath = path.join(rawDir, "telegram-qa-observed-messages.json");
+  const rawSummary = await readTelegramSummary(rawSummaryPath);
+  const finishedAt = new Date();
+  const result = buildRttResult({
+    artifacts: {
+      rawSummaryPath,
+      rawReportPath,
+      rawObservedMessagesPath,
+      resultPath,
+    },
+    finishedAt,
+    providerMode: params.options.providerMode,
+    rawSummary,
+    runId,
+    scenarios: params.options.scenarios,
+    spec: params.spec,
+    startedAt,
+    version: params.version,
+  });
+
+  await writeJson(resultPath, result);
+  await appendJsonl(path.resolve("data/rtt.jsonl"), result);
+  process.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
+  return {
+    harnessExitCode,
+    result,
+  };
+}
+
+async function main() {
+  const { spec, options } = parseArgs(process.argv.slice(2));
+  assertRequiredEnv(process.env);
+  await assertHarnessRoot(options.harnessRoot);
+  await assertDockerAvailable();
+  const version = await resolvePublishedVersion(spec);
+  let failed = false;
+  for (let index = 0; index < options.runs; index += 1) {
+    const run = await runOne({ index, options, spec, version });
+    failed = failed || run.harnessExitCode !== 0 || run.result.run.status === "fail";
+  }
+  if (failed) {
+    process.exitCode = 1;
+  }
+}
+
+if (import.meta.url === `file://${process.argv[1]}`) {
+  main().catch((error) => {
+    const message = error instanceof Error ? error.message : String(error);
+    process.stderr.write(`[rtt] ${message}\n`);
+    process.exitCode = 1;
+  });
+}
+
+export const __testing = {
+  parseArgs,
+  parseProviderMode,
+  parsePositiveInt,
+  resolveHome,
+};
diff --git a/test/fixtures/telegram-qa-summary-rtt.json b/test/fixtures/telegram-qa-summary-rtt.json
new file mode 100644
index 00000000000..36796aabb08
--- /dev/null
+++ b/test/fixtures/telegram-qa-summary-rtt.json
@@ -0,0 +1,31 @@
+{
+  "credentials": {
+    "kind": "telegram",
+    "source": "env"
+  },
+  "groupId": "-100123",
+  "startedAt": "2026-05-01T00:00:00.000Z",
+  "finishedAt": "2026-05-01T00:00:10.000Z",
+  "cleanupIssues": [],
+  "counts": {
+    "total": 2,
+    "passed": 2,
+    "failed": 0
+  },
+  "scenarios": [
+    {
+      "id": "telegram-canary",
+      "title": "Telegram canary",
+      "status": "pass",
+      "details": "reply matched in 1234ms",
+      "rttMs": 1234
+    },
+    {
+      "id": "telegram-mentioned-message-reply",
+      "title": "Telegram mentioned message gets a reply",
+      "status": "pass",
+      "details": "reply matched in 5678ms",
+      "rttMs": 5678
+    }
+  ]
+}
diff --git a/test/scripts/rtt-harness.test.ts b/test/scripts/rtt-harness.test.ts
new file mode 100644
index 00000000000..8643af3721e
--- /dev/null
+++ b/test/scripts/rtt-harness.test.ts
@@ -0,0 +1,172 @@
+import fs from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
+import { fileURLToPath } from "node:url";
+import { describe, expect, it } from "vitest";
+import {
+  appendJsonl,
+  buildRttResult,
+  buildRunId,
+  createHarnessEnv,
+  extractRtt,
+  readTelegramSummary,
+  safeRunLabel,
+  validateOpenClawPackageSpec,
+} from "../../scripts/lib/rtt-harness.ts";
+import { __testing as cliTesting } from "../../scripts/rtt.ts";
+
+const TEST_DIR = path.dirname(fileURLToPath(import.meta.url));
+const FIXTURE_PATH = path.resolve(TEST_DIR, "../fixtures/telegram-qa-summary-rtt.json");
+
+describe("RTT harness", () => {
+  it("validates published OpenClaw package specs", () => {
+    expect(validateOpenClawPackageSpec("openclaw@beta")).toBe("openclaw@beta");
+    expect(validateOpenClawPackageSpec("openclaw@latest")).toBe("openclaw@latest");
+    expect(validateOpenClawPackageSpec("openclaw@2026.4.30")).toBe("openclaw@2026.4.30");
+    expect(validateOpenClawPackageSpec("openclaw@2026.4.30-beta.2")).toBe(
+      "openclaw@2026.4.30-beta.2",
+    );
+
+    expect(() => validateOpenClawPackageSpec("@openclaw/openclaw@beta")).toThrow(
+      /Package spec must be/,
+    );
+    expect(() => validateOpenClawPackageSpec("openclaw@next")).toThrow(/Package spec must be/);
+  });
+
+  it("builds stable run labels", () => {
+    expect(safeRunLabel("openclaw@beta")).toBe("openclaw_beta");
+    expect(
+      buildRunId({
+        now: new Date("2026-05-01T03:04:05.678Z"),
+        spec: "openclaw@beta",
+        index: 1,
+      }),
+    ).toBe("2026-05-01T030405678Z-openclaw_beta-2");
+  });
+
+  it("constructs harness env without dropping caller env", () => {
+    const env = createHarnessEnv({
+      baseEnv: {
+        OPENCLAW_QA_TELEGRAM_GROUP_ID: "-100123",
+        OPENCLAW_NPM_TELEGRAM_FAST: "0",
+      },
+      providerMode: "mock-openai",
+      rawOutputDir: ".artifacts/rtt/run/raw",
+      scenarios: ["telegram-mentioned-message-reply"],
+      spec: "openclaw@beta",
+      timeoutMs: 180_000,
+      version: "2026.4.30-beta.1",
+    });
+
+    expect(env.OPENCLAW_QA_TELEGRAM_GROUP_ID).toBe("-100123");
+    expect(env.OPENCLAW_NPM_TELEGRAM_PACKAGE_SPEC).toBe("openclaw@beta");
+    expect(env.OPENCLAW_NPM_TELEGRAM_PACKAGE_LABEL).toBe("openclaw@beta (2026.4.30-beta.1)");
+    expect(env.OPENCLAW_NPM_TELEGRAM_PROVIDER_MODE).toBe("mock-openai");
+    expect(env.OPENCLAW_NPM_TELEGRAM_SCENARIOS).toBe("telegram-mentioned-message-reply");
+    expect(env.OPENCLAW_NPM_TELEGRAM_SKIP_HOTPATH).toBe("1");
+    expect(env.OPENCLAW_NPM_TELEGRAM_OUTPUT_DIR).toBe(".artifacts/rtt/run/raw");
+    expect(env.OPENCLAW_NPM_TELEGRAM_FAST).toBe("0");
+    expect(env.OPENCLAW_QA_TELEGRAM_CANARY_TIMEOUT_MS).toBe("180000");
+    expect(env.OPENCLAW_QA_TELEGRAM_SCENARIO_TIMEOUT_MS).toBe("180000");
+  });
+
+  it("extracts RTT values from Telegram QA summaries", async () => {
+    const summary = await readTelegramSummary(FIXTURE_PATH);
+    expect(extractRtt(summary)).toEqual({
+      canaryMs: 1234,
+      mentionReplyMs: 5678,
+    });
+  });
+
+  it("builds normalized result JSON", async () => {
+    const summary = await readTelegramSummary(FIXTURE_PATH);
+    const result = buildRttResult({
+      artifacts: {
+        rawObservedMessagesPath: "runs/run/raw/telegram-qa-observed-messages.json",
+        rawReportPath: "runs/run/raw/telegram-qa-report.md",
+        rawSummaryPath: "runs/run/raw/telegram-qa-summary.json",
+        resultPath: "runs/run/result.json",
+      },
+      finishedAt: new Date("2026-05-01T00:00:12.000Z"),
+      providerMode: "mock-openai",
+      rawSummary: summary,
+      runId: "run",
+      scenarios: ["telegram-mentioned-message-reply"],
+      spec: "openclaw@beta",
+      startedAt: new Date("2026-05-01T00:00:00.000Z"),
+      version: "2026.4.30-beta.1",
+    });
+
+    expect(result).toMatchObject({
+      package: { spec: "openclaw@beta", version: "2026.4.30-beta.1" },
+      run: { durationMs: 12_000, id: "run", status: "pass" },
+      mode: {
+        providerMode: "mock-openai",
+        scenarios: ["telegram-mentioned-message-reply"],
+      },
+      rtt: { canaryMs: 1234, mentionReplyMs: 5678 },
+    });
+  });
+
+  it("marks failed scenario summaries as failed results", () => {
+    const result = buildRttResult({
+      artifacts: {
+        rawObservedMessagesPath: "runs/run/raw/telegram-qa-observed-messages.json",
+        rawReportPath: "runs/run/raw/telegram-qa-report.md",
+        rawSummaryPath: "runs/run/raw/telegram-qa-summary.json",
+        resultPath: "runs/run/result.json",
+      },
+      finishedAt: new Date("2026-05-01T00:00:12.000Z"),
+      providerMode: "mock-openai",
+      rawSummary: {
+        scenarios: [
+          { id: "telegram-canary", rttMs: 5948, status: "pass" },
+          { id: "telegram-mentioned-message-reply", status: "fail" },
+        ],
+      },
+      runId: "run",
+      scenarios: ["telegram-mentioned-message-reply"],
+      spec: "openclaw@latest",
+      startedAt: new Date("2026-05-01T00:00:00.000Z"),
+      version: "2026.4.29",
+    });
+
+    expect(result.run.status).toBe("fail");
+    expect(result.rtt).toEqual({ canaryMs: 5948, mentionReplyMs: undefined });
+  });
+
+  it("appends JSONL rows", async () => {
+    const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-rtt-test-"));
+    const jsonlPath = path.join(tempDir, "data/rtt.jsonl");
+    await appendJsonl(jsonlPath, { run: 1 });
+    await appendJsonl(jsonlPath, { run: 2 });
+
+    await expect(fs.readFile(jsonlPath, "utf8")).resolves.toBe('{"run":1}\n{"run":2}\n');
+  });
+
+  it("parses CLI options", () => {
+    const parsed = cliTesting.parseArgs([
+      "openclaw@latest",
+      "--provider",
+      "live-frontier",
+      "--runs",
+      "3",
+      "--timeout-ms",
+      "240000",
+      "--harness-root",
+      "/tmp/openclaw",
+      "--output",
+      "/tmp/runs",
+    ]);
+
+    expect(parsed.spec).toBe("openclaw@latest");
+    expect(parsed.options).toMatchObject({
+      providerMode: "live-frontier",
+      runs: 3,
+      harnessRoot: "/tmp/openclaw",
+      output: "/tmp/runs",
+      scenarios: ["telegram-mentioned-message-reply"],
+      timeoutMs: 240_000,
+    });
+  });
+});