feat(rtt): add published package measurement harness

2026-05-06 19:00:45 +00:00 · 2026-05-01 11:16:04 +05:30
parent ef270b7a28
commit e8810c04a4
6 changed files with 637 additions and 0 deletions
--- a/test/fixtures/telegram-qa-summary-rtt.json
+++ b/test/fixtures/telegram-qa-summary-rtt.json
@@ -0,0 +1,31 @@
+{
+  "credentials": {
+    "kind": "telegram",
+    "source": "env"
+  },
+  "groupId": "-100123",
+  "startedAt": "2026-05-01T00:00:00.000Z",
+  "finishedAt": "2026-05-01T00:00:10.000Z",
+  "cleanupIssues": [],
+  "counts": {
+    "total": 2,
+    "passed": 2,
+    "failed": 0
+  },
+  "scenarios": [
+    {
+      "id": "telegram-canary",
+      "title": "Telegram canary",
+      "status": "pass",
+      "details": "reply matched in 1234ms",
+      "rttMs": 1234
+    },
+    {
+      "id": "telegram-mentioned-message-reply",
+      "title": "Telegram mentioned message gets a reply",
+      "status": "pass",
+      "details": "reply matched in 5678ms",
+      "rttMs": 5678
+    }
+  ]
+}
--- a/test/scripts/rtt-harness.test.ts
+++ b/test/scripts/rtt-harness.test.ts
@@ -0,0 +1,172 @@
+import fs from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
+import { fileURLToPath } from "node:url";
+import { describe, expect, it } from "vitest";
+import {
+  appendJsonl,
+  buildRttResult,
+  buildRunId,
+  createHarnessEnv,
+  extractRtt,
+  readTelegramSummary,
+  safeRunLabel,
+  validateOpenClawPackageSpec,
+} from "../../scripts/lib/rtt-harness.ts";
+import { __testing as cliTesting } from "../../scripts/rtt.ts";
+
+const TEST_DIR = path.dirname(fileURLToPath(import.meta.url));
+const FIXTURE_PATH = path.resolve(TEST_DIR, "../fixtures/telegram-qa-summary-rtt.json");
+
+describe("RTT harness", () => {
+  it("validates published OpenClaw package specs", () => {
+    expect(validateOpenClawPackageSpec("openclaw@beta")).toBe("openclaw@beta");
+    expect(validateOpenClawPackageSpec("openclaw@latest")).toBe("openclaw@latest");
+    expect(validateOpenClawPackageSpec("openclaw@2026.4.30")).toBe("openclaw@2026.4.30");
+    expect(validateOpenClawPackageSpec("openclaw@2026.4.30-beta.2")).toBe(
+      "openclaw@2026.4.30-beta.2",
+    );
+
+    expect(() => validateOpenClawPackageSpec("@openclaw/openclaw@beta")).toThrow(
+      /Package spec must be/,
+    );
+    expect(() => validateOpenClawPackageSpec("openclaw@next")).toThrow(/Package spec must be/);
+  });
+
+  it("builds stable run labels", () => {
+    expect(safeRunLabel("openclaw@beta")).toBe("openclaw_beta");
+    expect(
+      buildRunId({
+        now: new Date("2026-05-01T03:04:05.678Z"),
+        spec: "openclaw@beta",
+        index: 1,
+      }),
+    ).toBe("2026-05-01T030405678Z-openclaw_beta-2");
+  });
+
+  it("constructs harness env without dropping caller env", () => {
+    const env = createHarnessEnv({
+      baseEnv: {
+        OPENCLAW_QA_TELEGRAM_GROUP_ID: "-100123",
+        OPENCLAW_NPM_TELEGRAM_FAST: "0",
+      },
+      providerMode: "mock-openai",
+      rawOutputDir: ".artifacts/rtt/run/raw",
+      scenarios: ["telegram-mentioned-message-reply"],
+      spec: "openclaw@beta",
+      timeoutMs: 180_000,
+      version: "2026.4.30-beta.1",
+    });
+
+    expect(env.OPENCLAW_QA_TELEGRAM_GROUP_ID).toBe("-100123");
+    expect(env.OPENCLAW_NPM_TELEGRAM_PACKAGE_SPEC).toBe("openclaw@beta");
+    expect(env.OPENCLAW_NPM_TELEGRAM_PACKAGE_LABEL).toBe("openclaw@beta (2026.4.30-beta.1)");
+    expect(env.OPENCLAW_NPM_TELEGRAM_PROVIDER_MODE).toBe("mock-openai");
+    expect(env.OPENCLAW_NPM_TELEGRAM_SCENARIOS).toBe("telegram-mentioned-message-reply");
+    expect(env.OPENCLAW_NPM_TELEGRAM_SKIP_HOTPATH).toBe("1");
+    expect(env.OPENCLAW_NPM_TELEGRAM_OUTPUT_DIR).toBe(".artifacts/rtt/run/raw");
+    expect(env.OPENCLAW_NPM_TELEGRAM_FAST).toBe("0");
+    expect(env.OPENCLAW_QA_TELEGRAM_CANARY_TIMEOUT_MS).toBe("180000");
+    expect(env.OPENCLAW_QA_TELEGRAM_SCENARIO_TIMEOUT_MS).toBe("180000");
+  });
+
+  it("extracts RTT values from Telegram QA summaries", async () => {
+    const summary = await readTelegramSummary(FIXTURE_PATH);
+    expect(extractRtt(summary)).toEqual({
+      canaryMs: 1234,
+      mentionReplyMs: 5678,
+    });
+  });
+
+  it("builds normalized result JSON", async () => {
+    const summary = await readTelegramSummary(FIXTURE_PATH);
+    const result = buildRttResult({
+      artifacts: {
+        rawObservedMessagesPath: "runs/run/raw/telegram-qa-observed-messages.json",
+        rawReportPath: "runs/run/raw/telegram-qa-report.md",
+        rawSummaryPath: "runs/run/raw/telegram-qa-summary.json",
+        resultPath: "runs/run/result.json",
+      },
+      finishedAt: new Date("2026-05-01T00:00:12.000Z"),
+      providerMode: "mock-openai",
+      rawSummary: summary,
+      runId: "run",
+      scenarios: ["telegram-mentioned-message-reply"],
+      spec: "openclaw@beta",
+      startedAt: new Date("2026-05-01T00:00:00.000Z"),
+      version: "2026.4.30-beta.1",
+    });
+
+    expect(result).toMatchObject({
+      package: { spec: "openclaw@beta", version: "2026.4.30-beta.1" },
+      run: { durationMs: 12_000, id: "run", status: "pass" },
+      mode: {
+        providerMode: "mock-openai",
+        scenarios: ["telegram-mentioned-message-reply"],
+      },
+      rtt: { canaryMs: 1234, mentionReplyMs: 5678 },
+    });
+  });
+
+  it("marks failed scenario summaries as failed results", () => {
+    const result = buildRttResult({
+      artifacts: {
+        rawObservedMessagesPath: "runs/run/raw/telegram-qa-observed-messages.json",
+        rawReportPath: "runs/run/raw/telegram-qa-report.md",
+        rawSummaryPath: "runs/run/raw/telegram-qa-summary.json",
+        resultPath: "runs/run/result.json",
+      },
+      finishedAt: new Date("2026-05-01T00:00:12.000Z"),
+      providerMode: "mock-openai",
+      rawSummary: {
+        scenarios: [
+          { id: "telegram-canary", rttMs: 5948, status: "pass" },
+          { id: "telegram-mentioned-message-reply", status: "fail" },
+        ],
+      },
+      runId: "run",
+      scenarios: ["telegram-mentioned-message-reply"],
+      spec: "openclaw@latest",
+      startedAt: new Date("2026-05-01T00:00:00.000Z"),
+      version: "2026.4.29",
+    });
+
+    expect(result.run.status).toBe("fail");
+    expect(result.rtt).toEqual({ canaryMs: 5948, mentionReplyMs: undefined });
+  });
+
+  it("appends JSONL rows", async () => {
+    const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-rtt-test-"));
+    const jsonlPath = path.join(tempDir, "data/rtt.jsonl");
+    await appendJsonl(jsonlPath, { run: 1 });
+    await appendJsonl(jsonlPath, { run: 2 });
+
+    await expect(fs.readFile(jsonlPath, "utf8")).resolves.toBe('{"run":1}\n{"run":2}\n');
+  });
+
+  it("parses CLI options", () => {
+    const parsed = cliTesting.parseArgs([
+      "openclaw@latest",
+      "--provider",
+      "live-frontier",
+      "--runs",
+      "3",
+      "--timeout-ms",
+      "240000",
+      "--harness-root",
+      "/tmp/openclaw",
+      "--output",
+      "/tmp/runs",
+    ]);
+
+    expect(parsed.spec).toBe("openclaw@latest");
+    expect(parsed.options).toMatchObject({
+      providerMode: "live-frontier",
+      runs: 3,
+      harnessRoot: "/tmp/openclaw",
+      output: "/tmp/runs",
+      scenarios: ["telegram-mentioned-message-reply"],
+      timeoutMs: 240_000,
+    });
+  });
+});