Files
openclaw/test/scripts/rtt-harness.test.ts
2026-05-02 18:29:13 +01:00

205 lines
6.9 KiB
TypeScript

import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { fileURLToPath } from "node:url";
import { describe, expect, it } from "vitest";
import {
appendJsonl,
buildRttResult,
buildRunId,
createHarnessEnv,
extractRtt,
readTelegramSummary,
safeRunLabel,
validateOpenClawPackageSpec,
} from "../../scripts/lib/rtt-harness.ts";
import { __testing as cliTesting } from "../../scripts/rtt.ts";
const TEST_DIR = path.dirname(fileURLToPath(import.meta.url));
const FIXTURE_PATH = path.resolve(TEST_DIR, "../fixtures/telegram-qa-summary-rtt.json");
describe("RTT harness", () => {
it("validates OpenClaw package specs", () => {
expect(validateOpenClawPackageSpec("openclaw@main")).toBe("openclaw@main");
expect(validateOpenClawPackageSpec("openclaw@alpha")).toBe("openclaw@alpha");
expect(validateOpenClawPackageSpec("openclaw@beta")).toBe("openclaw@beta");
expect(validateOpenClawPackageSpec("openclaw@latest")).toBe("openclaw@latest");
expect(validateOpenClawPackageSpec("openclaw@2026.4.30")).toBe("openclaw@2026.4.30");
expect(validateOpenClawPackageSpec("openclaw@2026.4.30-beta.2")).toBe(
"openclaw@2026.4.30-beta.2",
);
expect(validateOpenClawPackageSpec("openclaw@2026.4.30-alpha.2")).toBe(
"openclaw@2026.4.30-alpha.2",
);
expect(() => validateOpenClawPackageSpec("@openclaw/openclaw@beta")).toThrow(
/Package spec must be/,
);
expect(() => validateOpenClawPackageSpec("openclaw@next")).toThrow(/Package spec must be/);
});
it("builds stable run labels", () => {
expect(safeRunLabel("openclaw@beta")).toBe("openclaw_beta");
expect(
buildRunId({
now: new Date("2026-05-01T03:04:05.678Z"),
spec: "openclaw@beta",
index: 1,
}),
).toBe("2026-05-01T030405678Z-openclaw_beta-2");
});
it("constructs harness env without dropping caller env", () => {
const env = createHarnessEnv({
baseEnv: {
OPENCLAW_QA_TELEGRAM_GROUP_ID: "-100123",
OPENCLAW_NPM_TELEGRAM_FAST: "0",
},
providerMode: "mock-openai",
rawOutputDir: ".artifacts/rtt/run/raw",
samples: 20,
sampleTimeoutMs: 30_000,
scenarios: ["telegram-mentioned-message-reply"],
spec: "openclaw@beta",
timeoutMs: 180_000,
version: "2026.4.30-beta.1",
});
expect(env.OPENCLAW_QA_TELEGRAM_GROUP_ID).toBe("-100123");
expect(env.OPENCLAW_NPM_TELEGRAM_PACKAGE_SPEC).toBe("openclaw@beta");
expect(env.OPENCLAW_NPM_TELEGRAM_PACKAGE_LABEL).toBe("openclaw@beta (2026.4.30-beta.1)");
expect(env.OPENCLAW_NPM_TELEGRAM_PROVIDER_MODE).toBe("mock-openai");
expect(env.OPENCLAW_NPM_TELEGRAM_SCENARIOS).toBe("telegram-mentioned-message-reply");
expect(env.OPENCLAW_NPM_TELEGRAM_OUTPUT_DIR).toBe(".artifacts/rtt/run/raw");
expect(env.OPENCLAW_NPM_TELEGRAM_FAST).toBe("0");
expect(env.OPENCLAW_NPM_TELEGRAM_WARM_SAMPLES).toBe("20");
expect(env.OPENCLAW_NPM_TELEGRAM_SAMPLE_TIMEOUT_MS).toBe("30000");
expect(env.OPENCLAW_QA_TELEGRAM_CANARY_TIMEOUT_MS).toBe("180000");
expect(env.OPENCLAW_QA_TELEGRAM_SCENARIO_TIMEOUT_MS).toBe("180000");
});
it("extracts RTT values from Telegram QA summaries", async () => {
const summary = await readTelegramSummary(FIXTURE_PATH);
expect(extractRtt(summary)).toEqual({
canaryMs: 1234,
mentionReplyMs: 5000,
warmSamples: [4000, 5000, 7000],
avgMs: 5333,
p50Ms: 5000,
p95Ms: 7000,
maxMs: 7000,
failedSamples: 0,
});
});
it("builds normalized result JSON", async () => {
const summary = await readTelegramSummary(FIXTURE_PATH);
const result = buildRttResult({
artifacts: {
rawObservedMessagesPath: "runs/run/raw/telegram-qa-observed-messages.json",
rawReportPath: "runs/run/raw/telegram-qa-report.md",
rawSummaryPath: "runs/run/raw/telegram-qa-summary.json",
resultPath: "runs/run/result.json",
},
finishedAt: new Date("2026-05-01T00:00:12.000Z"),
providerMode: "mock-openai",
rawSummary: summary,
runId: "run",
scenarios: ["telegram-mentioned-message-reply"],
spec: "openclaw@beta",
startedAt: new Date("2026-05-01T00:00:00.000Z"),
version: "2026.4.30-beta.1",
});
expect(result).toMatchObject({
package: { spec: "openclaw@beta", version: "2026.4.30-beta.1" },
run: { durationMs: 12_000, id: "run", status: "pass" },
mode: {
providerMode: "mock-openai",
scenarios: ["telegram-mentioned-message-reply"],
},
rtt: {
canaryMs: 1234,
mentionReplyMs: 5000,
avgMs: 5333,
p50Ms: 5000,
p95Ms: 7000,
maxMs: 7000,
failedSamples: 0,
},
});
expect(result.rtt.warmSamples).toEqual([4000, 5000, 7000]);
});
it("marks failed scenario summaries as failed results", () => {
const result = buildRttResult({
artifacts: {
rawObservedMessagesPath: "runs/run/raw/telegram-qa-observed-messages.json",
rawReportPath: "runs/run/raw/telegram-qa-report.md",
rawSummaryPath: "runs/run/raw/telegram-qa-summary.json",
resultPath: "runs/run/result.json",
},
finishedAt: new Date("2026-05-01T00:00:12.000Z"),
providerMode: "mock-openai",
rawSummary: {
scenarios: [
{ id: "telegram-canary", rttMs: 5948, status: "pass" },
{ id: "telegram-mentioned-message-reply", status: "fail" },
],
},
runId: "run",
scenarios: ["telegram-mentioned-message-reply"],
spec: "openclaw@latest",
startedAt: new Date("2026-05-01T00:00:00.000Z"),
version: "2026.4.29",
});
expect(result.run.status).toBe("fail");
expect(result.rtt).toEqual({ canaryMs: 5948, mentionReplyMs: undefined });
});
it("appends JSONL rows", async () => {
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-rtt-test-"));
const jsonlPath = path.join(tempDir, "data/rtt.jsonl");
await appendJsonl(jsonlPath, { run: 1 });
await appendJsonl(jsonlPath, { run: 2 });
await expect(fs.readFile(jsonlPath, "utf8")).resolves.toBe('{"run":1}\n{"run":2}\n');
});
it("parses CLI options", () => {
const parsed = cliTesting.parseArgs([
"openclaw@latest",
"--package-tgz",
"/tmp/openclaw.tgz",
"--provider",
"live-frontier",
"--runs",
"3",
"--samples",
"5",
"--sample-timeout-ms",
"30000",
"--timeout-ms",
"240000",
"--harness-root",
"/tmp/openclaw",
"--output",
"/tmp/runs",
]);
expect(parsed.spec).toBe("openclaw@latest");
expect(parsed.options).toMatchObject({
packageTgz: "/tmp/openclaw.tgz",
providerMode: "live-frontier",
runs: 3,
samples: 5,
sampleTimeoutMs: 30_000,
harnessRoot: "/tmp/openclaw",
output: "/tmp/runs",
scenarios: ["telegram-mentioned-message-reply"],
timeoutMs: 240_000,
});
});
});