mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 19:00:45 +00:00
feat(rtt): add published package measurement harness
This commit is contained in:
31
test/fixtures/telegram-qa-summary-rtt.json
vendored
Normal file
31
test/fixtures/telegram-qa-summary-rtt.json
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"credentials": {
|
||||
"kind": "telegram",
|
||||
"source": "env"
|
||||
},
|
||||
"groupId": "-100123",
|
||||
"startedAt": "2026-05-01T00:00:00.000Z",
|
||||
"finishedAt": "2026-05-01T00:00:10.000Z",
|
||||
"cleanupIssues": [],
|
||||
"counts": {
|
||||
"total": 2,
|
||||
"passed": 2,
|
||||
"failed": 0
|
||||
},
|
||||
"scenarios": [
|
||||
{
|
||||
"id": "telegram-canary",
|
||||
"title": "Telegram canary",
|
||||
"status": "pass",
|
||||
"details": "reply matched in 1234ms",
|
||||
"rttMs": 1234
|
||||
},
|
||||
{
|
||||
"id": "telegram-mentioned-message-reply",
|
||||
"title": "Telegram mentioned message gets a reply",
|
||||
"status": "pass",
|
||||
"details": "reply matched in 5678ms",
|
||||
"rttMs": 5678
|
||||
}
|
||||
]
|
||||
}
|
||||
172
test/scripts/rtt-harness.test.ts
Normal file
172
test/scripts/rtt-harness.test.ts
Normal file
@@ -0,0 +1,172 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
appendJsonl,
|
||||
buildRttResult,
|
||||
buildRunId,
|
||||
createHarnessEnv,
|
||||
extractRtt,
|
||||
readTelegramSummary,
|
||||
safeRunLabel,
|
||||
validateOpenClawPackageSpec,
|
||||
} from "../../scripts/lib/rtt-harness.ts";
|
||||
import { __testing as cliTesting } from "../../scripts/rtt.ts";
|
||||
|
||||
const TEST_DIR = path.dirname(fileURLToPath(import.meta.url));
|
||||
const FIXTURE_PATH = path.resolve(TEST_DIR, "../fixtures/telegram-qa-summary-rtt.json");
|
||||
|
||||
describe("RTT harness", () => {
|
||||
it("validates published OpenClaw package specs", () => {
|
||||
expect(validateOpenClawPackageSpec("openclaw@beta")).toBe("openclaw@beta");
|
||||
expect(validateOpenClawPackageSpec("openclaw@latest")).toBe("openclaw@latest");
|
||||
expect(validateOpenClawPackageSpec("openclaw@2026.4.30")).toBe("openclaw@2026.4.30");
|
||||
expect(validateOpenClawPackageSpec("openclaw@2026.4.30-beta.2")).toBe(
|
||||
"openclaw@2026.4.30-beta.2",
|
||||
);
|
||||
|
||||
expect(() => validateOpenClawPackageSpec("@openclaw/openclaw@beta")).toThrow(
|
||||
/Package spec must be/,
|
||||
);
|
||||
expect(() => validateOpenClawPackageSpec("openclaw@next")).toThrow(/Package spec must be/);
|
||||
});
|
||||
|
||||
it("builds stable run labels", () => {
|
||||
expect(safeRunLabel("openclaw@beta")).toBe("openclaw_beta");
|
||||
expect(
|
||||
buildRunId({
|
||||
now: new Date("2026-05-01T03:04:05.678Z"),
|
||||
spec: "openclaw@beta",
|
||||
index: 1,
|
||||
}),
|
||||
).toBe("2026-05-01T030405678Z-openclaw_beta-2");
|
||||
});
|
||||
|
||||
it("constructs harness env without dropping caller env", () => {
|
||||
const env = createHarnessEnv({
|
||||
baseEnv: {
|
||||
OPENCLAW_QA_TELEGRAM_GROUP_ID: "-100123",
|
||||
OPENCLAW_NPM_TELEGRAM_FAST: "0",
|
||||
},
|
||||
providerMode: "mock-openai",
|
||||
rawOutputDir: ".artifacts/rtt/run/raw",
|
||||
scenarios: ["telegram-mentioned-message-reply"],
|
||||
spec: "openclaw@beta",
|
||||
timeoutMs: 180_000,
|
||||
version: "2026.4.30-beta.1",
|
||||
});
|
||||
|
||||
expect(env.OPENCLAW_QA_TELEGRAM_GROUP_ID).toBe("-100123");
|
||||
expect(env.OPENCLAW_NPM_TELEGRAM_PACKAGE_SPEC).toBe("openclaw@beta");
|
||||
expect(env.OPENCLAW_NPM_TELEGRAM_PACKAGE_LABEL).toBe("openclaw@beta (2026.4.30-beta.1)");
|
||||
expect(env.OPENCLAW_NPM_TELEGRAM_PROVIDER_MODE).toBe("mock-openai");
|
||||
expect(env.OPENCLAW_NPM_TELEGRAM_SCENARIOS).toBe("telegram-mentioned-message-reply");
|
||||
expect(env.OPENCLAW_NPM_TELEGRAM_SKIP_HOTPATH).toBe("1");
|
||||
expect(env.OPENCLAW_NPM_TELEGRAM_OUTPUT_DIR).toBe(".artifacts/rtt/run/raw");
|
||||
expect(env.OPENCLAW_NPM_TELEGRAM_FAST).toBe("0");
|
||||
expect(env.OPENCLAW_QA_TELEGRAM_CANARY_TIMEOUT_MS).toBe("180000");
|
||||
expect(env.OPENCLAW_QA_TELEGRAM_SCENARIO_TIMEOUT_MS).toBe("180000");
|
||||
});
|
||||
|
||||
it("extracts RTT values from Telegram QA summaries", async () => {
|
||||
const summary = await readTelegramSummary(FIXTURE_PATH);
|
||||
expect(extractRtt(summary)).toEqual({
|
||||
canaryMs: 1234,
|
||||
mentionReplyMs: 5678,
|
||||
});
|
||||
});
|
||||
|
||||
it("builds normalized result JSON", async () => {
|
||||
const summary = await readTelegramSummary(FIXTURE_PATH);
|
||||
const result = buildRttResult({
|
||||
artifacts: {
|
||||
rawObservedMessagesPath: "runs/run/raw/telegram-qa-observed-messages.json",
|
||||
rawReportPath: "runs/run/raw/telegram-qa-report.md",
|
||||
rawSummaryPath: "runs/run/raw/telegram-qa-summary.json",
|
||||
resultPath: "runs/run/result.json",
|
||||
},
|
||||
finishedAt: new Date("2026-05-01T00:00:12.000Z"),
|
||||
providerMode: "mock-openai",
|
||||
rawSummary: summary,
|
||||
runId: "run",
|
||||
scenarios: ["telegram-mentioned-message-reply"],
|
||||
spec: "openclaw@beta",
|
||||
startedAt: new Date("2026-05-01T00:00:00.000Z"),
|
||||
version: "2026.4.30-beta.1",
|
||||
});
|
||||
|
||||
expect(result).toMatchObject({
|
||||
package: { spec: "openclaw@beta", version: "2026.4.30-beta.1" },
|
||||
run: { durationMs: 12_000, id: "run", status: "pass" },
|
||||
mode: {
|
||||
providerMode: "mock-openai",
|
||||
scenarios: ["telegram-mentioned-message-reply"],
|
||||
},
|
||||
rtt: { canaryMs: 1234, mentionReplyMs: 5678 },
|
||||
});
|
||||
});
|
||||
|
||||
it("marks failed scenario summaries as failed results", () => {
|
||||
const result = buildRttResult({
|
||||
artifacts: {
|
||||
rawObservedMessagesPath: "runs/run/raw/telegram-qa-observed-messages.json",
|
||||
rawReportPath: "runs/run/raw/telegram-qa-report.md",
|
||||
rawSummaryPath: "runs/run/raw/telegram-qa-summary.json",
|
||||
resultPath: "runs/run/result.json",
|
||||
},
|
||||
finishedAt: new Date("2026-05-01T00:00:12.000Z"),
|
||||
providerMode: "mock-openai",
|
||||
rawSummary: {
|
||||
scenarios: [
|
||||
{ id: "telegram-canary", rttMs: 5948, status: "pass" },
|
||||
{ id: "telegram-mentioned-message-reply", status: "fail" },
|
||||
],
|
||||
},
|
||||
runId: "run",
|
||||
scenarios: ["telegram-mentioned-message-reply"],
|
||||
spec: "openclaw@latest",
|
||||
startedAt: new Date("2026-05-01T00:00:00.000Z"),
|
||||
version: "2026.4.29",
|
||||
});
|
||||
|
||||
expect(result.run.status).toBe("fail");
|
||||
expect(result.rtt).toEqual({ canaryMs: 5948, mentionReplyMs: undefined });
|
||||
});
|
||||
|
||||
it("appends JSONL rows", async () => {
|
||||
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-rtt-test-"));
|
||||
const jsonlPath = path.join(tempDir, "data/rtt.jsonl");
|
||||
await appendJsonl(jsonlPath, { run: 1 });
|
||||
await appendJsonl(jsonlPath, { run: 2 });
|
||||
|
||||
await expect(fs.readFile(jsonlPath, "utf8")).resolves.toBe('{"run":1}\n{"run":2}\n');
|
||||
});
|
||||
|
||||
it("parses CLI options", () => {
|
||||
const parsed = cliTesting.parseArgs([
|
||||
"openclaw@latest",
|
||||
"--provider",
|
||||
"live-frontier",
|
||||
"--runs",
|
||||
"3",
|
||||
"--timeout-ms",
|
||||
"240000",
|
||||
"--harness-root",
|
||||
"/tmp/openclaw",
|
||||
"--output",
|
||||
"/tmp/runs",
|
||||
]);
|
||||
|
||||
expect(parsed.spec).toBe("openclaw@latest");
|
||||
expect(parsed.options).toMatchObject({
|
||||
providerMode: "live-frontier",
|
||||
runs: 3,
|
||||
harnessRoot: "/tmp/openclaw",
|
||||
output: "/tmp/runs",
|
||||
scenarios: ["telegram-mentioned-message-reply"],
|
||||
timeoutMs: 240_000,
|
||||
});
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user