mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-24 08:19:31 +00:00
* feat: add qa evidence summary normalization * chore: rename qa evidence target environment * chore: align qa evidence profile terminology * chore: align qa evidence summary fields * chore: add qa evidence taxonomy ref * test: remove stale multipass evidence example * test(qa): normalize vitest and playwright evidence * test(qa): slim evidence summary metadata * test(qa): clarify evidence summary inputs * test(qa): rename scenario specs in evidence flow * test(qa): treat evidence profiles as mapping strings * test(qa): use neutral evidence test identity * test(qa): nest evidence summary joins * refactor(qa): normalize live evidence summaries * fix(qa): accept normalized telegram rtt summaries * fix(qa): normalize evidence lane summaries * fix(qa): align evidence summaries with requirements * refactor(qa): tighten evidence summary builders * refactor(qa): restore standard evidence ids * fix(qa): keep legacy summaries out of rtt evidence * refactor(qa): make package evidence provenance explicit * test(qa): keep script tests out of qa lab internals * refactor(qa): rename scenario evidence definitions * refactor(qa): clean evidence summary wording * test(qa): fix evidence summary test inputs * refactor(qa): simplify evidence identity fields * refactor(qa): tighten evidence summary inputs * refactor(qa): rename evidence artifact
363 lines
12 KiB
TypeScript
363 lines
12 KiB
TypeScript
// Rtt Harness script supports OpenClaw repository automation.
|
|
import { execFile, spawn } from "node:child_process";
|
|
import fs from "node:fs/promises";
|
|
import path from "node:path";
|
|
import { promisify } from "node:util";
|
|
import {
|
|
QA_EVIDENCE_FILENAME,
|
|
validateQaEvidenceSummaryJson,
|
|
type QaEvidenceSummaryJson,
|
|
type QaEvidenceTiming,
|
|
} from "../../extensions/qa-lab/src/evidence-summary.ts";
|
|
|
|
const execFileAsync = promisify(execFile);
|
|
|
|
export type RttProviderMode = "mock-openai" | "live-frontier";
|
|
export type RttCredentialSource = "env" | "convex";
|
|
export type RttCredentialRole = "maintainer" | "ci";
|
|
|
|
type RttResult = {
|
|
package: {
|
|
spec: string;
|
|
version: string;
|
|
};
|
|
run: {
|
|
id: string;
|
|
startedAt: string;
|
|
finishedAt: string;
|
|
durationMs: number;
|
|
status: "pass" | "fail";
|
|
};
|
|
mode: {
|
|
providerMode: RttProviderMode;
|
|
scenarios: string[];
|
|
};
|
|
rtt: {
|
|
canaryMs?: number;
|
|
mentionReplyMs?: number;
|
|
warmSamples?: number[];
|
|
avgMs?: number;
|
|
p50Ms?: number;
|
|
p95Ms?: number;
|
|
maxMs?: number;
|
|
failedSamples?: number;
|
|
};
|
|
artifacts: {
|
|
rawSummaryPath: string;
|
|
rawReportPath: string;
|
|
rawObservedMessagesPath: string;
|
|
resultPath: string;
|
|
};
|
|
};
|
|
|
|
const OPENCLAW_PACKAGE_SPEC_RE =
|
|
/^openclaw@(main|alpha|beta|latest|[0-9]{4}\.[1-9][0-9]*\.[1-9][0-9]*(-[1-9][0-9]*|-(alpha|beta)\.[1-9][0-9]*)?)$/u;
|
|
|
|
const REQUIRED_TELEGRAM_ENV = [
|
|
"OPENCLAW_QA_TELEGRAM_GROUP_ID",
|
|
"OPENCLAW_QA_TELEGRAM_DRIVER_BOT_TOKEN",
|
|
"OPENCLAW_QA_TELEGRAM_SUT_BOT_TOKEN",
|
|
] as const;
|
|
|
|
export function parseRttCredentialSource(value: string): RttCredentialSource {
|
|
const normalized = value.trim().toLowerCase();
|
|
if (normalized === "env" || normalized === "convex") {
|
|
return normalized;
|
|
}
|
|
throw new Error(`--credential-source must be env or convex; got: ${value}`);
|
|
}
|
|
|
|
export function parseRttCredentialRole(value: string): RttCredentialRole {
|
|
const normalized = value.trim().toLowerCase();
|
|
if (normalized === "maintainer" || normalized === "ci") {
|
|
return normalized;
|
|
}
|
|
throw new Error(`--credential-role must be maintainer or ci; got: ${value}`);
|
|
}
|
|
|
|
function resolveRttCredentialSource(
|
|
env: NodeJS.ProcessEnv,
|
|
credentialSource?: RttCredentialSource,
|
|
): RttCredentialSource {
|
|
if (credentialSource) {
|
|
return credentialSource;
|
|
}
|
|
const rawSource =
|
|
env.OPENCLAW_NPM_TELEGRAM_CREDENTIAL_SOURCE ?? env.OPENCLAW_QA_CREDENTIAL_SOURCE;
|
|
if (rawSource?.trim()) {
|
|
return parseRttCredentialSource(rawSource);
|
|
}
|
|
if (
|
|
env.CI &&
|
|
env.OPENCLAW_QA_CONVEX_SITE_URL?.trim() &&
|
|
(env.OPENCLAW_QA_CONVEX_SECRET_CI?.trim() || env.OPENCLAW_QA_CONVEX_SECRET_MAINTAINER?.trim())
|
|
) {
|
|
return "convex";
|
|
}
|
|
return "env";
|
|
}
|
|
|
|
function resolveRttCredentialRole(
|
|
env: NodeJS.ProcessEnv,
|
|
credentialRole?: RttCredentialRole,
|
|
): RttCredentialRole {
|
|
if (credentialRole) {
|
|
return credentialRole;
|
|
}
|
|
const rawRole = env.OPENCLAW_NPM_TELEGRAM_CREDENTIAL_ROLE ?? env.OPENCLAW_QA_CREDENTIAL_ROLE;
|
|
if (rawRole?.trim()) {
|
|
return parseRttCredentialRole(rawRole);
|
|
}
|
|
return env.CI ? "ci" : "maintainer";
|
|
}
|
|
|
|
export function validateOpenClawPackageSpec(spec: string) {
|
|
if (!OPENCLAW_PACKAGE_SPEC_RE.test(spec)) {
|
|
throw new Error(
|
|
`Package spec must be openclaw@main, openclaw@alpha, openclaw@beta, openclaw@latest, or an exact OpenClaw release version; got: ${spec}`,
|
|
);
|
|
}
|
|
return spec;
|
|
}
|
|
|
|
export function safeRunLabel(input: string) {
|
|
return input.replace(/[^a-zA-Z0-9.-]+/gu, "_").replace(/^_+|_+$/gu, "");
|
|
}
|
|
|
|
export function buildRunId(params: { now: Date; spec: string; index?: number }) {
|
|
const stamp = params.now.toISOString().replaceAll(":", "").replaceAll(".", "");
|
|
const suffix = params.index === undefined ? "" : `-${params.index + 1}`;
|
|
return `${stamp}-${safeRunLabel(params.spec)}${suffix}`;
|
|
}
|
|
|
|
export function extractRtt(summary: QaEvidenceSummaryJson) {
|
|
const entries = summary.entries ?? [];
|
|
const findEntry = (id: string) => entries.find((entry) => entry.test?.id === id);
|
|
const canary = findEntry("telegram-canary")?.result?.timing;
|
|
const mention = findEntry("telegram-mentioned-message-reply")?.result?.timing;
|
|
const rtt: RttResult["rtt"] = {
|
|
canaryMs: canary?.rttMs,
|
|
mentionReplyMs: mention?.p50Ms ?? mention?.rttMs,
|
|
};
|
|
appendRttTiming(rtt, mention);
|
|
return rtt;
|
|
}
|
|
|
|
function appendRttTiming(rtt: RttResult["rtt"], timing: QaEvidenceTiming | undefined) {
|
|
if (timing?.avgMs !== undefined) {
|
|
rtt.avgMs = timing.avgMs;
|
|
}
|
|
if (timing?.p50Ms !== undefined) {
|
|
rtt.p50Ms = timing.p50Ms;
|
|
}
|
|
if (timing?.p95Ms !== undefined) {
|
|
rtt.p95Ms = timing.p95Ms;
|
|
}
|
|
if (timing?.maxMs !== undefined) {
|
|
rtt.maxMs = timing.maxMs;
|
|
}
|
|
if (timing?.failedSamples !== undefined) {
|
|
rtt.failedSamples = timing.failedSamples;
|
|
}
|
|
}
|
|
|
|
export function createHarnessEnv(params: {
|
|
baseEnv: NodeJS.ProcessEnv;
|
|
credentialRole?: RttCredentialRole;
|
|
credentialSource?: RttCredentialSource;
|
|
packageTgz?: string;
|
|
providerMode: RttProviderMode;
|
|
scenarios: string[];
|
|
spec: string;
|
|
version: string;
|
|
rawOutputDir: string;
|
|
samples: number;
|
|
sampleTimeoutMs: number;
|
|
timeoutMs: number;
|
|
}) {
|
|
const packageSourceSpec = params.packageTgz ?? params.spec;
|
|
return {
|
|
...params.baseEnv,
|
|
OPENCLAW_NPM_TELEGRAM_PACKAGE_SPEC: params.spec,
|
|
...(params.packageTgz ? { OPENCLAW_NPM_TELEGRAM_PACKAGE_TGZ: params.packageTgz } : {}),
|
|
OPENCLAW_NPM_TELEGRAM_PACKAGE_LABEL: `${params.spec} (${params.version})`,
|
|
OPENCLAW_NPM_TELEGRAM_PROVIDER_MODE: params.providerMode,
|
|
OPENCLAW_QA_PACKAGE_SOURCE: packageSourceSpec,
|
|
OPENCLAW_QA_PACKAGE_SOURCE_KIND: params.packageTgz ? "packed-tarball" : "npm-package",
|
|
...(params.credentialSource
|
|
? { OPENCLAW_NPM_TELEGRAM_CREDENTIAL_SOURCE: params.credentialSource }
|
|
: {}),
|
|
...(params.credentialRole
|
|
? { OPENCLAW_NPM_TELEGRAM_CREDENTIAL_ROLE: params.credentialRole }
|
|
: {}),
|
|
OPENCLAW_NPM_TELEGRAM_SCENARIOS: params.scenarios.join(","),
|
|
OPENCLAW_NPM_TELEGRAM_OUTPUT_DIR: params.rawOutputDir,
|
|
OPENCLAW_NPM_TELEGRAM_FAST: params.baseEnv.OPENCLAW_NPM_TELEGRAM_FAST ?? "1",
|
|
OPENCLAW_NPM_TELEGRAM_WARM_SAMPLES: String(params.samples),
|
|
OPENCLAW_NPM_TELEGRAM_SAMPLE_TIMEOUT_MS: String(params.sampleTimeoutMs),
|
|
OPENCLAW_QA_TELEGRAM_CANARY_TIMEOUT_MS: String(params.timeoutMs),
|
|
OPENCLAW_QA_TELEGRAM_SCENARIO_TIMEOUT_MS: String(params.timeoutMs),
|
|
};
|
|
}
|
|
|
|
export function assertRequiredEnv(
|
|
env: NodeJS.ProcessEnv,
|
|
options: {
|
|
credentialRole?: RttCredentialRole;
|
|
credentialSource?: RttCredentialSource;
|
|
} = {},
|
|
) {
|
|
const credentialSource = resolveRttCredentialSource(env, options.credentialSource);
|
|
if (credentialSource === "convex") {
|
|
const missing: string[] = [];
|
|
const credentialRole = resolveRttCredentialRole(env, options.credentialRole);
|
|
if (!env.OPENCLAW_QA_CONVEX_SITE_URL?.trim()) {
|
|
missing.push("OPENCLAW_QA_CONVEX_SITE_URL");
|
|
}
|
|
if (credentialRole === "ci" && !env.OPENCLAW_QA_CONVEX_SECRET_CI?.trim()) {
|
|
missing.push("OPENCLAW_QA_CONVEX_SECRET_CI");
|
|
}
|
|
if (credentialRole === "maintainer" && !env.OPENCLAW_QA_CONVEX_SECRET_MAINTAINER?.trim()) {
|
|
missing.push("OPENCLAW_QA_CONVEX_SECRET_MAINTAINER");
|
|
}
|
|
if (missing.length > 0) {
|
|
throw new Error(`Missing Convex Telegram QA credential env: ${missing.join(", ")}`);
|
|
}
|
|
return;
|
|
}
|
|
|
|
const missing = REQUIRED_TELEGRAM_ENV.filter((key) => !env[key]?.trim());
|
|
if (missing.length > 0) {
|
|
throw new Error(`Missing Telegram QA env: ${missing.join(", ")}`);
|
|
}
|
|
}
|
|
|
|
export async function assertHarnessRoot(harnessRoot: string) {
|
|
const scriptPath = path.join(harnessRoot, "scripts/e2e/npm-telegram-rtt-docker.sh");
|
|
try {
|
|
await fs.access(scriptPath);
|
|
} catch {
|
|
throw new Error(`Missing OpenClaw Telegram npm harness: ${scriptPath}`);
|
|
}
|
|
}
|
|
|
|
export async function assertDockerAvailable() {
|
|
try {
|
|
await execFileAsync("docker", ["version", "--format", "{{.Server.Version}}"], {
|
|
timeout: 10_000,
|
|
});
|
|
} catch {
|
|
throw new Error("Docker is required for RTT runs; install/start Docker and retry.");
|
|
}
|
|
}
|
|
|
|
export async function resolvePublishedVersion(spec: string) {
|
|
const { stdout } = await execFileAsync("npm", ["view", spec, "version", "--json"], {
|
|
timeout: 30_000,
|
|
});
|
|
const parsed = JSON.parse(stdout.trim()) as unknown;
|
|
if (typeof parsed !== "string" || parsed.trim().length === 0) {
|
|
throw new Error(`npm did not return a version for ${spec}.`);
|
|
}
|
|
return parsed.trim();
|
|
}
|
|
|
|
export async function resolveMainVersion(harnessRoot: string) {
|
|
const packageJson = JSON.parse(
|
|
await fs.readFile(path.join(harnessRoot, "package.json"), "utf8"),
|
|
) as { version?: unknown };
|
|
if (typeof packageJson.version !== "string" || packageJson.version.trim().length === 0) {
|
|
throw new Error("OpenClaw package.json must contain a non-empty version.");
|
|
}
|
|
const { stdout } = await execFileAsync("git", ["rev-parse", "--short=10", "HEAD"], {
|
|
cwd: harnessRoot,
|
|
timeout: 10_000,
|
|
});
|
|
return `${packageJson.version.trim()}+${stdout.trim()}`;
|
|
}
|
|
|
|
export async function readTelegramSummary(summaryPath: string) {
|
|
return validateQaEvidenceSummaryJson(JSON.parse(await fs.readFile(summaryPath, "utf8")));
|
|
}
|
|
|
|
export async function resolveTelegramSummaryPath(outputDir: string) {
|
|
return path.join(outputDir, QA_EVIDENCE_FILENAME);
|
|
}
|
|
|
|
export async function writeJson(pathname: string, value: unknown) {
|
|
await fs.mkdir(path.dirname(pathname), { recursive: true });
|
|
await fs.writeFile(pathname, `${JSON.stringify(value, null, 2)}\n`);
|
|
}
|
|
|
|
export async function appendJsonl(pathname: string, value: unknown) {
|
|
await fs.mkdir(path.dirname(pathname), { recursive: true });
|
|
await fs.appendFile(pathname, `${JSON.stringify(value)}\n`);
|
|
}
|
|
|
|
export async function runHarness(params: { env: NodeJS.ProcessEnv; harnessRoot: string }) {
|
|
const scriptPath = path.join(params.harnessRoot, "scripts/e2e/npm-telegram-rtt-docker.sh");
|
|
const child = spawn("bash", [scriptPath], {
|
|
cwd: params.harnessRoot,
|
|
env: params.env,
|
|
stdio: "inherit",
|
|
});
|
|
const exitCode = await new Promise<number | null>((resolve, reject) => {
|
|
child.once("error", reject);
|
|
child.once("exit", resolve);
|
|
});
|
|
return exitCode ?? 1;
|
|
}
|
|
|
|
function rttSummaryFailed(summary: QaEvidenceSummaryJson, requestedScenarios: string[]) {
|
|
const entries = summary.entries ?? [];
|
|
const requiredScenarioIds = ["telegram-canary", ...requestedScenarios];
|
|
for (const scenarioId of requiredScenarioIds) {
|
|
const entry = entries.find((candidate) => candidate.test?.id === scenarioId);
|
|
if (!entry || entry.result?.status !== "pass") {
|
|
return true;
|
|
}
|
|
const timing = entry.result.timing;
|
|
const rttMs =
|
|
scenarioId === "telegram-mentioned-message-reply"
|
|
? (timing?.p50Ms ?? timing?.rttMs)
|
|
: timing?.rttMs;
|
|
if (typeof rttMs !== "number" || !Number.isFinite(rttMs)) {
|
|
return true;
|
|
}
|
|
}
|
|
return entries.some((entry) => entry.result?.status !== "pass");
|
|
}
|
|
|
|
export function buildRttResult(params: {
|
|
artifacts: RttResult["artifacts"];
|
|
finishedAt: Date;
|
|
providerMode: RttProviderMode;
|
|
rawSummary: QaEvidenceSummaryJson;
|
|
runId: string;
|
|
scenarios: string[];
|
|
spec: string;
|
|
startedAt: Date;
|
|
version: string;
|
|
}): RttResult {
|
|
const failed = rttSummaryFailed(params.rawSummary, params.scenarios);
|
|
return {
|
|
package: {
|
|
spec: params.spec,
|
|
version: params.version,
|
|
},
|
|
run: {
|
|
id: params.runId,
|
|
startedAt: params.startedAt.toISOString(),
|
|
finishedAt: params.finishedAt.toISOString(),
|
|
durationMs: params.finishedAt.getTime() - params.startedAt.getTime(),
|
|
status: failed ? "fail" : "pass",
|
|
},
|
|
mode: {
|
|
providerMode: params.providerMode,
|
|
scenarios: params.scenarios,
|
|
},
|
|
rtt: extractRtt(params.rawSummary),
|
|
artifacts: params.artifacts,
|
|
};
|
|
}
|