mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-30 03:23:35 +00:00
* feat: add qa evidence summary normalization * chore: rename qa evidence target environment * chore: align qa evidence profile terminology * chore: align qa evidence summary fields * chore: add qa evidence taxonomy ref * test: remove stale multipass evidence example * test(qa): normalize vitest and playwright evidence * test(qa): slim evidence summary metadata * test(qa): clarify evidence summary inputs * test(qa): rename scenario specs in evidence flow * test(qa): treat evidence profiles as mapping strings * test(qa): use neutral evidence test identity * test(qa): nest evidence summary joins * refactor(qa): normalize live evidence summaries * fix(qa): accept normalized telegram rtt summaries * fix(qa): normalize evidence lane summaries * fix(qa): align evidence summaries with requirements * refactor(qa): tighten evidence summary builders * refactor(qa): restore standard evidence ids * fix(qa): keep legacy summaries out of rtt evidence * refactor(qa): make package evidence provenance explicit * test(qa): keep script tests out of qa lab internals * refactor(qa): rename scenario evidence definitions * refactor(qa): clean evidence summary wording * test(qa): fix evidence summary test inputs * refactor(qa): simplify evidence identity fields * refactor(qa): tighten evidence summary inputs * refactor(qa): rename evidence artifact
810 lines
28 KiB
TypeScript
810 lines
28 KiB
TypeScript
// Rtt Harness tests cover rtt harness script behavior.
|
|
import { execFile } from "node:child_process";
|
|
import fs from "node:fs/promises";
|
|
import { createServer, type Server } from "node:http";
|
|
import os from "node:os";
|
|
import path from "node:path";
|
|
import { fileURLToPath, pathToFileURL } from "node:url";
|
|
import { promisify } from "node:util";
|
|
import { afterEach, describe, expect, it } from "vitest";
|
|
import {
|
|
appendJsonl,
|
|
assertRequiredEnv,
|
|
buildRttResult,
|
|
buildRunId,
|
|
createHarnessEnv,
|
|
extractRtt,
|
|
readTelegramSummary,
|
|
resolveTelegramSummaryPath,
|
|
safeRunLabel,
|
|
validateOpenClawPackageSpec,
|
|
} from "../../scripts/lib/rtt-harness.ts";
|
|
import { testing as cliTesting } from "../../scripts/rtt.ts";
|
|
|
|
const TEST_DIR = path.dirname(fileURLToPath(import.meta.url));
|
|
const DOCKER_SCRIPT_PATH = path.resolve(TEST_DIR, "../../scripts/e2e/npm-telegram-rtt-docker.sh");
|
|
const CREDENTIAL_SCRIPT_PATH = path.resolve(
|
|
TEST_DIR,
|
|
"../../scripts/e2e/npm-telegram-rtt-credentials.mjs",
|
|
);
|
|
const CONFIG_SCRIPT_PATH = path.resolve(TEST_DIR, "../../scripts/e2e/npm-telegram-rtt-config.mjs");
|
|
const QA_EVIDENCE_FILENAME = "qa-evidence.json";
|
|
const CHUNKED_PAYLOAD_MARKER = "__openclawQaCredentialPayloadChunksV1";
|
|
const execFileAsync = promisify(execFile);
|
|
const tempDirs: string[] = [];
|
|
|
|
type EvidenceStatus = "pass" | "fail" | "blocked" | "skipped";
|
|
type EvidenceTiming = {
|
|
rttMs?: number;
|
|
avgMs?: number;
|
|
p50Ms?: number;
|
|
p95Ms?: number;
|
|
maxMs?: number;
|
|
samples?: number;
|
|
failedSamples?: number;
|
|
};
|
|
|
|
type EvidenceSummaryForTest = {
|
|
kind: "openclaw.qa.evidence-summary";
|
|
schemaVersion: 2;
|
|
generatedAt: string;
|
|
entries: Array<{
|
|
test: {
|
|
kind: string;
|
|
id: string;
|
|
title: string;
|
|
};
|
|
mapping: {
|
|
profile: string;
|
|
coverage: [];
|
|
};
|
|
execution: {
|
|
runner: string;
|
|
environment: { ref: null; os: string; nodeVersion: string };
|
|
provider: {
|
|
id: string;
|
|
live: boolean;
|
|
model: { name: null; ref: null };
|
|
fixture: string;
|
|
};
|
|
packageSource: { kind: string; spec: string };
|
|
artifacts: [];
|
|
};
|
|
result: {
|
|
status: EvidenceStatus;
|
|
timing?: EvidenceTiming;
|
|
};
|
|
}>;
|
|
};
|
|
|
|
function makeTelegramRttEvidenceSummary(
|
|
options: {
|
|
canaryStatus?: EvidenceStatus;
|
|
canaryTiming?: EvidenceTiming;
|
|
mentionStatus?: EvidenceStatus;
|
|
mentionTiming?: EvidenceTiming;
|
|
} = {},
|
|
): EvidenceSummaryForTest {
|
|
const canaryStatus = options.canaryStatus ?? "pass";
|
|
const canaryTiming = Object.hasOwn(options, "canaryTiming")
|
|
? options.canaryTiming
|
|
: { rttMs: 1234 };
|
|
const mentionStatus = options.mentionStatus ?? "pass";
|
|
const mentionTiming = Object.hasOwn(options, "mentionTiming")
|
|
? options.mentionTiming
|
|
: {
|
|
rttMs: 6000,
|
|
avgMs: 5333,
|
|
p50Ms: 5000,
|
|
p95Ms: 7000,
|
|
maxMs: 7000,
|
|
samples: 3,
|
|
failedSamples: 0,
|
|
};
|
|
const entry = (
|
|
id: string,
|
|
title: string,
|
|
status: EvidenceStatus,
|
|
timing: EvidenceTiming | undefined,
|
|
): EvidenceSummaryForTest["entries"][number] => {
|
|
const result = timing === undefined ? { status } : { status, timing };
|
|
return {
|
|
test: {
|
|
kind: "live-transport-check",
|
|
id,
|
|
title,
|
|
},
|
|
mapping: { profile: "release", coverage: [] },
|
|
execution: {
|
|
runner: "docker",
|
|
environment: { ref: null, os: "linux", nodeVersion: "v24.0.0" },
|
|
provider: {
|
|
id: "openai",
|
|
live: false,
|
|
model: { name: null, ref: null },
|
|
fixture: "mock-openai",
|
|
},
|
|
packageSource: { kind: "npm-package", spec: "openclaw@beta" },
|
|
artifacts: [],
|
|
},
|
|
result,
|
|
};
|
|
};
|
|
return {
|
|
kind: "openclaw.qa.evidence-summary",
|
|
schemaVersion: 2,
|
|
generatedAt: "2026-05-01T00:00:00.000Z",
|
|
entries: [
|
|
entry("telegram-canary", "Telegram canary", canaryStatus, canaryTiming),
|
|
entry(
|
|
"telegram-mentioned-message-reply",
|
|
"Telegram normal reply",
|
|
mentionStatus,
|
|
mentionTiming,
|
|
),
|
|
],
|
|
};
|
|
}
|
|
|
|
afterEach(async () => {
|
|
await Promise.all(tempDirs.splice(0).map((dir) => fs.rm(dir, { recursive: true, force: true })));
|
|
});
|
|
|
|
async function listenOnLoopback(server: Server) {
|
|
await new Promise<void>((resolve, reject) => {
|
|
server.once("error", reject);
|
|
server.listen(0, "127.0.0.1", () => {
|
|
server.off("error", reject);
|
|
resolve();
|
|
});
|
|
});
|
|
const address = server.address();
|
|
if (!address || typeof address === "string") {
|
|
throw new Error("Expected TCP server address.");
|
|
}
|
|
return address;
|
|
}
|
|
|
|
function closeServer(server: Server) {
|
|
return new Promise<void>((resolve, reject) => {
|
|
server.close((error) => (error ? reject(error) : resolve()));
|
|
});
|
|
}
|
|
|
|
function credentialBrokerEnv(port: number) {
|
|
return {
|
|
...process.env,
|
|
OPENCLAW_QA_ALLOW_INSECURE_HTTP: "1",
|
|
OPENCLAW_QA_CONVEX_SECRET_MAINTAINER: "test-secret",
|
|
OPENCLAW_QA_CONVEX_SITE_URL: `http://127.0.0.1:${port}`,
|
|
OPENCLAW_QA_CREDENTIAL_HTTP_TIMEOUT_MS: "1000",
|
|
OPENCLAW_QA_CREDENTIAL_OWNER_ID: "test-owner",
|
|
OPENCLAW_NPM_TELEGRAM_CREDENTIAL_ROLE: "maintainer",
|
|
};
|
|
}
|
|
|
|
describe("RTT harness", () => {
|
|
it("validates OpenClaw package specs", () => {
|
|
expect(validateOpenClawPackageSpec("openclaw@main")).toBe("openclaw@main");
|
|
expect(validateOpenClawPackageSpec("openclaw@alpha")).toBe("openclaw@alpha");
|
|
expect(validateOpenClawPackageSpec("openclaw@beta")).toBe("openclaw@beta");
|
|
expect(validateOpenClawPackageSpec("openclaw@latest")).toBe("openclaw@latest");
|
|
expect(validateOpenClawPackageSpec("openclaw@2026.4.30")).toBe("openclaw@2026.4.30");
|
|
expect(validateOpenClawPackageSpec("openclaw@2026.4.30-beta.2")).toBe(
|
|
"openclaw@2026.4.30-beta.2",
|
|
);
|
|
expect(validateOpenClawPackageSpec("openclaw@2026.4.30-alpha.2")).toBe(
|
|
"openclaw@2026.4.30-alpha.2",
|
|
);
|
|
|
|
expect(() => validateOpenClawPackageSpec("@openclaw/openclaw@beta")).toThrow(
|
|
/Package spec must be/,
|
|
);
|
|
expect(() => validateOpenClawPackageSpec("openclaw@next")).toThrow(/Package spec must be/);
|
|
});
|
|
|
|
it("builds stable run labels", () => {
|
|
expect(safeRunLabel("openclaw@beta")).toBe("openclaw_beta");
|
|
expect(
|
|
buildRunId({
|
|
now: new Date("2026-05-01T03:04:05.678Z"),
|
|
spec: "openclaw@beta",
|
|
index: 1,
|
|
}),
|
|
).toBe("2026-05-01T030405678Z-openclaw_beta-2");
|
|
});
|
|
|
|
it("constructs harness env without dropping caller env", () => {
|
|
const env = createHarnessEnv({
|
|
baseEnv: {
|
|
OPENCLAW_QA_TELEGRAM_GROUP_ID: "-100123",
|
|
OPENCLAW_NPM_TELEGRAM_FAST: "0",
|
|
},
|
|
providerMode: "mock-openai",
|
|
rawOutputDir: ".artifacts/rtt/run/raw",
|
|
samples: 20,
|
|
sampleTimeoutMs: 30_000,
|
|
scenarios: ["telegram-mentioned-message-reply"],
|
|
spec: "openclaw@beta",
|
|
timeoutMs: 180_000,
|
|
version: "2026.4.30-beta.1",
|
|
});
|
|
|
|
expect(env.OPENCLAW_QA_TELEGRAM_GROUP_ID).toBe("-100123");
|
|
expect(env.OPENCLAW_NPM_TELEGRAM_PACKAGE_SPEC).toBe("openclaw@beta");
|
|
expect(env.OPENCLAW_NPM_TELEGRAM_PACKAGE_LABEL).toBe("openclaw@beta (2026.4.30-beta.1)");
|
|
expect(env.OPENCLAW_NPM_TELEGRAM_PROVIDER_MODE).toBe("mock-openai");
|
|
expect(env.OPENCLAW_QA_PACKAGE_SOURCE).toBe("openclaw@beta");
|
|
expect(env.OPENCLAW_QA_PACKAGE_SOURCE_KIND).toBe("npm-package");
|
|
expect(env.OPENCLAW_NPM_TELEGRAM_SCENARIOS).toBe("telegram-mentioned-message-reply");
|
|
expect(env.OPENCLAW_NPM_TELEGRAM_OUTPUT_DIR).toBe(".artifacts/rtt/run/raw");
|
|
expect(env.OPENCLAW_NPM_TELEGRAM_FAST).toBe("0");
|
|
expect(env.OPENCLAW_NPM_TELEGRAM_WARM_SAMPLES).toBe("20");
|
|
expect(env.OPENCLAW_NPM_TELEGRAM_SAMPLE_TIMEOUT_MS).toBe("30000");
|
|
expect(env.OPENCLAW_QA_TELEGRAM_CANARY_TIMEOUT_MS).toBe("180000");
|
|
expect(env.OPENCLAW_QA_TELEGRAM_SCENARIO_TIMEOUT_MS).toBe("180000");
|
|
});
|
|
|
|
it("marks package tarball provenance in RTT evidence env", () => {
|
|
const env = createHarnessEnv({
|
|
baseEnv: {},
|
|
packageTgz: "/tmp/openclaw.tgz",
|
|
providerMode: "mock-openai",
|
|
rawOutputDir: ".artifacts/rtt/run/raw",
|
|
samples: 20,
|
|
sampleTimeoutMs: 30_000,
|
|
scenarios: ["telegram-mentioned-message-reply"],
|
|
spec: "openclaw@main",
|
|
timeoutMs: 180_000,
|
|
version: "2026.4.30+abc123",
|
|
});
|
|
|
|
expect(env.OPENCLAW_NPM_TELEGRAM_PACKAGE_SPEC).toBe("openclaw@main");
|
|
expect(env.OPENCLAW_NPM_TELEGRAM_PACKAGE_TGZ).toBe("/tmp/openclaw.tgz");
|
|
expect(env.OPENCLAW_QA_PACKAGE_SOURCE).toBe("/tmp/openclaw.tgz");
|
|
expect(env.OPENCLAW_QA_PACKAGE_SOURCE_KIND).toBe("packed-tarball");
|
|
});
|
|
|
|
it("forwards Convex credential controls without dropping RTT sample controls", () => {
|
|
const env = createHarnessEnv({
|
|
baseEnv: {
|
|
OPENCLAW_QA_CONVEX_SITE_URL: "https://qa-credentials.example.convex.site",
|
|
OPENCLAW_QA_CONVEX_SECRET_MAINTAINER: "maintainer-secret",
|
|
},
|
|
credentialRole: "maintainer",
|
|
credentialSource: "convex",
|
|
providerMode: "mock-openai",
|
|
rawOutputDir: ".artifacts/rtt/run/raw",
|
|
samples: 7,
|
|
sampleTimeoutMs: 45_000,
|
|
scenarios: ["telegram-mentioned-message-reply"],
|
|
spec: "openclaw@beta",
|
|
timeoutMs: 180_000,
|
|
version: "2026.4.30-beta.1",
|
|
});
|
|
|
|
expect(env.OPENCLAW_NPM_TELEGRAM_CREDENTIAL_SOURCE).toBe("convex");
|
|
expect(env.OPENCLAW_NPM_TELEGRAM_CREDENTIAL_ROLE).toBe("maintainer");
|
|
expect(env.OPENCLAW_NPM_TELEGRAM_WARM_SAMPLES).toBe("7");
|
|
expect(env.OPENCLAW_NPM_TELEGRAM_SAMPLE_TIMEOUT_MS).toBe("45000");
|
|
expect(() =>
|
|
assertRequiredEnv(env, { credentialRole: "maintainer", credentialSource: "convex" }),
|
|
).not.toThrow();
|
|
});
|
|
|
|
it("exports the Telegram bot token after Convex credentials are sourced", async () => {
|
|
const script = await fs.readFile(DOCKER_SCRIPT_PATH, "utf8");
|
|
const sourceIndex = script.indexOf('source "$credential_env_file"');
|
|
const tokenExportIndex = script.indexOf(
|
|
'export TELEGRAM_BOT_TOKEN="${OPENCLAW_QA_TELEGRAM_SUT_BOT_TOKEN:?missing OPENCLAW_QA_TELEGRAM_SUT_BOT_TOKEN}"',
|
|
);
|
|
const installEnvSnapshotIndex = script.indexOf('install_env=("${docker_env[@]}")');
|
|
const convexSecretForwardIndex = script.indexOf(
|
|
"OPENCLAW_QA_CONVEX_SECRET_CI",
|
|
installEnvSnapshotIndex,
|
|
);
|
|
const bodyLimitForwardIndex = script.indexOf(
|
|
"OPENCLAW_QA_CREDENTIAL_HTTP_MAX_BODY_BYTES",
|
|
installEnvSnapshotIndex,
|
|
);
|
|
const payloadByteLimitForwardIndex = script.indexOf(
|
|
"OPENCLAW_QA_CREDENTIAL_PAYLOAD_MAX_BYTES",
|
|
installEnvSnapshotIndex,
|
|
);
|
|
const payloadChunkLimitForwardIndex = script.indexOf(
|
|
"OPENCLAW_QA_CREDENTIAL_PAYLOAD_MAX_CHUNKS",
|
|
installEnvSnapshotIndex,
|
|
);
|
|
const packageInstallIndex = script.indexOf("npm install -g");
|
|
const credentialAcquireIndex = script.indexOf(
|
|
"node /app/scripts/e2e/npm-telegram-rtt-credentials.mjs acquire",
|
|
);
|
|
const heartbeatStartIndex = script.indexOf("start_credential_heartbeat", sourceIndex);
|
|
const driverIndex = script.indexOf("node /app/scripts/e2e/npm-telegram-rtt-driver.mjs");
|
|
|
|
expect(sourceIndex).toBeGreaterThanOrEqual(0);
|
|
expect(tokenExportIndex).toBeGreaterThan(sourceIndex);
|
|
expect(installEnvSnapshotIndex).toBeGreaterThanOrEqual(0);
|
|
expect(convexSecretForwardIndex).toBeGreaterThan(installEnvSnapshotIndex);
|
|
expect(bodyLimitForwardIndex).toBeGreaterThan(installEnvSnapshotIndex);
|
|
expect(payloadByteLimitForwardIndex).toBeGreaterThan(installEnvSnapshotIndex);
|
|
expect(payloadChunkLimitForwardIndex).toBeGreaterThan(installEnvSnapshotIndex);
|
|
expect(packageInstallIndex).toBeLessThan(credentialAcquireIndex);
|
|
expect(script).toContain(
|
|
'-e OPENCLAW_E2E_NPM_INSTALL_TIMEOUT="${OPENCLAW_E2E_NPM_INSTALL_TIMEOUT:-600s}"',
|
|
);
|
|
expect(script).toContain('-e OPENCLAW_QA_PACKAGE_SOURCE="$package_install_source"');
|
|
expect(script).toContain('-e OPENCLAW_QA_PACKAGE_SOURCE_KIND="$package_source_kind"');
|
|
expect(script).toContain("OPENCLAW_QA_PACKAGE_SOURCE_SHA");
|
|
expect(script).toContain(
|
|
'"$timeout_bin" --kill-after=30s "$npm_install_timeout" npm install -g "$install_source" --no-fund --no-audit',
|
|
);
|
|
expect(script).toContain("elif command -v gtimeout >/dev/null 2>&1; then");
|
|
expect(script).toContain('timeout_bin="gtimeout"');
|
|
expect(script).toContain(
|
|
'echo "timeout or gtimeout is required for OPENCLAW_E2E_NPM_INSTALL_TIMEOUT=$npm_install_timeout" >&2',
|
|
);
|
|
expect(script).toContain('"$timeout_bin" --kill-after=1s 1s true >/dev/null 2>&1');
|
|
expect(script).toContain(
|
|
'"$timeout_bin" "$npm_install_timeout" npm install -g "$install_source" --no-fund --no-audit',
|
|
);
|
|
expect(script).not.toContain(
|
|
"running package install without OPENCLAW_E2E_NPM_INSTALL_TIMEOUT",
|
|
);
|
|
expect(script).toContain("run_logged docker_e2e_docker_run_cmd run --rm");
|
|
expect(script).not.toContain("run_logged docker run --rm");
|
|
expect(script).toContain("source scripts/lib/openclaw-e2e-instance.sh");
|
|
expect(script).toContain('docker_e2e_print_log "$run_log"');
|
|
expect(script).not.toContain('cat "$run_log"');
|
|
expect(heartbeatStartIndex).toBeGreaterThan(sourceIndex);
|
|
expect(heartbeatStartIndex).toBeLessThan(driverIndex);
|
|
expect(script).toContain("start_credential_heartbeat() {\n (\n set +e");
|
|
expect(script).toContain("Convex credential heartbeat exited with status");
|
|
expect(script).toContain('kill -TERM "$rtt_shell_pid"');
|
|
expect(script).toContain("const controller = new AbortController();");
|
|
expect(script).toContain("const timer = setTimeout(() => controller.abort(), 1000);");
|
|
expect(script).toContain('if [ "$mock_ready" != "1" ]; then');
|
|
expect(script).toContain("Mock OpenAI server did not become ready");
|
|
expect(script).toContain('openclaw_e2e_print_log "$mock_log"');
|
|
expect(script).toContain('openclaw_e2e_print_log "$file"');
|
|
expect(script).not.toContain('cat "$mock_log"');
|
|
expect(script).not.toContain("sed -n '1,260p'");
|
|
expect(script).not.toContain("fetch('http://127.0.0.1:${mock_port}/health')");
|
|
expect(script).not.toContain('export TELEGRAM_BOT_TOKEN="$OPENCLAW_QA_TELEGRAM_SUT_BOT_TOKEN"');
|
|
});
|
|
|
|
it("rejects oversized chunked RTT credential markers before hydration", async () => {
|
|
const credentialModule = (await import(
|
|
`${pathToFileURL(CREDENTIAL_SCRIPT_PATH).href}?case=chunk-marker-${Date.now()}`
|
|
)) as {
|
|
parseChunkedPayloadMarker(payload: unknown): unknown;
|
|
};
|
|
|
|
expect(() =>
|
|
credentialModule.parseChunkedPayloadMarker({
|
|
[CHUNKED_PAYLOAD_MARKER]: true,
|
|
byteLength: 1,
|
|
chunkCount: 4097,
|
|
}),
|
|
).toThrow("Chunked credential payload exceeds 4096 chunks.");
|
|
expect(() =>
|
|
credentialModule.parseChunkedPayloadMarker({
|
|
[CHUNKED_PAYLOAD_MARKER]: true,
|
|
byteLength: 64 * 1024 * 1024 + 1,
|
|
chunkCount: 1,
|
|
}),
|
|
).toThrow("Chunked credential payload exceeds 67108864 bytes.");
|
|
});
|
|
|
|
it("keeps RTT Docker artifacts isolated by default", async () => {
|
|
const script = await fs.readFile(DOCKER_SCRIPT_PATH, "utf8");
|
|
|
|
expect(script).toContain(
|
|
'RUN_ID="${OPENCLAW_NPM_TELEGRAM_RUN_ID:-$(date -u +%Y%m%dT%H%M%SZ)-$$}"',
|
|
);
|
|
expect(script).toContain(
|
|
'OUTPUT_DIR="${OPENCLAW_NPM_TELEGRAM_OUTPUT_DIR:-.artifacts/qa-e2e/npm-telegram-rtt/$RUN_ID}"',
|
|
);
|
|
expect(script).toContain('-e OPENCLAW_NPM_TELEGRAM_OUTPUT_DIR="$OUTPUT_DIR"');
|
|
expect(script).not.toContain(
|
|
'OUTPUT_DIR="${OPENCLAW_NPM_TELEGRAM_OUTPUT_DIR:-.artifacts/qa-e2e/npm-telegram-rtt}"',
|
|
);
|
|
});
|
|
|
|
it("keeps broker helper heartbeat handling aligned with QA leases", async () => {
|
|
const script = await fs.readFile(CREDENTIAL_SCRIPT_PATH, "utf8");
|
|
|
|
expect(script).toContain("leaseTtlMs: acquired.leaseTtlMs ?? config.leaseTtlMs");
|
|
expect(script).toContain("leaseTtlMs: leaseTtlMsFromLease(config, lease)");
|
|
});
|
|
|
|
it("bounds Convex credential broker response bodies", async () => {
|
|
const server = createServer((_request, response) => {
|
|
response.writeHead(500, { "content-type": "application/json" });
|
|
response.end(JSON.stringify({ status: "error", message: "x".repeat(128) }));
|
|
});
|
|
const { port } = await listenOnLoopback(server);
|
|
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-rtt-credentials-"));
|
|
tempDirs.push(tempDir);
|
|
|
|
try {
|
|
await execFileAsync(
|
|
process.execPath,
|
|
[
|
|
CREDENTIAL_SCRIPT_PATH,
|
|
"acquire",
|
|
"--lease-file",
|
|
path.join(tempDir, "lease.json"),
|
|
"--credential-env-file",
|
|
path.join(tempDir, "credentials.env"),
|
|
],
|
|
{
|
|
env: {
|
|
...credentialBrokerEnv(port),
|
|
OPENCLAW_QA_CREDENTIAL_HTTP_MAX_BODY_BYTES: "16",
|
|
},
|
|
maxBuffer: 128 * 1024,
|
|
},
|
|
);
|
|
throw new Error("Expected credential acquire to fail.");
|
|
} catch (error) {
|
|
const execError = error as Error & { stderr?: string };
|
|
expect(execError.stderr).toContain(
|
|
"credential broker acquire response body exceeded 16 bytes",
|
|
);
|
|
expect(execError.stderr).not.toContain("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx");
|
|
} finally {
|
|
await closeServer(server);
|
|
}
|
|
});
|
|
|
|
it("does not start another credential acquire after retry delay exhausts the deadline", async () => {
|
|
let requests = 0;
|
|
const server = createServer((_request, response) => {
|
|
requests += 1;
|
|
response.writeHead(503, { "content-type": "application/json" });
|
|
response.end(
|
|
JSON.stringify({
|
|
status: "error",
|
|
code: "POOL_EXHAUSTED",
|
|
message: "credential pool exhausted",
|
|
retryAfterMs: 1_000,
|
|
}),
|
|
);
|
|
});
|
|
const { port } = await listenOnLoopback(server);
|
|
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-rtt-credentials-retry-"));
|
|
tempDirs.push(tempDir);
|
|
const startedAt = Date.now();
|
|
|
|
try {
|
|
await execFileAsync(
|
|
process.execPath,
|
|
[
|
|
CREDENTIAL_SCRIPT_PATH,
|
|
"acquire",
|
|
"--lease-file",
|
|
path.join(tempDir, "lease.json"),
|
|
"--credential-env-file",
|
|
path.join(tempDir, "credentials.env"),
|
|
],
|
|
{
|
|
env: {
|
|
...credentialBrokerEnv(port),
|
|
OPENCLAW_QA_CREDENTIAL_ACQUIRE_TIMEOUT_MS: "75",
|
|
OPENCLAW_QA_CREDENTIAL_HTTP_TIMEOUT_MS: "250",
|
|
},
|
|
maxBuffer: 128 * 1024,
|
|
},
|
|
);
|
|
throw new Error("Expected credential acquire to fail.");
|
|
} catch (error) {
|
|
const execError = error as Error & { stderr?: string };
|
|
expect(execError.stderr).toContain("credential broker acquire timed out after 75ms");
|
|
expect(Date.now() - startedAt).toBeLessThan(500);
|
|
expect(requests).toBe(1);
|
|
} finally {
|
|
await closeServer(server);
|
|
}
|
|
});
|
|
|
|
it("caps credential acquire HTTP retries to the remaining acquire deadline", async () => {
|
|
let requests = 0;
|
|
const server = createServer((_request, response) => {
|
|
requests += 1;
|
|
if (requests === 1) {
|
|
response.writeHead(503, { "content-type": "application/json" });
|
|
response.end(
|
|
JSON.stringify({
|
|
status: "error",
|
|
code: "POOL_EXHAUSTED",
|
|
message: "credential pool exhausted",
|
|
retryAfterMs: 1,
|
|
}),
|
|
);
|
|
}
|
|
});
|
|
const { port } = await listenOnLoopback(server);
|
|
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-rtt-credentials-cap-"));
|
|
tempDirs.push(tempDir);
|
|
const startedAt = Date.now();
|
|
|
|
try {
|
|
await execFileAsync(
|
|
process.execPath,
|
|
[
|
|
CREDENTIAL_SCRIPT_PATH,
|
|
"acquire",
|
|
"--lease-file",
|
|
path.join(tempDir, "lease.json"),
|
|
"--credential-env-file",
|
|
path.join(tempDir, "credentials.env"),
|
|
],
|
|
{
|
|
env: {
|
|
...credentialBrokerEnv(port),
|
|
OPENCLAW_QA_CREDENTIAL_ACQUIRE_TIMEOUT_MS: "100",
|
|
OPENCLAW_QA_CREDENTIAL_HTTP_TIMEOUT_MS: "900",
|
|
},
|
|
maxBuffer: 128 * 1024,
|
|
},
|
|
);
|
|
throw new Error("Expected credential acquire to fail.");
|
|
} catch (error) {
|
|
const execError = error as Error & { stderr?: string };
|
|
expect(execError.stderr).toContain("credential broker acquire timed out after");
|
|
expect(Date.now() - startedAt).toBeLessThan(500);
|
|
expect(requests).toBe(2);
|
|
} finally {
|
|
await closeServer(server);
|
|
}
|
|
});
|
|
|
|
it("preserves empty broker responses for successful lease release", async () => {
|
|
const server = createServer((_request, response) => {
|
|
response.writeHead(204);
|
|
response.end();
|
|
});
|
|
const { port } = await listenOnLoopback(server);
|
|
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-rtt-credentials-"));
|
|
tempDirs.push(tempDir);
|
|
const leaseFile = path.join(tempDir, "lease.json");
|
|
await fs.writeFile(
|
|
leaseFile,
|
|
`${JSON.stringify({
|
|
kind: "telegram",
|
|
ownerId: "test-owner",
|
|
actorRole: "maintainer",
|
|
credentialId: "credential",
|
|
leaseToken: "lease",
|
|
})}\n`,
|
|
);
|
|
|
|
try {
|
|
await execFileAsync(
|
|
process.execPath,
|
|
[CREDENTIAL_SCRIPT_PATH, "release", "--lease-file", leaseFile],
|
|
{
|
|
env: credentialBrokerEnv(port),
|
|
},
|
|
);
|
|
await expect(fs.stat(leaseFile)).rejects.toMatchObject({ code: "ENOENT" });
|
|
} finally {
|
|
await closeServer(server);
|
|
}
|
|
});
|
|
|
|
it("generates final-only Telegram RTT delivery config for release packages", async () => {
|
|
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-rtt-config-test-"));
|
|
tempDirs.push(tempDir);
|
|
const configPath = path.join(tempDir, "config.json");
|
|
|
|
await execFileAsync(process.execPath, [
|
|
CONFIG_SCRIPT_PATH,
|
|
configPath,
|
|
"12345",
|
|
"-100123",
|
|
"111:driver-token",
|
|
"222:sut-token",
|
|
"2026.5.16-beta.6",
|
|
]);
|
|
|
|
const config = JSON.parse(await fs.readFile(configPath, "utf8"));
|
|
expect(config.channels.telegram.replyToMode).toBe("first");
|
|
expect(config.channels.telegram.streaming).toEqual({ mode: "off" });
|
|
expect(config.messages.groupChat.visibleReplies).toBe("automatic");
|
|
});
|
|
|
|
it("extracts RTT values from evidence summaries", () => {
|
|
const summary = makeTelegramRttEvidenceSummary();
|
|
|
|
expect(extractRtt(summary)).toEqual({
|
|
canaryMs: 1234,
|
|
mentionReplyMs: 5000,
|
|
avgMs: 5333,
|
|
p50Ms: 5000,
|
|
p95Ms: 7000,
|
|
maxMs: 7000,
|
|
failedSamples: 0,
|
|
});
|
|
});
|
|
|
|
it("resolves the evidence summary path for Telegram RTT artifacts", async () => {
|
|
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-rtt-summary-test-"));
|
|
tempDirs.push(tempDir);
|
|
|
|
await expect(resolveTelegramSummaryPath(tempDir)).resolves.toBe(
|
|
path.join(tempDir, QA_EVIDENCE_FILENAME),
|
|
);
|
|
});
|
|
|
|
it("builds RTT result JSON", async () => {
|
|
const summary = makeTelegramRttEvidenceSummary();
|
|
const result = buildRttResult({
|
|
artifacts: {
|
|
rawObservedMessagesPath: "runs/run/raw/telegram-qa-observed-messages.json",
|
|
rawReportPath: "runs/run/raw/telegram-qa-report.md",
|
|
rawSummaryPath: "runs/run/raw/qa-evidence.json",
|
|
resultPath: "runs/run/result.json",
|
|
},
|
|
finishedAt: new Date("2026-05-01T00:00:12.000Z"),
|
|
providerMode: "mock-openai",
|
|
rawSummary: summary,
|
|
runId: "run",
|
|
scenarios: ["telegram-mentioned-message-reply"],
|
|
spec: "openclaw@beta",
|
|
startedAt: new Date("2026-05-01T00:00:00.000Z"),
|
|
version: "2026.4.30-beta.1",
|
|
});
|
|
|
|
expect(result).toStrictEqual({
|
|
artifacts: {
|
|
rawObservedMessagesPath: "runs/run/raw/telegram-qa-observed-messages.json",
|
|
rawReportPath: "runs/run/raw/telegram-qa-report.md",
|
|
rawSummaryPath: "runs/run/raw/qa-evidence.json",
|
|
resultPath: "runs/run/result.json",
|
|
},
|
|
package: { spec: "openclaw@beta", version: "2026.4.30-beta.1" },
|
|
run: {
|
|
durationMs: 12_000,
|
|
finishedAt: "2026-05-01T00:00:12.000Z",
|
|
id: "run",
|
|
startedAt: "2026-05-01T00:00:00.000Z",
|
|
status: "pass",
|
|
},
|
|
mode: {
|
|
providerMode: "mock-openai",
|
|
scenarios: ["telegram-mentioned-message-reply"],
|
|
},
|
|
rtt: {
|
|
canaryMs: 1234,
|
|
mentionReplyMs: 5000,
|
|
avgMs: 5333,
|
|
p50Ms: 5000,
|
|
p95Ms: 7000,
|
|
maxMs: 7000,
|
|
failedSamples: 0,
|
|
},
|
|
});
|
|
});
|
|
|
|
it("marks failed scenario summaries as failed results", () => {
|
|
const result = buildRttResult({
|
|
artifacts: {
|
|
rawObservedMessagesPath: "runs/run/raw/telegram-qa-observed-messages.json",
|
|
rawReportPath: "runs/run/raw/telegram-qa-report.md",
|
|
rawSummaryPath: "runs/run/raw/qa-evidence.json",
|
|
resultPath: "runs/run/result.json",
|
|
},
|
|
finishedAt: new Date("2026-05-01T00:00:12.000Z"),
|
|
providerMode: "mock-openai",
|
|
rawSummary: makeTelegramRttEvidenceSummary({
|
|
canaryTiming: { rttMs: 5948 },
|
|
mentionStatus: "fail",
|
|
mentionTiming: undefined,
|
|
}),
|
|
runId: "run",
|
|
scenarios: ["telegram-mentioned-message-reply"],
|
|
spec: "openclaw@latest",
|
|
startedAt: new Date("2026-05-01T00:00:00.000Z"),
|
|
version: "2026.4.29",
|
|
});
|
|
|
|
expect(result.run.status).toBe("fail");
|
|
expect(result.rtt).toEqual({ canaryMs: 5948, mentionReplyMs: undefined });
|
|
});
|
|
|
|
it("marks incomplete RTT summaries as failed results", () => {
|
|
const baseParams = {
|
|
artifacts: {
|
|
rawObservedMessagesPath: "runs/run/raw/telegram-qa-observed-messages.json",
|
|
rawReportPath: "runs/run/raw/telegram-qa-report.md",
|
|
rawSummaryPath: "runs/run/raw/qa-evidence.json",
|
|
resultPath: "runs/run/result.json",
|
|
},
|
|
finishedAt: new Date("2026-05-01T00:00:12.000Z"),
|
|
providerMode: "mock-openai" as const,
|
|
runId: "run",
|
|
scenarios: ["telegram-mentioned-message-reply"],
|
|
spec: "openclaw@latest",
|
|
startedAt: new Date("2026-05-01T00:00:00.000Z"),
|
|
version: "2026.4.29",
|
|
};
|
|
const emptySummary = { ...makeTelegramRttEvidenceSummary(), entries: [] };
|
|
const canaryOnlySummary = makeTelegramRttEvidenceSummary();
|
|
canaryOnlySummary.entries = canaryOnlySummary.entries.slice(0, 1);
|
|
|
|
for (const rawSummary of [
|
|
emptySummary,
|
|
canaryOnlySummary,
|
|
makeTelegramRttEvidenceSummary({ mentionStatus: "skipped" }),
|
|
makeTelegramRttEvidenceSummary({ mentionTiming: undefined }),
|
|
]) {
|
|
expect(buildRttResult({ ...baseParams, rawSummary }).run.status).toBe("fail");
|
|
}
|
|
});
|
|
|
|
it("appends JSONL rows", async () => {
|
|
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-rtt-test-"));
|
|
tempDirs.push(tempDir);
|
|
const jsonlPath = path.join(tempDir, "data/rtt.jsonl");
|
|
await appendJsonl(jsonlPath, { run: 1 });
|
|
await appendJsonl(jsonlPath, { run: 2 });
|
|
|
|
await expect(fs.readFile(jsonlPath, "utf8")).resolves.toBe('{"run":1}\n{"run":2}\n');
|
|
});
|
|
|
|
it("parses CLI options", () => {
|
|
const parsed = cliTesting.parseArgs([
|
|
"openclaw@latest",
|
|
"--package-tgz",
|
|
"/tmp/openclaw.tgz",
|
|
"--provider",
|
|
"live-frontier",
|
|
"--credential-source",
|
|
"convex",
|
|
"--credential-role",
|
|
"ci",
|
|
"--runs",
|
|
"3",
|
|
"--samples",
|
|
"5",
|
|
"--sample-timeout-ms",
|
|
"30000",
|
|
"--timeout-ms",
|
|
"240000",
|
|
"--harness-root",
|
|
"/tmp/openclaw",
|
|
"--output",
|
|
"/tmp/runs",
|
|
]);
|
|
|
|
expect(parsed.spec).toBe("openclaw@latest");
|
|
expect(parsed.options).toStrictEqual({
|
|
packageTgz: "/tmp/openclaw.tgz",
|
|
credentialRole: "ci",
|
|
credentialSource: "convex",
|
|
providerMode: "live-frontier",
|
|
runs: 3,
|
|
samples: 5,
|
|
sampleTimeoutMs: 30_000,
|
|
harnessRoot: "/tmp/openclaw",
|
|
output: "/tmp/runs",
|
|
scenarios: ["telegram-mentioned-message-reply"],
|
|
timeoutMs: 240_000,
|
|
});
|
|
});
|
|
|
|
it("rejects missing CLI path option values", () => {
|
|
for (const [flag, next] of [
|
|
["--package-tgz", "--runs"],
|
|
["--harness-root", "--output"],
|
|
["--output", "--samples"],
|
|
] as const) {
|
|
expect(() => cliTesting.parseArgs(["openclaw@latest", flag, next])).toThrow(
|
|
`${flag} requires a path.`,
|
|
);
|
|
}
|
|
});
|
|
});
|