chore: improve beta smoke release tooling

This commit is contained in:
Peter Steinberger
2026-05-04 07:28:47 +01:00
parent 8412b189df
commit d8da04e58e
9 changed files with 481 additions and 5 deletions

View File

@@ -72,6 +72,9 @@ Use this skill for Parallels guest workflows and smoke interpretation. Do not lo
- For full beta validation after a tag is published, prefer one command:
- `timeout --foreground 150m pnpm test:parallels:npm-update -- --beta-validation beta3 --json`
This resolves `beta3` to the latest `*-beta.3` version, runs latest->that-version same-guest update coverage, and then runs fresh install smoke for that exact published target on the same selected OS matrix. Use `--platform macos|windows|linux` to narrow reruns.
- For beta 4 npm validation with agent turns, the known-good shape is:
- `gtimeout --foreground 150m pnpm test:parallels:npm-update -- --beta-validation beta4 --model openai/gpt-5.4 --json`
Prefer the explicit `beta4` alias over `openclaw@beta` when validating a specific prerelease number; npm tags can move.
- If the wrapper fails a lane, read the auto-dumped tail first, then the full nested lane log under `.artifacts/parallels/openclaw-parallels-npm-update.*`.
- Current known macOS update-lane transport signature when the fallback is missing or bypassed: `Unable to authenticate the user. Make sure that the specified credentials are correct and try again.` Treat that as Parallels current-user authentication before blaming npm or OpenClaw.
- A macOS packaged fresh install with global package directories or bundled files mode `0777` usually means the harness used the root `prlctl exec` fallback under a permissive umask. The POSIX guest transports should prepend `umask 022`; verify the phase preflight line before blaming npm.

View File

@@ -139,6 +139,20 @@ pnpm test:docker:npm-telegram-live
- `OPENCLAW_QA_CONVEX_SITE_URL`
- `OPENCLAW_QA_CONVEX_SECRET_MAINTAINER`
- `OPENCLAW_NPM_TELEGRAM_PROVIDER_MODE=mock-openai`
- If direct Telegram env is missing locally and `op signin` blocks, prefer dispatching the manual GitHub lane because the `qa-live-shared` environment already has Convex CI credentials:
```bash
gh workflow run "NPM Telegram Beta E2E" --repo openclaw/openclaw --ref main \
-f package_spec=openclaw@YYYY.M.D-beta.N \
-f package_label=openclaw@YYYY.M.D-beta.N \
-f provider_mode=mock-openai
```
- Poll the exact run id from the dispatch URL. `gh run view --json artifacts` is not supported; list artifacts with:
```bash
gh api repos/openclaw/openclaw/actions/runs/<run-id>/artifacts
```
## Character evals

View File

@@ -220,6 +220,23 @@ jobs:
echo "output_dir=${output_dir}" >> "$GITHUB_OUTPUT"
export OPENCLAW_NPM_TELEGRAM_OUTPUT_DIR="${output_dir}"
append_telegram_summary() {
local status=$?
local report="${output_dir}/telegram-qa-report.md"
if [[ -n "${GITHUB_STEP_SUMMARY:-}" && -f "${report}" ]]; then
{
echo "## Package Telegram E2E"
echo
echo "- Package: ${OPENCLAW_NPM_TELEGRAM_PACKAGE_LABEL:-${OPENCLAW_NPM_TELEGRAM_PACKAGE_SPEC}}"
echo "- Provider mode: ${OPENCLAW_NPM_TELEGRAM_PROVIDER_MODE}"
echo
cat "${report}"
} >> "${GITHUB_STEP_SUMMARY}"
fi
return "${status}"
}
trap append_telegram_summary EXIT
if [[ -n "${PACKAGE_ARTIFACT_NAME// }" ]]; then
mapfile -t package_tgzs < <(find .artifacts/telegram-package-under-test -type f -name "*.tgz" | sort)
if [[ "${#package_tgzs[@]}" -ne 1 ]]; then

View File

@@ -195,6 +195,9 @@ inside every shard.
`OPENCLAW_QA_CONVEX_SITE_URL` and the role secret. If
`OPENCLAW_QA_CONVEX_SITE_URL` and a Convex role secret are present in CI,
the Docker wrapper selects Convex automatically.
- The wrapper validates Telegram or Convex credential env on the host before
Docker build/install work. Set `OPENCLAW_NPM_TELEGRAM_SKIP_CREDENTIAL_PREFLIGHT=1`
only when deliberately debugging pre-credential setup.
- `OPENCLAW_NPM_TELEGRAM_CREDENTIAL_ROLE=ci|maintainer` overrides the shared
`OPENCLAW_QA_CREDENTIAL_ROLE` for this lane only.
- GitHub Actions exposes this lane as the manual maintainer workflow

View File

@@ -211,6 +211,7 @@ Validation` or from the `main`/release workflow ref so workflow logic and
against the published npm package using the shared leased Telegram credential
pool. Local maintainer one-offs may omit the Convex vars and pass the three
`OPENCLAW_QA_TELEGRAM_*` env credentials directly.
- To run the full post-publish beta smoke from a maintainer machine, use `pnpm release:beta-smoke -- --beta betaN`. The helper runs Parallels npm update/fresh-target validation, dispatches `NPM Telegram Beta E2E`, polls the exact workflow run, downloads the artifact, and prints the Telegram report.
- Maintainers can run the same post-publish check from GitHub Actions via the
manual `NPM Telegram Beta E2E` workflow. It is intentionally manual-only and
does not run on every merge.

View File

@@ -1477,6 +1477,7 @@
"qa:lab:watch": "vite build --watch --config extensions/qa-lab/web/vite.config.ts",
"qa:otel:smoke": "node --import tsx scripts/qa-otel-smoke.ts",
"release-metadata:check": "node scripts/check-release-metadata-only.mjs",
"release:beta-smoke": "node --import tsx scripts/release-beta-smoke.ts",
"release:check": "pnpm deps:root-ownership:check && pnpm plugins:inventory:check && pnpm check:base-config-schema && pnpm check:bundled-channel-config-metadata && pnpm config:docs:check && pnpm plugin-sdk:check-exports && pnpm plugin-sdk:api:check && node --import tsx scripts/release-check.ts",
"release:openclaw:npm:check": "node --import tsx scripts/openclaw-npm-release-check.ts",
"release:openclaw:npm:verify-published": "node --import tsx scripts/openclaw-npm-postpublish-verify.ts",

View File

@@ -88,17 +88,70 @@ if [ -z "$PACKAGE_LABEL" ]; then
fi
fi
credential_source="$(resolve_credential_source)"
credential_role="$(resolve_credential_role)"
if [ -z "$credential_role" ] && [ -n "${CI:-}" ] && [ "$credential_source" = "convex" ]; then
credential_role="ci"
fi
validate_credential_preflight() {
if [ "${OPENCLAW_NPM_TELEGRAM_SKIP_CREDENTIAL_PREFLIGHT:-0}" = "1" ]; then
return 0
fi
if [ "$credential_source" = "convex" ]; then
if [ -z "${OPENCLAW_QA_CONVEX_SITE_URL:-}" ]; then
echo "Missing required env for Convex credential mode: OPENCLAW_QA_CONVEX_SITE_URL" >&2
exit 1
fi
if [ "$credential_role" = "ci" ]; then
if [ -z "${OPENCLAW_QA_CONVEX_SECRET_CI:-}" ]; then
echo "Missing required env for Convex ci credential mode: OPENCLAW_QA_CONVEX_SECRET_CI" >&2
exit 1
fi
return 0
fi
if [ "$credential_role" = "maintainer" ]; then
if [ -z "${OPENCLAW_QA_CONVEX_SECRET_MAINTAINER:-}" ]; then
echo "Missing required env for Convex maintainer credential mode: OPENCLAW_QA_CONVEX_SECRET_MAINTAINER" >&2
exit 1
fi
return 0
fi
if [ -z "${OPENCLAW_QA_CONVEX_SECRET_CI:-}" ] && [ -z "${OPENCLAW_QA_CONVEX_SECRET_MAINTAINER:-}" ]; then
echo "Missing required env for Convex credential mode: OPENCLAW_QA_CONVEX_SECRET_CI or OPENCLAW_QA_CONVEX_SECRET_MAINTAINER" >&2
exit 1
fi
return 0
fi
local missing=()
for key in \
OPENCLAW_QA_TELEGRAM_GROUP_ID \
OPENCLAW_QA_TELEGRAM_DRIVER_BOT_TOKEN \
OPENCLAW_QA_TELEGRAM_SUT_BOT_TOKEN; do
if [ -z "${!key:-}" ]; then
missing+=("$key")
fi
done
if [ "${#missing[@]}" -gt 0 ]; then
{
echo "Missing required Telegram QA credential env before Docker work: ${missing[*]}"
echo "Use one of:"
echo " direct Telegram env: OPENCLAW_QA_TELEGRAM_GROUP_ID, OPENCLAW_QA_TELEGRAM_DRIVER_BOT_TOKEN, OPENCLAW_QA_TELEGRAM_SUT_BOT_TOKEN"
echo " Convex env: OPENCLAW_NPM_TELEGRAM_CREDENTIAL_SOURCE=convex plus OPENCLAW_QA_CONVEX_SITE_URL and a role secret"
} >&2
exit 1
fi
}
validate_credential_preflight
docker_e2e_build_or_reuse "$IMAGE_NAME" npm-telegram-live "$ROOT_DIR/scripts/e2e/Dockerfile" "$ROOT_DIR" "$DOCKER_TARGET"
mkdir -p "$ROOT_DIR/.artifacts/qa-e2e"
run_log="$(mktemp "${TMPDIR:-/tmp}/openclaw-npm-telegram-live.XXXXXX")"
npm_prefix_host="$(mktemp -d "$ROOT_DIR/.artifacts/qa-e2e/npm-telegram-live-prefix.XXXXXX")"
trap 'rm -f "$run_log"; rm -rf "$npm_prefix_host"' EXIT
credential_source="$(resolve_credential_source)"
credential_role="$(resolve_credential_role)"
if [ -z "$credential_role" ] && [ -n "${CI:-}" ] && [ "$credential_source" = "convex" ]; then
credential_role="ci"
fi
docker_env=(
-e COREPACK_ENABLE_DOWNLOAD_PROMPT=0

View File

@@ -65,10 +65,19 @@ interface NpmUpdateSummary {
packageSpec: string;
updateTarget: string;
updateExpected: string;
updateTargetBuildCommit: string;
updateTargetPackageVersion: string;
updateTargetTarball: string;
provider: Provider;
latestVersion: string;
currentHead: string;
runDir: string;
slowestTiming?: {
durationMs: number;
label: string;
phase: "fresh" | "fresh-target" | "update";
};
totalDurationMs: number;
fresh: Record<Platform, string>;
freshTarget: Record<Platform, string>;
freshTargetSpec: string;
@@ -184,6 +193,13 @@ function platformRecord<T>(value: T): Record<Platform, T> {
return { linux: value, macos: value, windows: value };
}
function formatDuration(durationMs: number): string {
const seconds = Math.round(durationMs / 1000);
const minutes = Math.floor(seconds / 60);
const remainder = seconds % 60;
return minutes > 0 ? `${minutes}m ${remainder}s` : `${remainder}s`;
}
class NpmUpdateSmoke {
private auth: ProviderAuth;
private windowsAuth: ProviderAuth;
@@ -197,8 +213,12 @@ class NpmUpdateSmoke {
private server: HostServer | null = null;
private artifact: PackageArtifact | null = null;
private freshTargetSpec = "";
private startedAt = Date.now();
private updateTargetBuildCommit = "";
private updateTargetEffective = "";
private updateExpectedNeedle = "";
private updateTargetPackageVersion = "";
private updateTargetTarball = "";
private linuxVm = linuxVmDefault;
private freshStatus = platformRecord("skip");
@@ -221,6 +241,7 @@ class NpmUpdateSmoke {
}
async run(): Promise<void> {
this.startedAt = Date.now();
this.runDir = await makeTempDir("openclaw-parallels-npm-update.");
this.tgzDir = await makeTempDir("openclaw-parallels-npm-update-tgz.");
try {
@@ -394,12 +415,76 @@ class NpmUpdateSmoke {
});
this.updateTargetEffective = this.server.urlFor(this.artifact.path);
this.updateExpectedNeedle = this.currentHeadShort;
this.updateTargetPackageVersion = this.artifact.version ?? "";
this.updateTargetBuildCommit =
this.artifact.buildCommitShort ?? this.artifact.buildCommit ?? "";
this.updateTargetTarball = this.updateTargetEffective;
return;
}
this.updateTargetEffective = this.options.updateTarget;
this.updateExpectedNeedle = this.isExplicitPackageTarget(this.updateTargetEffective)
? ""
: resolveOpenClawRegistryVersion(this.updateTargetEffective) || this.updateTargetEffective;
const metadata = this.resolveRegistryPackageMetadata(this.updateTargetEffective);
this.updateTargetPackageVersion = metadata.version;
this.updateTargetBuildCommit =
metadata.gitHead || this.resolvePackageBuildCommit(metadata.tarball);
this.updateTargetTarball = metadata.tarball;
}
private resolvePackageBuildCommit(tarball: string): string {
if (!tarball) {
return "";
}
const output = run(
"bash",
["-lc", `curl -fsSL ${shellQuote(tarball)} | tar -xzOf - package/dist/build-info.json`],
{
check: false,
quiet: true,
},
).stdout.trim();
if (!output) {
return "";
}
try {
const parsed = JSON.parse(output) as { commit?: string };
return parsed.commit ? parsed.commit.slice(0, 7) : "";
} catch {
return "";
}
}
private resolveRegistryPackageMetadata(target: string): {
gitHead: string;
tarball: string;
version: string;
} {
if (this.isExplicitPackageTarget(target)) {
return { gitHead: "", tarball: "", version: "" };
}
const spec = target.startsWith("openclaw@") ? target : `openclaw@${target}`;
const output = run("npm", ["view", spec, "version", "dist.tarball", "gitHead", "--json"], {
check: false,
quiet: true,
}).stdout.trim();
if (!output) {
return { gitHead: "", tarball: "", version: "" };
}
try {
const parsed = JSON.parse(output) as {
dist?: { tarball?: string };
gitHead?: string;
version?: string;
};
return {
gitHead: parsed.gitHead ?? "",
tarball: parsed.dist?.tarball ?? "",
version: parsed.version ?? "",
};
} catch {
return { gitHead: "", tarball: "", version: "" };
}
}
private async runSameGuestUpdates(): Promise<void> {
@@ -900,6 +985,7 @@ class NpmUpdateSmoke {
}
private async writeSummary(): Promise<string> {
const slowestTiming = this.timings.toSorted((a, b) => b.durationMs - a.durationMs)[0];
const summary: NpmUpdateSummary = {
currentHead: this.currentHeadShort,
fresh: this.freshStatus,
@@ -915,7 +1001,18 @@ class NpmUpdateSmoke {
windows: { status: this.updateStatus.windows, version: this.updateVersion.windows },
},
timings: this.timings,
slowestTiming: slowestTiming
? {
durationMs: slowestTiming.durationMs,
label: slowestTiming.label,
phase: slowestTiming.phase,
}
: undefined,
totalDurationMs: Date.now() - this.startedAt,
updateExpected: this.updateExpectedNeedle,
updateTargetBuildCommit: this.updateTargetBuildCommit,
updateTargetPackageVersion: this.updateTargetPackageVersion,
updateTargetTarball: this.updateTargetTarball,
updateTarget: this.updateTargetEffective,
};
const summaryPath = path.join(this.runDir, "summary.json");
@@ -924,10 +1021,14 @@ class NpmUpdateSmoke {
lines: [
`- package spec: ${summary.packageSpec}`,
`- update target: ${summary.updateTarget}`,
`- update target package: ${summary.updateTargetPackageVersion || "unknown"}${summary.updateTargetBuildCommit ? ` (${summary.updateTargetBuildCommit})` : ""}`,
`- update target tarball: ${summary.updateTargetTarball || "n/a"}`,
`- update expected: ${summary.updateExpected}`,
`- fresh: macOS=${summary.fresh.macos}, Windows=${summary.fresh.windows}, Linux=${summary.fresh.linux}`,
`- update: macOS=${summary.update.macos.status} (${summary.update.macos.version}), Windows=${summary.update.windows.status} (${summary.update.windows.version}), Linux=${summary.update.linux.status} (${summary.update.linux.version})`,
`- fresh target: ${summary.freshTargetSpec || "skip"} macOS=${summary.freshTarget.macos}, Windows=${summary.freshTarget.windows}, Linux=${summary.freshTarget.linux}`,
`- wall clock: ${formatDuration(summary.totalDurationMs)}`,
`- slowest phase: ${summary.slowestTiming ? `${summary.slowestTiming.phase}/${summary.slowestTiming.label} ${formatDuration(summary.slowestTiming.durationMs)}` : "n/a"}`,
`- logs: ${summary.runDir}`,
],
summaryPath,

View File

@@ -0,0 +1,283 @@
#!/usr/bin/env -S pnpm tsx
import { spawnSync } from "node:child_process";
import { existsSync, mkdirSync, readdirSync, readFileSync } from "node:fs";
import path from "node:path";
interface Options {
beta: string;
model: string;
providerMode: string;
ref: string;
repo: string;
skipParallels: boolean;
skipTelegram: boolean;
}
function usage(): string {
return `Usage: pnpm release:beta-smoke -- --beta beta4 [options]
Options:
--beta <beta|betaN|version> Beta target. Default: beta
--model <provider/model> Parallels agent-turn model. Default: openai/gpt-5.4
--provider-mode <mode> Telegram workflow provider mode. Default: mock-openai
--ref <ref> GitHub workflow dispatch ref. Default: main
--repo <owner/repo> GitHub repo. Default: openclaw/openclaw
--skip-parallels Only run Telegram workflow
--skip-telegram Only run Parallels beta validation
-h, --help Show help
`;
}
function parseArgs(argv: string[]): Options {
const options: Options = {
beta: "beta",
model: "openai/gpt-5.4",
providerMode: "mock-openai",
ref: "main",
repo: "openclaw/openclaw",
skipParallels: false,
skipTelegram: false,
};
for (let i = 0; i < argv.length; i++) {
const arg = argv[i];
switch (arg) {
case "--":
break;
case "--beta":
options.beta = requireValue(argv, ++i, arg);
break;
case "--model":
options.model = requireValue(argv, ++i, arg);
break;
case "--provider-mode":
options.providerMode = requireValue(argv, ++i, arg);
break;
case "--ref":
options.ref = requireValue(argv, ++i, arg);
break;
case "--repo":
options.repo = requireValue(argv, ++i, arg);
break;
case "--skip-parallels":
options.skipParallels = true;
break;
case "--skip-telegram":
options.skipTelegram = true;
break;
case "-h":
case "--help":
process.stdout.write(usage());
process.exit(0);
default:
throw new Error(`unknown option: ${arg}`);
}
}
return options;
}
function requireValue(argv: string[], index: number, flag: string): string {
const value = argv[index];
if (!value || value.startsWith("-")) {
throw new Error(`${flag} requires a value`);
}
return value;
}
function run(command: string, args: string[], input?: { capture?: boolean }): string {
const result = spawnSync(command, args, {
encoding: "utf8",
stdio: input?.capture ? ["ignore", "pipe", "pipe"] : "inherit",
});
if (result.status !== 0) {
const stderr = result.stderr ? `\n${result.stderr}` : "";
throw new Error(
`${command} ${args.join(" ")} failed with ${result.status ?? "signal"}${stderr}`,
);
}
return result.stdout ?? "";
}
function shellQuote(value: string): string {
return `'${value.replace(/'/g, "'\\''")}'`;
}
function resolveBetaVersion(beta: string): string {
const value = beta.trim().replace(/^openclaw@/, "");
if (/^\d{4}\.\d+\.\d+-beta\.\d+$/u.test(value)) {
return value;
}
if (value === "beta") {
return run("npm", ["view", "openclaw@beta", "version"], { capture: true }).trim();
}
const betaMatch = /^(?:beta)?(\d+)$/u.exec(value);
if (!betaMatch) {
return run("npm", ["view", `openclaw@${value}`, "version"], { capture: true }).trim();
}
const suffix = `-beta.${betaMatch[1]}`;
const versions = JSON.parse(
run("npm", ["view", "openclaw", "versions", "--json"], { capture: true }),
) as string[];
const match = versions
.filter((version) => version.endsWith(suffix))
.toSorted((a, b) => a.localeCompare(b, undefined, { numeric: true }))
.at(-1);
if (!match) {
throw new Error(`no openclaw registry version found for ${beta}`);
}
return match;
}
function timeoutCommand(): string {
return run("bash", ["-lc", "command -v gtimeout || command -v timeout"], {
capture: true,
}).trim();
}
function runParallels(beta: string, model: string): void {
const timeoutBin = timeoutCommand();
const forwarded = [
"pnpm",
"test:parallels:npm-update",
"--",
"--beta-validation",
beta,
"--model",
model,
"--json",
];
const command = [
'set -a; source "$HOME/.profile" >/dev/null 2>&1 || true; set +a;',
"exec",
shellQuote(timeoutBin),
"--foreground",
"150m",
...forwarded.map(shellQuote),
].join(" ");
run("bash", ["-lc", command]);
}
function ghJson(repo: string, pathSuffix: string): unknown {
return JSON.parse(run("gh", ["api", `repos/${repo}/${pathSuffix}`], { capture: true }));
}
function dispatchTelegram(options: Options, packageSpec: string): string {
const output = run(
"gh",
[
"workflow",
"run",
"NPM Telegram Beta E2E",
"--repo",
options.repo,
"--ref",
options.ref,
"-f",
`package_spec=${packageSpec}`,
"-f",
`package_label=${packageSpec}`,
"-f",
`provider_mode=${options.providerMode}`,
],
{ capture: true },
);
const runId = /\/actions\/runs\/(\d+)/u.exec(output)?.[1];
if (!runId) {
throw new Error(`could not parse workflow run id from gh output:\n${output}`);
}
return runId;
}
async function pollRun(repo: string, runId: string): Promise<void> {
for (;;) {
const info = ghJson(repo, `actions/runs/${runId}`) as {
conclusion: string | null;
html_url: string;
status: string;
updated_at: string;
};
console.log(
`Telegram workflow ${runId}: ${info.status}${info.conclusion ? `/${info.conclusion}` : ""} updated=${info.updated_at}`,
);
if (info.status === "completed") {
if (info.conclusion !== "success") {
throw new Error(
`Telegram workflow failed: ${info.conclusion ?? "unknown"} ${info.html_url}`,
);
}
console.log(info.html_url);
return;
}
await new Promise((resolve) => setTimeout(resolve, 30_000));
}
}
function downloadTelegramArtifact(repo: string, runId: string): string {
const artifacts = (
ghJson(repo, `actions/runs/${runId}/artifacts`) as {
artifacts: Array<{ expired: boolean; name: string }>;
}
).artifacts;
const artifact = artifacts.find(
(entry) => !entry.expired && entry.name.startsWith(`npm-telegram-beta-e2e-${runId}-`),
);
if (!artifact) {
throw new Error(`no npm Telegram artifact found for run ${runId}`);
}
const outputDir = path.join(".artifacts", "qa-e2e", artifact.name);
mkdirSync(outputDir, { recursive: true });
run("gh", [
"run",
"download",
runId,
"--repo",
repo,
"--name",
artifact.name,
"--dir",
outputDir,
]);
return outputDir;
}
function findFile(root: string, basename: string): string {
for (const entry of readdirSync(root, { withFileTypes: true })) {
const filePath = path.join(root, entry.name);
if (entry.isFile() && entry.name === basename) {
return filePath;
}
if (entry.isDirectory()) {
const nested = findFile(filePath, basename);
if (nested) {
return nested;
}
}
}
return "";
}
async function main(): Promise<void> {
const options = parseArgs(process.argv.slice(2));
const version = resolveBetaVersion(options.beta);
const packageSpec = `openclaw@${version}`;
console.log(`Resolved beta target: ${packageSpec}`);
if (!options.skipParallels) {
runParallels(options.beta, options.model);
}
if (!options.skipTelegram) {
const runId = dispatchTelegram(options, packageSpec);
await pollRun(options.repo, runId);
const artifactDir = downloadTelegramArtifact(options.repo, runId);
const report = findFile(artifactDir, "telegram-qa-report.md");
if (report && existsSync(report)) {
console.log(`\nTelegram report: ${report}\n`);
console.log(readFileSync(report, "utf8"));
}
}
}
await main().catch((error: unknown) => {
console.error(error instanceof Error ? error.message : String(error));
process.exit(1);
});