From 2243a68a1df2c3476b37b715ab54ef4ad5c020d2 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 27 Apr 2026 14:24:00 +0100 Subject: [PATCH] ci: shard release live validation --- .agents/skills/openclaw-testing/SKILL.md | 13 ++ .../openclaw-live-and-e2e-checks-reusable.yml | 39 ++++- docs/ci.md | 9 +- scripts/lib/docker-e2e-plan.mjs | 8 +- scripts/lib/docker-e2e-scenarios.mjs | 14 +- scripts/test-live-shard.mjs | 144 ++++++++++++++++++ test/scripts/docker-all-scheduler.test.ts | 30 ++++ test/scripts/docker-build-helper.test.ts | 5 +- test/scripts/docker-e2e-plan.test.ts | 21 ++- .../package-acceptance-workflow.test.ts | 13 ++ test/scripts/test-live-shard.test.ts | 41 +++++ 11 files changed, 324 insertions(+), 13 deletions(-) create mode 100644 scripts/test-live-shard.mjs create mode 100644 test/scripts/test-live-shard.test.ts diff --git a/.agents/skills/openclaw-testing/SKILL.md b/.agents/skills/openclaw-testing/SKILL.md index a727751c5b4..a043a0508bf 100644 --- a/.agents/skills/openclaw-testing/SKILL.md +++ b/.agents/skills/openclaw-testing/SKILL.md @@ -244,6 +244,19 @@ Useful knobs: targeted Docker live model job instead of the full provider matrix. - blank `live_model_providers`: run the full live-model provider matrix. +When live suites are enabled, the workflow shards broad native `pnpm test:live` +coverage through `scripts/test-live-shard.mjs` instead of one serial `live-all` +job: + +- `native-live-src-agents` +- `native-live-src-gateway` +- `native-live-test` +- `native-live-extensions-a-k` +- `native-live-extensions-l-z` + +Use `node scripts/test-live-shard.mjs --list` to see the exact files +before rerunning a failed native live shard. + For model-list or provider-selection fixes, use `live_models_only=true` plus the specific `live_model_providers` allowlist. Confirm logs show the expected `OPENCLAW_LIVE_PROVIDERS` and selected model ids before declaring proof. diff --git a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml index d160b3d1d70..b8fedfccfe4 100644 --- a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml +++ b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml @@ -1425,30 +1425,59 @@ jobs: fail-fast: false matrix: include: - - suite_id: live-all - label: pnpm test:live - command: pnpm test:live - timeout_minutes: 180 + - suite_id: native-live-src-agents + label: Native live agents + command: node scripts/test-live-shard.mjs native-live-src-agents + timeout_minutes: 90 + needs_ffmpeg: false + profile_env_only: false + - suite_id: native-live-src-gateway + label: Native live gateway + command: node scripts/test-live-shard.mjs native-live-src-gateway + timeout_minutes: 90 + needs_ffmpeg: false + profile_env_only: false + - suite_id: native-live-test + label: Native live test harnesses + command: node scripts/test-live-shard.mjs native-live-test + timeout_minutes: 90 + needs_ffmpeg: false + profile_env_only: false + - suite_id: native-live-extensions-a-k + label: Native live plugins A-K + command: node scripts/test-live-shard.mjs native-live-extensions-a-k + timeout_minutes: 90 + needs_ffmpeg: true + profile_env_only: false + - suite_id: native-live-extensions-l-z + label: Native live plugins L-Z + command: node scripts/test-live-shard.mjs native-live-extensions-l-z + timeout_minutes: 90 + needs_ffmpeg: true profile_env_only: false - suite_id: live-gateway-docker label: Docker live gateway command: pnpm test:docker:live-gateway timeout_minutes: 120 + needs_ffmpeg: false profile_env_only: false - suite_id: live-cli-backend-docker label: Docker live CLI backend command: pnpm test:docker:live-cli-backend timeout_minutes: 120 + needs_ffmpeg: false profile_env_only: false - suite_id: live-acp-bind-docker label: Docker live ACP bind command: pnpm test:docker:live-acp-bind timeout_minutes: 120 + needs_ffmpeg: false profile_env_only: false - suite_id: live-codex-harness-docker label: Docker live Codex harness command: pnpm test:docker:live-codex-harness timeout_minutes: 120 + needs_ffmpeg: false profile_env_only: false env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} @@ -1516,7 +1545,7 @@ jobs: run: bash scripts/ci-hydrate-live-auth.sh - name: Install live media dependencies - if: matrix.suite_id == 'live-all' + if: matrix.needs_ffmpeg shell: bash run: | set -euo pipefail diff --git a/docs/ci.md b/docs/ci.md index 93b2e7598d4..6dee5bf16a7 100644 --- a/docs/ci.md +++ b/docs/ci.md @@ -19,6 +19,13 @@ provided. The umbrella records the dispatched child run ids, and the final child workflow is rerun and turns green, rerun only the parent verifier job to refresh the umbrella result. +The release live/E2E child keeps broad native `pnpm test:live` coverage, but it +runs it as named shards (`native-live-src-agents`, `native-live-src-gateway`, +`native-live-test`, `native-live-extensions-a-k`, and +`native-live-extensions-l-z`) through `scripts/test-live-shard.mjs` instead of +one serial job. That keeps the same file coverage while making slow live +provider failures easier to rerun and diagnose. + `Package Acceptance` is the side-run workflow for validating a package artifact without blocking the release workflow. It resolves one candidate from a published npm spec, a trusted `package_ref` built with the selected @@ -271,7 +278,7 @@ act as if every scoped area changed. CI workflow edits validate the Node CI graph plus workflow linting, but do not force Windows, Android, or macOS native builds by themselves; those platform lanes stay scoped to platform source changes. CI routing-only edits, selected cheap core-test fixture edits, and narrow plugin contract helper/test-routing edits use a fast Node-only manifest path: preflight, security, and a single `checks-fast-core` task. That path avoids build artifacts, Node 22 compatibility, channel contracts, full core shards, bundled-plugin shards, and additional guard matrices when the changed files are limited to the routing or helper surfaces that the fast task exercises directly. Windows Node checks are scoped to Windows-specific process/path wrappers, npm/pnpm/UI runner helpers, package manager config, and the CI workflow surfaces that execute that lane; unrelated source, plugin, install-smoke, and test-only changes stay on the Linux Node lanes so they do not reserve a 16-vCPU Windows worker for coverage that is already exercised by the normal test shards. -The separate `install-smoke` workflow reuses the same scope script through its own `preflight` job. It splits smoke coverage into `run_fast_install_smoke` and `run_full_install_smoke`. Pull requests run the fast path for Docker/package surfaces, bundled plugin package/manifest changes, and core plugin/channel/gateway/Plugin SDK surfaces that the Docker smoke jobs exercise. Source-only bundled plugin changes, test-only edits, and docs-only edits do not reserve Docker workers. The fast path builds the root Dockerfile image once, checks the CLI, runs the agents delete shared-workspace CLI smoke, runs the container gateway-network e2e, verifies a bundled extension build arg, and runs the bounded bundled-plugin Docker profile under a 240-second aggregate command timeout with each scenario's Docker run capped separately. The full path keeps QR package install and installer Docker/update coverage for nightly scheduled runs, manual dispatches, workflow-call release checks, and pull requests that truly touch installer/package/Docker surfaces. `main` pushes, including merge commits, do not force the full path; when changed-scope logic would request full coverage on a push, the workflow keeps the fast Docker smoke and leaves the full install smoke to nightly or release validation. The slow Bun global install image-provider smoke is separately gated by `run_bun_global_install_smoke`; it runs on the nightly schedule and from the release checks workflow, and manual `install-smoke` dispatches can opt into it, but pull requests and `main` pushes do not run it. QR and installer Docker tests keep their own install-focused Dockerfiles. Local `test:docker:all` prebuilds one shared live-test image, packs OpenClaw once as an npm tarball, and builds two shared `scripts/e2e/Dockerfile` images: a bare Node/Git runner for installer/update/plugin-dependency lanes and a functional image that installs the same tarball into `/app` for normal functionality lanes. Docker lane definitions live in `scripts/lib/docker-e2e-scenarios.mjs`, planner logic lives in `scripts/lib/docker-e2e-plan.mjs`, and the runner only executes the selected plan. The scheduler selects the image per lane with `OPENCLAW_DOCKER_E2E_BARE_IMAGE` and `OPENCLAW_DOCKER_E2E_FUNCTIONAL_IMAGE`, then runs lanes with `OPENCLAW_SKIP_DOCKER_BUILD=1`; tune the default main-pool slot count of 10 with `OPENCLAW_DOCKER_ALL_PARALLELISM` and the provider-sensitive tail-pool slot count of 10 with `OPENCLAW_DOCKER_ALL_TAIL_PARALLELISM`. Heavy lane caps default to `OPENCLAW_DOCKER_ALL_LIVE_LIMIT=9`, `OPENCLAW_DOCKER_ALL_NPM_LIMIT=10`, and `OPENCLAW_DOCKER_ALL_SERVICE_LIMIT=7` so npm install and multi-service lanes do not overcommit Docker while lighter lanes still fill available slots. A single lane heavier than the effective caps can still start from an empty pool, then runs alone until it releases capacity. Lane starts are staggered by 2 seconds by default to avoid local Docker daemon create storms; override with `OPENCLAW_DOCKER_ALL_START_STAGGER_MS=0` or another millisecond value. The local aggregate preflights Docker, removes stale OpenClaw E2E containers, emits active-lane status, persists lane timings for longest-first ordering, and supports `OPENCLAW_DOCKER_ALL_DRY_RUN=1` for scheduler inspection. It stops scheduling new pooled lanes after the first failure by default, and each lane has a 120-minute fallback timeout overrideable with `OPENCLAW_DOCKER_ALL_LANE_TIMEOUT_MS`; selected live/tail lanes use tighter per-lane caps. `OPENCLAW_DOCKER_ALL_LANES=` runs exact scheduler lanes, including release-only lanes such as `install-e2e` and split bundled update lanes such as `bundled-channel-update-acpx`, while skipping the cleanup smoke so agents can reproduce one failed lane. The reusable live/E2E workflow asks `scripts/test-docker-all.mjs --plan-json` which package, image kind, live image, lane, and credential coverage is required, then `scripts/docker-e2e.mjs` converts that plan into GitHub outputs and summaries. It either packs OpenClaw through `scripts/package-openclaw-for-docker.mjs`, downloads a current-run package artifact, or downloads a package artifact from `package_artifact_run_id`; validates the tarball inventory; builds and pushes package-digest-tagged bare/functional GHCR Docker E2E images through Blacksmith's Docker layer cache when the plan needs package-installed lanes; and reuses provided `docker_e2e_bare_image`/`docker_e2e_functional_image` inputs or existing package-digest images instead of rebuilding. The `Package Acceptance` workflow is the high-level package gate: it resolves a candidate from npm, a trusted `package_ref`, an HTTPS tarball plus SHA-256, or a prior workflow artifact, then passes that single `package-under-test` artifact into the reusable Docker E2E workflow. It keeps `workflow_ref` separate from `package_ref` so current acceptance logic can validate older trusted commits without checking out old workflow code. Release checks run a custom Package Acceptance delta for the target ref: bundled-channel compat, offline plugin fixtures, and Telegram package QA against the resolved tarball. The release-path Docker suite runs four chunked jobs with `OPENCLAW_SKIP_DOCKER_BUILD=1` so each chunk pulls only the image kind it needs and executes multiple lanes through the same weighted scheduler (`OPENCLAW_DOCKER_ALL_PROFILE=release-path`, `OPENCLAW_DOCKER_ALL_CHUNK=core|package-update|plugins-runtime|bundled-channels`). OpenWebUI is folded into `plugins-runtime` when full release-path coverage requests it, and keeps a standalone `openwebui` chunk only for OpenWebUI-only dispatches. The `bundled-channels` chunk runs split `bundled-channel-*` and `bundled-channel-update-*` lanes rather than the serial all-in-one `bundled-channel-deps` lane; `plugins-integrations` remains a legacy aggregate alias for manual reruns. Each chunk uploads `.artifacts/docker-tests/` with lane logs, timings, `summary.json`, `failures.json`, phase timings, scheduler plan JSON, slow-lane tables, and per-lane rerun commands. The workflow `docker_lanes` input runs selected lanes against the prepared images instead of the chunk jobs, which keeps failed-lane debugging bounded to one targeted Docker job and prepares, downloads, or reuses the package artifact for that run; if a selected lane is a live Docker lane, the targeted job builds the live-test image locally for that rerun. Generated per-lane GitHub rerun commands include `package_artifact_run_id`, `package_artifact_name`, and prepared image inputs when those values exist, so a failed lane can reuse the exact package and images from the failed run. Use `pnpm test:docker:rerun ` to download Docker artifacts from a GitHub run and print combined/per-lane targeted rerun commands; use `pnpm test:docker:timings ` for slow-lane and phase critical-path summaries. The scheduled live/E2E workflow runs the full release-path Docker suite daily. The bundled update matrix is split by update target so repeated npm update and doctor repair passes can shard with other bundled checks. +The separate `install-smoke` workflow reuses the same scope script through its own `preflight` job. It splits smoke coverage into `run_fast_install_smoke` and `run_full_install_smoke`. Pull requests run the fast path for Docker/package surfaces, bundled plugin package/manifest changes, and core plugin/channel/gateway/Plugin SDK surfaces that the Docker smoke jobs exercise. Source-only bundled plugin changes, test-only edits, and docs-only edits do not reserve Docker workers. The fast path builds the root Dockerfile image once, checks the CLI, runs the agents delete shared-workspace CLI smoke, runs the container gateway-network e2e, verifies a bundled extension build arg, and runs the bounded bundled-plugin Docker profile under a 240-second aggregate command timeout with each scenario's Docker run capped separately. The full path keeps QR package install and installer Docker/update coverage for nightly scheduled runs, manual dispatches, workflow-call release checks, and pull requests that truly touch installer/package/Docker surfaces. `main` pushes, including merge commits, do not force the full path; when changed-scope logic would request full coverage on a push, the workflow keeps the fast Docker smoke and leaves the full install smoke to nightly or release validation. The slow Bun global install image-provider smoke is separately gated by `run_bun_global_install_smoke`; it runs on the nightly schedule and from the release checks workflow, and manual `install-smoke` dispatches can opt into it, but pull requests and `main` pushes do not run it. QR and installer Docker tests keep their own install-focused Dockerfiles. Local `test:docker:all` prebuilds one shared live-test image, packs OpenClaw once as an npm tarball, and builds two shared `scripts/e2e/Dockerfile` images: a bare Node/Git runner for installer/update/plugin-dependency lanes and a functional image that installs the same tarball into `/app` for normal functionality lanes. Docker lane definitions live in `scripts/lib/docker-e2e-scenarios.mjs`, planner logic lives in `scripts/lib/docker-e2e-plan.mjs`, and the runner only executes the selected plan. The scheduler selects the image per lane with `OPENCLAW_DOCKER_E2E_BARE_IMAGE` and `OPENCLAW_DOCKER_E2E_FUNCTIONAL_IMAGE`, then runs lanes with `OPENCLAW_SKIP_DOCKER_BUILD=1`; tune the default main-pool slot count of 10 with `OPENCLAW_DOCKER_ALL_PARALLELISM` and the provider-sensitive tail-pool slot count of 10 with `OPENCLAW_DOCKER_ALL_TAIL_PARALLELISM`. Heavy lane caps default to `OPENCLAW_DOCKER_ALL_LIVE_LIMIT=9`, `OPENCLAW_DOCKER_ALL_NPM_LIMIT=10`, and `OPENCLAW_DOCKER_ALL_SERVICE_LIMIT=7` so npm install and multi-service lanes do not overcommit Docker while lighter lanes still fill available slots. A single lane heavier than the effective caps can still start from an empty pool, then runs alone until it releases capacity. Lane starts are staggered by 2 seconds by default to avoid local Docker daemon create storms; override with `OPENCLAW_DOCKER_ALL_START_STAGGER_MS=0` or another millisecond value. The local aggregate preflights Docker, removes stale OpenClaw E2E containers, emits active-lane status, persists lane timings for longest-first ordering, and supports `OPENCLAW_DOCKER_ALL_DRY_RUN=1` for scheduler inspection. It stops scheduling new pooled lanes after the first failure by default, and each lane has a 120-minute fallback timeout overrideable with `OPENCLAW_DOCKER_ALL_LANE_TIMEOUT_MS`; selected live/tail lanes use tighter per-lane caps. `OPENCLAW_DOCKER_ALL_LANES=` runs exact scheduler lanes, including release-only lanes such as `install-e2e` and split bundled update lanes such as `bundled-channel-update-acpx`, while skipping the cleanup smoke so agents can reproduce one failed lane. The reusable live/E2E workflow asks `scripts/test-docker-all.mjs --plan-json` which package, image kind, live image, lane, and credential coverage is required, then `scripts/docker-e2e.mjs` converts that plan into GitHub outputs and summaries. It either packs OpenClaw through `scripts/package-openclaw-for-docker.mjs`, downloads a current-run package artifact, or downloads a package artifact from `package_artifact_run_id`; validates the tarball inventory; builds and pushes package-digest-tagged bare/functional GHCR Docker E2E images through Blacksmith's Docker layer cache when the plan needs package-installed lanes; and reuses provided `docker_e2e_bare_image`/`docker_e2e_functional_image` inputs or existing package-digest images instead of rebuilding. The `Package Acceptance` workflow is the high-level package gate: it resolves a candidate from npm, a trusted `package_ref`, an HTTPS tarball plus SHA-256, or a prior workflow artifact, then passes that single `package-under-test` artifact into the reusable Docker E2E workflow. It keeps `workflow_ref` separate from `package_ref` so current acceptance logic can validate older trusted commits without checking out old workflow code. Release checks run a custom Package Acceptance delta for the target ref: bundled-channel compat, offline plugin fixtures, and Telegram package QA against the resolved tarball. The release-path Docker suite runs four chunked jobs with `OPENCLAW_SKIP_DOCKER_BUILD=1` so each chunk pulls only the image kind it needs and executes multiple lanes through the same weighted scheduler (`OPENCLAW_DOCKER_ALL_PROFILE=release-path`, `OPENCLAW_DOCKER_ALL_CHUNK=core|package-update|plugins-runtime|bundled-channels`). OpenWebUI is folded into `plugins-runtime` when full release-path coverage requests it, and keeps a standalone `openwebui` chunk only for OpenWebUI-only dispatches. The `package-update` chunk splits installer E2E into `install-e2e-openai` and `install-e2e-anthropic`; `install-e2e` remains the aggregate manual rerun alias. The `bundled-channels` chunk runs split `bundled-channel-*` and `bundled-channel-update-*` lanes rather than the serial all-in-one `bundled-channel-deps` lane; `plugins-integrations` remains a legacy aggregate alias for manual reruns. Each chunk uploads `.artifacts/docker-tests/` with lane logs, timings, `summary.json`, `failures.json`, phase timings, scheduler plan JSON, slow-lane tables, and per-lane rerun commands. The workflow `docker_lanes` input runs selected lanes against the prepared images instead of the chunk jobs, which keeps failed-lane debugging bounded to one targeted Docker job and prepares, downloads, or reuses the package artifact for that run; if a selected lane is a live Docker lane, the targeted job builds the live-test image locally for that rerun. Generated per-lane GitHub rerun commands include `package_artifact_run_id`, `package_artifact_name`, and prepared image inputs when those values exist, so a failed lane can reuse the exact package and images from the failed run. Use `pnpm test:docker:rerun ` to download Docker artifacts from a GitHub run and print combined/per-lane targeted rerun commands; use `pnpm test:docker:timings ` for slow-lane and phase critical-path summaries. The scheduled live/E2E workflow runs the full release-path Docker suite daily. The bundled update matrix is split by update target so repeated npm update and doctor repair passes can shard with other bundled checks. Local changed-lane logic lives in `scripts/changed-lanes.mjs` and is executed by `scripts/check-changed.mjs`. That local check gate is stricter about architecture boundaries than the broad CI platform scope: core production changes run core prod and core test typecheck plus core lint/guards, core test-only changes run only core test typecheck plus core lint, extension production changes run extension prod and extension test typecheck plus extension lint, and extension test-only changes run extension test typecheck plus extension lint. Public Plugin SDK or plugin-contract changes expand to extension typecheck because extensions depend on those core contracts, but Vitest extension sweeps are explicit test work. Release metadata-only version bumps run targeted version/config/root-dependency checks. Unknown root/config changes fail safe to all check lanes. diff --git a/scripts/lib/docker-e2e-plan.mjs b/scripts/lib/docker-e2e-plan.mjs index 89bd9df9171..f6ca3deee94 100644 --- a/scripts/lib/docker-e2e-plan.mjs +++ b/scripts/lib/docker-e2e-plan.mjs @@ -37,6 +37,7 @@ export function parseLaneSelection(raw) { } const laneAliases = new Map([ ["bundled-channel-deps", ["bundled-channel-deps-compat"]], + ["install-e2e", ["install-e2e-openai", "install-e2e-anthropic"]], [ "bundled-plugin-install-uninstall", Array.from( @@ -145,8 +146,11 @@ export function findLaneByName(name) { export function laneCredentialRequirements(poolLane) { const credentials = []; - if (poolLane.name === "install-e2e") { - credentials.push("openai", "anthropic"); + if (poolLane.name === "install-e2e-openai") { + credentials.push("openai"); + } + if (poolLane.name === "install-e2e-anthropic") { + credentials.push("anthropic"); } if (poolLane.name === "openwebui" || poolLane.name === "openai-web-search-minimal") { credentials.push("openai"); diff --git a/scripts/lib/docker-e2e-scenarios.mjs b/scripts/lib/docker-e2e-scenarios.mjs index e5068f21ee5..869b2be3b0e 100644 --- a/scripts/lib/docker-e2e-scenarios.mjs +++ b/scripts/lib/docker-e2e-scenarios.mjs @@ -400,11 +400,19 @@ const releasePathChunks = { ], "package-update": [ npmLane( - "install-e2e", - "OPENCLAW_INSTALL_TAG=beta OPENCLAW_E2E_MODELS=both pnpm test:install:e2e", + "install-e2e-openai", + "OPENCLAW_INSTALL_TAG=beta OPENCLAW_E2E_MODELS=openai OPENCLAW_INSTALL_E2E_IMAGE=openclaw-install-e2e-openai:local pnpm test:install:e2e", { resources: ["service"], - weight: 4, + weight: 3, + }, + ), + npmLane( + "install-e2e-anthropic", + "OPENCLAW_INSTALL_TAG=beta OPENCLAW_E2E_MODELS=anthropic OPENCLAW_INSTALL_E2E_IMAGE=openclaw-install-e2e-anthropic:local pnpm test:install:e2e", + { + resources: ["service"], + weight: 3, }, ), npmLane( diff --git a/scripts/test-live-shard.mjs b/scripts/test-live-shard.mjs new file mode 100644 index 00000000000..55a06e52959 --- /dev/null +++ b/scripts/test-live-shard.mjs @@ -0,0 +1,144 @@ +#!/usr/bin/env node +import fs from "node:fs"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import { spawnPnpmRunner } from "./pnpm-runner.mjs"; + +const LIVE_TEST_SUFFIX = ".live.test.ts"; + +export const LIVE_TEST_SHARDS = Object.freeze([ + "native-live-src-agents", + "native-live-src-gateway", + "native-live-test", + "native-live-extensions-a-k", + "native-live-extensions-l-z", +]); + +function walkFiles(rootDir) { + const files = []; + if (!fs.existsSync(rootDir)) { + return files; + } + const stack = [rootDir]; + while (stack.length > 0) { + const current = stack.pop(); + const entries = fs.readdirSync(current, { withFileTypes: true }); + for (const entry of entries) { + const fullPath = path.join(current, entry.name); + if (entry.isDirectory()) { + if ( + entry.name === "node_modules" || + entry.name === "dist" || + entry.name === "vendor" || + entry.name === "fixtures" + ) { + continue; + } + stack.push(fullPath); + continue; + } + if (entry.isFile()) { + files.push(fullPath); + } + } + } + return files; +} + +export function collectAllLiveTestFiles(repoRoot = process.cwd()) { + return ["src", "test", "extensions"] + .flatMap((dir) => walkFiles(path.join(repoRoot, dir))) + .map((file) => path.relative(repoRoot, file).split(path.sep).join("/")) + .filter((file) => file.endsWith(LIVE_TEST_SUFFIX)) + .sort((a, b) => a.localeCompare(b)); +} + +function extensionKey(file) { + const relative = file.slice("extensions/".length); + return relative.split("/", 1)[0]?.toLowerCase() ?? ""; +} + +function isExtensionInRange(file, start, end) { + if (!file.startsWith("extensions/")) { + return false; + } + const key = extensionKey(file); + if (!key) { + return false; + } + const first = key[0]; + return first >= start && first <= end; +} + +export function selectLiveShardFiles(shard, files = collectAllLiveTestFiles()) { + switch (shard) { + case "native-live-src-agents": + return files.filter((file) => file.startsWith("src/agents/")); + case "native-live-src-gateway": + return files.filter( + (file) => file.startsWith("src/gateway/") || file.startsWith("src/crestodian/"), + ); + case "native-live-test": + return files.filter((file) => file.startsWith("test/")); + case "native-live-extensions-a-k": + return files.filter((file) => isExtensionInRange(file, "a", "k")); + case "native-live-extensions-l-z": + return files.filter((file) => isExtensionInRange(file, "l", "z")); + default: + throw new Error( + `Unknown live test shard '${shard}'. Expected one of: ${LIVE_TEST_SHARDS.join(", ")}`, + ); + } +} + +function usage() { + console.error(`Usage: node scripts/test-live-shard.mjs <${LIVE_TEST_SHARDS.join("|")}> [--list]`); +} + +if (process.argv[1] && path.resolve(process.argv[1]) === fileURLToPath(import.meta.url)) { + const args = process.argv.slice(2); + const shard = args.find((arg) => !arg.startsWith("-")); + const listOnly = args.includes("--list"); + if (!shard) { + usage(); + process.exit(2); + } + + let files; + try { + files = selectLiveShardFiles(shard); + } catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + usage(); + process.exit(2); + } + if (files.length === 0) { + console.error(`Live test shard '${shard}' selected no files.`); + process.exit(2); + } + + if (listOnly) { + for (const file of files) { + console.log(file); + } + process.exit(0); + } + + console.log(`[test:live:shard] ${shard}: ${files.length} file(s)`); + const child = spawnPnpmRunner({ + stdio: "inherit", + pnpmArgs: ["test:live", "--", ...files], + env: process.env, + }); + child.on("exit", (code, signal) => { + if (signal) { + process.kill(process.pid, signal); + return; + } + process.exit(code ?? 1); + }); + child.on("error", (error) => { + console.error(error); + process.exit(1); + }); +} diff --git a/test/scripts/docker-all-scheduler.test.ts b/test/scripts/docker-all-scheduler.test.ts index 28f0856f1ca..772205b053e 100644 --- a/test/scripts/docker-all-scheduler.test.ts +++ b/test/scripts/docker-all-scheduler.test.ts @@ -66,6 +66,36 @@ describe("scripts/test-docker-all scheduler", () => { ).toBe(false); }); + it("can co-schedule the split installer provider lanes", () => { + expect( + canStartSchedulerLane( + { + name: "install-e2e-anthropic", + resources: ["npm", "service"], + weight: 3, + }, + activePool({ + count: 1, + resources: { + docker: 3, + npm: 3, + service: 3, + }, + weight: 3, + }), + 10, + { + resourceLimits: { + docker: 10, + npm: 10, + service: 7, + }, + weightLimit: 10, + }, + ), + ).toBe(true); + }); + it("preserves the parallelism count cap", () => { expect( canStartSchedulerLane( diff --git a/test/scripts/docker-build-helper.test.ts b/test/scripts/docker-build-helper.test.ts index 221b6e79e22..b0518aea18c 100644 --- a/test/scripts/docker-build-helper.test.ts +++ b/test/scripts/docker-build-helper.test.ts @@ -84,7 +84,10 @@ describe("docker build helper", () => { const scenarios = readFileSync(DOCKER_E2E_SCENARIOS_PATH, "utf8"); expect(scenarios).toContain( - '"OPENCLAW_INSTALL_TAG=beta OPENCLAW_E2E_MODELS=both pnpm test:install:e2e"', + '"OPENCLAW_INSTALL_TAG=beta OPENCLAW_E2E_MODELS=openai OPENCLAW_INSTALL_E2E_IMAGE=openclaw-install-e2e-openai:local pnpm test:install:e2e"', + ); + expect(scenarios).toContain( + '"OPENCLAW_INSTALL_TAG=beta OPENCLAW_E2E_MODELS=anthropic OPENCLAW_INSTALL_E2E_IMAGE=openclaw-install-e2e-anthropic:local pnpm test:install:e2e"', ); }); diff --git a/test/scripts/docker-e2e-plan.test.ts b/test/scripts/docker-e2e-plan.test.ts index 8d82309f69d..5d863589931 100644 --- a/test/scripts/docker-e2e-plan.test.ts +++ b/test/scripts/docker-e2e-plan.test.ts @@ -41,7 +41,8 @@ describe("scripts/lib/docker-e2e-plan", () => { package: true, }); expect(plan.credentials).toEqual(["anthropic", "openai"]); - expect(plan.lanes.map((lane) => lane.name)).toContain("install-e2e"); + expect(plan.lanes.map((lane) => lane.name)).toContain("install-e2e-openai"); + expect(plan.lanes.map((lane) => lane.name)).toContain("install-e2e-anthropic"); expect(plan.lanes.map((lane) => lane.name)).toContain("mcp-channels"); expect(plan.lanes.map((lane) => lane.name)).toContain("bundled-channel-feishu"); expect(plan.lanes.map((lane) => lane.name)).toContain("bundled-channel-update-acpx"); @@ -166,6 +167,24 @@ describe("scripts/lib/docker-e2e-plan", () => { ]); }); + it("maps installer E2E to provider-specific package install lanes", () => { + const selectedLaneNames = parseLaneSelection("install-e2e"); + const plan = planFor({ selectedLaneNames }); + + expect(selectedLaneNames).toEqual(["install-e2e-openai", "install-e2e-anthropic"]); + expect(plan.lanes).toEqual([ + expect.objectContaining({ + command: expect.stringContaining("OPENCLAW_E2E_MODELS=openai"), + name: "install-e2e-openai", + }), + expect.objectContaining({ + command: expect.stringContaining("OPENCLAW_E2E_MODELS=anthropic"), + name: "install-e2e-anthropic", + }), + ]); + expect(plan.credentials).toEqual(["anthropic", "openai"]); + }); + it("maps bundled plugin install/uninstall to package-backed shards", () => { const selectedLaneNames = parseLaneSelection("bundled-plugin-install-uninstall"); const plan = planFor({ selectedLaneNames }); diff --git a/test/scripts/package-acceptance-workflow.test.ts b/test/scripts/package-acceptance-workflow.test.ts index 042dd40f910..d3703bbc1f1 100644 --- a/test/scripts/package-acceptance-workflow.test.ts +++ b/test/scripts/package-acceptance-workflow.test.ts @@ -90,6 +90,19 @@ describe("package artifact reuse", () => { expect(workflow).not.toContain("cache-to: type=gha,mode=max,scope=docker-e2e"); }); + it("shards broad native live tests instead of one serial live-all job", () => { + const workflow = readFileSync(LIVE_E2E_WORKFLOW, "utf8"); + + expect(workflow).not.toContain("suite_id: live-all"); + expect(workflow).not.toContain("command: pnpm test:live\n"); + expect(workflow).toContain("suite_id: native-live-src-agents"); + expect(workflow).toContain("command: node scripts/test-live-shard.mjs native-live-src-agents"); + expect(workflow).toContain("suite_id: native-live-src-gateway"); + expect(workflow).toContain("suite_id: native-live-extensions-a-k"); + expect(workflow).toContain("suite_id: native-live-extensions-l-z"); + expect(workflow).toContain("if: matrix.needs_ffmpeg"); + }); + it("allows the Telegram lane to run from reusable package acceptance artifacts", () => { const workflow = readFileSync(NPM_TELEGRAM_WORKFLOW, "utf8"); diff --git a/test/scripts/test-live-shard.test.ts b/test/scripts/test-live-shard.test.ts new file mode 100644 index 00000000000..9e46b3985c6 --- /dev/null +++ b/test/scripts/test-live-shard.test.ts @@ -0,0 +1,41 @@ +import { describe, expect, it } from "vitest"; +import { + LIVE_TEST_SHARDS, + collectAllLiveTestFiles, + selectLiveShardFiles, +} from "../../scripts/test-live-shard.mjs"; + +describe("scripts/test-live-shard", () => { + it("partitions every native live test into exactly one release shard", () => { + const allFiles = collectAllLiveTestFiles(); + const selected = LIVE_TEST_SHARDS.flatMap((shard) => + selectLiveShardFiles(shard, allFiles).map((file) => ({ file, shard })), + ); + const selectedFiles = selected.map(({ file }) => file); + + expect(allFiles.length).toBeGreaterThan(0); + expect(selectedFiles.toSorted()).toEqual(allFiles); + expect(new Set(selectedFiles).size).toBe(selectedFiles.length); + }); + + it("keeps media-capable extension and test harness files in their own shards", () => { + const allFiles = collectAllLiveTestFiles(); + + expect(selectLiveShardFiles("native-live-test", allFiles)).toEqual( + expect.arrayContaining([ + "test/image-generation.infer-cli.live.test.ts", + "test/image-generation.runtime.live.test.ts", + ]), + ); + expect(selectLiveShardFiles("native-live-extensions-l-z", allFiles)).toEqual( + expect.arrayContaining([ + "extensions/music-generation-providers.live.test.ts", + "extensions/video-generation-providers.live.test.ts", + ]), + ); + }); + + it("rejects unknown shard names", () => { + expect(() => selectLiveShardFiles("native-live-missing")).toThrow(/Unknown live test shard/u); + }); +});