From c9ead1b928b890b7bd7c1f1476a10dd277f8f8b9 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 28 Apr 2026 11:10:27 +0100 Subject: [PATCH] test: annotate Docker test-state scenarios --- docs/reference/test.md | 2 +- .../agents-delete-shared-workspace-docker.sh | 19 +++++++---- .../e2e/npm-onboard-channel-agent-docker.sh | 10 +++++- scripts/lib/docker-e2e-plan.mjs | 4 ++- scripts/lib/docker-e2e-scenarios.mjs | 10 ++++-- test/scripts/docker-e2e-plan.test.ts | 33 +++++++++++++++++++ 6 files changed, 66 insertions(+), 12 deletions(-) diff --git a/docs/reference/test.md b/docs/reference/test.md index b7a068ce5c6..a66bb141c9b 100644 --- a/docs/reference/test.md +++ b/docs/reference/test.md @@ -35,7 +35,7 @@ title: "Tests" - Gateway integration: opt-in via `OPENCLAW_TEST_INCLUDE_GATEWAY=1 pnpm test` or `pnpm test:gateway`. - `pnpm test:e2e`: Runs gateway end-to-end smoke tests (multi-instance WS/HTTP/node pairing). Defaults to `threads` + `isolate: false` with adaptive workers in `vitest.e2e.config.ts`; tune with `OPENCLAW_E2E_WORKERS=` and set `OPENCLAW_E2E_VERBOSE=1` for verbose logs. - `pnpm test:live`: Runs provider live tests (minimax/zai). Requires API keys and `LIVE=1` (or provider-specific `*_LIVE_TEST=1`) to unskip. -- `pnpm test:docker:all`: Builds the shared live-test image, packs OpenClaw once as an npm tarball, builds/reuses a bare Node/Git runner image plus a functional image that installs that tarball into `/app`, then runs Docker smoke lanes with `OPENCLAW_SKIP_DOCKER_BUILD=1` through a weighted scheduler. The bare image (`OPENCLAW_DOCKER_E2E_BARE_IMAGE`) is used for installer/update/plugin-dependency lanes; those lanes mount the prebuilt tarball instead of using copied repo sources. The functional image (`OPENCLAW_DOCKER_E2E_FUNCTIONAL_IMAGE`) is used for normal built-app functionality lanes. `scripts/package-openclaw-for-docker.mjs` is the single local/CI package packer and validates the tarball plus `dist/postinstall-inventory.json` before Docker consumes it. Docker lane definitions live in `scripts/lib/docker-e2e-scenarios.mjs`; planner logic lives in `scripts/lib/docker-e2e-plan.mjs`; `scripts/test-docker-all.mjs` executes the selected plan. `node scripts/test-docker-all.mjs --plan-json` emits the scheduler-owned CI plan for selected lanes, image kinds, package/live-image needs, and credential checks without building or running Docker. `OPENCLAW_DOCKER_ALL_PARALLELISM=` controls process slots and defaults to 10; `OPENCLAW_DOCKER_ALL_TAIL_PARALLELISM=` controls the provider-sensitive tail pool and defaults to 10. Heavy lane caps default to `OPENCLAW_DOCKER_ALL_LIVE_LIMIT=9`, `OPENCLAW_DOCKER_ALL_NPM_LIMIT=10`, and `OPENCLAW_DOCKER_ALL_SERVICE_LIMIT=7`; provider caps default to one heavy lane per provider via `OPENCLAW_DOCKER_ALL_LIVE_CLAUDE_LIMIT=4`, `OPENCLAW_DOCKER_ALL_LIVE_CODEX_LIMIT=4`, and `OPENCLAW_DOCKER_ALL_LIVE_GEMINI_LIMIT=4`. Use `OPENCLAW_DOCKER_ALL_WEIGHT_LIMIT` or `OPENCLAW_DOCKER_ALL_DOCKER_LIMIT` for larger hosts. If one lane exceeds the effective weight or resource cap on a low-parallelism host, it can still start from an empty pool and will run alone until it releases capacity. Lane starts are staggered by 2 seconds by default to avoid local Docker daemon create storms; override with `OPENCLAW_DOCKER_ALL_START_STAGGER_MS=`. The runner preflights Docker by default, cleans stale OpenClaw E2E containers, emits active-lane status every 30 seconds, shares provider CLI tool caches between compatible lanes, retries transient live-provider failures once by default (`OPENCLAW_DOCKER_ALL_LIVE_RETRIES=`), and stores lane timings in `.artifacts/docker-tests/lane-timings.json` for longest-first ordering on later runs. Use `OPENCLAW_DOCKER_ALL_DRY_RUN=1` to print the lane manifest without running Docker, `OPENCLAW_DOCKER_ALL_STATUS_INTERVAL_MS=` to tune status output, or `OPENCLAW_DOCKER_ALL_TIMINGS=0` to disable timing reuse. Use `OPENCLAW_DOCKER_ALL_LIVE_MODE=skip` for deterministic/local lanes only or `OPENCLAW_DOCKER_ALL_LIVE_MODE=only` for live-provider lanes only; package aliases are `pnpm test:docker:local:all` and `pnpm test:docker:live:all`. Live-only mode merges main and tail live lanes into one longest-first pool so provider buckets can pack Claude, Codex, and Gemini work together. The runner stops scheduling new pooled lanes after the first failure unless `OPENCLAW_DOCKER_ALL_FAIL_FAST=0` is set, and each lane has a 120-minute fallback timeout overrideable with `OPENCLAW_DOCKER_ALL_LANE_TIMEOUT_MS`; selected live/tail lanes use tighter per-lane caps. CLI backend Docker setup commands have their own timeout via `OPENCLAW_LIVE_CLI_BACKEND_SETUP_TIMEOUT_SECONDS` (default 180). Per-lane logs, `summary.json`, `failures.json`, and phase timings are written under `.artifacts/docker-tests//`; use `pnpm test:docker:timings ` to inspect slow lanes and `pnpm test:docker:rerun ` to print cheap targeted rerun commands. +- `pnpm test:docker:all`: Builds the shared live-test image, packs OpenClaw once as an npm tarball, builds/reuses a bare Node/Git runner image plus a functional image that installs that tarball into `/app`, then runs Docker smoke lanes with `OPENCLAW_SKIP_DOCKER_BUILD=1` through a weighted scheduler. The bare image (`OPENCLAW_DOCKER_E2E_BARE_IMAGE`) is used for installer/update/plugin-dependency lanes; those lanes mount the prebuilt tarball instead of using copied repo sources. The functional image (`OPENCLAW_DOCKER_E2E_FUNCTIONAL_IMAGE`) is used for normal built-app functionality lanes. `scripts/package-openclaw-for-docker.mjs` is the single local/CI package packer and validates the tarball plus `dist/postinstall-inventory.json` before Docker consumes it. Docker lane definitions live in `scripts/lib/docker-e2e-scenarios.mjs`; planner logic lives in `scripts/lib/docker-e2e-plan.mjs`; `scripts/test-docker-all.mjs` executes the selected plan. `node scripts/test-docker-all.mjs --plan-json` emits the scheduler-owned CI plan for selected lanes, image kinds, package/live-image needs, state scenarios, and credential checks without building or running Docker. `OPENCLAW_DOCKER_ALL_PARALLELISM=` controls process slots and defaults to 10; `OPENCLAW_DOCKER_ALL_TAIL_PARALLELISM=` controls the provider-sensitive tail pool and defaults to 10. Heavy lane caps default to `OPENCLAW_DOCKER_ALL_LIVE_LIMIT=9`, `OPENCLAW_DOCKER_ALL_NPM_LIMIT=10`, and `OPENCLAW_DOCKER_ALL_SERVICE_LIMIT=7`; provider caps default to one heavy lane per provider via `OPENCLAW_DOCKER_ALL_LIVE_CLAUDE_LIMIT=4`, `OPENCLAW_DOCKER_ALL_LIVE_CODEX_LIMIT=4`, and `OPENCLAW_DOCKER_ALL_LIVE_GEMINI_LIMIT=4`. Use `OPENCLAW_DOCKER_ALL_WEIGHT_LIMIT` or `OPENCLAW_DOCKER_ALL_DOCKER_LIMIT` for larger hosts. If one lane exceeds the effective weight or resource cap on a low-parallelism host, it can still start from an empty pool and will run alone until it releases capacity. Lane starts are staggered by 2 seconds by default to avoid local Docker daemon create storms; override with `OPENCLAW_DOCKER_ALL_START_STAGGER_MS=`. The runner preflights Docker by default, cleans stale OpenClaw E2E containers, emits active-lane status every 30 seconds, shares provider CLI tool caches between compatible lanes, retries transient live-provider failures once by default (`OPENCLAW_DOCKER_ALL_LIVE_RETRIES=`), and stores lane timings in `.artifacts/docker-tests/lane-timings.json` for longest-first ordering on later runs. Use `OPENCLAW_DOCKER_ALL_DRY_RUN=1` to print the lane manifest without running Docker, `OPENCLAW_DOCKER_ALL_STATUS_INTERVAL_MS=` to tune status output, or `OPENCLAW_DOCKER_ALL_TIMINGS=0` to disable timing reuse. Use `OPENCLAW_DOCKER_ALL_LIVE_MODE=skip` for deterministic/local lanes only or `OPENCLAW_DOCKER_ALL_LIVE_MODE=only` for live-provider lanes only; package aliases are `pnpm test:docker:local:all` and `pnpm test:docker:live:all`. Live-only mode merges main and tail live lanes into one longest-first pool so provider buckets can pack Claude, Codex, and Gemini work together. The runner stops scheduling new pooled lanes after the first failure unless `OPENCLAW_DOCKER_ALL_FAIL_FAST=0` is set, and each lane has a 120-minute fallback timeout overrideable with `OPENCLAW_DOCKER_ALL_LANE_TIMEOUT_MS`; selected live/tail lanes use tighter per-lane caps. CLI backend Docker setup commands have their own timeout via `OPENCLAW_LIVE_CLI_BACKEND_SETUP_TIMEOUT_SECONDS` (default 180). Per-lane logs, `summary.json`, `failures.json`, and phase timings are written under `.artifacts/docker-tests//`; use `pnpm test:docker:timings ` to inspect slow lanes and `pnpm test:docker:rerun ` to print cheap targeted rerun commands. - `pnpm test:docker:browser-cdp-snapshot`: Builds a Chromium-backed source E2E container, starts raw CDP plus an isolated Gateway, runs `browser doctor --deep`, and verifies CDP role snapshots include link URLs, cursor-promoted clickables, iframe refs, and frame metadata. - CLI backend live Docker probes can be run as focused lanes, for example `pnpm test:docker:live-cli-backend:codex`, `pnpm test:docker:live-cli-backend:codex:resume`, or `pnpm test:docker:live-cli-backend:codex:mcp`. Claude and Gemini have matching `:resume` and `:mcp` aliases. - `pnpm test:docker:openwebui`: Starts Dockerized OpenClaw + Open WebUI, signs in through Open WebUI, checks `/api/models`, then runs a real proxied chat through `/api/chat/completions`. Requires a usable live model key (for example OpenAI in `~/.profile`), pulls an external Open WebUI image, and is not expected to be CI-stable like the normal unit/e2e suites. diff --git a/scripts/e2e/agents-delete-shared-workspace-docker.sh b/scripts/e2e/agents-delete-shared-workspace-docker.sh index 22b2dec33db..fb65ca58e8c 100644 --- a/scripts/e2e/agents-delete-shared-workspace-docker.sh +++ b/scripts/e2e/agents-delete-shared-workspace-docker.sh @@ -7,6 +7,13 @@ source "$ROOT_DIR/scripts/lib/docker-e2e-image.sh" IMAGE_NAME="$(docker_e2e_resolve_image "openclaw-agents-delete-shared-workspace-e2e:local" OPENCLAW_AGENTS_DELETE_SHARED_WORKSPACE_E2E_IMAGE)" SKIP_BUILD="${OPENCLAW_AGENTS_DELETE_SHARED_WORKSPACE_E2E_SKIP_BUILD:-0}" DOCKER_COMMAND_TIMEOUT="${OPENCLAW_AGENTS_DELETE_SHARED_WORKSPACE_DOCKER_COMMAND_TIMEOUT:-300s}" +OPENCLAW_TEST_STATE_SCRIPT_B64="$( + node "$ROOT_DIR/scripts/lib/openclaw-test-state.mjs" shell \ + --label agents-delete-shared-workspace \ + --scenario empty \ + | base64 \ + | tr -d '\n' +)" docker_cmd() { if command -v timeout >/dev/null 2>&1; then @@ -28,6 +35,7 @@ run_logged agents-delete-shared-workspace docker_cmd docker run --rm \ -e OPENCLAW_SKIP_BROWSER_CONTROL_SERVER=1 \ -e OPENCLAW_SKIP_ACPX_RUNTIME=1 \ -e OPENCLAW_SKIP_ACPX_RUNTIME_PROBE=1 \ + -e "OPENCLAW_TEST_STATE_SCRIPT_B64=$OPENCLAW_TEST_STATE_SCRIPT_B64" \ "$IMAGE_NAME" \ -lc ' set -euo pipefail @@ -45,13 +53,10 @@ run_openclaw() { exit 1 } -home_dir="$(mktemp -d /tmp/openclaw-agents-delete-e2e-home.XXXXXX)" -export HOME="$home_dir" -export OPENCLAW_HOME="$home_dir" -export OPENCLAW_STATE_DIR="$home_dir/.openclaw" -export SHARED_WORKSPACE="$home_dir/workspace-shared" -output_file="$home_dir/delete.json" -trap '\''rm -rf "$home_dir"'\'' EXIT +eval "$(printf "%s" "${OPENCLAW_TEST_STATE_SCRIPT_B64:?missing OPENCLAW_TEST_STATE_SCRIPT_B64}" | base64 -d)" +export SHARED_WORKSPACE="$HOME/workspace-shared" +output_file="$HOME/delete.json" +trap '\''rm -rf "$HOME"'\'' EXIT mkdir -p "$OPENCLAW_STATE_DIR" "$SHARED_WORKSPACE" node --input-type=module - <<'\''NODE'\'' diff --git a/scripts/e2e/npm-onboard-channel-agent-docker.sh b/scripts/e2e/npm-onboard-channel-agent-docker.sh index c8f35e5f233..7205d3fa0e6 100644 --- a/scripts/e2e/npm-onboard-channel-agent-docker.sh +++ b/scripts/e2e/npm-onboard-channel-agent-docker.sh @@ -40,17 +40,25 @@ prepare_package_tgz docker_e2e_package_mount_args "$PACKAGE_TGZ" docker_e2e_harness_mount_args run_log="$(docker_e2e_run_log npm-onboard-channel-agent)" +OPENCLAW_TEST_STATE_SCRIPT_B64="$( + node "$ROOT_DIR/scripts/lib/openclaw-test-state.mjs" shell \ + --label npm-onboard-channel-agent \ + --scenario empty \ + | base64 \ + | tr -d '\n' +)" echo "Running npm tarball onboard/channel/agent Docker E2E ($CHANNEL)..." if ! docker run --rm \ -e COREPACK_ENABLE_DOWNLOAD_PROMPT=0 \ -e OPENCLAW_NPM_ONBOARD_CHANNEL="$CHANNEL" \ + -e "OPENCLAW_TEST_STATE_SCRIPT_B64=$OPENCLAW_TEST_STATE_SCRIPT_B64" \ "${DOCKER_E2E_PACKAGE_ARGS[@]}" \ "${DOCKER_E2E_HARNESS_ARGS[@]}" \ -i "$IMAGE_NAME" bash -s >"$run_log" 2>&1 <<'EOF' set -euo pipefail -export HOME="$(mktemp -d "/tmp/openclaw-npm-onboard.XXXXXX")" +eval "$(printf "%s" "${OPENCLAW_TEST_STATE_SCRIPT_B64:?missing OPENCLAW_TEST_STATE_SCRIPT_B64}" | base64 -d)" export NPM_CONFIG_PREFIX="$HOME/.npm-global" export PATH="$NPM_CONFIG_PREFIX/bin:$PATH" export OPENAI_API_KEY="sk-openclaw-npm-onboard-e2e" diff --git a/scripts/lib/docker-e2e-plan.mjs b/scripts/lib/docker-e2e-plan.mjs index 07d1eefc727..6f4c5215f54 100644 --- a/scripts/lib/docker-e2e-plan.mjs +++ b/scripts/lib/docker-e2e-plan.mjs @@ -128,7 +128,8 @@ export function laneSummary(poolLane) { const retries = poolLane.retries > 0 ? ` retries=${poolLane.retries}` : ""; const cache = poolLane.cacheKey ? ` cache=${poolLane.cacheKey}` : ""; const image = poolLane.e2eImageKind ? ` image=${poolLane.e2eImageKind}` : ""; - return `${poolLane.name}(w=${laneWeight(poolLane)} r=${resources}${timeout}${noOutputTimeout}${retries}${cache}${image})`; + const state = poolLane.stateScenario ? ` state=${poolLane.stateScenario}` : ""; + return `${poolLane.name}(w=${laneWeight(poolLane)} r=${resources}${timeout}${noOutputTimeout}${retries}${cache}${image}${state})`; } export function lanesNeedE2eImageKind(poolLanes, kind) { @@ -184,6 +185,7 @@ export function buildPlanJson(params) { name: poolLane.name, noOutputTimeoutMs: poolLane.noOutputTimeoutMs, resources: laneResources(poolLane), + stateScenario: poolLane.stateScenario, timeoutMs: poolLane.timeoutMs, weight: laneWeight(poolLane), })), diff --git a/scripts/lib/docker-e2e-scenarios.mjs b/scripts/lib/docker-e2e-scenarios.mjs index 8debc47b078..10b6d38cd34 100644 --- a/scripts/lib/docker-e2e-scenarios.mjs +++ b/scripts/lib/docker-e2e-scenarios.mjs @@ -43,6 +43,7 @@ function lane(name, command, options = {}) { retryPatterns: options.retryPatterns ?? [], retries: options.retries ?? 0, resources: options.resources ?? [], + stateScenario: options.stateScenario, timeoutMs: options.timeoutMs, weight: options.weight ?? 1, }; @@ -225,17 +226,19 @@ export const mainLanes = [ weight: 5, }), serviceLane("onboard", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:onboard", { + stateScenario: "empty", weight: 2, }), npmLane( "npm-onboard-channel-agent", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:npm-onboard-channel-agent", - { resources: ["service"], weight: 3 }, + { resources: ["service"], stateScenario: "empty", weight: 3 }, ), serviceLane("gateway-network", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:gateway-network"), serviceLane( "agents-delete-shared-workspace", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:agents-delete-shared-workspace", + { stateScenario: "empty" }, ), serviceLane("mcp-channels", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:mcp-channels", { resources: ["npm"], @@ -256,6 +259,7 @@ export const mainLanes = [ "update-channel-switch", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:update-channel-switch", { + stateScenario: "update-stable", timeoutMs: 30 * 60 * 1000, weight: 3, }, @@ -477,7 +481,7 @@ const releasePathPackageUpdateCoreLanes = [ npmLane( "npm-onboard-channel-agent", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:npm-onboard-channel-agent", - { resources: ["service"], weight: 3 }, + { resources: ["service"], stateScenario: "empty", weight: 3 }, ), npmLane("doctor-switch", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:doctor-switch", { weight: 3, @@ -486,6 +490,7 @@ const releasePathPackageUpdateCoreLanes = [ "update-channel-switch", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:update-channel-switch", { + stateScenario: "update-stable", timeoutMs: 30 * 60 * 1000, weight: 3, }, @@ -496,6 +501,7 @@ const primaryReleasePathChunks = { core: [ lane("qr", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:qr"), serviceLane("onboard", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:onboard", { + stateScenario: "empty", weight: 2, }), serviceLane("gateway-network", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:gateway-network"), diff --git a/test/scripts/docker-e2e-plan.test.ts b/test/scripts/docker-e2e-plan.test.ts index 6cccb8aef0f..b6ae9cdcddd 100644 --- a/test/scripts/docker-e2e-plan.test.ts +++ b/test/scripts/docker-e2e-plan.test.ts @@ -154,6 +154,18 @@ describe("scripts/lib/docker-e2e-plan", () => { "doctor-switch", "update-channel-switch", ]); + expect(packageUpdateCore.lanes).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + name: "npm-onboard-channel-agent", + stateScenario: "empty", + }), + expect.objectContaining({ + name: "update-channel-switch", + stateScenario: "update-stable", + }), + ]), + ); expect(pluginsRuntimePlugins.lanes.map((lane) => lane.name)).toEqual(["plugins"]); expect(pluginsRuntimeServices.lanes.map((lane) => lane.name)).toEqual([ "cron-mcp-cleanup", @@ -299,6 +311,27 @@ describe("scripts/lib/docker-e2e-plan", () => { }); }); + it("surfaces Docker lane test-state scenarios in plan JSON", () => { + const plan = planFor({ + selectedLaneNames: ["onboard", "agents-delete-shared-workspace", "update-channel-switch"], + }); + + expect(plan.lanes).toEqual([ + expect.objectContaining({ + name: "onboard", + stateScenario: "empty", + }), + expect.objectContaining({ + name: "agents-delete-shared-workspace", + stateScenario: "empty", + }), + expect.objectContaining({ + name: "update-channel-switch", + stateScenario: "update-stable", + }), + ]); + }); + it("maps the legacy bundled channel deps lane to the split compat lane", () => { const selectedLaneNames = parseLaneSelection("bundled-channel-deps"); const plan = planFor({ selectedLaneNames });