From 2500b5d4ec904cd336c75a0ca4324af8f668ba88 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Thu, 30 Apr 2026 23:17:52 -0700 Subject: [PATCH] test(e2e): expand published upgrade survivor baselines --- .../openclaw-live-and-e2e-checks-reusable.yml | 12 ++ .github/workflows/package-acceptance.yml | 48 +++++++ docs/ci.md | 2 +- docs/help/testing.md | 4 +- docs/reference/test.md | 2 +- scripts/lib/docker-e2e-plan.mjs | 125 +++++++++++++++--- .../resolve-upgrade-survivor-baselines.mjs | 115 ++++++++++++++++ scripts/test-docker-all.mjs | 8 ++ test/scripts/docker-e2e-plan.test.ts | 33 +++++ .../package-acceptance-workflow.test.ts | 17 +++ .../upgrade-survivor-baselines.test.ts | 66 +++++++++ 11 files changed, 410 insertions(+), 22 deletions(-) create mode 100644 scripts/resolve-upgrade-survivor-baselines.mjs create mode 100644 test/scripts/upgrade-survivor-baselines.test.ts diff --git a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml index 9c08237f1e3..5de776f5b16 100644 --- a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml +++ b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml @@ -38,6 +38,11 @@ on: required: false default: openclaw@latest type: string + published_upgrade_survivor_baselines: + description: Optional exact baseline list for published-upgrade-survivor lane expansion + required: false + default: "" + type: string package_artifact_name: description: Existing workflow artifact containing openclaw-current.tgz; blank packs the selected ref required: false @@ -123,6 +128,11 @@ on: required: false default: openclaw@latest type: string + published_upgrade_survivor_baselines: + description: Optional exact baseline list for published-upgrade-survivor lane expansion + required: false + default: "" + type: string package_artifact_name: description: Existing workflow artifact containing openclaw-current.tgz; blank packs the selected ref required: false @@ -695,6 +705,7 @@ jobs: OPENCLAW_DOCKER_E2E_SELECTED_SHA: ${{ needs.validate_selected_ref.outputs.selected_sha }} OPENCLAW_CURRENT_PACKAGE_TGZ: .artifacts/docker-e2e-package/openclaw-current.tgz OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC: ${{ inputs.published_upgrade_survivor_baseline }} + OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS: ${{ inputs.published_upgrade_survivor_baselines }} OPENCLAW_SKIP_DOCKER_BUILD: "1" INCLUDE_OPENWEBUI: ${{ inputs.include_openwebui }} DOCKER_E2E_CHUNK: ${{ matrix.chunk_id }} @@ -929,6 +940,7 @@ jobs: OPENCLAW_DOCKER_E2E_SELECTED_SHA: ${{ needs.validate_selected_ref.outputs.selected_sha }} OPENCLAW_CURRENT_PACKAGE_TGZ: .artifacts/docker-e2e-package/openclaw-current.tgz OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC: ${{ inputs.published_upgrade_survivor_baseline }} + OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS: ${{ inputs.published_upgrade_survivor_baselines }} OPENCLAW_SKIP_DOCKER_BUILD: "1" INCLUDE_OPENWEBUI: ${{ inputs.include_openwebui }} DOCKER_E2E_LANES: ${{ matrix.group.docker_lanes }} diff --git a/.github/workflows/package-acceptance.yml b/.github/workflows/package-acceptance.yml index 41ed7ac4b8d..cfea4839843 100644 --- a/.github/workflows/package-acceptance.yml +++ b/.github/workflows/package-acceptance.yml @@ -69,6 +69,11 @@ on: required: false default: openclaw@latest type: string + published_upgrade_survivor_baselines: + description: Optional baseline list for published-upgrade-survivor; use release-history for last 6 plus key legacy releases + required: false + default: "" + type: string telegram_mode: description: Optional Telegram QA lane for the resolved package candidate required: true @@ -139,6 +144,11 @@ on: required: false default: openclaw@latest type: string + published_upgrade_survivor_baselines: + description: Optional baseline list for published-upgrade-survivor; use release-history for last 6 plus key legacy releases + required: false + default: "" + type: string telegram_mode: description: Optional Telegram QA lane for the resolved package candidate required: false @@ -275,6 +285,7 @@ jobs: package_source_sha: ${{ steps.resolve.outputs.package_source_sha }} package_sha256: ${{ steps.resolve.outputs.sha256 }} package_version: ${{ steps.resolve.outputs.package_version }} + published_upgrade_survivor_baselines: ${{ steps.upgrade_survivor_baselines.outputs.baselines }} telegram_enabled: ${{ steps.profile.outputs.telegram_enabled }} telegram_mode: ${{ steps.profile.outputs.telegram_mode }} steps: @@ -405,6 +416,40 @@ jobs: echo "package_artifact_name=${PACKAGE_ARTIFACT_NAME}" } >> "$GITHUB_OUTPUT" + - name: Resolve published upgrade survivor baselines + id: upgrade_survivor_baselines + env: + FALLBACK_BASELINE: ${{ inputs.published_upgrade_survivor_baseline }} + REQUESTED_BASELINES: ${{ inputs.published_upgrade_survivor_baselines }} + GH_TOKEN: ${{ github.token }} + shell: bash + run: | + set -euo pipefail + if [[ -z "${REQUESTED_BASELINES// }" ]]; then + echo "baselines=" >> "$GITHUB_OUTPUT" + exit 0 + fi + releases_json="" + if [[ "$REQUESTED_BASELINES" == *"release-history"* ]]; then + releases_json=".artifacts/package-candidate-input/openclaw-releases.json" + mkdir -p "$(dirname "$releases_json")" + gh release list --repo "$GITHUB_REPOSITORY" --limit 100 --json tagName,publishedAt,isPrerelease > "$releases_json" + fi + args=( + --requested "$REQUESTED_BASELINES" + --fallback "$FALLBACK_BASELINE" + --github-output "$GITHUB_OUTPUT" + ) + if [[ -n "$releases_json" ]]; then + args+=( + --releases-json "$releases_json" + --history-count 6 + --include-version 2026.4.23 + --pre-date 2026-03-15T00:00:00Z + ) + fi + node scripts/resolve-upgrade-survivor-baselines.mjs "${args[@]}" >/dev/null + - name: Upload package-under-test artifact uses: actions/upload-artifact@v7 with: @@ -424,6 +469,7 @@ jobs: SUITE_PROFILE: ${{ inputs.suite_profile }} WORKFLOW_REF: ${{ inputs.workflow_ref }} PUBLISHED_UPGRADE_SURVIVOR_BASELINE: ${{ inputs.published_upgrade_survivor_baseline }} + PUBLISHED_UPGRADE_SURVIVOR_BASELINES: ${{ steps.upgrade_survivor_baselines.outputs.baselines }} shell: bash run: | { @@ -438,6 +484,7 @@ jobs: echo "- SHA-256: \`${PACKAGE_SHA256}\`" echo "- Profile: \`${SUITE_PROFILE}\`" echo "- Published upgrade survivor baseline: \`${PUBLISHED_UPGRADE_SURVIVOR_BASELINE}\`" + echo "- Published upgrade survivor baselines: \`${PUBLISHED_UPGRADE_SURVIVOR_BASELINES}\`" } >> "$GITHUB_STEP_SUMMARY" docker_acceptance: @@ -451,6 +498,7 @@ jobs: include_openwebui: ${{ needs.resolve_package.outputs.include_openwebui == 'true' }} docker_lanes: ${{ needs.resolve_package.outputs.docker_lanes }} published_upgrade_survivor_baseline: ${{ inputs.published_upgrade_survivor_baseline }} + published_upgrade_survivor_baselines: ${{ needs.resolve_package.outputs.published_upgrade_survivor_baselines }} package_artifact_name: ${{ needs.resolve_package.outputs.package_artifact_name }} include_live_suites: ${{ needs.resolve_package.outputs.include_live_suites == 'true' }} live_models_only: false diff --git a/docs/ci.md b/docs/ci.md index 9981b1a75a2..d4a8e110ee7 100644 --- a/docs/ci.md +++ b/docs/ci.md @@ -188,7 +188,7 @@ Keep `workflow_ref` and `package_ref` separate. `workflow_ref` is the trusted wo The `package` profile uses offline plugin coverage so published-package validation is not gated on live ClawHub availability. The optional Telegram lane reuses the `package-under-test` artifact in `NPM Telegram Beta E2E`, with the published npm spec path kept for standalone dispatches. -Release checks call Package Acceptance with `source=ref`, `package_ref=`, `workflow_ref=`, `suite_profile=custom`, `docker_lanes='bundled-channel-deps-compat plugins-offline'`, and `telegram_mode=mock-openai`. Release-path Docker chunks cover the overlapping package/update/plugin lanes; Package Acceptance keeps the artifact-native bundled-channel compat, offline plugin, and Telegram proof against the same resolved package tarball. Cross-OS release checks still cover OS-specific onboarding, installer, and platform behavior; package/update product validation should start with Package Acceptance. The `published-upgrade-survivor` Docker lane validates one published package baseline per run. In Package Acceptance, the resolved `package-under-test` tarball is always the candidate and `published_upgrade_survivor_baseline` selects the published baseline, defaulting to `openclaw@latest`; failed-lane rerun commands preserve that baseline. Local runs can set `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC` to an exact package such as `openclaw@2026.4.15`. The published lane configures the baseline with a baked `openclaw config set` command recipe, then records recipe steps in `summary.json`. Broader previous-version coverage should shard Package Acceptance across exact `published_upgrade_survivor_baseline` values. The Windows packaged and installer fresh lanes also verify that an installed package can import a browser-control override from a raw absolute Windows path. The OpenAI cross-OS agent-turn smoke defaults to `OPENCLAW_CROSS_OS_OPENAI_MODEL` when set, otherwise `openai/gpt-5.4-mini`, so the install and gateway proof stays fast and deterministic. +Release checks call Package Acceptance with `source=ref`, `package_ref=`, `workflow_ref=`, `suite_profile=custom`, `docker_lanes='bundled-channel-deps-compat plugins-offline'`, and `telegram_mode=mock-openai`. Release-path Docker chunks cover the overlapping package/update/plugin lanes; Package Acceptance keeps the artifact-native bundled-channel compat, offline plugin, and Telegram proof against the same resolved package tarball. Cross-OS release checks still cover OS-specific onboarding, installer, and platform behavior; package/update product validation should start with Package Acceptance. The `published-upgrade-survivor` Docker lane validates one published package baseline per run. In Package Acceptance, the resolved `package-under-test` tarball is always the candidate and `published_upgrade_survivor_baseline` selects the fallback published baseline, defaulting to `openclaw@latest`; failed-lane rerun commands preserve that baseline. Set `published_upgrade_survivor_baselines=release-history` to expand the lane across a deduped history matrix: the latest six stable releases, `2026.4.23`, and the latest stable release before `2026-03-15`. Local aggregate runs can pass exact package specs with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS`, or keep a single lane with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC` such as `openclaw@2026.4.15`. The published lane configures the baseline with a baked `openclaw config set` command recipe, then records recipe steps in `summary.json`. The Windows packaged and installer fresh lanes also verify that an installed package can import a browser-control override from a raw absolute Windows path. The OpenAI cross-OS agent-turn smoke defaults to `OPENCLAW_CROSS_OS_OPENAI_MODEL` when set, otherwise `openai/gpt-5.4-mini`, so the install and gateway proof stays fast and deterministic. ### Legacy compatibility windows diff --git a/docs/help/testing.md b/docs/help/testing.md index 037b24c1594..5b6f1afe042 100644 --- a/docs/help/testing.md +++ b/docs/help/testing.md @@ -600,7 +600,7 @@ These Docker runners split into two buckets: `OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS=90000`. Override those env vars when you explicitly want the larger exhaustive scan. - `test:docker:all` builds the live Docker image once via `test:docker:live-build`, packs OpenClaw once as an npm tarball through `scripts/package-openclaw-for-docker.mjs`, then builds/reuses two `scripts/e2e/Dockerfile` images. The bare image is only the Node/Git runner for install/update/plugin-dependency lanes; those lanes mount the prebuilt tarball. The functional image installs the same tarball into `/app` for built-app functionality lanes. Docker lane definitions live in `scripts/lib/docker-e2e-scenarios.mjs`; planner logic lives in `scripts/lib/docker-e2e-plan.mjs`; `scripts/test-docker-all.mjs` executes the selected plan. The aggregate uses a weighted local scheduler: `OPENCLAW_DOCKER_ALL_PARALLELISM` controls process slots, while resource caps keep heavy live, npm-install, and multi-service lanes from all starting at once. If a single lane is heavier than the active caps, the scheduler can still start it when the pool is empty and then keeps it running alone until capacity is available again. Defaults are 10 slots, `OPENCLAW_DOCKER_ALL_LIVE_LIMIT=9`, `OPENCLAW_DOCKER_ALL_NPM_LIMIT=10`, and `OPENCLAW_DOCKER_ALL_SERVICE_LIMIT=7`; tune `OPENCLAW_DOCKER_ALL_WEIGHT_LIMIT` or `OPENCLAW_DOCKER_ALL_DOCKER_LIMIT` only when the Docker host has more headroom. The runner performs a Docker preflight by default, removes stale OpenClaw E2E containers, prints status every 30 seconds, stores successful lane timings in `.artifacts/docker-tests/lane-timings.json`, and uses those timings to start longer lanes first on later runs. Use `OPENCLAW_DOCKER_ALL_DRY_RUN=1` to print the weighted lane manifest without building or running Docker, or `node scripts/test-docker-all.mjs --plan-json` to print the CI plan for selected lanes, package/image needs, and credentials. -- `Package Acceptance` is the GitHub-native package gate for "does this installable tarball work as a product?" It resolves one candidate package from `source=npm`, `source=ref`, `source=url`, or `source=artifact`, uploads it as `package-under-test`, then runs the reusable Docker E2E lanes against that exact tarball instead of repacking the selected ref. `workflow_ref` selects the trusted workflow/harness scripts, while `package_ref` selects the source commit/branch/tag to pack when `source=ref`; this lets current acceptance logic validate older trusted commits. Profiles are ordered by breadth: `smoke` is quick install/channel/agent plus gateway/config, `package` is the package/update/plugin contract plus the keyless upgrade-survivor fixture, the published-baseline upgrade survivor lane, and the default native replacement for most Parallels package/update coverage, `product` adds MCP channels, cron/subagent cleanup, OpenAI web search, and OpenWebUI, and `full` runs the release-path Docker chunks with OpenWebUI. For `published-upgrade-survivor`, Package Acceptance always uses `package-under-test` as the candidate and `published_upgrade_survivor_baseline` as the published baseline, defaulting to `openclaw@latest`; shard broader coverage by dispatching multiple runs with exact baseline values. The published lane configures its baseline with a baked `openclaw config set` command recipe, then records recipe steps in the lane summary. Release validation runs a custom package delta (`bundled-channel-deps-compat plugins-offline`) plus Telegram package QA because the release-path Docker chunks already cover the overlapping package/update/plugin lanes. Targeted GitHub Docker rerun commands generated from artifacts include prior package artifact, prepared image inputs, and the published upgrade-survivor baseline when available, so failed lanes can avoid rebuilding the package and images. +- `Package Acceptance` is the GitHub-native package gate for "does this installable tarball work as a product?" It resolves one candidate package from `source=npm`, `source=ref`, `source=url`, or `source=artifact`, uploads it as `package-under-test`, then runs the reusable Docker E2E lanes against that exact tarball instead of repacking the selected ref. `workflow_ref` selects the trusted workflow/harness scripts, while `package_ref` selects the source commit/branch/tag to pack when `source=ref`; this lets current acceptance logic validate older trusted commits. Profiles are ordered by breadth: `smoke` is quick install/channel/agent plus gateway/config, `package` is the package/update/plugin contract plus the keyless upgrade-survivor fixture, the published-baseline upgrade survivor lane, and the default native replacement for most Parallels package/update coverage, `product` adds MCP channels, cron/subagent cleanup, OpenAI web search, and OpenWebUI, and `full` runs the release-path Docker chunks with OpenWebUI. For `published-upgrade-survivor`, Package Acceptance always uses `package-under-test` as the candidate and `published_upgrade_survivor_baseline` as the fallback published baseline, defaulting to `openclaw@latest`; set `published_upgrade_survivor_baselines=release-history` to shard the lane across a deduped matrix of the latest six stable releases, `2026.4.23`, and the latest stable release before `2026-03-15`. The published lane configures its baseline with a baked `openclaw config set` command recipe, then records recipe steps in the lane summary. Release validation runs a custom package delta (`bundled-channel-deps-compat plugins-offline`) plus Telegram package QA because the release-path Docker chunks already cover the overlapping package/update/plugin lanes. Targeted GitHub Docker rerun commands generated from artifacts include prior package artifact, prepared image inputs, and the published upgrade-survivor baseline list when available, so failed lanes can avoid rebuilding the package and images. - Build and release checks run `scripts/check-cli-bootstrap-imports.mjs` after tsdown. The guard walks the static built graph from `dist/entry.js` and `dist/cli/run-main.js` and fails if pre-dispatch startup imports package dependencies such as Commander, prompt UI, undici, or logging before command dispatch; it also keeps the bundled gateway run chunk under budget and rejects static imports of known cold gateway paths. Packaged CLI smoke also covers root help, onboard help, doctor help, status, config schema, and a model-list command. - Package Acceptance legacy compatibility is capped at `2026.4.25` (`2026.4.25-beta.*` included). Through that cutoff, the harness tolerates only shipped-package metadata gaps: omitted private QA inventory entries, missing `gateway install --wrapper`, missing patch files in the tarball-derived git fixture, missing persisted `update.channel`, legacy plugin install-record locations, missing marketplace install-record persistence, and config metadata migration during `plugins update`. For packages after `2026.4.25`, those paths are strict failures. - Container smoke runners: `test:docker:openwebui`, `test:docker:onboard`, `test:docker:npm-onboard-channel-agent`, `test:docker:update-channel-switch`, `test:docker:upgrade-survivor`, `test:docker:published-upgrade-survivor`, `test:docker:session-runtime-context`, `test:docker:agents-delete-shared-workspace`, `test:docker:gateway-network`, `test:docker:browser-cdp-snapshot`, `test:docker:mcp-channels`, `test:docker:pi-bundle-mcp-tools`, `test:docker:cron-mcp-cleanup`, `test:docker:plugins`, `test:docker:plugin-update`, and `test:docker:config-reload` boot one or more real containers and verify higher-level integration paths. @@ -618,7 +618,7 @@ The live-model Docker runners also bind-mount only the needed CLI auth homes (or - Npm tarball onboarding/channel/agent smoke: `pnpm test:docker:npm-onboard-channel-agent` installs the packed OpenClaw tarball globally in Docker, configures OpenAI via env-ref onboarding plus Telegram by default, verifies doctor repairs activated plugin runtime deps, and runs one mocked OpenAI agent turn. Reuse a prebuilt tarball with `OPENCLAW_CURRENT_PACKAGE_TGZ=/path/to/openclaw-*.tgz`, skip the host rebuild with `OPENCLAW_NPM_ONBOARD_HOST_BUILD=0`, or switch channel with `OPENCLAW_NPM_ONBOARD_CHANNEL=discord`. - Update channel switch smoke: `pnpm test:docker:update-channel-switch` installs the packed OpenClaw tarball globally in Docker, switches from package `stable` to git `dev`, verifies the persisted channel and plugin post-update work, then switches back to package `stable` and checks update status. - Upgrade survivor smoke: `pnpm test:docker:upgrade-survivor` installs the packed OpenClaw tarball over a dirty old-user fixture with agents, channel config, plugin allowlists, stale plugin runtime-deps state, and existing workspace/session files. It runs package update plus non-interactive doctor without live provider or channel keys, then starts a loopback Gateway and checks config/state preservation plus startup/status budgets. -- Published upgrade survivor smoke: `pnpm test:docker:published-upgrade-survivor` installs `openclaw@latest` by default, seeds realistic existing-user files, configures that baseline with a baked command recipe, validates the resulting config, updates that published install to the candidate tarball, runs non-interactive doctor, writes `.artifacts/upgrade-survivor/summary.json`, then starts a loopback Gateway and checks configured intents, state preservation, startup, and status budgets. Override the baseline with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC`; Package Acceptance exposes the same value as `published_upgrade_survivor_baseline`. +- Published upgrade survivor smoke: `pnpm test:docker:published-upgrade-survivor` installs `openclaw@latest` by default, seeds realistic existing-user files, configures that baseline with a baked command recipe, validates the resulting config, updates that published install to the candidate tarball, runs non-interactive doctor, writes `.artifacts/upgrade-survivor/summary.json`, then starts a loopback Gateway and checks configured intents, state preservation, startup, and status budgets. Override one baseline with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC`, or ask the aggregate scheduler to expand exact baselines with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS`; Package Acceptance exposes those as `published_upgrade_survivor_baseline` and `published_upgrade_survivor_baselines`. - Session runtime context smoke: `pnpm test:docker:session-runtime-context` verifies hidden runtime context transcript persistence plus doctor repair of affected duplicated prompt-rewrite branches. - Bun global install smoke: `bash scripts/e2e/bun-global-install-smoke.sh` packs the current tree, installs it with `bun install -g` in an isolated home, and verifies `openclaw infer image providers --json` returns bundled image providers instead of hanging. Reuse a prebuilt tarball with `OPENCLAW_BUN_GLOBAL_SMOKE_PACKAGE_TGZ=/path/to/openclaw-*.tgz`, skip the host build with `OPENCLAW_BUN_GLOBAL_SMOKE_HOST_BUILD=0`, or copy `dist/` from a built Docker image with `OPENCLAW_BUN_GLOBAL_SMOKE_DIST_IMAGE=openclaw-dockerfile-smoke:local`. - Installer Docker smoke: `bash scripts/test-install-sh-docker.sh` shares one npm cache across its root, update, and direct-npm containers. Update smoke defaults to npm `latest` as the stable baseline before upgrading to the candidate tarball. Override with `OPENCLAW_INSTALL_SMOKE_UPDATE_BASELINE=2026.4.22` locally, or with the Install Smoke workflow's `update_baseline_version` input on GitHub. Non-root installer checks keep an isolated npm cache so root-owned cache entries do not mask user-local install behavior. Set `OPENCLAW_INSTALL_SMOKE_NPM_CACHE_DIR=/path/to/cache` to reuse the root/update/direct-npm cache across local reruns. diff --git a/docs/reference/test.md b/docs/reference/test.md index 6e406448266..31a1f29254e 100644 --- a/docs/reference/test.md +++ b/docs/reference/test.md @@ -43,7 +43,7 @@ title: "Tests" - `pnpm test:docker:openwebui`: Starts Dockerized OpenClaw + Open WebUI, signs in through Open WebUI, checks `/api/models`, then runs a real proxied chat through `/api/chat/completions`. Requires a usable live model key (for example OpenAI in `~/.profile`), pulls an external Open WebUI image, and is not expected to be CI-stable like the normal unit/e2e suites. - `pnpm test:docker:mcp-channels`: Starts a seeded Gateway container and a second client container that spawns `openclaw mcp serve`, then verifies routed conversation discovery, transcript reads, attachment metadata, live event queue behavior, outbound send routing, and Claude-style channel + permission notifications over the real stdio bridge. The Claude notification assertion reads the raw stdio MCP frames directly so the smoke reflects what the bridge actually emits. - `pnpm test:docker:upgrade-survivor`: Installs the packed OpenClaw tarball over a dirty old-user fixture, runs package update plus non-interactive doctor without live provider or channel keys, then starts a loopback Gateway and checks that agents, channel config, plugin allowlists, workspace/session files, stale plugin runtime-deps state, startup, and RPC status survive. -- `pnpm test:docker:published-upgrade-survivor`: Installs `openclaw@latest` by default, seeds realistic existing-user files without live provider or channel keys, configures that baseline with a baked `openclaw config set` command recipe, updates that published install to the packed OpenClaw tarball, runs non-interactive doctor, writes `.artifacts/upgrade-survivor/summary.json`, then starts a loopback Gateway and checks that configured intents, workspace/session files, stale plugin config/runtime-deps state, startup, and RPC status survive or repair cleanly. Override the baseline with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC`; Package Acceptance exposes the same value as `published_upgrade_survivor_baseline`. +- `pnpm test:docker:published-upgrade-survivor`: Installs `openclaw@latest` by default, seeds realistic existing-user files without live provider or channel keys, configures that baseline with a baked `openclaw config set` command recipe, updates that published install to the packed OpenClaw tarball, runs non-interactive doctor, writes `.artifacts/upgrade-survivor/summary.json`, then starts a loopback Gateway and checks that configured intents, workspace/session files, stale plugin config/runtime-deps state, startup, and RPC status survive or repair cleanly. Override one baseline with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC`, or expand an exact matrix with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS`; Package Acceptance exposes those as `published_upgrade_survivor_baseline` and `published_upgrade_survivor_baselines`. ## Local PR gate diff --git a/scripts/lib/docker-e2e-plan.mjs b/scripts/lib/docker-e2e-plan.mjs index 6f4c5215f54..92668da865e 100644 --- a/scripts/lib/docker-e2e-plan.mjs +++ b/scripts/lib/docker-e2e-plan.mjs @@ -57,6 +57,74 @@ export function parseLaneSelection(raw) { ]; } +function shellQuote(value) { + return `'${String(value).replaceAll("'", "'\\''")}'`; +} + +function sanitizeLaneNameSuffix(value) { + return ( + String(value) + .replace(/^openclaw@/u, "") + .replace(/[^A-Za-z0-9._-]+/g, "-") + .replace(/^-+|-+$/g, "") || "baseline" + ); +} + +export function normalizeUpgradeSurvivorBaselineSpec(raw) { + const value = String(raw ?? "").trim(); + if (!value) { + return undefined; + } + const spec = value.startsWith("openclaw@") ? value : `openclaw@${value}`; + if ( + !/^openclaw@(?:beta|latest|[0-9]{4}\.[0-9]+\.[0-9]+(?:-(?:[0-9]+|beta\.[0-9]+))?)$/u.test(spec) + ) { + throw new Error( + `invalid published upgrade survivor baseline: ${JSON.stringify( + value, + )}. Expected openclaw@latest, openclaw@beta, or openclaw@YYYY.M.D.`, + ); + } + return spec; +} + +export function parseUpgradeSurvivorBaselineSpecs(raw) { + if (!raw) { + return []; + } + return [ + ...new Set( + String(raw) + .split(/[,\s]+/u) + .map(normalizeUpgradeSurvivorBaselineSpec) + .filter(Boolean), + ), + ]; +} + +export function expandUpgradeSurvivorBaselineLanes(poolLanes, rawBaselineSpecs) { + const baselineSpecs = parseUpgradeSurvivorBaselineSpecs(rawBaselineSpecs); + if (baselineSpecs.length === 0) { + return poolLanes; + } + return poolLanes.flatMap((poolLane) => { + if (poolLane.name !== "published-upgrade-survivor") { + return [poolLane]; + } + return baselineSpecs.map((baselineSpec) => { + const suffix = sanitizeLaneNameSuffix(baselineSpec); + const name = `${poolLane.name}-${suffix}`; + return Object.assign({}, poolLane, { + cacheKey: poolLane.cacheKey ? `${poolLane.cacheKey}-${suffix}` : name, + command: `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC=${shellQuote( + baselineSpec, + )} ${poolLane.command}`, + name, + }); + }); + }); +} + export function dedupeLanes(poolLanes) { const byName = new Map(); for (const poolLane of poolLanes) { @@ -141,11 +209,12 @@ export function lanesNeedOpenClawPackage(poolLanes) { } export function findLaneByName(name) { - return dedupeLanes([ - ...allReleasePathLanes({ includeOpenWebUI: true }), - ...mainLanes, - ...tailLanes, - ]).find((poolLane) => poolLane.name === name); + return dedupeLanes( + expandUpgradeSurvivorBaselineLanes( + [...allReleasePathLanes({ includeOpenWebUI: true }), ...mainLanes, ...tailLanes], + process.env.OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS, + ), + ).find((poolLane) => poolLane.name === name); } export function laneCredentialRequirements(poolLane) { @@ -207,25 +276,45 @@ export function buildPlanJson(params) { export function resolveDockerE2ePlan(options) { const retriedMainLanes = applyLiveRetries(mainLanes, options.liveRetries); const retriedTailLanes = applyLiveRetries(tailLanes, options.liveRetries); + const upgradeSurvivorBaselines = options.upgradeSurvivorBaselines ?? ""; + const unexpandedSelectableLanes = dedupeLanes([ + ...allReleasePathLanes({ includeOpenWebUI: options.includeOpenWebUI }), + ...retriedMainLanes, + ...retriedTailLanes, + ]); + const selectableLanes = dedupeLanes( + expandUpgradeSurvivorBaselineLanes(unexpandedSelectableLanes, upgradeSurvivorBaselines), + ); const releaseLanes = options.selectedLaneNames.length === 0 && options.profile === RELEASE_PATH_PROFILE ? options.planReleaseAll - ? allReleasePathLanes({ includeOpenWebUI: options.includeOpenWebUI }) - : releasePathChunkLanes(options.releaseChunk, { - includeOpenWebUI: options.includeOpenWebUI, - }) + ? expandUpgradeSurvivorBaselineLanes( + allReleasePathLanes({ includeOpenWebUI: options.includeOpenWebUI }), + upgradeSurvivorBaselines, + ) + : expandUpgradeSurvivorBaselineLanes( + releasePathChunkLanes(options.releaseChunk, { + includeOpenWebUI: options.includeOpenWebUI, + }), + upgradeSurvivorBaselines, + ) : undefined; const selectedLanes = options.selectedLaneNames.length > 0 - ? selectNamedLanes( - dedupeLanes([ - ...allReleasePathLanes({ includeOpenWebUI: options.includeOpenWebUI }), - ...retriedMainLanes, - ...retriedTailLanes, - ]), - options.selectedLaneNames, - "OPENCLAW_DOCKER_ALL_LANES", - ) + ? options.selectedLaneNames.flatMap((selectedName) => { + const expandedLane = selectableLanes.find((poolLane) => poolLane.name === selectedName); + if (expandedLane) { + return [expandedLane]; + } + const unexpandedLane = unexpandedSelectableLanes.find( + (poolLane) => poolLane.name === selectedName, + ); + if (unexpandedLane) { + return expandUpgradeSurvivorBaselineLanes([unexpandedLane], upgradeSurvivorBaselines); + } + selectNamedLanes(selectableLanes, [selectedName], "OPENCLAW_DOCKER_ALL_LANES"); + return []; + }) : undefined; const configuredLanes = selectedLanes ? selectedLanes diff --git a/scripts/resolve-upgrade-survivor-baselines.mjs b/scripts/resolve-upgrade-survivor-baselines.mjs new file mode 100644 index 00000000000..441a3f9500a --- /dev/null +++ b/scripts/resolve-upgrade-survivor-baselines.mjs @@ -0,0 +1,115 @@ +import { readFileSync, writeFileSync } from "node:fs"; +import { fileURLToPath } from "node:url"; +import { normalizeUpgradeSurvivorBaselineSpec } from "./lib/docker-e2e-plan.mjs"; + +function parseArgs(argv) { + const args = new Map(); + for (let index = 0; index < argv.length; index += 1) { + const arg = argv[index]; + if (!arg.startsWith("--")) { + throw new Error(`unexpected argument: ${arg}`); + } + const key = arg.slice(2); + const value = argv[index + 1]; + if (value === undefined || value.startsWith("--")) { + throw new Error(`missing value for --${key}`); + } + args.set(key, value); + index += 1; + } + return args; +} + +function splitSpecs(raw) { + return String(raw ?? "") + .split(/[,\s]+/u) + .map((token) => token.trim()) + .filter(Boolean); +} + +function dedupeSpecs(specs) { + return [...new Set(specs.map(normalizeUpgradeSurvivorBaselineSpec).filter(Boolean))]; +} + +function stableVersionFromTag(tagName) { + const version = String(tagName ?? "").replace(/^v/u, ""); + if (!/^[0-9]{4}\.[0-9]+\.[0-9]+(?:-[0-9]+)?$/u.test(version)) { + return undefined; + } + return version; +} + +function readStableReleases(file) { + const ansiEscape = new RegExp(`${String.fromCharCode(27)}\\[[0-?]*[ -/]*[@-~]`, "g"); + const raw = readFileSync(file, "utf8").replace(ansiEscape, ""); + const parsed = JSON.parse(raw); + if (!Array.isArray(parsed)) { + throw new Error(`release list must be a JSON array: ${file}`); + } + return parsed + .filter((release) => !release.isPrerelease) + .map((release) => ({ + publishedAt: release.publishedAt, + version: stableVersionFromTag(release.tagName), + })) + .filter((release) => release.version && release.publishedAt) + .toSorted((a, b) => String(b.publishedAt).localeCompare(String(a.publishedAt))); +} + +export function resolveReleaseHistory(args) { + const releasesJson = args.get("releases-json"); + if (!releasesJson) { + throw new Error("--releases-json is required when requested baselines include release-history"); + } + const historyCount = Number.parseInt(args.get("history-count") ?? "6", 10); + if (!Number.isInteger(historyCount) || historyCount < 1) { + throw new Error("--history-count must be a positive integer"); + } + const includeVersion = args.get("include-version") ?? "2026.4.23"; + const preDate = args.get("pre-date") ?? "2026-03-15T00:00:00Z"; + const releases = readStableReleases(releasesJson); + const versions = releases.slice(0, historyCount).map((release) => release.version); + const exact = releases.find((release) => release.version === includeVersion); + if (exact) { + versions.push(exact.version); + } + const preDateRelease = releases.find( + (release) => new Date(release.publishedAt).getTime() < new Date(preDate).getTime(), + ); + if (preDateRelease) { + versions.push(preDateRelease.version); + } + return dedupeSpecs(versions); +} + +export function resolveBaselines(args) { + const requested = args.get("requested") ?? ""; + const fallback = args.get("fallback") ?? "openclaw@latest"; + const requestedTokens = splitSpecs(requested); + if (requestedTokens.length === 0) { + return dedupeSpecs([fallback]); + } + const exactTokens = []; + const resolved = []; + for (const token of requestedTokens) { + if (token === "release-history") { + resolved.push(...resolveReleaseHistory(args)); + } else { + exactTokens.push(token); + } + } + return dedupeSpecs([...exactTokens, ...resolved]); +} + +const isMain = process.argv[1] ? fileURLToPath(import.meta.url) === process.argv[1] : false; + +if (isMain) { + const args = parseArgs(process.argv.slice(2)); + const baselines = resolveBaselines(args).join(" "); + process.stdout.write(`${baselines}\n`); + + const githubOutput = args.get("github-output"); + if (githubOutput) { + writeFileSync(githubOutput, `baselines=${baselines}\n`, { flag: "a" }); + } +} diff --git a/scripts/test-docker-all.mjs b/scripts/test-docker-all.mjs index 0fc7e632fc9..b8ce6f5225b 100644 --- a/scripts/test-docker-all.mjs +++ b/scripts/test-docker-all.mjs @@ -228,6 +228,12 @@ function githubWorkflowRerunCommand(laneNames, ref) { `published_upgrade_survivor_baseline=${shellQuote(process.env.OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC)}`, ); } + if (process.env.OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS) { + fields.push( + "-f", + `published_upgrade_survivor_baselines=${shellQuote(process.env.OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS)}`, + ); + } if (process.env.OPENCLAW_DOCKER_E2E_BARE_IMAGE) { fields.push( "-f", @@ -257,6 +263,7 @@ function buildLaneRerunCommand(name, baseEnv) { ["OPENCLAW_DOCKER_E2E_FUNCTIONAL_IMAGE", baseEnv.OPENCLAW_DOCKER_E2E_FUNCTIONAL_IMAGE], ["OPENCLAW_CURRENT_PACKAGE_TGZ", baseEnv.OPENCLAW_CURRENT_PACKAGE_TGZ], ["OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC", baseEnv.OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC], + ["OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS", baseEnv.OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS], ]; if (baseEnv.OPENCLAW_DOCKER_ALL_PNPM_COMMAND) { env.push(["OPENCLAW_DOCKER_ALL_PNPM_COMMAND", baseEnv.OPENCLAW_DOCKER_ALL_PNPM_COMMAND]); @@ -1125,6 +1132,7 @@ async function main() { releaseChunk, selectedLaneNames, timingStore, + upgradeSurvivorBaselines: process.env.OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS, }); if (planJson) { diff --git a/test/scripts/docker-e2e-plan.test.ts b/test/scripts/docker-e2e-plan.test.ts index c39e2a5df3a..3917adcd81b 100644 --- a/test/scripts/docker-e2e-plan.test.ts +++ b/test/scripts/docker-e2e-plan.test.ts @@ -351,6 +351,39 @@ describe("scripts/lib/docker-e2e-plan", () => { ); }); + it("expands the published upgrade survivor lane across deduped baselines", () => { + const plan = planFor({ + selectedLaneNames: ["published-upgrade-survivor"], + upgradeSurvivorBaselines: + "openclaw@2026.4.29 2026.4.23 openclaw@2026.4.23 openclaw@2026.3.13-1", + }); + + expect(plan.lanes.map((lane) => lane.name)).toEqual([ + "published-upgrade-survivor-2026.4.29", + "published-upgrade-survivor-2026.4.23", + "published-upgrade-survivor-2026.3.13-1", + ]); + expect(plan.lanes).toEqual([ + expect.objectContaining({ + command: expect.stringContaining( + "OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC='openclaw@2026.4.29'", + ), + imageKind: "bare", + stateScenario: "upgrade-survivor", + }), + expect.objectContaining({ + command: expect.stringContaining( + "OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC='openclaw@2026.4.23'", + ), + }), + expect.objectContaining({ + command: expect.stringContaining( + "OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC='openclaw@2026.3.13-1'", + ), + }), + ]); + }); + it("plans a live-only selected lane without package e2e images", () => { const plan = planFor({ selectedLaneNames: ["live-models"] }); diff --git a/test/scripts/package-acceptance-workflow.test.ts b/test/scripts/package-acceptance-workflow.test.ts index 93aaba4d67a..1b47769adfa 100644 --- a/test/scripts/package-acceptance-workflow.test.ts +++ b/test/scripts/package-acceptance-workflow.test.ts @@ -42,6 +42,11 @@ describe("package acceptance workflow", () => { expect(workflow).toContain("suite_profile:"); expect(workflow).toContain("published_upgrade_survivor_baseline:"); + expect(workflow).toContain("published_upgrade_survivor_baselines:"); + expect(workflow).toContain("scripts/resolve-upgrade-survivor-baselines.mjs"); + expect(workflow).toContain("--history-count 6"); + expect(workflow).toContain("--include-version 2026.4.23"); + expect(workflow).toContain("--pre-date 2026-03-15T00:00:00Z"); expect(workflow).toContain("npm-onboard-channel-agent gateway-network config-reload"); expect(workflow).toContain("npm-onboard-channel-agent doctor-switch"); expect(workflow).toContain("update-channel-switch upgrade-survivor"); @@ -68,7 +73,11 @@ describe("package acceptance workflow", () => { expect(workflow).toContain( "published_upgrade_survivor_baseline: ${{ inputs.published_upgrade_survivor_baseline }}", ); + expect(workflow).toContain( + "published_upgrade_survivor_baselines: ${{ needs.resolve_package.outputs.published_upgrade_survivor_baselines }}", + ); expect(workflow).toContain("Published upgrade survivor baseline:"); + expect(workflow).toContain("Published upgrade survivor baselines:"); }); }); @@ -82,12 +91,16 @@ describe("package artifact reuse", () => { expect(workflow).toContain("package_artifact_name:"); expect(workflow).toContain("package_artifact_run_id:"); expect(workflow).toContain("published_upgrade_survivor_baseline:"); + expect(workflow).toContain("published_upgrade_survivor_baselines:"); expect(workflow).toContain("docker_e2e_bare_image:"); expect(workflow).toContain("docker_e2e_functional_image:"); expect(workflow).toContain("OPENCLAW_DOCKER_E2E_SELECTED_SHA:"); expect(workflow).toContain( "OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC: ${{ inputs.published_upgrade_survivor_baseline }}", ); + expect(workflow).toContain( + "OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS: ${{ inputs.published_upgrade_survivor_baselines }}", + ); expect(workflow).toContain("Download current-run OpenClaw Docker E2E package"); expect(workflow).toContain("Download previous-run OpenClaw Docker E2E package"); expect(workflow).toContain("inputs.package_artifact_name != ''"); @@ -113,9 +126,13 @@ describe("package artifact reuse", () => { expect(scheduler).toContain( "published_upgrade_survivor_baseline=${shellQuote(process.env.OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC)}", ); + expect(scheduler).toContain( + "published_upgrade_survivor_baselines=${shellQuote(process.env.OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS)}", + ); expect(scheduler).toContain( '["OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC", baseEnv.OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC]', ); + expect(scheduler).toContain('["OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS",'); expect(packageJson).toContain("OPENCLAW_UPGRADE_SURVIVOR_PUBLISHED_BASELINE=1"); expect(publishedUpgradeSurvivor).toContain("validate_baseline_package_spec"); expect(publishedUpgradeSurvivor).toContain("openclaw@(beta|latest|"); diff --git a/test/scripts/upgrade-survivor-baselines.test.ts b/test/scripts/upgrade-survivor-baselines.test.ts new file mode 100644 index 00000000000..fd340b15ab7 --- /dev/null +++ b/test/scripts/upgrade-survivor-baselines.test.ts @@ -0,0 +1,66 @@ +import { mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import path from "node:path"; +import { describe, expect, it } from "vitest"; +import { resolveBaselines } from "../../scripts/resolve-upgrade-survivor-baselines.mjs"; + +function withReleaseFixture(releases: unknown[], fn: (file: string) => T): T { + const dir = mkdtempSync(path.join(tmpdir(), "openclaw-upgrade-baselines-")); + try { + const file = path.join(dir, "releases.json"); + writeFileSync(file, `${JSON.stringify(releases)}\n`); + return fn(file); + } finally { + rmSync(dir, { force: true, recursive: true }); + } +} + +describe("scripts/resolve-upgrade-survivor-baselines", () => { + it("keeps the single fallback baseline when no expanded request is provided", () => { + expect(resolveBaselines(new Map([["fallback", "2026.4.23"]]))).toEqual(["openclaw@2026.4.23"]); + }); + + it("resolves release-history to last six stable releases plus explicit legacy anchors", () => { + const releases = ( + [ + ["v2026.4.29", "2026-04-30T00:00:00Z"], + ["v2026.4.27", "2026-04-28T00:00:00Z"], + ["v2026.4.26", "2026-04-27T00:00:00Z"], + ["v2026.4.25", "2026-04-26T00:00:00Z"], + ["v2026.4.24", "2026-04-25T00:00:00Z"], + ["v2026.4.22", "2026-04-23T00:00:00Z"], + ["v2026.4.23", "2026-04-22T00:00:00Z"], + ["v2026.3.13-1", "2026-03-14T18:04:00Z"], + ["v2026.3.12", "2026-03-12T00:00:00Z"], + ["v2026.4.30-beta.1", "2026-05-01T00:00:00Z", true], + ] as const + ).map(([tagName, publishedAt, isPrerelease = false]) => ({ + isPrerelease, + publishedAt, + tagName, + })); + + withReleaseFixture(releases, (file) => { + expect( + resolveBaselines( + new Map([ + ["requested", "release-history 2026.4.29"], + ["releases-json", file], + ["history-count", "6"], + ["include-version", "2026.4.23"], + ["pre-date", "2026-03-15T00:00:00Z"], + ]), + ), + ).toEqual([ + "openclaw@2026.4.29", + "openclaw@2026.4.27", + "openclaw@2026.4.26", + "openclaw@2026.4.25", + "openclaw@2026.4.24", + "openclaw@2026.4.22", + "openclaw@2026.4.23", + "openclaw@2026.3.13-1", + ]); + }); + }); +});