From 5e9a96fafbaf57f075b0fae5b82f76b47afff797 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 27 Apr 2026 06:29:03 +0100 Subject: [PATCH] ci(docker): reuse cached e2e images for reruns --- .agents/skills/openclaw-testing/SKILL.md | 21 ++- .../openclaw-live-and-e2e-checks-reusable.yml | 147 +++++++++++++++--- docs/ci.md | 2 +- docs/help/testing.md | 2 +- docs/reference/RELEASING.md | 10 +- scripts/docker-e2e-rerun.mjs | 72 ++++++++- scripts/docker-e2e.mjs | 29 +++- scripts/lib/docker-e2e-scenarios.mjs | 30 ++-- scripts/test-docker-all.mjs | 31 +++- test/scripts/docker-e2e-plan.test.ts | 16 ++ .../package-acceptance-workflow.test.ts | 22 ++- 11 files changed, 319 insertions(+), 63 deletions(-) diff --git a/.agents/skills/openclaw-testing/SKILL.md b/.agents/skills/openclaw-testing/SKILL.md index 16d7c3f098d..34ac9101b86 100644 --- a/.agents/skills/openclaw-testing/SKILL.md +++ b/.agents/skills/openclaw-testing/SKILL.md @@ -270,16 +270,23 @@ Multiple lanes are allowed: docker_lanes: install-e2e bundled-channel-update-acpx ``` -That skips the three chunk matrix and runs one targeted Docker job against the -prepared GHCR images and a fresh OpenClaw npm tarball for the selected ref. -Reruns usually need that new tarball because the fix being tested changed the -package contents even if the SHA-tagged GHCR Docker image can be reused. +That skips the release chunk matrix and runs one targeted Docker job against the +prepared GHCR images and the selected package artifact. Rerun commands +generated inside GitHub artifacts include `package_artifact_run_id`, +`package_artifact_name`, `docker_e2e_bare_image`, and +`docker_e2e_functional_image` when available, so failed lanes can reuse the +exact tarball and prepared images from the failed run. When the fix changes +package contents, omit those reuse inputs so the workflow packs a new tarball. Live-only targeted reruns skip the E2E images and build only the live-test -image. Release-path normal mode remains max three Docker chunk jobs: +image. Release-path normal mode is split into these Docker chunks: - `core` +- `package-install` - `package-update` -- `plugins-integrations` +- `plugins` +- `bundled-channel-deps` +- `service-integrations` +- `openwebui` when OpenWebUI coverage is requested ## Package Acceptance @@ -340,7 +347,7 @@ Profiles: package/update coverage. - `product`: package profile plus broader product surfaces: MCP channels, cron/subagent cleanup, OpenAI web search, and OpenWebUI. -- `full`: Docker release-path chunks with OpenWebUI. +- `full`: split Docker release-path chunks with OpenWebUI. - `custom`: exact `docker_lanes` list for a focused rerun. Candidate sources: diff --git a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml index 753dfe7d9fe..37634da459d 100644 --- a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml +++ b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml @@ -33,6 +33,21 @@ on: required: false default: "" type: string + package_artifact_run_id: + description: Prior run id containing package_artifact_name; blank uses this run or packs the selected ref + required: false + default: "" + type: string + docker_e2e_bare_image: + description: Existing bare Docker E2E image to reuse; blank derives from package SHA/ref + required: false + default: "" + type: string + docker_e2e_functional_image: + description: Existing functional Docker E2E image to reuse; blank derives from package SHA/ref + required: false + default: "" + type: string include_live_suites: description: Whether to run live-provider coverage required: false @@ -79,6 +94,21 @@ on: required: false default: "" type: string + package_artifact_run_id: + description: Prior run id containing package_artifact_name; blank uses this run or packs the selected ref + required: false + default: "" + type: string + docker_e2e_bare_image: + description: Existing bare Docker E2E image to reuse; blank derives from package SHA/ref + required: false + default: "" + type: string + docker_e2e_functional_image: + description: Existing functional Docker E2E image to reuse; blank derives from package SHA/ref + required: false + default: "" + type: string include_live_suites: description: Whether to run live-provider coverage required: false @@ -398,12 +428,21 @@ jobs: - chunk_id: core label: core timeout_minutes: 120 + - chunk_id: package-install + label: package/install + timeout_minutes: 180 - chunk_id: package-update label: package/update - timeout_minutes: 180 - - chunk_id: plugins-integrations - label: plugins/integrations - timeout_minutes: 180 + timeout_minutes: 90 + - chunk_id: plugins + label: plugins + timeout_minutes: 90 + - chunk_id: bundled-channel-deps + label: bundled/channel deps + timeout_minutes: 120 + - chunk_id: service-integrations + label: service integrations + timeout_minutes: 90 env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }} @@ -452,6 +491,7 @@ jobs: OPENCLAW_DOCKER_E2E_IMAGE: ${{ needs.prepare_docker_e2e_image.outputs.image }} OPENCLAW_DOCKER_E2E_BARE_IMAGE: ${{ needs.prepare_docker_e2e_image.outputs.bare_image }} OPENCLAW_DOCKER_E2E_FUNCTIONAL_IMAGE: ${{ needs.prepare_docker_e2e_image.outputs.functional_image }} + OPENCLAW_DOCKER_E2E_PACKAGE_ARTIFACT_NAME: ${{ inputs.package_artifact_name || 'docker-e2e-package' }} OPENCLAW_CURRENT_PACKAGE_TGZ: .artifacts/docker-e2e-package/openclaw-current.tgz OPENCLAW_SKIP_DOCKER_BUILD: "1" INCLUDE_OPENWEBUI: ${{ inputs.include_openwebui }} @@ -579,6 +619,7 @@ jobs: OPENCLAW_DOCKER_E2E_IMAGE: ${{ needs.prepare_docker_e2e_image.outputs.image }} OPENCLAW_DOCKER_E2E_BARE_IMAGE: ${{ needs.prepare_docker_e2e_image.outputs.bare_image }} OPENCLAW_DOCKER_E2E_FUNCTIONAL_IMAGE: ${{ needs.prepare_docker_e2e_image.outputs.functional_image }} + OPENCLAW_DOCKER_E2E_PACKAGE_ARTIFACT_NAME: ${{ inputs.package_artifact_name || 'docker-e2e-package' }} OPENCLAW_CURRENT_PACKAGE_TGZ: .artifacts/docker-e2e-package/openclaw-current.tgz OPENCLAW_SKIP_DOCKER_BUILD: "1" INCLUDE_OPENWEBUI: ${{ inputs.include_openwebui }} @@ -656,7 +697,8 @@ jobs: validate_docker_openwebui: needs: [validate_selected_ref, prepare_docker_e2e_image] - if: inputs.include_openwebui && !inputs.include_release_path_suites && inputs.docker_lanes == '' + if: inputs.include_openwebui && inputs.docker_lanes == '' + name: Docker E2E (openwebui) runs-on: blacksmith-32vcpu-ubuntu-2404 timeout-minutes: 75 env: @@ -664,6 +706,8 @@ jobs: OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }} OPENCLAW_DOCKER_E2E_IMAGE: ${{ needs.prepare_docker_e2e_image.outputs.image }} OPENCLAW_DOCKER_E2E_FUNCTIONAL_IMAGE: ${{ needs.prepare_docker_e2e_image.outputs.functional_image }} + OPENCLAW_DOCKER_E2E_PACKAGE_ARTIFACT_NAME: ${{ inputs.package_artifact_name || 'docker-e2e-package' }} + OPENCLAW_CURRENT_PACKAGE_TGZ: .artifacts/docker-e2e-package/openclaw-current.tgz OPENCLAW_SKIP_DOCKER_BUILD: "1" steps: - name: Checkout selected ref @@ -695,8 +739,50 @@ jobs: exit 1 } - - name: Run Open WebUI Docker E2E - run: pnpm test:docker:openwebui + - name: Plan and hydrate Open WebUI Docker E2E chunk + id: plan + uses: ./.github/actions/docker-e2e-plan + with: + mode: chunk + chunk: openwebui + include-openwebui: "true" + package-artifact-name: ${{ inputs.package_artifact_name || 'docker-e2e-package' }} + + - name: Run Open WebUI Docker E2E chunk + shell: bash + run: | + set -euo pipefail + export OPENCLAW_DOCKER_ALL_PROFILE=release-path + export OPENCLAW_DOCKER_ALL_CHUNK=openwebui + export OPENCLAW_DOCKER_ALL_BUILD=0 + export OPENCLAW_DOCKER_ALL_PREFLIGHT=0 + export OPENCLAW_DOCKER_ALL_FAIL_FAST=0 + export OPENCLAW_DOCKER_ALL_INCLUDE_OPENWEBUI=1 + export OPENCLAW_DOCKER_ALL_LOG_DIR=".artifacts/docker-tests/release-openwebui" + export OPENCLAW_DOCKER_ALL_TIMINGS_FILE=".artifacts/docker-tests/release-openwebui-timings.json" + export OPENCLAW_DOCKER_ALL_PNPM_COMMAND="$(command -v pnpm)" + + pnpm test:docker:all + + - name: Summarize Open WebUI Docker E2E chunk + if: always() + shell: bash + run: | + set -euo pipefail + summary=".artifacts/docker-tests/release-openwebui/summary.json" + if [[ ! -f "$summary" ]]; then + echo "Docker Open WebUI summary missing: \`$summary\`" >> "$GITHUB_STEP_SUMMARY" + exit 0 + fi + node scripts/docker-e2e.mjs summary "$summary" "Docker E2E chunk: openwebui" >> "$GITHUB_STEP_SUMMARY" + + - name: Upload Open WebUI Docker E2E artifacts + if: always() + uses: actions/upload-artifact@v7 + with: + name: docker-e2e-openwebui + path: .artifacts/docker-tests/ + if-no-files-found: ignore prepare_docker_e2e_image: needs: validate_selected_ref @@ -704,6 +790,7 @@ jobs: runs-on: blacksmith-32vcpu-ubuntu-2404 timeout-minutes: 90 permissions: + actions: read contents: read packages: write outputs: @@ -736,22 +823,31 @@ jobs: hydrate-artifacts: "false" - name: Setup Node environment - if: steps.plan.outputs.needs_package == '1' && inputs.package_artifact_name == '' + if: steps.plan.outputs.needs_package == '1' && inputs.package_artifact_name == '' && inputs.package_artifact_run_id == '' uses: ./.github/actions/setup-node-env with: node-version: ${{ env.NODE_VERSION }} pnpm-version: ${{ env.PNPM_VERSION }} install-bun: "true" - - name: Download provided OpenClaw Docker E2E package - if: steps.plan.outputs.needs_package == '1' && inputs.package_artifact_name != '' + - name: Download current-run OpenClaw Docker E2E package + if: steps.plan.outputs.needs_package == '1' && inputs.package_artifact_name != '' && inputs.package_artifact_run_id == '' uses: actions/download-artifact@v8 with: name: ${{ inputs.package_artifact_name }} path: .artifacts/docker-e2e-package + - name: Download previous-run OpenClaw Docker E2E package + if: steps.plan.outputs.needs_package == '1' && inputs.package_artifact_run_id != '' + uses: actions/download-artifact@v8 + with: + name: ${{ inputs.package_artifact_name || 'docker-e2e-package' }} + path: .artifacts/docker-e2e-package + run-id: ${{ inputs.package_artifact_run_id }} + github-token: ${{ github.token }} + - name: Pack OpenClaw package for Docker E2E - if: steps.plan.outputs.needs_package == '1' && inputs.package_artifact_name == '' + if: steps.plan.outputs.needs_package == '1' && inputs.package_artifact_name == '' && inputs.package_artifact_run_id == '' shell: bash run: | set -euo pipefail @@ -788,10 +884,10 @@ jobs: } >> "$GITHUB_STEP_SUMMARY" - name: Upload OpenClaw Docker E2E package - if: steps.plan.outputs.needs_package == '1' && inputs.package_artifact_name == '' + if: steps.plan.outputs.needs_package == '1' && (inputs.package_artifact_name == '' || inputs.package_artifact_run_id != '') uses: actions/upload-artifact@v7 with: - name: docker-e2e-package + name: ${{ inputs.package_artifact_name || 'docker-e2e-package' }} path: .artifacts/docker-e2e-package/openclaw-current.tgz if-no-files-found: error @@ -801,12 +897,14 @@ jobs: env: PACKAGE_TAG: ${{ steps.package.outputs.tag }} SELECTED_SHA: ${{ needs.validate_selected_ref.outputs.selected_sha }} + PROVIDED_BARE_IMAGE: ${{ inputs.docker_e2e_bare_image }} + PROVIDED_FUNCTIONAL_IMAGE: ${{ inputs.docker_e2e_functional_image }} run: | set -euo pipefail repository="${GITHUB_REPOSITORY,,}" image_tag="${PACKAGE_TAG:-$SELECTED_SHA}" - bare_image="ghcr.io/${repository}-docker-e2e-bare:${image_tag}" - functional_image="ghcr.io/${repository}-docker-e2e-functional:${image_tag}" + bare_image="${PROVIDED_BARE_IMAGE:-ghcr.io/${repository}-docker-e2e-bare:${image_tag}}" + functional_image="${PROVIDED_FUNCTIONAL_IMAGE:-ghcr.io/${repository}-docker-e2e-functional:${image_tag}}" image="$functional_image" echo "image=$image" >> "$GITHUB_OUTPUT" echo "bare_image=$bare_image" >> "$GITHUB_OUTPUT" @@ -826,6 +924,9 @@ jobs: id: image_exists if: steps.plan.outputs.needs_e2e_image == '1' shell: bash + env: + PROVIDED_BARE_IMAGE: ${{ inputs.docker_e2e_bare_image }} + PROVIDED_FUNCTIONAL_IMAGE: ${{ inputs.docker_e2e_functional_image }} run: | set -euo pipefail bare_exists=0 @@ -836,6 +937,9 @@ jobs: if docker manifest inspect "${{ steps.image.outputs.bare_image }}" >/dev/null 2>&1; then bare_exists=1 echo "Shared Docker E2E bare image already exists: ${{ steps.image.outputs.bare_image }}" + elif [[ -n "$PROVIDED_BARE_IMAGE" ]]; then + echo "Provided bare Docker E2E image does not exist: $PROVIDED_BARE_IMAGE" >&2 + exit 1 else needs_build=1 fi @@ -845,6 +949,9 @@ jobs: if docker manifest inspect "${{ steps.image.outputs.functional_image }}" >/dev/null 2>&1; then functional_exists=1 echo "Shared Docker E2E functional image already exists: ${{ steps.image.outputs.functional_image }}" + elif [[ -n "$PROVIDED_FUNCTIONAL_IMAGE" ]]; then + echo "Provided functional Docker E2E image does not exist: $PROVIDED_FUNCTIONAL_IMAGE" >&2 + exit 1 else needs_build=1 fi @@ -860,14 +967,12 @@ jobs: - name: Build and push bare Docker E2E image if: steps.plan.outputs.needs_bare_image == '1' && steps.image_exists.outputs.bare_exists != '1' - uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0 + uses: useblacksmith/build-push-action@cbd1f60d194a98cb3be5523b15134501eaf0fbf3 # v2 with: context: . file: ./scripts/e2e/Dockerfile target: bare platforms: linux/amd64 - cache-from: type=gha,scope=docker-e2e-bare - cache-to: type=gha,mode=max,scope=docker-e2e-bare tags: ${{ steps.image.outputs.bare_image }} sbom: true provenance: mode=max @@ -875,7 +980,7 @@ jobs: - name: Build and push functional Docker E2E image if: steps.plan.outputs.needs_functional_image == '1' && steps.image_exists.outputs.functional_exists != '1' - uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0 + uses: useblacksmith/build-push-action@cbd1f60d194a98cb3be5523b15134501eaf0fbf3 # v2 with: context: . file: ./scripts/e2e/Dockerfile @@ -883,10 +988,6 @@ jobs: build-contexts: | openclaw_package=.artifacts/docker-e2e-package platforms: linux/amd64 - cache-from: | - type=gha,scope=docker-e2e-bare - type=gha,scope=docker-e2e-functional - cache-to: type=gha,mode=max,scope=docker-e2e-functional tags: ${{ steps.image.outputs.functional_image }} sbom: true provenance: mode=max diff --git a/docs/ci.md b/docs/ci.md index e5659bafd54..d0dbde46fe7 100644 --- a/docs/ci.md +++ b/docs/ci.md @@ -248,7 +248,7 @@ act as if every scoped area changed. CI workflow edits validate the Node CI graph plus workflow linting, but do not force Windows, Android, or macOS native builds by themselves; those platform lanes stay scoped to platform source changes. CI routing-only edits, selected cheap core-test fixture edits, and narrow plugin contract helper/test-routing edits use a fast Node-only manifest path: preflight, security, and a single `checks-fast-core` task. That path avoids build artifacts, Node 22 compatibility, channel contracts, full core shards, bundled-plugin shards, and additional guard matrices when the changed files are limited to the routing or helper surfaces that the fast task exercises directly. Windows Node checks are scoped to Windows-specific process/path wrappers, npm/pnpm/UI runner helpers, package manager config, and the CI workflow surfaces that execute that lane; unrelated source, plugin, install-smoke, and test-only changes stay on the Linux Node lanes so they do not reserve a 16-vCPU Windows worker for coverage that is already exercised by the normal test shards. -The separate `install-smoke` workflow reuses the same scope script through its own `preflight` job. It splits smoke coverage into `run_fast_install_smoke` and `run_full_install_smoke`. Pull requests run the fast path for Docker/package surfaces, bundled plugin package/manifest changes, and core plugin/channel/gateway/Plugin SDK surfaces that the Docker smoke jobs exercise. Source-only bundled plugin changes, test-only edits, and docs-only edits do not reserve Docker workers. The fast path builds the root Dockerfile image once, checks the CLI, runs the agents delete shared-workspace CLI smoke, runs the container gateway-network e2e, verifies a bundled extension build arg, and runs the bounded bundled-plugin Docker profile under a 240-second aggregate command timeout with each scenario's Docker run capped separately. The full path keeps QR package install and installer Docker/update coverage for nightly scheduled runs, manual dispatches, workflow-call release checks, and pull requests that truly touch installer/package/Docker surfaces. `main` pushes, including merge commits, do not force the full path; when changed-scope logic would request full coverage on a push, the workflow keeps the fast Docker smoke and leaves the full install smoke to nightly or release validation. The slow Bun global install image-provider smoke is separately gated by `run_bun_global_install_smoke`; it runs on the nightly schedule and from the release checks workflow, and manual `install-smoke` dispatches can opt into it, but pull requests and `main` pushes do not run it. QR and installer Docker tests keep their own install-focused Dockerfiles. Local `test:docker:all` prebuilds one shared live-test image, packs OpenClaw once as an npm tarball, and builds two shared `scripts/e2e/Dockerfile` images: a bare Node/Git runner for installer/update/plugin-dependency lanes and a functional image that installs the same tarball into `/app` for normal functionality lanes. Docker lane definitions live in `scripts/lib/docker-e2e-scenarios.mjs`, planner logic lives in `scripts/lib/docker-e2e-plan.mjs`, and the runner only executes the selected plan. The scheduler selects the image per lane with `OPENCLAW_DOCKER_E2E_BARE_IMAGE` and `OPENCLAW_DOCKER_E2E_FUNCTIONAL_IMAGE`, then runs lanes with `OPENCLAW_SKIP_DOCKER_BUILD=1`; tune the default main-pool slot count of 10 with `OPENCLAW_DOCKER_ALL_PARALLELISM` and the provider-sensitive tail-pool slot count of 10 with `OPENCLAW_DOCKER_ALL_TAIL_PARALLELISM`. Heavy lane caps default to `OPENCLAW_DOCKER_ALL_LIVE_LIMIT=9`, `OPENCLAW_DOCKER_ALL_NPM_LIMIT=10`, and `OPENCLAW_DOCKER_ALL_SERVICE_LIMIT=7` so npm install and multi-service lanes do not overcommit Docker while lighter lanes still fill available slots. A single lane heavier than the effective caps can still start from an empty pool, then runs alone until it releases capacity. Lane starts are staggered by 2 seconds by default to avoid local Docker daemon create storms; override with `OPENCLAW_DOCKER_ALL_START_STAGGER_MS=0` or another millisecond value. The local aggregate preflights Docker, removes stale OpenClaw E2E containers, emits active-lane status, persists lane timings for longest-first ordering, and supports `OPENCLAW_DOCKER_ALL_DRY_RUN=1` for scheduler inspection. It stops scheduling new pooled lanes after the first failure by default, and each lane has a 120-minute fallback timeout overrideable with `OPENCLAW_DOCKER_ALL_LANE_TIMEOUT_MS`; selected live/tail lanes use tighter per-lane caps. `OPENCLAW_DOCKER_ALL_LANES=` runs exact scheduler lanes, including release-only lanes such as `install-e2e` and split bundled update lanes such as `bundled-channel-update-acpx`, while skipping the cleanup smoke so agents can reproduce one failed lane. The reusable live/E2E workflow asks `scripts/test-docker-all.mjs --plan-json` which package, image kind, live image, lane, and credential coverage is required, then `scripts/docker-e2e.mjs` converts that plan into GitHub outputs and summaries. It either packs OpenClaw through `scripts/package-openclaw-for-docker.mjs` or downloads a caller-provided package artifact, validates the tarball inventory, builds and pushes package-digest-tagged bare/functional GHCR Docker E2E images when the plan needs package-installed lanes, and reuses those images when the same package digest has already been prepared. The `Package Acceptance` workflow is the high-level package gate: it resolves a candidate from npm, a trusted `package_ref`, an HTTPS tarball plus SHA-256, or a prior workflow artifact, then passes that single `package-under-test` artifact into the reusable Docker E2E workflow. It keeps `workflow_ref` separate from `package_ref` so current acceptance logic can validate older trusted commits without checking out old workflow code. Release checks run the `package` acceptance profile for the target ref; that profile covers package/update/plugin contracts and is the default GitHub-native replacement for most Parallels package/update coverage. The release-path Docker suite runs as at most three chunked jobs with `OPENCLAW_SKIP_DOCKER_BUILD=1` so each chunk pulls only the image kind it needs and executes multiple lanes through the same weighted scheduler (`OPENCLAW_DOCKER_ALL_PROFILE=release-path`, `OPENCLAW_DOCKER_ALL_CHUNK=core|package-update|plugins-integrations`). Each chunk uploads `.artifacts/docker-tests/` with lane logs, timings, `summary.json`, `failures.json`, phase timings, scheduler plan JSON, and per-lane rerun commands. The workflow `docker_lanes` input runs selected lanes against the prepared images instead of the three chunk jobs, which keeps failed-lane debugging bounded to one targeted Docker job and prepares or downloads the package artifact for that run; if a selected lane is a live Docker lane, the targeted job builds the live-test image locally for that rerun. Use `pnpm test:docker:rerun ` to download Docker artifacts from a GitHub run and print combined/per-lane targeted rerun commands; use `pnpm test:docker:timings ` for slow-lane and phase critical-path summaries. When Open WebUI is requested with the release-path suite, it runs inside the plugins/integrations chunk instead of reserving a fourth Docker worker; Open WebUI keeps a standalone job only for openwebui-only dispatches. The scheduled live/E2E workflow runs the full release-path Docker suite daily. The bundled update matrix is split by update target so repeated npm update and doctor repair passes can shard with other bundled checks. +The separate `install-smoke` workflow reuses the same scope script through its own `preflight` job. It splits smoke coverage into `run_fast_install_smoke` and `run_full_install_smoke`. Pull requests run the fast path for Docker/package surfaces, bundled plugin package/manifest changes, and core plugin/channel/gateway/Plugin SDK surfaces that the Docker smoke jobs exercise. Source-only bundled plugin changes, test-only edits, and docs-only edits do not reserve Docker workers. The fast path builds the root Dockerfile image once, checks the CLI, runs the agents delete shared-workspace CLI smoke, runs the container gateway-network e2e, verifies a bundled extension build arg, and runs the bounded bundled-plugin Docker profile under a 240-second aggregate command timeout with each scenario's Docker run capped separately. The full path keeps QR package install and installer Docker/update coverage for nightly scheduled runs, manual dispatches, workflow-call release checks, and pull requests that truly touch installer/package/Docker surfaces. `main` pushes, including merge commits, do not force the full path; when changed-scope logic would request full coverage on a push, the workflow keeps the fast Docker smoke and leaves the full install smoke to nightly or release validation. The slow Bun global install image-provider smoke is separately gated by `run_bun_global_install_smoke`; it runs on the nightly schedule and from the release checks workflow, and manual `install-smoke` dispatches can opt into it, but pull requests and `main` pushes do not run it. QR and installer Docker tests keep their own install-focused Dockerfiles. Local `test:docker:all` prebuilds one shared live-test image, packs OpenClaw once as an npm tarball, and builds two shared `scripts/e2e/Dockerfile` images: a bare Node/Git runner for installer/update/plugin-dependency lanes and a functional image that installs the same tarball into `/app` for normal functionality lanes. Docker lane definitions live in `scripts/lib/docker-e2e-scenarios.mjs`, planner logic lives in `scripts/lib/docker-e2e-plan.mjs`, and the runner only executes the selected plan. The scheduler selects the image per lane with `OPENCLAW_DOCKER_E2E_BARE_IMAGE` and `OPENCLAW_DOCKER_E2E_FUNCTIONAL_IMAGE`, then runs lanes with `OPENCLAW_SKIP_DOCKER_BUILD=1`; tune the default main-pool slot count of 10 with `OPENCLAW_DOCKER_ALL_PARALLELISM` and the provider-sensitive tail-pool slot count of 10 with `OPENCLAW_DOCKER_ALL_TAIL_PARALLELISM`. Heavy lane caps default to `OPENCLAW_DOCKER_ALL_LIVE_LIMIT=9`, `OPENCLAW_DOCKER_ALL_NPM_LIMIT=10`, and `OPENCLAW_DOCKER_ALL_SERVICE_LIMIT=7` so npm install and multi-service lanes do not overcommit Docker while lighter lanes still fill available slots. A single lane heavier than the effective caps can still start from an empty pool, then runs alone until it releases capacity. Lane starts are staggered by 2 seconds by default to avoid local Docker daemon create storms; override with `OPENCLAW_DOCKER_ALL_START_STAGGER_MS=0` or another millisecond value. The local aggregate preflights Docker, removes stale OpenClaw E2E containers, emits active-lane status, persists lane timings for longest-first ordering, and supports `OPENCLAW_DOCKER_ALL_DRY_RUN=1` for scheduler inspection. It stops scheduling new pooled lanes after the first failure by default, and each lane has a 120-minute fallback timeout overrideable with `OPENCLAW_DOCKER_ALL_LANE_TIMEOUT_MS`; selected live/tail lanes use tighter per-lane caps. `OPENCLAW_DOCKER_ALL_LANES=` runs exact scheduler lanes, including release-only lanes such as `install-e2e` and split bundled update lanes such as `bundled-channel-update-acpx`, while skipping the cleanup smoke so agents can reproduce one failed lane. The reusable live/E2E workflow asks `scripts/test-docker-all.mjs --plan-json` which package, image kind, live image, lane, and credential coverage is required, then `scripts/docker-e2e.mjs` converts that plan into GitHub outputs and summaries. It either packs OpenClaw through `scripts/package-openclaw-for-docker.mjs`, downloads a current-run package artifact, or downloads a package artifact from `package_artifact_run_id`; validates the tarball inventory; builds and pushes package-digest-tagged bare/functional GHCR Docker E2E images through Blacksmith's Docker layer cache when the plan needs package-installed lanes; and reuses provided `docker_e2e_bare_image`/`docker_e2e_functional_image` inputs or existing package-digest images instead of rebuilding. The `Package Acceptance` workflow is the high-level package gate: it resolves a candidate from npm, a trusted `package_ref`, an HTTPS tarball plus SHA-256, or a prior workflow artifact, then passes that single `package-under-test` artifact into the reusable Docker E2E workflow. It keeps `workflow_ref` separate from `package_ref` so current acceptance logic can validate older trusted commits without checking out old workflow code. Release checks run the `package` acceptance profile for the target ref; that profile covers package/update/plugin contracts and is the default GitHub-native replacement for most Parallels package/update coverage. The release-path Docker suite runs chunked jobs with `OPENCLAW_SKIP_DOCKER_BUILD=1` so each chunk pulls only the image kind it needs and executes multiple lanes through the same weighted scheduler (`OPENCLAW_DOCKER_ALL_PROFILE=release-path`, `OPENCLAW_DOCKER_ALL_CHUNK=core|package-install|package-update|plugins|bundled-channel-deps|service-integrations`). OpenWebUI runs as its own `openwebui` chunk when requested. Each chunk uploads `.artifacts/docker-tests/` with lane logs, timings, `summary.json`, `failures.json`, phase timings, scheduler plan JSON, slow-lane tables, and per-lane rerun commands. The workflow `docker_lanes` input runs selected lanes against the prepared images instead of the chunk jobs, which keeps failed-lane debugging bounded to one targeted Docker job and prepares, downloads, or reuses the package artifact for that run; if a selected lane is a live Docker lane, the targeted job builds the live-test image locally for that rerun. Generated per-lane GitHub rerun commands include `package_artifact_run_id`, `package_artifact_name`, and prepared image inputs when those values exist, so a failed lane can reuse the exact package and images from the failed run. Use `pnpm test:docker:rerun ` to download Docker artifacts from a GitHub run and print combined/per-lane targeted rerun commands; use `pnpm test:docker:timings ` for slow-lane and phase critical-path summaries. The scheduled live/E2E workflow runs the full release-path Docker suite daily. The bundled update matrix is split by update target so repeated npm update and doctor repair passes can shard with other bundled checks. Local changed-lane logic lives in `scripts/changed-lanes.mjs` and is executed by `scripts/check-changed.mjs`. That local check gate is stricter about architecture boundaries than the broad CI platform scope: core production changes run core prod and core test typecheck plus core lint/guards, core test-only changes run only core test typecheck plus core lint, extension production changes run extension prod and extension test typecheck plus extension lint, and extension test-only changes run extension test typecheck plus extension lint. Public Plugin SDK or plugin-contract changes expand to extension typecheck because extensions depend on those core contracts, but Vitest extension sweeps are explicit test work. Release metadata-only version bumps run targeted version/config/root-dependency checks. Unknown root/config changes fail safe to all check lanes. diff --git a/docs/help/testing.md b/docs/help/testing.md index 5110034dd5e..d2a60a29134 100644 --- a/docs/help/testing.md +++ b/docs/help/testing.md @@ -656,7 +656,7 @@ These Docker runners split into two buckets: `OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS=90000`. Override those env vars when you explicitly want the larger exhaustive scan. - `test:docker:all` builds the live Docker image once via `test:docker:live-build`, packs OpenClaw once as an npm tarball through `scripts/package-openclaw-for-docker.mjs`, then builds/reuses two `scripts/e2e/Dockerfile` images. The bare image is only the Node/Git runner for install/update/plugin-dependency lanes; those lanes mount the prebuilt tarball. The functional image installs the same tarball into `/app` for built-app functionality lanes. Docker lane definitions live in `scripts/lib/docker-e2e-scenarios.mjs`; planner logic lives in `scripts/lib/docker-e2e-plan.mjs`; `scripts/test-docker-all.mjs` executes the selected plan. The aggregate uses a weighted local scheduler: `OPENCLAW_DOCKER_ALL_PARALLELISM` controls process slots, while resource caps keep heavy live, npm-install, and multi-service lanes from all starting at once. If a single lane is heavier than the active caps, the scheduler can still start it when the pool is empty and then keeps it running alone until capacity is available again. Defaults are 10 slots, `OPENCLAW_DOCKER_ALL_LIVE_LIMIT=9`, `OPENCLAW_DOCKER_ALL_NPM_LIMIT=10`, and `OPENCLAW_DOCKER_ALL_SERVICE_LIMIT=7`; tune `OPENCLAW_DOCKER_ALL_WEIGHT_LIMIT` or `OPENCLAW_DOCKER_ALL_DOCKER_LIMIT` only when the Docker host has more headroom. The runner performs a Docker preflight by default, removes stale OpenClaw E2E containers, prints status every 30 seconds, stores successful lane timings in `.artifacts/docker-tests/lane-timings.json`, and uses those timings to start longer lanes first on later runs. Use `OPENCLAW_DOCKER_ALL_DRY_RUN=1` to print the weighted lane manifest without building or running Docker, or `node scripts/test-docker-all.mjs --plan-json` to print the CI plan for selected lanes, package/image needs, and credentials. -- `Package Acceptance` is the GitHub-native package gate for "does this installable tarball work as a product?" It resolves one candidate package from `source=npm`, `source=ref`, `source=url`, or `source=artifact`, uploads it as `package-under-test`, then runs the reusable Docker E2E lanes against that exact tarball instead of repacking the selected ref. `workflow_ref` selects the trusted workflow/harness scripts, while `package_ref` selects the source commit/branch/tag to pack when `source=ref`; this lets current acceptance logic validate older trusted commits. Profiles are ordered by breadth: `smoke` is quick install/channel/agent plus gateway/config, `package` is the package/update/plugin contract and the default native replacement for most Parallels package/update coverage, `product` adds MCP channels, cron/subagent cleanup, OpenAI web search, and OpenWebUI, and `full` runs the release-path Docker chunks with OpenWebUI. Release validation runs the `package` profile for the target ref with Telegram package QA enabled. +- `Package Acceptance` is the GitHub-native package gate for "does this installable tarball work as a product?" It resolves one candidate package from `source=npm`, `source=ref`, `source=url`, or `source=artifact`, uploads it as `package-under-test`, then runs the reusable Docker E2E lanes against that exact tarball instead of repacking the selected ref. `workflow_ref` selects the trusted workflow/harness scripts, while `package_ref` selects the source commit/branch/tag to pack when `source=ref`; this lets current acceptance logic validate older trusted commits. Profiles are ordered by breadth: `smoke` is quick install/channel/agent plus gateway/config, `package` is the package/update/plugin contract and the default native replacement for most Parallels package/update coverage, `product` adds MCP channels, cron/subagent cleanup, OpenAI web search, and OpenWebUI, and `full` runs the split release-path Docker chunks with OpenWebUI. Release validation runs the `package` profile for the target ref with Telegram package QA enabled. Targeted GitHub Docker rerun commands generated from artifacts include prior package artifact and prepared image inputs when available, so failed lanes can avoid rebuilding the package and images. - Container smoke runners: `test:docker:openwebui`, `test:docker:onboard`, `test:docker:npm-onboard-channel-agent`, `test:docker:update-channel-switch`, `test:docker:session-runtime-context`, `test:docker:agents-delete-shared-workspace`, `test:docker:gateway-network`, `test:docker:browser-cdp-snapshot`, `test:docker:mcp-channels`, `test:docker:pi-bundle-mcp-tools`, `test:docker:cron-mcp-cleanup`, `test:docker:plugins`, `test:docker:plugin-update`, and `test:docker:config-reload` boot one or more real containers and verify higher-level integration paths. The live-model Docker runners also bind-mount only the needed CLI auth homes (or all supported ones when the run is not narrowed), then copy them into the container home before the run so external-CLI OAuth can refresh tokens without mutating the host auth store: diff --git a/docs/reference/RELEASING.md b/docs/reference/RELEASING.md index e3b8fc869b7..a225d40d8da 100644 --- a/docs/reference/RELEASING.md +++ b/docs/reference/RELEASING.md @@ -319,9 +319,9 @@ Release Docker coverage includes: - full install smoke with the slow Bun global install smoke enabled - repository E2E lanes -- release-path Docker chunks: `core`, `package-update`, and - `plugins-integrations` -- OpenWebUI coverage inside the plugins/integrations chunk +- release-path Docker chunks: `core`, `package-install`, `package-update`, + `plugins`, `bundled-channel-deps`, and `service-integrations` +- OpenWebUI coverage as the `openwebui` Docker chunk when requested - live/E2E provider suites and Docker live model coverage when release checks include live suites @@ -329,7 +329,9 @@ Use Docker artifacts before rerunning. The release-path scheduler uploads `.artifacts/docker-tests/` with lane logs, `summary.json`, `failures.json`, phase timings, scheduler plan JSON, and rerun commands. For focused recovery, use `docker_lanes=` on the reusable live/E2E workflow instead of -rerunning all release chunks. +rerunning all release chunks. Generated rerun commands include prior +`package_artifact_run_id` and prepared Docker image inputs when available, so a +failed lane can reuse the same tarball and GHCR images. ### QA Lab diff --git a/scripts/docker-e2e-rerun.mjs b/scripts/docker-e2e-rerun.mjs index 710cb8194a2..81a691e2871 100644 --- a/scripts/docker-e2e-rerun.mjs +++ b/scripts/docker-e2e-rerun.mjs @@ -1,8 +1,9 @@ #!/usr/bin/env node // Builds cheap rerun commands from a Docker E2E GitHub run or local summary. // For GitHub runs, the script downloads Docker E2E artifacts, reads -// summary/failures JSON, and prints targeted workflow commands that prepare a -// fresh OpenClaw tarball for the same ref before running only failed lanes. +// summary/failures JSON, and prints targeted workflow commands for failed +// lanes, reusing package artifacts and prepared GHCR images when artifacts +// expose them. import { spawnSync } from "node:child_process"; import fs from "node:fs"; import os from "node:os"; @@ -76,8 +77,44 @@ function shellQuote(value) { return `'${String(value).replaceAll("'", "'\\''")}'`; } -function ghWorkflowCommand(lanes, ref, workflow) { - return [ +function maybeGhcrImage(value) { + return typeof value === "string" && value.startsWith("ghcr.io/") ? value : ""; +} + +function reuseInputsFromJson(parsed) { + const packageArtifactRunId = parsed.github?.runId || ""; + if (!packageArtifactRunId) { + return {}; + } + return { + bareImage: maybeGhcrImage(parsed.images?.bare), + functionalImage: maybeGhcrImage(parsed.images?.functional), + packageArtifactName: + parsed.packageArtifactName || parsed.artifacts?.packageName || "docker-e2e-package", + packageArtifactRunId, + }; +} + +function sameReuseInputs(left, right) { + return ( + (left?.packageArtifactRunId || "") === (right?.packageArtifactRunId || "") && + (left?.packageArtifactName || "") === (right?.packageArtifactName || "") && + (left?.bareImage || "") === (right?.bareImage || "") && + (left?.functionalImage || "") === (right?.functionalImage || "") + ); +} + +function commonReuseInputs(entries) { + const inputs = entries.map((entry) => entry.reuseInputs).filter(Boolean); + if (inputs.length === 0) { + return {}; + } + const [first] = inputs; + return inputs.every((input) => sameReuseInputs(first, input)) ? first : {}; +} + +function ghWorkflowCommand(lanes, ref, workflow, reuseInputs = {}) { + const fields = [ "gh workflow run", shellQuote(workflow), "-f", @@ -94,7 +131,21 @@ function ghWorkflowCommand(lanes, ref, workflow) { "include_live_suites=false", "-f", "live_models_only=false", - ].join(" "); + ]; + if (reuseInputs.packageArtifactRunId) { + fields.push("-f", `package_artifact_run_id=${shellQuote(reuseInputs.packageArtifactRunId)}`); + fields.push( + "-f", + `package_artifact_name=${shellQuote(reuseInputs.packageArtifactName || "docker-e2e-package")}`, + ); + } + if (reuseInputs.bareImage) { + fields.push("-f", `docker_e2e_bare_image=${shellQuote(reuseInputs.bareImage)}`); + } + if (reuseInputs.functionalImage) { + fields.push("-f", `docker_e2e_functional_image=${shellQuote(reuseInputs.functionalImage)}`); + } + return fields.join(" "); } function detectRepo() { @@ -115,15 +166,18 @@ function findFiles(rootDir, basenames, out = []) { function failedLaneEntriesFromJson(file, ref, workflow) { const parsed = readJson(file); + const reuseInputs = reuseInputsFromJson(parsed); const source = path.basename(file); if (source === "failures.json" && Array.isArray(parsed.lanes)) { return parsed.lanes .filter((lane) => lane.name) .map((lane) => ({ - ghWorkflowCommand: lane.ghWorkflowCommand, + ghWorkflowCommand: + lane.ghWorkflowCommand || ghWorkflowCommand([lane.name], ref, workflow, reuseInputs), lane: lane.name, localRerunCommand: lane.rerunCommand, logFile: lane.logFile, + reuseInputs, source: file, status: lane.status, })); @@ -133,10 +187,11 @@ function failedLaneEntriesFromJson(file, ref, workflow) { return lanes .filter((lane) => lane.status !== 0 && lane.name) .map((lane) => ({ - ghWorkflowCommand: ghWorkflowCommand([lane.name], ref, workflow), + ghWorkflowCommand: ghWorkflowCommand([lane.name], ref, workflow, reuseInputs), lane: lane.name, localRerunCommand: lane.rerunCommand, logFile: lane.logFile, + reuseInputs, source: file, status: lane.status, })); @@ -201,7 +256,7 @@ function printEntries(entries, ref, workflow, run) { } console.log(`Ref: ${ref}`); console.log( - "Targeted GitHub reruns prepare a fresh OpenClaw npm tarball for that ref before lane execution.", + "Targeted GitHub reruns reuse package artifacts and prepared GHCR images when the downloaded artifacts expose them.", ); if (entries.length === 0) { console.log("No failed Docker E2E lanes found."); @@ -215,6 +270,7 @@ function printEntries(entries, ref, workflow, run) { entries.map((entry) => entry.lane), ref, workflow, + commonReuseInputs(entries), ), ); console.log(""); diff --git a/scripts/docker-e2e.mjs b/scripts/docker-e2e.mjs index 13ff391f1d6..9e3b8d18f52 100644 --- a/scripts/docker-e2e.mjs +++ b/scripts/docker-e2e.mjs @@ -40,8 +40,23 @@ function inlineCode(value) { return `\`${String(value ?? "").replaceAll("`", "\\`")}\``; } +function formatSeconds(value) { + const seconds = Number(value); + if (!Number.isFinite(seconds) || seconds < 0) { + return ""; + } + const rounded = Math.round(seconds); + const minutes = Math.floor(rounded / 60); + const rest = rounded % 60; + return minutes > 0 ? `${minutes}m ${rest}s` : `${rest}s`; +} + function summaryMarkdown(summary, title) { const lanes = Array.isArray(summary.lanes) ? summary.lanes : []; + const slowest = lanes + .filter((lane) => Number.isFinite(Number(lane.elapsedSeconds))) + .toSorted((a, b) => Number(b.elapsedSeconds) - Number(a.elapsedSeconds)) + .slice(0, 8); const lines = [ `### ${title}`, "", @@ -57,12 +72,22 @@ function summaryMarkdown(summary, title) { ); } + if (slowest.length > 0) { + lines.push("", "| Slowest lane | Duration | Status |", "| --- | ---: | --- |"); + for (const lane of slowest) { + const status = lane.status === 0 ? "pass" : `fail ${lane.status}`; + lines.push( + `| ${inlineCode(lane.name)} | ${markdownCell(formatSeconds(lane.elapsedSeconds))} | ${markdownCell(status)} |`, + ); + } + } + const phases = Array.isArray(summary.phases) ? summary.phases : []; if (phases.length > 0) { - lines.push("", "| Phase | Seconds | Status | Image kind |", "| --- | ---: | --- | --- |"); + lines.push("", "| Phase | Duration | Status | Image kind |", "| --- | ---: | --- | --- |"); for (const phase of phases) { lines.push( - `| ${inlineCode(phase.name)} | ${markdownCell(phase.elapsedSeconds)} | ${markdownCell(phase.status)} | ${markdownCell(phase.imageKind)} |`, + `| ${inlineCode(phase.name)} | ${markdownCell(formatSeconds(phase.elapsedSeconds))} | ${markdownCell(phase.status)} | ${markdownCell(phase.imageKind)} |`, ); } } diff --git a/scripts/lib/docker-e2e-scenarios.mjs b/scripts/lib/docker-e2e-scenarios.mjs index 61d77e3b52d..1607d449b56 100644 --- a/scripts/lib/docker-e2e-scenarios.mjs +++ b/scripts/lib/docker-e2e-scenarios.mjs @@ -356,7 +356,7 @@ const releasePathChunks = { weight: 3, }), ], - "package-update": [ + "package-install": [ npmLane( "install-e2e", "OPENCLAW_INSTALL_TAG=beta OPENCLAW_E2E_MODELS=both pnpm test:install:e2e", @@ -370,6 +370,8 @@ const releasePathChunks = { "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:npm-onboard-channel-agent", { resources: ["service"], weight: 3 }, ), + ], + "package-update": [ npmLane("doctor-switch", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:doctor-switch", { weight: 3, }), @@ -382,17 +384,21 @@ const releasePathChunks = { }, ), ], - "plugins-integrations": [ + plugins: [ lane("plugins", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:plugins", { resources: ["npm", "service"], weight: 6, }), npmLane("plugin-update", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:plugin-update"), + ], + "bundled-channel-deps": [ npmLane( "bundled-channel-deps", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:bundled-channel-deps", { resources: ["service"], weight: 3 }, ), + ], + "service-integrations": [ serviceLane( "cron-mcp-cleanup", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:cron-mcp-cleanup", @@ -407,6 +413,12 @@ const releasePathChunks = { { timeoutMs: 8 * 60 * 1000 }, ), ], + openwebui: [ + serviceLane("openwebui", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:openwebui", { + timeoutMs: OPENWEBUI_TIMEOUT_MS, + weight: 5, + }), + ], }; export function releasePathChunkLanes(chunk, options = {}) { @@ -416,22 +428,16 @@ export function releasePathChunkLanes(chunk, options = {}) { `OPENCLAW_DOCKER_ALL_CHUNK must be one of: ${Object.keys(releasePathChunks).join(", ")}. Got: ${JSON.stringify(chunk)}`, ); } - if (chunk !== "plugins-integrations" || !options.includeOpenWebUI) { - return base; + if (chunk === "openwebui" && !options.includeOpenWebUI) { + return []; } - return [ - ...base, - serviceLane("openwebui", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:openwebui", { - timeoutMs: OPENWEBUI_TIMEOUT_MS, - weight: 5, - }), - ]; + return base; } export function allReleasePathLanes(options = {}) { return Object.keys(releasePathChunks).flatMap((chunk) => releasePathChunkLanes(chunk, { - includeOpenWebUI: chunk === "plugins-integrations" && options.includeOpenWebUI, + includeOpenWebUI: options.includeOpenWebUI, }), ); } diff --git a/scripts/test-docker-all.mjs b/scripts/test-docker-all.mjs index 3a6c7b6abac..7fe0130581c 100644 --- a/scripts/test-docker-all.mjs +++ b/scripts/test-docker-all.mjs @@ -194,7 +194,7 @@ function shellQuote(value) { } function githubWorkflowRerunCommand(laneNames, ref) { - return [ + const fields = [ "gh workflow run", shellQuote(process.env.OPENCLAW_DOCKER_E2E_WORKFLOW || DEFAULT_GITHUB_WORKFLOW), "-f", @@ -211,7 +211,29 @@ function githubWorkflowRerunCommand(laneNames, ref) { "include_live_suites=false", "-f", "live_models_only=false", - ].join(" "); + ]; + if (process.env.GITHUB_RUN_ID) { + fields.push("-f", `package_artifact_run_id=${shellQuote(process.env.GITHUB_RUN_ID)}`); + fields.push( + "-f", + `package_artifact_name=${shellQuote( + process.env.OPENCLAW_DOCKER_E2E_PACKAGE_ARTIFACT_NAME || "docker-e2e-package", + )}`, + ); + } + if (process.env.OPENCLAW_DOCKER_E2E_BARE_IMAGE) { + fields.push( + "-f", + `docker_e2e_bare_image=${shellQuote(process.env.OPENCLAW_DOCKER_E2E_BARE_IMAGE)}`, + ); + } + if (process.env.OPENCLAW_DOCKER_E2E_FUNCTIONAL_IMAGE) { + fields.push( + "-f", + `docker_e2e_functional_image=${shellQuote(process.env.OPENCLAW_DOCKER_E2E_FUNCTIONAL_IMAGE)}`, + ); + } + return fields.join(" "); } function buildLaneRerunCommand(name, baseEnv) { @@ -301,6 +323,7 @@ async function writeRunSummary(logDir, summary) { const file = path.join(logDir, "summary.json"); const payload = { ...summary, + packageArtifactName: process.env.OPENCLAW_DOCKER_E2E_PACKAGE_ARTIFACT_NAME || undefined, finishedAt: new Date().toISOString(), github: { ref: process.env.GITHUB_REF_NAME || undefined, @@ -346,7 +369,9 @@ async function writeFailureIndex(logDir, summary) { : undefined, generatedAt: new Date().toISOString(), lanes, - note: "Targeted GitHub reruns prepare a fresh OpenClaw npm tarball for the selected ref before lane execution.", + note: "Targeted GitHub reruns reuse this run's package artifact and shared Docker images when the generated command includes package_artifact_run_id and docker_e2e_*_image inputs.", + images: summary.images, + packageArtifactName: process.env.OPENCLAW_DOCKER_E2E_PACKAGE_ARTIFACT_NAME || undefined, ref, runUrl: summary.github?.runUrl, status: summary.status, diff --git a/test/scripts/docker-e2e-plan.test.ts b/test/scripts/docker-e2e-plan.test.ts index a762809c445..723a27152d9 100644 --- a/test/scripts/docker-e2e-plan.test.ts +++ b/test/scripts/docker-e2e-plan.test.ts @@ -45,6 +45,22 @@ describe("scripts/lib/docker-e2e-plan", () => { expect(plan.lanes.map((lane) => lane.name)).not.toContain("openwebui"); }); + it("plans Open WebUI only when release-path coverage requests it", () => { + const withoutOpenWebUI = planFor({ + includeOpenWebUI: false, + planReleaseAll: true, + profile: RELEASE_PATH_PROFILE, + }); + const withOpenWebUI = planFor({ + includeOpenWebUI: true, + planReleaseAll: true, + profile: RELEASE_PATH_PROFILE, + }); + + expect(withoutOpenWebUI.lanes.map((lane) => lane.name)).not.toContain("openwebui"); + expect(withOpenWebUI.lanes.map((lane) => lane.name)).toContain("openwebui"); + }); + it("plans a live-only selected lane without package e2e images", () => { const plan = planFor({ selectedLaneNames: ["live-models"] }); diff --git a/test/scripts/package-acceptance-workflow.test.ts b/test/scripts/package-acceptance-workflow.test.ts index 09a3c505dbc..6b3df86148a 100644 --- a/test/scripts/package-acceptance-workflow.test.ts +++ b/test/scripts/package-acceptance-workflow.test.ts @@ -60,9 +60,18 @@ describe("package artifact reuse", () => { const action = readFileSync(DOCKER_E2E_PLAN_ACTION, "utf8"); expect(workflow).toContain("package_artifact_name:"); - expect(workflow).toContain("Download provided OpenClaw Docker E2E package"); + expect(workflow).toContain("package_artifact_run_id:"); + expect(workflow).toContain("docker_e2e_bare_image:"); + expect(workflow).toContain("docker_e2e_functional_image:"); + expect(workflow).toContain("Download current-run OpenClaw Docker E2E package"); + expect(workflow).toContain("Download previous-run OpenClaw Docker E2E package"); expect(workflow).toContain("inputs.package_artifact_name != ''"); - expect(workflow).toContain('image_tag="${PACKAGE_TAG:-$SELECTED_SHA}"'); + expect(workflow).toContain( + 'bare_image="${PROVIDED_BARE_IMAGE:-ghcr.io/${repository}-docker-e2e-bare:${image_tag}}"', + ); + expect(workflow).toContain( + 'functional_image="${PROVIDED_FUNCTIONAL_IMAGE:-ghcr.io/${repository}-docker-e2e-functional:${image_tag}}"', + ); expect(workflow).toContain( "package-artifact-name: ${{ inputs.package_artifact_name || 'docker-e2e-package' }}", ); @@ -70,6 +79,15 @@ describe("package artifact reuse", () => { expect(action).toContain("name: ${{ inputs.package-artifact-name }}"); }); + it("uses Blacksmith Docker build caching for prepared E2E images", () => { + const workflow = readFileSync(LIVE_E2E_WORKFLOW, "utf8"); + + expect(workflow).toContain("uses: useblacksmith/setup-docker-builder@"); + expect(workflow).toContain("uses: useblacksmith/build-push-action@"); + expect(workflow).not.toContain("cache-from: type=gha,scope=docker-e2e"); + expect(workflow).not.toContain("cache-to: type=gha,mode=max,scope=docker-e2e"); + }); + it("allows the Telegram lane to run from reusable package acceptance artifacts", () => { const workflow = readFileSync(NPM_TELEGRAM_WORKFLOW, "utf8");