From 206b5f78a28f83df82167e181342835cb21c0cec Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 1 May 2026 03:19:44 +0100 Subject: [PATCH] ci: tighten full release validation --- .github/workflows/full-release-validation.yml | 36 +++- .../openclaw-live-and-e2e-checks-reusable.yml | 89 ++++++++-- .github/workflows/openclaw-release-checks.yml | 2 +- .github/workflows/plugin-prerelease.yml | 1 + docs/ci.md | 20 ++- docs/docs.json | 8 +- docs/reference/RELEASING.md | 5 +- docs/reference/full-release-validation.md | 164 ++++++++++++++++++ .../package-acceptance-workflow.test.ts | 23 ++- .../plugin-prerelease-test-plan.test.ts | 3 +- 10 files changed, 316 insertions(+), 35 deletions(-) create mode 100644 docs/reference/full-release-validation.md diff --git a/.github/workflows/full-release-validation.yml b/.github/workflows/full-release-validation.yml index c765ddb6522..b1128de43b9 100644 --- a/.github/workflows/full-release-validation.yml +++ b/.github/workflows/full-release-validation.yml @@ -29,7 +29,7 @@ on: release_profile: description: Release coverage profile for live/Docker/provider breadth required: false - default: full + default: stable type: choice options: - minimum @@ -88,7 +88,7 @@ permissions: concurrency: group: full-release-validation-${{ inputs.ref }}-${{ inputs.rerun_group }} - cancel-in-progress: false + cancel-in-progress: ${{ inputs.ref == 'main' && inputs.rerun_group == 'all' }} env: FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true" @@ -222,6 +222,14 @@ jobs: echo "Dispatched ${workflow}: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}" echo "run_id=${run_id}" >> "$GITHUB_OUTPUT" + cancel_child() { + if [[ -n "${run_id:-}" ]]; then + echo "Cancelling child workflow ${workflow}: ${run_id}" >&2 + gh run cancel "$run_id" >/dev/null 2>&1 || true + fi + } + trap cancel_child EXIT INT TERM + while true; do status="$(gh run view "$run_id" --json status --jq '.status')" if [[ "$status" == "completed" ]]; then @@ -307,6 +315,14 @@ jobs: echo "Dispatched ${workflow}: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}" echo "run_id=${run_id}" >> "$GITHUB_OUTPUT" + cancel_child() { + if [[ -n "${run_id:-}" ]]; then + echo "Cancelling child workflow ${workflow}: ${run_id}" >&2 + gh run cancel "$run_id" >/dev/null 2>&1 || true + fi + } + trap cancel_child EXIT INT TERM + while true; do status="$(gh run view "$run_id" --json status --jq '.status')" if [[ "$status" == "completed" ]]; then @@ -397,6 +413,14 @@ jobs: echo "Dispatched ${workflow}: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}" echo "run_id=${run_id}" >> "$GITHUB_OUTPUT" + cancel_child() { + if [[ -n "${run_id:-}" ]]; then + echo "Cancelling child workflow ${workflow}: ${run_id}" >&2 + gh run cancel "$run_id" >/dev/null 2>&1 || true + fi + } + trap cancel_child EXIT INT TERM + while true; do status="$(gh run view "$run_id" --json status --jq '.status')" if [[ "$status" == "completed" ]]; then @@ -501,6 +525,14 @@ jobs: echo "Dispatched npm-telegram-beta-e2e.yml: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}" echo "run_id=${run_id}" >> "$GITHUB_OUTPUT" + cancel_child() { + if [[ -n "${run_id:-}" ]]; then + echo "Cancelling child workflow npm-telegram-beta-e2e.yml: ${run_id}" >&2 + gh run cancel "$run_id" >/dev/null 2>&1 || true + fi + } + trap cancel_child EXIT INT TERM + while true; do status="$(gh run view "$run_id" --json status --jq '.status')" if [[ "$status" == "completed" ]]; then diff --git a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml index 253d718c59e..774cc31d8fe 100644 --- a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml +++ b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml @@ -28,6 +28,11 @@ on: required: false default: "" type: string + targeted_docker_lane_group_size: + description: Number of targeted Docker lanes to batch into one runner job + required: false + default: 1 + type: number package_artifact_name: description: Existing workflow artifact containing openclaw-current.tgz; blank packs the selected ref required: false @@ -71,7 +76,7 @@ on: release_test_profile: description: Release coverage profile for live/Docker/provider breadth required: false - default: full + default: stable type: choice options: - minimum @@ -103,6 +108,11 @@ on: required: false default: "" type: string + targeted_docker_lane_group_size: + description: Number of targeted Docker lanes to batch into one runner job + required: false + default: 1 + type: number package_artifact_name: description: Existing workflow artifact containing openclaw-current.tgz; blank packs the selected ref required: false @@ -146,7 +156,7 @@ on: release_test_profile: description: Release coverage profile for live/Docker/provider breadth required: false - default: full + default: stable type: string secrets: OPENAI_API_KEY: @@ -374,6 +384,10 @@ jobs: add_profile_suite native-live-extensions-xai "full" add_profile_suite live-gateway-docker "minimum stable full" + add_profile_suite live-gateway-anthropic-docker "stable full" + add_profile_suite live-gateway-google-docker "stable full" + add_profile_suite live-gateway-minimax-docker "stable full" + add_profile_suite live-gateway-advisory-docker "full" add_profile_suite live-cli-backend-docker "stable full" add_profile_suite live-acp-bind-docker "stable full" add_profile_suite live-codex-harness-docker "stable full" @@ -815,16 +829,27 @@ jobs: shell: bash env: LANES: ${{ inputs.docker_lanes }} + GROUP_SIZE: ${{ inputs.targeted_docker_lane_group_size }} run: | set -euo pipefail groups_json="$( - LANES="$LANES" node <<'NODE' + LANES="$LANES" GROUP_SIZE="$GROUP_SIZE" node <<'NODE' const lanes = [...new Set(String(process.env.LANES || "").split(/[,\s]+/u).map((lane) => lane.trim()).filter(Boolean))]; if (lanes.length === 0) { throw new Error("docker_lanes is required when planning targeted Docker lane groups."); } + const rawGroupSize = Number.parseInt(process.env.GROUP_SIZE || "1", 10); + const groupSize = Number.isFinite(rawGroupSize) && rawGroupSize > 0 ? rawGroupSize : 1; const sanitize = (lane) => lane.replace(/[^A-Za-z0-9._-]+/g, "-").replace(/^-+|-+$/g, "") || "targeted"; - process.stdout.write(JSON.stringify(lanes.map((lane) => ({ label: sanitize(lane), docker_lanes: lane })))); + const groups = []; + for (let index = 0; index < lanes.length; index += groupSize) { + const groupLanes = lanes.slice(index, index + groupSize); + const first = sanitize(groupLanes[0]); + const last = sanitize(groupLanes[groupLanes.length - 1]); + const label = groupLanes.length === 1 ? first : `${first}--${last}`; + groups.push({ label, docker_lanes: groupLanes.join(" ") }); + } + process.stdout.write(JSON.stringify(groups)); NODE )" echo "groups_json=${groups_json}" >> "$GITHUB_OUTPUT" @@ -834,7 +859,7 @@ jobs: if: inputs.docker_lanes != '' name: Docker E2E targeted lanes (${{ matrix.group.label }}) runs-on: blacksmith-32vcpu-ubuntu-2404 - timeout-minutes: 180 + timeout-minutes: 90 strategy: fail-fast: false matrix: @@ -1468,7 +1493,7 @@ jobs: needs: [validate_selected_ref, prepare_live_test_image] if: inputs.include_live_suites && inputs.live_model_providers == '' && (inputs.live_suite_filter == '' || inputs.live_suite_filter == 'docker-live-models') runs-on: blacksmith-32vcpu-ubuntu-2404 - timeout-minutes: 75 + timeout-minutes: 45 strategy: fail-fast: false matrix: @@ -1536,6 +1561,8 @@ jobs: FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }} OPENCLAW_LIVE_PROVIDERS: ${{ matrix.providers }} OPENCLAW_LIVE_IMAGE: ${{ needs.prepare_live_test_image.outputs.live_image }} + OPENCLAW_LIVE_MAX_MODELS: "6" + OPENCLAW_LIVE_MODEL_TIMEOUT_MS: "45000" OPENCLAW_SKIP_DOCKER_BUILD: "1" OPENCLAW_VITEST_MAX_WORKERS: "2" steps: @@ -1611,14 +1638,14 @@ jobs: - name: Run Docker live model sweep if: contains(matrix.profiles, inputs.release_test_profile) - run: OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" bash .release-harness/scripts/test-live-models-docker.sh + run: OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 35m bash .release-harness/scripts/test-live-models-docker.sh validate_live_models_docker_targeted: name: Docker live models (selected providers) needs: [validate_selected_ref, prepare_live_test_image] if: inputs.include_live_suites && inputs.live_model_providers != '' && (inputs.live_suite_filter == '' || inputs.live_suite_filter == 'docker-live-models') runs-on: blacksmith-32vcpu-ubuntu-2404 - timeout-minutes: 75 + timeout-minutes: 45 env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }} @@ -1655,6 +1682,8 @@ jobs: FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }} REQUESTED_LIVE_MODEL_PROVIDERS: ${{ inputs.live_model_providers }} OPENCLAW_LIVE_IMAGE: ${{ needs.prepare_live_test_image.outputs.live_image }} + OPENCLAW_LIVE_MAX_MODELS: "6" + OPENCLAW_LIVE_MODEL_TIMEOUT_MS: "45000" OPENCLAW_SKIP_DOCKER_BUILD: "1" OPENCLAW_VITEST_MAX_WORKERS: "2" steps: @@ -1785,7 +1814,7 @@ jobs: done - name: Run Docker live model sweep - run: OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" bash .release-harness/scripts/test-live-models-docker.sh + run: OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 35m bash .release-harness/scripts/test-live-models-docker.sh validate_live_provider_suites: needs: validate_selected_ref @@ -2099,27 +2128,51 @@ jobs: matrix: include: - suite_id: live-gateway-docker - label: Docker live gateway - command: OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" bash .release-harness/scripts/test-live-gateway-models-docker.sh - timeout_minutes: 120 + label: Docker live gateway OpenAI + command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=openai OPENCLAW_LIVE_GATEWAY_MAX_MODELS=2 OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS=30000 OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS=60000 OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 25m bash .release-harness/scripts/test-live-gateway-models-docker.sh + timeout_minutes: 30 profile_env_only: false profiles: minimum stable full + - suite_id: live-gateway-anthropic-docker + label: Docker live gateway Anthropic + command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=anthropic OPENCLAW_LIVE_GATEWAY_MAX_MODELS=2 OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS=30000 OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS=60000 OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 25m bash .release-harness/scripts/test-live-gateway-models-docker.sh + timeout_minutes: 30 + profile_env_only: false + profiles: stable full + - suite_id: live-gateway-google-docker + label: Docker live gateway Google + command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=google OPENCLAW_LIVE_GATEWAY_MODELS=google/gemini-3.1-pro-preview,google/gemini-3-flash-preview OPENCLAW_LIVE_GATEWAY_MAX_MODELS=2 OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS=30000 OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS=60000 OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 25m bash .release-harness/scripts/test-live-gateway-models-docker.sh + timeout_minutes: 30 + profile_env_only: false + profiles: stable full + - suite_id: live-gateway-minimax-docker + label: Docker live gateway MiniMax + command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=minimax,minimax-portal OPENCLAW_LIVE_GATEWAY_MAX_MODELS=2 OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS=30000 OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS=60000 OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 25m bash .release-harness/scripts/test-live-gateway-models-docker.sh + timeout_minutes: 30 + profile_env_only: false + profiles: stable full + - suite_id: live-gateway-advisory-docker + label: Docker live gateway advisory providers + command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=deepseek,fireworks,opencode-go,openrouter,xai,zai OPENCLAW_LIVE_GATEWAY_MAX_MODELS=6 OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS=30000 OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS=60000 OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 35m bash .release-harness/scripts/test-live-gateway-models-docker.sh + timeout_minutes: 40 + profile_env_only: false + profiles: full - suite_id: live-cli-backend-docker label: Docker live CLI backend - command: OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" bash .release-harness/scripts/test-live-cli-backend-docker.sh - timeout_minutes: 120 + command: OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 45m bash .release-harness/scripts/test-live-cli-backend-docker.sh + timeout_minutes: 50 profile_env_only: false profiles: stable full - suite_id: live-acp-bind-docker label: Docker live ACP bind - command: OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" bash .release-harness/scripts/test-live-acp-bind-docker.sh - timeout_minutes: 120 + command: OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 45m bash .release-harness/scripts/test-live-acp-bind-docker.sh + timeout_minutes: 50 profile_env_only: false profiles: stable full - suite_id: live-codex-harness-docker label: Docker live Codex harness - command: OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" bash .release-harness/scripts/test-live-codex-harness-docker.sh - timeout_minutes: 120 + command: OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 35m bash .release-harness/scripts/test-live-codex-harness-docker.sh + timeout_minutes: 40 profile_env_only: false profiles: stable full env: diff --git a/.github/workflows/openclaw-release-checks.yml b/.github/workflows/openclaw-release-checks.yml index 12e325aa822..48cf8fd94a1 100644 --- a/.github/workflows/openclaw-release-checks.yml +++ b/.github/workflows/openclaw-release-checks.yml @@ -33,7 +33,7 @@ on: release_profile: description: Release coverage profile for live/Docker/provider breadth required: false - default: full + default: stable type: choice options: - minimum diff --git a/.github/workflows/plugin-prerelease.yml b/.github/workflows/plugin-prerelease.yml index 20dc8e503f8..952e741ed59 100644 --- a/.github/workflows/plugin-prerelease.yml +++ b/.github/workflows/plugin-prerelease.yml @@ -362,6 +362,7 @@ jobs: include_release_path_suites: false include_openwebui: false docker_lanes: ${{ needs.preflight.outputs.plugin_prerelease_docker_lanes }} + targeted_docker_lane_group_size: 4 include_live_suites: false live_models_only: false diff --git a/docs/ci.md b/docs/ci.md index 9dafe4a3f5e..0fec3e992b5 100644 --- a/docs/ci.md +++ b/docs/ci.md @@ -112,7 +112,13 @@ pnpm test:perf:groups:compare .artifacts/test-perf/baseline-before.json .artifac `Full Release Validation` is the manual umbrella workflow for "run everything before release." It accepts a branch, tag, or full commit SHA, dispatches the manual `CI` workflow with that target, dispatches `Plugin Prerelease` for release-only plugin/package/static/Docker proof, and dispatches `OpenClaw Release Checks` for install smoke, package acceptance, Docker release-path suites, live/E2E, OpenWebUI, QA Lab parity, Matrix, and Telegram lanes. It can also run the post-publish `NPM Telegram Beta E2E` workflow when a published package spec is provided. -`release_profile` controls live/provider breadth passed into release checks: +See [Full release validation](/reference/full-release-validation) for the +stage matrix, exact workflow job names, profile differences, artifacts, and +focused rerun handles. + +`release_profile` controls live/provider breadth passed into release checks. The +manual release workflows default to `stable`; use `full` only when you +intentionally want the broad advisory provider/media matrix. - `minimum` keeps the fastest OpenAI/core release-critical lanes. - `stable` adds the stable provider/backend set. @@ -120,10 +126,16 @@ pnpm test:perf:groups:compare .artifacts/test-perf/baseline-before.json .artifac The umbrella records the dispatched child run ids, and the final `Verify full validation` job re-checks current child run conclusions and appends slowest-job tables for each child run. If a child workflow is rerun and turns green, rerun only the parent verifier job to refresh the umbrella result and timing summary. -For recovery, both `Full Release Validation` and `OpenClaw Release Checks` accept `rerun_group`. Use `all` for a release candidate, `ci` for only the normal full CI child, `release-checks` for every release child, or a narrower group: `install-smoke`, `cross-os`, `live-e2e`, `package`, `qa`, `qa-parity`, `qa-live`, or `npm-telegram` on the umbrella. This keeps a failed release box rerun bounded after a focused fix. +For recovery, both `Full Release Validation` and `OpenClaw Release Checks` accept `rerun_group`. Use `all` for a release candidate, `ci` for only the normal full CI child, `plugin-prerelease` for only the plugin prerelease child, `release-checks` for every release child, or a narrower group: `install-smoke`, `cross-os`, `live-e2e`, `package`, `qa`, `qa-parity`, `qa-live`, or `npm-telegram` on the umbrella. This keeps a failed release box rerun bounded after a focused fix. `OpenClaw Release Checks` uses the trusted workflow ref to resolve the selected ref once into a `release-package-under-test` tarball, then passes that artifact to both the live/E2E release-path Docker workflow and the package acceptance shard. That keeps the package bytes consistent across release boxes and avoids repacking the same candidate in multiple child jobs. +Duplicate `Full Release Validation` runs for `ref=main` and `rerun_group=all` +supersede the older umbrella. The parent monitor cancels any child workflow it +has already dispatched when the parent is cancelled, so newer main validation +does not sit behind a stale two-hour release-check run. Release branch/tag +validation and focused rerun groups keep `cancel-in-progress: false`. + ## Live and E2E shards The release live/E2E child keeps broad native `pnpm test:live` coverage, but it runs it as named shards through `scripts/test-live-shard.mjs` instead of one serial job: @@ -144,7 +156,7 @@ That keeps the same file coverage while making slow live provider failures easie The native live media shards run in `ghcr.io/openclaw/openclaw-live-media-runner:ubuntu-24.04`, built by the `Live Media Runner Image` workflow. That image preinstalls `ffmpeg` and `ffprobe`; media jobs only verify the binaries before setup. Keep Docker-backed live suites on normal Blacksmith runners — container jobs are the wrong place to launch nested Docker tests. -Docker-backed live model/backend shards use a separate shared `ghcr.io/openclaw/openclaw-live-test:` image per selected commit. The live release workflow builds and pushes that image once, then the Docker live model, gateway, CLI backend, ACP bind, and Codex harness shards run with `OPENCLAW_SKIP_DOCKER_BUILD=1`. If those shards rebuild the full source Docker target independently, the release run is misconfigured and will waste wall clock on duplicate image builds. +Docker-backed live model/backend shards use a separate shared `ghcr.io/openclaw/openclaw-live-test:` image per selected commit. The live release workflow builds and pushes that image once, then the Docker live model, provider-sharded gateway, CLI backend, ACP bind, and Codex harness shards run with `OPENCLAW_SKIP_DOCKER_BUILD=1`. Gateway Docker shards carry explicit script-level `timeout` caps below the workflow job timeout so a stuck container or cleanup path fails fast instead of consuming the whole release-check budget. If those shards rebuild the full source Docker target independently, the release run is misconfigured and will waste wall clock on duplicate image builds. ## Package Acceptance @@ -295,7 +307,7 @@ The scheduled live/E2E workflow runs the full release-path Docker suite daily. ## Plugin Prerelease -`Plugin Prerelease` is more expensive product/package coverage, so it is a separate workflow dispatched by `Full Release Validation` or by an explicit operator. Normal pull requests, `main` pushes, and standalone manual CI dispatches keep that suite off. It balances bundled plugin tests across eight extension workers; those extension shard jobs run up to two plugin config groups at a time with one Vitest worker per group and a larger Node heap so import-heavy plugin batches do not create extra CI jobs. +`Plugin Prerelease` is more expensive product/package coverage, so it is a separate workflow dispatched by `Full Release Validation` or by an explicit operator. Normal pull requests, `main` pushes, and standalone manual CI dispatches keep that suite off. It balances bundled plugin tests across eight extension workers; those extension shard jobs run up to two plugin config groups at a time with one Vitest worker per group and a larger Node heap so import-heavy plugin batches do not create extra CI jobs. The release-only Docker prerelease path batches targeted Docker lanes in small groups to avoid reserving dozens of runners for one-to-three-minute jobs. ## QA Lab diff --git a/docs/docs.json b/docs/docs.json index 13c6c1995dc..abfbfd02896 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -1708,7 +1708,13 @@ }, { "group": "Release and CI", - "pages": ["reference/RELEASING", "reference/test", "ci", "help/scripts"] + "pages": [ + "reference/RELEASING", + "reference/full-release-validation", + "reference/test", + "ci", + "help/scripts" + ] } ] }, diff --git a/docs/reference/RELEASING.md b/docs/reference/RELEASING.md index 4fba316f3c8..f7f89d03dbe 100644 --- a/docs/reference/RELEASING.md +++ b/docs/reference/RELEASING.md @@ -240,7 +240,7 @@ gh workflow run full-release-validation.yml \ -f ref=release/YYYY.M.D \ -f provider=openai \ -f mode=both \ - -f release_profile=full \ + -f release_profile=stable \ -f evidence_package_spec=openclaw@YYYY.M.D-beta.N ``` @@ -255,6 +255,9 @@ summary shows `normal_ci` and `release_checks` as successful, and any optional `npm_telegram` child is either successful or intentionally skipped. The final verifier summary includes slowest-job tables for each child run, so the release manager can see the current critical path without downloading logs. +See [Full release validation](/reference/full-release-validation) for the +complete stage matrix, exact workflow job names, stable versus full profile +differences, artifacts, and focused rerun handles. Child workflows are dispatched from the trusted ref that runs `Full Release Validation`, normally `--ref main`, even when the target `ref` points at an older release branch or tag. There is no separate Full Release Validation diff --git a/docs/reference/full-release-validation.md b/docs/reference/full-release-validation.md new file mode 100644 index 00000000000..b73cc36763d --- /dev/null +++ b/docs/reference/full-release-validation.md @@ -0,0 +1,164 @@ +--- +summary: "Full Release Validation stages, child workflows, release profiles, rerun handles, and evidence" +title: "Full release validation" +read_when: + - Running or rerunning Full Release Validation + - Comparing stable and full release validation profiles + - Debugging release validation stage failures +--- + +`Full Release Validation` is the release umbrella. It is the single manual +entrypoint for pre-release proof, but most work happens in child workflows so a +failed box can be rerun without restarting the whole release. + +Run it from a trusted workflow ref, normally `main`, and pass the release branch, +tag, or full commit SHA as `ref`: + +```bash +gh workflow run full-release-validation.yml \ + --ref main \ + -f ref=release/YYYY.M.D \ + -f provider=openai \ + -f mode=both \ + -f release_profile=stable +``` + +Child workflows use the trusted workflow ref for the harness and the input +`ref` for the candidate under test. That keeps new validation logic available +when validating an older release branch or tag. + +## Top-level stages + +| Stage | Workflow job name | Child workflow | What it proves | Rerun handle | +| --------------------- | --------------------------------------- | ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------- | +| Target resolution | `Resolve target ref` | none | Resolves the release branch, tag, or full commit SHA and records selected inputs. | Rerun the umbrella if this fails. | +| Vitest and normal CI | `Run normal full CI` | `CI` | Manual full CI graph against the target ref, including Linux Node lanes, bundled plugin shards, channel contracts, Node 22 compatibility, `check`, `check-additional`, build smoke, docs checks, Python skills, Windows, macOS, Control UI i18n, and Android via the umbrella. | `rerun_group=ci` | +| Plugin prerelease | `Run plugin prerelease validation` | `Plugin Prerelease` | Release-only plugin static checks, agentic plugin coverage, full extension batch shards, and plugin prerelease Docker lanes. | `rerun_group=plugin-prerelease` | +| Release checks | `Run release/live/Docker/QA validation` | `OpenClaw Release Checks` | Install smoke, cross-OS package checks, live/E2E suites, Docker release-path chunks, Package Acceptance, QA Lab parity, live Matrix, and live Telegram. | `rerun_group=release-checks` or a narrower release-checks handle | +| Post-publish Telegram | `Run post-publish Telegram E2E` | `NPM Telegram Beta E2E` | Optional published-package Telegram proof when `npm_telegram_package_spec` is set. | `rerun_group=npm-telegram` | +| Umbrella verifier | `Verify full validation` | none | Re-checks recorded child run conclusions and appends slowest-job tables from child workflows. | Rerun only this job after rerunning a failed child to green. | + +For `ref=main` and `rerun_group=all`, a newer umbrella supersedes an older one. +When the parent is cancelled, its monitor cancels any child workflow it already +dispatched. Release branch and tag validation runs do not cancel each other by +default. + +## Release checks stages + +`OpenClaw Release Checks` is the largest child workflow. It resolves the target +once and prepares a shared `release-package-under-test` artifact when package +or Docker-facing stages need it. + +| Stage | Workflow job name | Backing workflow or jobs | What it tests | Rerun handle | +| ------------------- | ------------------------------------------------------- | --------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------------------------------------------------------- | +| Release target | `Resolve target ref` | none | Validates the selected ref, optional expected SHA, profile, rerun group, and focused live suite filter. | Rerun `release-checks`. | +| Package artifact | `Prepare release package artifact` | none | Packs or resolves one candidate tarball and uploads `release-package-under-test` for downstream package-facing checks. | Rerun the affected package, cross-OS, or live/E2E group. | +| Install smoke | `Run install smoke` | `Install Smoke` | Full install path with root Dockerfile smoke image reuse, QR package install, root and gateway Docker smokes, installer Docker tests, Bun global install image-provider smoke, and fast bundled-plugin Docker E2E. | `rerun_group=install-smoke` | +| Cross-OS | `cross_os_release_checks` | `OpenClaw Cross-OS Release Checks (Reusable)` | Fresh and upgrade lanes on Linux, Windows, and macOS for the selected provider and mode, using the candidate tarball plus a baseline package. | `rerun_group=cross-os` | +| Repo and live E2E | `Run repo/live E2E validation` | `OpenClaw Live And E2E Checks (Reusable)` | Repository E2E, live cache, OpenAI websocket streaming, native live provider and plugin shards, and Docker-backed live model/backend/gateway harnesses selected by `release_profile`. | `rerun_group=live-e2e`, optionally with `live_suite_filter` | +| Docker release path | `Run Docker release-path validation` | `OpenClaw Live And E2E Checks (Reusable)` | Release-path Docker chunks against the shared package artifact. | `rerun_group=live-e2e` | +| Package Acceptance | `Run package acceptance` | `Package Acceptance` | Artifact-native bundled-channel dependency compatibility, offline plugin package fixtures, and mock-OpenAI Telegram package acceptance against the same tarball. | `rerun_group=package` | +| QA parity | `Run QA Lab parity lane` and `Run QA Lab parity report` | direct jobs | Candidate and baseline agentic parity packs, then the parity report. | `rerun_group=qa-parity` or `rerun_group=qa` | +| QA live Matrix | `Run QA Lab live Matrix lane` | direct job | Fast live Matrix QA profile in the `qa-live-shared` environment. | `rerun_group=qa-live` or `rerun_group=qa` | +| QA live Telegram | `Run QA Lab live Telegram lane` | direct job | Live Telegram QA with Convex CI credential leases. | `rerun_group=qa-live` or `rerun_group=qa` | +| Release verifier | `Verify release checks` | none | Verifies required release-check jobs for the selected rerun group. | Rerun after focused child jobs pass. | + +## Docker release-path chunks + +The Docker release-path stage runs these chunks when `live_suite_filter` is +empty: + +| Chunk | Coverage | +| ------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------- | +| `core` | Core Docker release-path smoke lanes. | +| `package-update-openai` | OpenAI package install and update behavior. | +| `package-update-anthropic` | Anthropic package install and update behavior. | +| `package-update-core` | Provider-neutral package and update behavior. | +| `plugins-runtime-plugins` | Plugin runtime lanes that exercise plugin behavior. | +| `plugins-runtime-services` | Service-backed plugin runtime lanes; includes OpenWebUI when requested. | +| `plugins-runtime-install-a` through `plugins-runtime-install-h` | Plugin install/runtime batches split for parallel release validation. | +| `bundled-channels-core` | Bundled channel Docker behavior. | +| `bundled-channels-update-a`, `bundled-channels-update-discord`, `bundled-channels-update-b` | Bundled channel update behavior. | +| `bundled-channels-contracts` | Bundled channel contract checks in the Docker release path. | + +Use targeted `docker_lanes=` on the reusable live/E2E workflow when +only one Docker lane failed. The release artifacts include per-lane rerun +commands with package artifact and image reuse inputs when available. + +## Release profiles + +`release_profile` only controls live/provider breadth inside release checks. It +does not remove normal full CI, Plugin Prerelease, install smoke, package +acceptance, QA Lab, or Docker release-path chunks. + +| Profile | Intended use | Included live/provider coverage | +| --------- | --------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `minimum` | Fastest release-critical smoke. | OpenAI/core live path, Docker live models for OpenAI, native gateway core, native OpenAI gateway profile, native OpenAI plugin, and Docker live gateway OpenAI. | +| `stable` | Default release approval profile. | `minimum` plus Anthropic, Google, MiniMax, backend, native live test harness, Docker live CLI backend, Docker ACP bind, Docker Codex harness, and an OpenCode Go smoke shard. | +| `full` | Broad advisory sweep. | `stable` plus advisory providers, plugin live shards, and media live shards. | + +## Full-only additions + +These suites are skipped by `stable` and included by `full`: + +| Area | Full-only coverage | +| -------------------------------- | ------------------------------------------------------------------------------- | +| Docker live models | OpenCode Go, OpenRouter, xAI, Z.ai, and Fireworks. | +| Docker live gateway | Advisory shard for DeepSeek, Fireworks, OpenCode Go, OpenRouter, xAI, and Z.ai. | +| Native gateway provider profiles | Fireworks, DeepSeek, full OpenCode Go model shards, OpenRouter, xAI, and Z.ai. | +| Native plugin live shards | Plugins A-K, L-N, O-Z other, Moonshot, and xAI. | +| Native media live shards | Audio, Google music, MiniMax music, and video groups A-D. | + +`stable` includes `native-live-src-gateway-profiles-opencode-go-smoke`; `full` +uses the broader OpenCode Go model shards instead. + +## Focused reruns + +Use `rerun_group` to avoid repeating unrelated release boxes: + +| Handle | Scope | +| ------------------- | ------------------------------------------------- | +| `all` | All Full Release Validation stages. | +| `ci` | Manual full CI child only. | +| `plugin-prerelease` | Plugin Prerelease child only. | +| `release-checks` | All OpenClaw Release Checks stages. | +| `install-smoke` | Install Smoke through release checks. | +| `cross-os` | Cross-OS release checks. | +| `live-e2e` | Repo/live E2E and Docker release-path validation. | +| `package` | Package Acceptance. | +| `qa` | QA parity plus QA live lanes. | +| `qa-parity` | QA parity lanes and report only. | +| `qa-live` | QA live Matrix and Telegram only. | +| `npm-telegram` | Optional post-publish Telegram E2E only. | + +Use `live_suite_filter` with `rerun_group=live-e2e` when one live suite failed. +Valid filter ids are defined in the reusable live/E2E workflow, including +`docker-live-models`, `live-gateway-docker`, +`live-gateway-anthropic-docker`, `live-gateway-google-docker`, +`live-gateway-minimax-docker`, `live-gateway-advisory-docker`, +`live-cli-backend-docker`, `live-acp-bind-docker`, and +`live-codex-harness-docker`. + +## Evidence to keep + +Keep the `Full Release Validation` summary as the release-level index. It links +child run ids and includes slowest-job tables. For failures, inspect the child +workflow first, then rerun the smallest matching handle above. + +Useful artifacts: + +- `release-package-under-test` from `OpenClaw Release Checks` +- Docker release-path artifacts under `.artifacts/docker-tests/` +- Package Acceptance `package-under-test` and Docker acceptance artifacts +- Cross-OS release-check artifacts for each OS and suite +- QA parity, Matrix, and Telegram artifacts + +## Workflow files + +- `.github/workflows/full-release-validation.yml` +- `.github/workflows/openclaw-release-checks.yml` +- `.github/workflows/openclaw-live-and-e2e-checks-reusable.yml` +- `.github/workflows/plugin-prerelease.yml` +- `.github/workflows/install-smoke.yml` +- `.github/workflows/openclaw-cross-os-release-checks-reusable.yml` +- `.github/workflows/package-acceptance.yml` diff --git a/test/scripts/package-acceptance-workflow.test.ts b/test/scripts/package-acceptance-workflow.test.ts index ef9744b98d1..5445e47dbbf 100644 --- a/test/scripts/package-acceptance-workflow.test.ts +++ b/test/scripts/package-acceptance-workflow.test.ts @@ -90,6 +90,7 @@ describe("package artifact reuse", () => { expect(workflow).toContain("node .release-harness/scripts/docker-e2e.mjs github-outputs"); expect(workflow).toContain("bash .release-harness/scripts/ci-docker-pull-retry.sh"); expect(workflow).toContain("plan_docker_lane_groups:"); + expect(workflow).toContain("targeted_docker_lane_group_size:"); expect(workflow).toContain("Docker E2E targeted lanes (${{ matrix.group.label }})"); expect(workflow).toContain("LANES: ${{ matrix.group.docker_lanes }}"); expect(workflow).toContain("DOCKER_E2E_LANES: ${{ matrix.group.docker_lanes }}"); @@ -142,6 +143,9 @@ describe("package artifact reuse", () => { expect(workflow).toContain( 'add_profile_suite native-live-src-gateway-core "minimum stable full"', ); + expect(workflow).toContain('add_profile_suite live-gateway-docker "minimum stable full"'); + expect(workflow).toContain('add_profile_suite live-gateway-anthropic-docker "stable full"'); + expect(workflow).toContain('add_profile_suite live-gateway-advisory-docker "full"'); expect(workflow).toContain('add_profile_suite live-cli-backend-docker "stable full"'); expect(workflow).toContain( "inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id", @@ -169,6 +173,9 @@ describe("package artifact reuse", () => { expect(workflow).not.toContain( "OPENCLAW_LIVE_GATEWAY_PROVIDERS=deepseek,opencode-go,openrouter,xai,zai", ); + expect(workflow).toContain("suite_id: live-gateway-anthropic-docker"); + expect(workflow).toContain("OPENCLAW_LIVE_GATEWAY_MAX_MODELS=2"); + expect(workflow).toContain("timeout --foreground --kill-after=30s 25m"); expect(workflow).toContain("suite_id: native-live-extensions-a-k"); expect(workflow).toContain("suite_id: native-live-extensions-l-n"); expect(workflow).toContain("suite_id: native-live-extensions-moonshot"); @@ -220,19 +227,19 @@ describe("package artifact reuse", () => { const stage = readFileSync("scripts/lib/live-docker-stage.sh", "utf8"); expect(workflow).toContain( - 'run: OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" bash .release-harness/scripts/test-live-models-docker.sh', + 'run: OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 35m bash .release-harness/scripts/test-live-models-docker.sh', ); expect(workflow).toContain( - 'command: OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" bash .release-harness/scripts/test-live-gateway-models-docker.sh', + "command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=openai OPENCLAW_LIVE_GATEWAY_MAX_MODELS=2", ); expect(workflow).toContain( - 'command: OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" bash .release-harness/scripts/test-live-cli-backend-docker.sh', + 'command: OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 45m bash .release-harness/scripts/test-live-cli-backend-docker.sh', ); expect(workflow).toContain( - 'command: OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" bash .release-harness/scripts/test-live-acp-bind-docker.sh', + 'command: OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 45m bash .release-harness/scripts/test-live-acp-bind-docker.sh', ); expect(workflow).toContain( - 'command: OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" bash .release-harness/scripts/test-live-codex-harness-docker.sh', + 'command: OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 35m bash .release-harness/scripts/test-live-codex-harness-docker.sh', ); expect(scenarios).toContain("function liveDockerScriptCommand"); expect(scenarios).toContain( @@ -385,8 +392,10 @@ describe("package artifact reuse", () => { expect(workflow).toContain("child_rerun_group=all"); expect(workflow).toContain('-f rerun_group="$child_rerun_group"'); expect(workflow).toContain('args+=(-f live_suite_filter="$LIVE_SUITE_FILTER")'); - expect(workflow).toContain("cancel-in-progress: false"); - expect(workflow).not.toContain("gh run cancel"); + expect(workflow).toContain( + "cancel-in-progress: ${{ inputs.ref == 'main' && inputs.rerun_group == 'all' }}", + ); + expect(workflow).toContain("gh run cancel"); expect(workflow).not.toContain("force-cancel"); expect(workflow).toContain("NORMAL_CI_RESULT: ${{ needs.normal_ci.result }}"); expect(workflow).not.toContain("workflow_ref:"); diff --git a/test/scripts/plugin-prerelease-test-plan.test.ts b/test/scripts/plugin-prerelease-test-plan.test.ts index db23f51e8b9..8184bbb617a 100644 --- a/test/scripts/plugin-prerelease-test-plan.test.ts +++ b/test/scripts/plugin-prerelease-test-plan.test.ts @@ -295,6 +295,7 @@ describe("scripts/lib/plugin-prerelease-test-plan.mjs", () => { include_repo_e2e: false, live_models_only: false, ref: "${{ needs.preflight.outputs.checkout_revision }}", + targeted_docker_lane_group_size: 4, }, }); expect(dockerSuite.secrets).toBeUndefined(); @@ -320,7 +321,7 @@ describe("scripts/lib/plugin-prerelease-test-plan.mjs", () => { }); expect(fullReleaseWorkflow.concurrency).toEqual({ group: "full-release-validation-${{ inputs.ref }}-${{ inputs.rerun_group }}", - "cancel-in-progress": false, + "cancel-in-progress": "${{ inputs.ref == 'main' && inputs.rerun_group == 'all' }}", }); expect(releaseChecksWorkflow.jobs.resolve_target["runs-on"]).toBe("ubuntu-24.04"); expect(releaseChecksWorkflow.jobs.prepare_release_package["runs-on"]).toBe("ubuntu-24.04");