diff --git a/.github/workflows/full-release-validation.yml b/.github/workflows/full-release-validation.yml index bbf39f3d773..ac58b612c8a 100644 --- a/.github/workflows/full-release-validation.yml +++ b/.github/workflows/full-release-validation.yml @@ -35,6 +35,11 @@ on: - minimum - stable - full + run_release_soak: + description: Run exhaustive live/Docker and upgrade-survivor soak lanes; forced on for release_profile=full + required: false + default: false + type: boolean rerun_group: description: Validation group to run required: false @@ -136,6 +141,7 @@ jobs: EVIDENCE_PACKAGE_SPEC: ${{ inputs.evidence_package_spec }} PACKAGE_ACCEPTANCE_PACKAGE_SPEC: ${{ inputs.package_acceptance_package_spec }} RELEASE_PROFILE: ${{ inputs.release_profile }} + RUN_RELEASE_SOAK: ${{ inputs.run_release_soak || inputs.release_profile == 'full' }} RERUN_GROUP: ${{ inputs.rerun_group }} LIVE_SUITE_FILTER: ${{ inputs.live_suite_filter }} run: | @@ -145,6 +151,7 @@ jobs: echo "- Target ref: \`${TARGET_REF}\`" echo "- Target SHA: \`${TARGET_SHA}\`" echo "- Child workflow ref: \`${CHILD_WORKFLOW_REF}\`" + echo "- Release soak lanes: \`${RUN_RELEASE_SOAK}\`" echo "- Rerun group: \`${RERUN_GROUP}\`" if [[ -n "${LIVE_SUITE_FILTER// }" ]]; then echo "- Live suite filter: \`${LIVE_SUITE_FILTER}\`" @@ -206,7 +213,7 @@ jobs: local workflow="$1" shift - local before_json dispatch_output run_id status conclusion url + local before_json dispatch_output run_id status conclusion url poll_count before_json="$(gh run list --workflow "$workflow" --event workflow_dispatch --limit 100 --json databaseId --jq '[.[].databaseId]')" dispatch_output="$(gh workflow run "$workflow" --ref "$CHILD_WORKFLOW_REF" "$@" 2>&1)" @@ -246,11 +253,17 @@ jobs: } trap cancel_child EXIT INT TERM + poll_count=0 while true; do status="$(gh run view "$run_id" --json status --jq '.status')" if [[ "$status" == "completed" ]]; then break fi + poll_count=$((poll_count + 1)) + if (( poll_count % 10 == 0 )); then + echo "Still waiting on ${workflow}: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}" + gh run view "$run_id" --json jobs --jq '.jobs[] | select(.status != "completed") | {name, status, url}' || true + fi sleep 30 done trap - EXIT INT TERM @@ -299,7 +312,7 @@ jobs: local workflow="$1" shift - local before_json dispatch_output run_id status conclusion url + local before_json dispatch_output run_id status conclusion url poll_count before_json="$(gh run list --workflow "$workflow" --event workflow_dispatch --limit 100 --json databaseId --jq '[.[].databaseId]')" dispatch_output="$(gh workflow run "$workflow" --ref "$CHILD_WORKFLOW_REF" "$@" 2>&1)" @@ -339,11 +352,17 @@ jobs: } trap cancel_child EXIT INT TERM + poll_count=0 while true; do status="$(gh run view "$run_id" --json status --jq '.status')" if [[ "$status" == "completed" ]]; then break fi + poll_count=$((poll_count + 1)) + if (( poll_count % 10 == 0 )); then + echo "Still waiting on ${workflow}: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}" + gh run view "$run_id" --json jobs --jq '.jobs[] | select(.status != "completed") | {name, status, url}' || true + fi sleep 30 done trap - EXIT INT TERM @@ -388,6 +407,7 @@ jobs: PROVIDER: ${{ inputs.provider }} MODE: ${{ inputs.mode }} RELEASE_PROFILE: ${{ inputs.release_profile }} + RUN_RELEASE_SOAK: ${{ inputs.run_release_soak || inputs.release_profile == 'full' }} RERUN_GROUP: ${{ inputs.rerun_group }} LIVE_SUITE_FILTER: ${{ inputs.live_suite_filter }} PACKAGE_ACCEPTANCE_PACKAGE_SPEC: ${{ inputs.package_acceptance_package_spec }} @@ -398,7 +418,7 @@ jobs: local workflow="$1" shift - local before_json dispatch_output run_id status conclusion url + local before_json dispatch_output run_id status conclusion url poll_count before_json="$(gh run list --workflow "$workflow" --event workflow_dispatch --limit 100 --json databaseId --jq '[.[].databaseId]')" dispatch_output="$(gh workflow run "$workflow" --ref "$CHILD_WORKFLOW_REF" "$@" 2>&1)" @@ -438,11 +458,17 @@ jobs: } trap cancel_child EXIT INT TERM + poll_count=0 while true; do status="$(gh run view "$run_id" --json status --jq '.status')" if [[ "$status" == "completed" ]]; then break fi + poll_count=$((poll_count + 1)) + if (( poll_count % 10 == 0 )); then + echo "Still waiting on ${workflow}: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}" + gh run view "$run_id" --json jobs --jq '.jobs[] | select(.status != "completed") | {name, status, url}' || true + fi sleep 30 done trap - EXIT INT TERM @@ -465,6 +491,7 @@ jobs: echo "- Provider: \`${PROVIDER}\`" echo "- Cross-OS mode: \`${MODE}\`" echo "- Release profile: \`${RELEASE_PROFILE}\`" + echo "- Release soak lanes: \`${RUN_RELEASE_SOAK}\`" echo "- Rerun group: \`${RERUN_GROUP}\`" if [[ -n "${LIVE_SUITE_FILTER// }" ]]; then echo "- Live suite filter: \`${LIVE_SUITE_FILTER}\`" @@ -485,6 +512,7 @@ jobs: -f provider="$PROVIDER" -f mode="$MODE" -f release_profile="$RELEASE_PROFILE" + -f run_release_soak="$RUN_RELEASE_SOAK" -f rerun_group="$child_rerun_group" ) if [[ -n "${LIVE_SUITE_FILTER// }" ]]; then @@ -640,11 +668,17 @@ jobs: } trap cancel_child EXIT INT TERM + poll_count=0 while true; do status="$(gh run view "$run_id" --json status --jq '.status')" if [[ "$status" == "completed" ]]; then break fi + poll_count=$((poll_count + 1)) + if (( poll_count % 10 == 0 )); then + echo "Still waiting on npm-telegram-beta-e2e.yml: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}" + gh run view "$run_id" --json jobs --jq '.jobs[] | select(.status != "completed") | {name, status, url}' || true + fi sleep 30 done trap - EXIT INT TERM diff --git a/.github/workflows/openclaw-release-checks.yml b/.github/workflows/openclaw-release-checks.yml index 92686c9c7ac..d1469c5e8c7 100644 --- a/.github/workflows/openclaw-release-checks.yml +++ b/.github/workflows/openclaw-release-checks.yml @@ -39,6 +39,11 @@ on: - minimum - stable - full + run_release_soak: + description: Run exhaustive live/Docker and upgrade-survivor soak lanes; forced on for release_profile=full + required: false + default: false + type: boolean rerun_group: description: Release check group to run required: false @@ -86,6 +91,7 @@ jobs: provider: ${{ steps.inputs.outputs.provider }} mode: ${{ steps.inputs.outputs.mode }} release_profile: ${{ steps.inputs.outputs.release_profile }} + run_release_soak: ${{ steps.inputs.outputs.run_release_soak }} rerun_group: ${{ steps.inputs.outputs.rerun_group }} live_suite_filter: ${{ steps.inputs.outputs.live_suite_filter }} qa_live_matrix_enabled: ${{ steps.inputs.outputs.qa_live_matrix_enabled }} @@ -206,6 +212,7 @@ jobs: RELEASE_PROVIDER_INPUT: ${{ inputs.provider }} RELEASE_MODE_INPUT: ${{ inputs.mode }} RELEASE_PROFILE_INPUT: ${{ inputs.release_profile }} + RELEASE_RUN_RELEASE_SOAK_INPUT: ${{ inputs.run_release_soak }} RELEASE_RERUN_GROUP_INPUT: ${{ inputs.rerun_group }} RELEASE_LIVE_SUITE_FILTER_INPUT: ${{ inputs.live_suite_filter }} RELEASE_QA_SLACK_LIVE_CI_ENABLED: ${{ vars.OPENCLAW_QA_SLACK_LIVE_CI_ENABLED || 'false' }} @@ -221,6 +228,15 @@ jobs: else qa_live_slack_ci_enabled=true fi + run_release_soak="$(printf '%s' "$RELEASE_RUN_RELEASE_SOAK_INPUT" | tr '[:upper:]' '[:lower:]')" + if [[ "$run_release_soak" != "true" && "$run_release_soak" != "1" && "$run_release_soak" != "yes" ]]; then + run_release_soak=false + else + run_release_soak=true + fi + if [[ "$RELEASE_PROFILE_INPUT" == "full" ]]; then + run_release_soak=true + fi filter="$(printf '%s' "$RELEASE_LIVE_SUITE_FILTER_INPUT" | tr '[:upper:]' '[:lower:]')" if [[ -n "${filter// }" ]]; then @@ -273,6 +289,7 @@ jobs: printf 'provider=%s\n' "$RELEASE_PROVIDER_INPUT" printf 'mode=%s\n' "$RELEASE_MODE_INPUT" printf 'release_profile=%s\n' "$RELEASE_PROFILE_INPUT" + printf 'run_release_soak=%s\n' "$run_release_soak" printf 'rerun_group=%s\n' "$RELEASE_RERUN_GROUP_INPUT" printf 'live_suite_filter=%s\n' "$RELEASE_LIVE_SUITE_FILTER_INPUT" printf 'qa_live_matrix_enabled=%s\n' "$qa_live_matrix_enabled" @@ -289,6 +306,7 @@ jobs: RELEASE_PROVIDER: ${{ inputs.provider }} RELEASE_MODE: ${{ inputs.mode }} RELEASE_PROFILE: ${{ inputs.release_profile }} + RUN_RELEASE_SOAK: ${{ steps.inputs.outputs.run_release_soak }} RELEASE_RERUN_GROUP: ${{ inputs.rerun_group }} RELEASE_LIVE_SUITE_FILTER: ${{ inputs.live_suite_filter }} PACKAGE_ACCEPTANCE_PACKAGE_SPEC: ${{ inputs.package_acceptance_package_spec }} @@ -302,6 +320,7 @@ jobs: echo "- Cross-OS provider: \`${RELEASE_PROVIDER}\`" echo "- Cross-OS mode: \`${RELEASE_MODE}\`" echo "- Release profile: \`${RELEASE_PROFILE}\`" + echo "- Release soak lanes: \`${RUN_RELEASE_SOAK}\`" echo "- Rerun group: \`${RELEASE_RERUN_GROUP}\`" if [[ -n "${RELEASE_LIVE_SUITE_FILTER// }" ]]; then echo "- Live suite filter: \`${RELEASE_LIVE_SUITE_FILTER}\`" @@ -312,7 +331,11 @@ jobs: else echo "- Package Acceptance package spec: prepared release artifact" fi - echo "- This run will execute cross-OS release validation, install smoke, QA Lab parity, Matrix, Telegram, and Slack lanes, and the non-Parallels Docker/live/openwebui coverage from the CI migration plan." + if [[ "$RUN_RELEASE_SOAK" == "true" ]]; then + echo "- This run will execute blocking release validation plus exhaustive live/Docker soak coverage." + else + echo "- This run will execute blocking release validation. Exhaustive live/Docker soak lanes are skipped unless \`run_release_soak=true\`, \`release_profile=full\`, or \`rerun_group=live-e2e\` is selected." + fi } >> "$GITHUB_STEP_SUMMARY" prepare_release_package: @@ -423,7 +446,7 @@ jobs: live_repo_e2e_release_checks: name: Run repo/live E2E validation needs: [resolve_target] - if: contains(fromJSON('["all","live-e2e"]'), needs.resolve_target.outputs.rerun_group) + if: needs.resolve_target.outputs.rerun_group == 'live-e2e' || (needs.resolve_target.outputs.rerun_group == 'all' && needs.resolve_target.outputs.run_release_soak == 'true') permissions: actions: read contents: read @@ -488,7 +511,7 @@ jobs: docker_e2e_release_checks: name: Run Docker release-path validation needs: [resolve_target, prepare_release_package] - if: contains(fromJSON('["all","live-e2e"]'), needs.resolve_target.outputs.rerun_group) && needs.resolve_target.outputs.live_suite_filter == '' + if: (needs.resolve_target.outputs.rerun_group == 'live-e2e' || (needs.resolve_target.outputs.rerun_group == 'all' && needs.resolve_target.outputs.run_release_soak == 'true')) && needs.resolve_target.outputs.live_suite_filter == '' permissions: actions: read contents: read @@ -523,8 +546,8 @@ jobs: package_sha256: ${{ needs.prepare_release_package.outputs.package_sha256 }} suite_profile: custom docker_lanes: doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor plugins-offline plugin-update - published_upgrade_survivor_baselines: all-since-2026.4.23 - published_upgrade_survivor_scenarios: reported-issues + published_upgrade_survivor_baselines: ${{ needs.resolve_target.outputs.run_release_soak == 'true' && 'all-since-2026.4.23' || '' }} + published_upgrade_survivor_scenarios: ${{ needs.resolve_target.outputs.run_release_soak == 'true' && 'reported-issues' || '' }} telegram_mode: mock-openai telegram_scenarios: telegram-help-command,telegram-commands-command,telegram-tools-compact-command,telegram-whoami-command,telegram-context-command,telegram-current-session-status-tool,telegram-mention-gating secrets: diff --git a/docs/ci.md b/docs/ci.md index 878d5deeb6e..6c67eb6e1e0 100644 --- a/docs/ci.md +++ b/docs/ci.md @@ -152,7 +152,7 @@ Every lane uploads GitHub artifacts. When `CLAWGRIT_REPORTS_TOKEN` is configured ## Full Release Validation -`Full Release Validation` is the manual umbrella workflow for "run everything before release." It accepts a branch, tag, or full commit SHA, dispatches the manual `CI` workflow with that target, dispatches `Plugin Prerelease` for release-only plugin/package/static/Docker proof, and dispatches `OpenClaw Release Checks` for install smoke, package acceptance, Docker release-path suites, live/E2E, OpenWebUI, QA Lab parity, Matrix, and Telegram lanes. With `rerun_group=all` and `release_profile=full`, it also runs `NPM Telegram Beta E2E` against the `release-package-under-test` artifact from release checks. After publishing, pass `npm_telegram_package_spec` to rerun the same Telegram package lane against the published npm package. +`Full Release Validation` is the manual umbrella workflow for "run everything before release." It accepts a branch, tag, or full commit SHA, dispatches the manual `CI` workflow with that target, dispatches `Plugin Prerelease` for release-only plugin/package/static/Docker proof, and dispatches `OpenClaw Release Checks` for install smoke, package acceptance, cross-OS package checks, QA Lab parity, Matrix, and Telegram lanes. Stable/default runs keep exhaustive live/E2E and Docker release-path coverage behind `run_release_soak=true`; `release_profile=full` forces that soak coverage on so broad advisory validation remains broad. With `rerun_group=all` and `release_profile=full`, it also runs `NPM Telegram Beta E2E` against the `release-package-under-test` artifact from release checks. After publishing, pass `npm_telegram_package_spec` to rerun the same Telegram package lane against the published npm package. See [Full release validation](/reference/full-release-validation) for the stage matrix, exact workflow job names, profile differences, artifacts, and @@ -189,7 +189,9 @@ different SHA. `release_profile` controls live/provider breadth passed into release checks. The manual release workflows default to `stable`; use `full` only when you -intentionally want the broad advisory provider/media matrix. +intentionally want the broad advisory provider/media matrix. `run_release_soak` +controls whether stable/default release checks run the exhaustive live/E2E and +Docker release-path soak; `full` forces soak on. - `minimum` keeps the fastest OpenAI/core release-critical lanes. - `stable` adds the stable provider/backend set. @@ -199,7 +201,7 @@ The umbrella records the dispatched child run ids, and the final `Verify full va For recovery, both `Full Release Validation` and `OpenClaw Release Checks` accept `rerun_group`. Use `all` for a release candidate, `ci` for only the normal full CI child, `plugin-prerelease` for only the plugin prerelease child, `release-checks` for every release child, or a narrower group: `install-smoke`, `cross-os`, `live-e2e`, `package`, `qa`, `qa-parity`, `qa-live`, or `npm-telegram` on the umbrella. This keeps a failed release box rerun bounded after a focused fix. -`OpenClaw Release Checks` uses the trusted workflow ref to resolve the selected ref once into a `release-package-under-test` tarball, then passes that artifact to both the live/E2E release-path Docker workflow and the package acceptance shard. That keeps the package bytes consistent across release boxes and avoids repacking the same candidate in multiple child jobs. +`OpenClaw Release Checks` uses the trusted workflow ref to resolve the selected ref once into a `release-package-under-test` tarball, then passes that artifact to cross-OS checks and Package Acceptance, plus the live/E2E release-path Docker workflow when soak coverage runs. That keeps the package bytes consistent across release boxes and avoids repacking the same candidate in multiple child jobs. Duplicate `Full Release Validation` runs for `ref=main` and `rerun_group=all` supersede the older umbrella. The parent monitor cancels any child workflow it @@ -263,7 +265,7 @@ For the dedicated update and plugin testing policy, including local commands, Docker lanes, Package Acceptance inputs, release defaults, and failure triage, see [Testing updates and plugins](/help/testing-updates-plugins). -Release checks call Package Acceptance with `source=artifact`, the prepared release package artifact, `suite_profile=custom`, `docker_lanes='doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor plugins-offline plugin-update'`, `published_upgrade_survivor_baselines=all-since-2026.4.23`, `published_upgrade_survivor_scenarios=reported-issues`, and `telegram_mode=mock-openai`. This keeps package migration, update, stale-plugin-dependency cleanup, configured-plugin install repair, offline plugin, plugin-update, and Telegram proof on the same resolved package tarball. Set `package_acceptance_package_spec` on Full Release Validation or OpenClaw Release Checks to run that same matrix against a shipped npm package instead of the SHA-built artifact. Cross-OS release checks still cover OS-specific onboarding, installer, and platform behavior; package/update product validation should start with Package Acceptance. The `published-upgrade-survivor` Docker lane validates one published package baseline per run. In Package Acceptance, the resolved `package-under-test` tarball is always the candidate and `published_upgrade_survivor_baseline` selects the fallback published baseline, defaulting to `openclaw@latest`; failed-lane rerun commands preserve that baseline. Set `published_upgrade_survivor_baselines=all-since-2026.4.23` to expand Full Release CI across every stable npm release from `2026.4.23` through `latest`; `release-history` remains available for manual wider sampling with the older pre-date anchor. Set `published_upgrade_survivor_scenarios=reported-issues` to expand the same baselines across issue-shaped fixtures for Feishu config, preserved bootstrap/persona files, configured OpenClaw plugin installs, tilde log paths, and stale legacy plugin dependency roots. The separate `Update Migration` workflow uses the `update-migration` Docker lane with `all-since-2026.4.23` and `plugin-deps-cleanup` when the question is exhaustive published update cleanup, not normal Full Release CI breadth. Local aggregate runs can pass exact package specs with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS`, keep a single lane with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC` such as `openclaw@2026.4.15`, or set `OPENCLAW_UPGRADE_SURVIVOR_SCENARIOS` for the scenario matrix. The published lane configures the baseline with a baked `openclaw config set` command recipe, records recipe steps in `summary.json`, and probes `/healthz`, `/readyz`, plus RPC status after Gateway start. The Windows packaged and installer fresh lanes also verify that an installed package can import a browser-control override from a raw absolute Windows path. The OpenAI cross-OS agent-turn smoke defaults to `OPENCLAW_CROSS_OS_OPENAI_MODEL` when set, otherwise `openai/gpt-5.4`, so the install and gateway proof stays on a GPT-5 test model while avoiding GPT-4.x defaults. +Release checks call Package Acceptance with `source=artifact`, the prepared release package artifact, `suite_profile=custom`, `docker_lanes='doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor plugins-offline plugin-update'`, and `telegram_mode=mock-openai`. This keeps package migration, update, stale-plugin-dependency cleanup, configured-plugin install repair, offline plugin, plugin-update, and Telegram proof on the same resolved package tarball. Set `package_acceptance_package_spec` on Full Release Validation or OpenClaw Release Checks to run that same matrix against a shipped npm package instead of the SHA-built artifact. Cross-OS release checks still cover OS-specific onboarding, installer, and platform behavior; package/update product validation should start with Package Acceptance. The `published-upgrade-survivor` Docker lane validates one published package baseline per run in the blocking release path. In Package Acceptance, the resolved `package-under-test` tarball is always the candidate and `published_upgrade_survivor_baseline` selects the fallback published baseline, defaulting to `openclaw@latest`; failed-lane rerun commands preserve that baseline. Full Release Validation with `run_release_soak=true` or `release_profile=full` sets `published_upgrade_survivor_baselines=all-since-2026.4.23` and `published_upgrade_survivor_scenarios=reported-issues` to expand across every stable npm release from `2026.4.23` through `latest` and issue-shaped fixtures for Feishu config, preserved bootstrap/persona files, configured OpenClaw plugin installs, tilde log paths, and stale legacy plugin dependency roots. The separate `Update Migration` workflow uses the `update-migration` Docker lane with `all-since-2026.4.23` and `plugin-deps-cleanup` when the question is exhaustive published update cleanup, not normal Full Release CI breadth. Local aggregate runs can pass exact package specs with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS`, keep a single lane with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC` such as `openclaw@2026.4.15`, or set `OPENCLAW_UPGRADE_SURVIVOR_SCENARIOS` for the scenario matrix. The published lane configures the baseline with a baked `openclaw config set` command recipe, records recipe steps in `summary.json`, and probes `/healthz`, `/readyz`, plus RPC status after Gateway start. The Windows packaged and installer fresh lanes also verify that an installed package can import a browser-control override from a raw absolute Windows path. The OpenAI cross-OS agent-turn smoke defaults to `OPENCLAW_CROSS_OS_OPENAI_MODEL` when set, otherwise `openai/gpt-5.4`, so the install and gateway proof stays on a GPT-5 test model while avoiding GPT-4.x defaults. ### Legacy compatibility windows diff --git a/docs/help/testing.md b/docs/help/testing.md index 179d5b6fb4f..daaa48c0163 100644 --- a/docs/help/testing.md +++ b/docs/help/testing.md @@ -112,7 +112,9 @@ These commands sit beside the main test suites when you need QA-lab realism: CI runs QA Lab in dedicated workflows. Agentic parity is nested under `QA-Lab - All Lanes` and release validation, not a standalone PR workflow. Broad validation should use `Full Release Validation` with -`rerun_group=qa-parity` or the release-checks QA group. `QA-Lab - All Lanes` +`rerun_group=qa-parity` or the release-checks QA group. Stable/default release +checks keep exhaustive live/Docker soak behind `run_release_soak=true`; the +`full` profile forces soak on. `QA-Lab - All Lanes` runs nightly on `main` and from manual dispatch with the mock parity lane, live Matrix lane, Convex-managed live Telegram lane, and Convex-managed live Discord lane as parallel jobs. Scheduled QA and release checks pass Matrix @@ -641,7 +643,7 @@ The live-model Docker runners also bind-mount only the needed CLI auth homes (or - Npm tarball onboarding/channel/agent smoke: `pnpm test:docker:npm-onboard-channel-agent` installs the packed OpenClaw tarball globally in Docker, configures OpenAI via env-ref onboarding plus Telegram by default, runs doctor, and runs one mocked OpenAI agent turn. Reuse a prebuilt tarball with `OPENCLAW_CURRENT_PACKAGE_TGZ=/path/to/openclaw-*.tgz`, skip the host rebuild with `OPENCLAW_NPM_ONBOARD_HOST_BUILD=0`, or switch channel with `OPENCLAW_NPM_ONBOARD_CHANNEL=discord`. - Update channel switch smoke: `pnpm test:docker:update-channel-switch` installs the packed OpenClaw tarball globally in Docker, switches from package `stable` to git `dev`, verifies the persisted channel and plugin post-update work, then switches back to package `stable` and checks update status. - Upgrade survivor smoke: `pnpm test:docker:upgrade-survivor` installs the packed OpenClaw tarball over a dirty old-user fixture with agents, channel config, plugin allowlists, stale plugin dependency state, and existing workspace/session files. It runs package update plus non-interactive doctor without live provider or channel keys, then starts a loopback Gateway and checks config/state preservation plus startup/status budgets. -- Published upgrade survivor smoke: `pnpm test:docker:published-upgrade-survivor` installs `openclaw@latest` by default, seeds realistic existing-user files, configures that baseline with a baked command recipe, validates the resulting config, updates that published install to the candidate tarball, runs non-interactive doctor, writes `.artifacts/upgrade-survivor/summary.json`, then starts a loopback Gateway and checks configured intents, state preservation, startup, `/healthz`, `/readyz`, and RPC status budgets. Override one baseline with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC`, ask the aggregate scheduler to expand exact baselines with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS` such as `all-since-2026.4.23`, and expand issue-shaped fixtures with `OPENCLAW_UPGRADE_SURVIVOR_SCENARIOS` such as `reported-issues`; the reported-issues set includes `configured-plugin-installs` for automatic external OpenClaw plugin install repair. Package Acceptance exposes those as `published_upgrade_survivor_baseline`, `published_upgrade_survivor_baselines`, and `published_upgrade_survivor_scenarios`. +- Published upgrade survivor smoke: `pnpm test:docker:published-upgrade-survivor` installs `openclaw@latest` by default, seeds realistic existing-user files, configures that baseline with a baked command recipe, validates the resulting config, updates that published install to the candidate tarball, runs non-interactive doctor, writes `.artifacts/upgrade-survivor/summary.json`, then starts a loopback Gateway and checks configured intents, state preservation, startup, `/healthz`, `/readyz`, and RPC status budgets. Override one baseline with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC`, ask the aggregate scheduler to expand exact baselines with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS` such as `all-since-2026.4.23`, and expand issue-shaped fixtures with `OPENCLAW_UPGRADE_SURVIVOR_SCENARIOS` such as `reported-issues`; the reported-issues set includes `configured-plugin-installs` for automatic external OpenClaw plugin install repair. Package Acceptance exposes those as `published_upgrade_survivor_baseline`, `published_upgrade_survivor_baselines`, and `published_upgrade_survivor_scenarios`; Full Release Validation uses the default latest baseline in the blocking path and expands to all-since/reported-issues only for `run_release_soak=true` or `release_profile=full`. - Session runtime context smoke: `pnpm test:docker:session-runtime-context` verifies hidden runtime context transcript persistence plus doctor repair of affected duplicated prompt-rewrite branches. - Bun global install smoke: `bash scripts/e2e/bun-global-install-smoke.sh` packs the current tree, installs it with `bun install -g` in an isolated home, and verifies `openclaw infer image providers --json` returns bundled image providers instead of hanging. Reuse a prebuilt tarball with `OPENCLAW_BUN_GLOBAL_SMOKE_PACKAGE_TGZ=/path/to/openclaw-*.tgz`, skip the host build with `OPENCLAW_BUN_GLOBAL_SMOKE_HOST_BUILD=0`, or copy `dist/` from a built Docker image with `OPENCLAW_BUN_GLOBAL_SMOKE_DIST_IMAGE=openclaw-dockerfile-smoke:local`. - Installer Docker smoke: `bash scripts/test-install-sh-docker.sh` shares one npm cache across its root, update, and direct-npm containers. Update smoke defaults to npm `latest` as the stable baseline before upgrading to the candidate tarball. Override with `OPENCLAW_INSTALL_SMOKE_UPDATE_BASELINE=2026.4.22` locally, or with the Install Smoke workflow's `update_baseline_version` input on GitHub. Non-root installer checks keep an isolated npm cache so root-owned cache entries do not mask user-local install behavior. Set `OPENCLAW_INSTALL_SMOKE_NPM_CACHE_DIR=/path/to/cache` to reuse the root/update/direct-npm cache across local reruns. diff --git a/docs/reference/RELEASING.md b/docs/reference/RELEASING.md index d5825ab98cd..3e972641ebe 100644 --- a/docs/reference/RELEASING.md +++ b/docs/reference/RELEASING.md @@ -114,11 +114,13 @@ the maintainer-only release runbook. - Run the manual `Full Release Validation` workflow before release approval to kick off all pre-release test boxes from one entrypoint. It accepts a branch, tag, or full commit SHA, dispatches manual `CI`, and dispatches - `OpenClaw Release Checks` for install smoke, package acceptance, Docker - release-path suites, live/E2E, OpenWebUI, QA Lab parity, Matrix, and Telegram - lanes. With `release_profile=full` and `rerun_group=all`, it also runs package - Telegram E2E against the `release-package-under-test` artifact from release - checks. Provide `npm_telegram_package_spec` after publishing when the same + `OpenClaw Release Checks` for install smoke, package acceptance, cross-OS + package checks, QA Lab parity, Matrix, and Telegram lanes. Stable/default runs + keep exhaustive live/E2E and Docker release-path soak behind + `run_release_soak=true`; `release_profile=full` forces soak on. With + `release_profile=full` and `rerun_group=all`, it also runs package Telegram + E2E against the `release-package-under-test` artifact from release checks. + Provide `npm_telegram_package_spec` after publishing when the same Telegram E2E should prove the published npm package too. Provide `package_acceptance_package_spec` after publishing when Package Acceptance should run its package/update matrix against the shipped npm package instead @@ -293,8 +295,8 @@ parent `release-package-under-test` artifact for package-facing checks, and dispatches standalone package Telegram E2E when `release_profile=full` with `rerun_group=all` or when `npm_telegram_package_spec` is set. `OpenClaw Release Checks` then fans out install smoke, cross-OS release checks, live/E2E Docker -release-path coverage, Package Acceptance with Telegram package QA, QA Lab -parity, live Matrix, and live Telegram. A full run is only acceptable when the +release-path coverage when soak is enabled, Package Acceptance with Telegram +package QA, QA Lab parity, live Matrix, and live Telegram. A full run is only acceptable when the `Full Release Validation` summary shows `normal_ci` and `release_checks` as successful. In full/all mode, the `npm_telegram` child must also be successful; outside full/all it is skipped @@ -318,10 +320,15 @@ Use `release_profile` to select live/provider breadth: - `stable`: minimum plus stable provider/backend coverage for release approval - `full`: stable plus broad advisory provider/media coverage +Use `run_release_soak=true` with `stable` when the release-blocking lanes are +green and you want the exhaustive live/E2E, Docker release-path, and +all-since-2026.4.23 upgrade-survivor sweep before promotion. `full` implies +`run_release_soak=true`. + `OpenClaw Release Checks` uses the trusted workflow ref to resolve the target -ref once as `release-package-under-test` and reuses that artifact in both -release-path Docker checks and Package Acceptance. This keeps all -package-facing boxes on the same bytes and avoids repeated package builds. +ref once as `release-package-under-test` and reuses that artifact in cross-OS, +Package Acceptance, and release-path Docker checks when soak runs. This keeps +all package-facing boxes on the same bytes and avoids repeated package builds. The cross-OS OpenAI install smoke uses `OPENCLAW_CROSS_OS_OPENAI_MODEL` when the repo/org variable is set, otherwise `openai/gpt-5.4`, because this lane is proving package install, onboarding, gateway startup, and one live agent turn @@ -474,11 +481,12 @@ Supported candidate sources: `OpenClaw Release Checks` runs Package Acceptance with `source=artifact`, the prepared release package artifact, `suite_profile=custom`, `docker_lanes=doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor plugins-offline plugin-update`, -`published_upgrade_survivor_baselines=all-since-2026.4.23`, -`published_upgrade_survivor_scenarios=reported-issues`, and `telegram_mode=mock-openai`. Package Acceptance keeps migration, update, stale plugin dependency cleanup, offline plugin fixtures, plugin update, and Telegram -package QA against the same resolved tarball. The upgrade matrix covers every stable npm-published baseline from `2026.4.23` through `latest`; use +package QA against the same resolved tarball. Blocking release checks use the +default latest published package baseline; `run_release_soak=true` or +`release_profile=full` expands to every stable npm-published baseline from +`2026.4.23` through `latest` plus reported-issue fixtures. Use Package Acceptance with `source=npm` for an already shipped candidate, or `source=ref`/`source=artifact` for a SHA-backed local npm tarball before publish. It is the GitHub-native @@ -615,6 +623,9 @@ OpenClaw package must not be published. - `ref`: branch, tag, or full commit SHA to validate. Secret-bearing checks require the resolved commit to be reachable from an OpenClaw branch or release tag. +- `run_release_soak`: opt into exhaustive live/E2E, Docker release-path, and + all-since upgrade-survivor soak on stable/default release checks. It is forced + on by `release_profile=full`. Rules: diff --git a/docs/reference/full-release-validation.md b/docs/reference/full-release-validation.md index c44c0ee5cc9..6baeea4b1ee 100644 --- a/docs/reference/full-release-validation.md +++ b/docs/reference/full-release-validation.md @@ -27,6 +27,11 @@ Child workflows use the trusted workflow ref for the harness and the input `ref` for the candidate under test. That keeps new validation logic available when validating an older release branch or tag. +By default, `release_profile=stable` runs the release-blocking lanes and skips +the exhaustive live/Docker soak. Pass `run_release_soak=true` to include the +soak lanes on a stable run. `release_profile=full` always enables soak lanes so +the broad advisory profile never drops coverage silently. + Package Acceptance normally builds the candidate tarball from the resolved `ref`, including full-SHA runs dispatched with `pnpm ci:full-release`. After publish, pass `package_acceptance_package_spec=openclaw@YYYY.M.D` (or @@ -35,15 +40,15 @@ the shipped npm package instead. ## Top-level stages -| Stage | Details | -| -------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| Target resolution | **Job:** `Resolve target ref`
**Child workflow:** none
**Proves:** resolves the release branch, tag, or full commit SHA and records selected inputs.
**Rerun:** rerun the umbrella if this fails. | -| Vitest and normal CI | **Job:** `Run normal full CI`
**Child workflow:** `CI`
**Proves:** manual full CI graph against the target ref, including Linux Node lanes, bundled plugin shards, channel contracts, Node 22 compatibility, `check`, `check-additional`, build smoke, docs checks, Python skills, Windows, macOS, Control UI i18n, and Android via the umbrella.
**Rerun:** `rerun_group=ci`. | -| Plugin prerelease | **Job:** `Run plugin prerelease validation`
**Child workflow:** `Plugin Prerelease`
**Proves:** release-only plugin static checks, agentic plugin coverage, full extension batch shards, and plugin prerelease Docker lanes.
**Rerun:** `rerun_group=plugin-prerelease`. | -| Release checks | **Job:** `Run release/live/Docker/QA validation`
**Child workflow:** `OpenClaw Release Checks`
**Proves:** install smoke, cross-OS package checks, live/E2E suites, Docker release-path chunks, Package Acceptance, QA Lab parity, live Matrix, and live Telegram.
**Rerun:** `rerun_group=release-checks` or a narrower release-checks handle. | -| Package artifact | **Job:** `Prepare release package artifact`
**Child workflow:** none
**Proves:** creates the parent `release-package-under-test` tarball early enough for package-facing checks that do not need to wait for `OpenClaw Release Checks`.
**Rerun:** rerun the umbrella or provide `npm_telegram_package_spec` for `rerun_group=npm-telegram`. | -| Package Telegram | **Job:** `Run package Telegram E2E`
**Child workflow:** `NPM Telegram Beta E2E`
**Proves:** parent-artifact-backed Telegram package proof for `rerun_group=all` with `release_profile=full`, or published-package Telegram proof when `npm_telegram_package_spec` is set.
**Rerun:** `rerun_group=npm-telegram` with `npm_telegram_package_spec`. | -| Umbrella verifier | **Job:** `Verify full validation`
**Child workflow:** none
**Proves:** re-checks recorded child run conclusions and appends slowest-job tables from child workflows.
**Rerun:** rerun only this job after rerunning a failed child to green. | +| Stage | Details | +| -------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Target resolution | **Job:** `Resolve target ref`
**Child workflow:** none
**Proves:** resolves the release branch, tag, or full commit SHA and records selected inputs.
**Rerun:** rerun the umbrella if this fails. | +| Vitest and normal CI | **Job:** `Run normal full CI`
**Child workflow:** `CI`
**Proves:** manual full CI graph against the target ref, including Linux Node lanes, bundled plugin shards, channel contracts, Node 22 compatibility, `check`, `check-additional`, build smoke, docs checks, Python skills, Windows, macOS, Control UI i18n, and Android via the umbrella.
**Rerun:** `rerun_group=ci`. | +| Plugin prerelease | **Job:** `Run plugin prerelease validation`
**Child workflow:** `Plugin Prerelease`
**Proves:** release-only plugin static checks, agentic plugin coverage, full extension batch shards, and plugin prerelease Docker lanes.
**Rerun:** `rerun_group=plugin-prerelease`. | +| Release checks | **Job:** `Run release/live/Docker/QA validation`
**Child workflow:** `OpenClaw Release Checks`
**Proves:** install smoke, cross-OS package checks, Package Acceptance, QA Lab parity, live Matrix, and live Telegram. With `run_release_soak=true` or `release_profile=full`, also runs exhaustive live/E2E suites and Docker release-path chunks.
**Rerun:** `rerun_group=release-checks` or a narrower release-checks handle. | +| Package artifact | **Job:** `Prepare release package artifact`
**Child workflow:** none
**Proves:** creates the parent `release-package-under-test` tarball early enough for package-facing checks that do not need to wait for `OpenClaw Release Checks`.
**Rerun:** rerun the umbrella or provide `npm_telegram_package_spec` for `rerun_group=npm-telegram`. | +| Package Telegram | **Job:** `Run package Telegram E2E`
**Child workflow:** `NPM Telegram Beta E2E`
**Proves:** parent-artifact-backed Telegram package proof for `rerun_group=all` with `release_profile=full`, or published-package Telegram proof when `npm_telegram_package_spec` is set.
**Rerun:** `rerun_group=npm-telegram` with `npm_telegram_package_spec`. | +| Umbrella verifier | **Job:** `Verify full validation`
**Child workflow:** none
**Proves:** re-checks recorded child run conclusions and appends slowest-job tables from child workflows.
**Rerun:** rerun only this job after rerunning a failed child to green. | For `ref=main` and `rerun_group=all`, a newer umbrella supersedes an older one. When the parent is cancelled, its monitor cancels any child workflow it already @@ -56,19 +61,19 @@ default. once and prepares a shared `release-package-under-test` artifact when package or Docker-facing stages need it. -| Stage | Details | -| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| Release target | **Job:** `Resolve target ref`
**Backing workflow:** none
**Tests:** selected ref, optional expected SHA, profile, rerun group, and focused live suite filter.
**Rerun:** `rerun_group=release-checks`. | -| Package artifact | **Job:** `Prepare release package artifact`
**Backing workflow:** none
**Tests:** packs or resolves one candidate tarball and uploads `release-package-under-test` for downstream package-facing checks.
**Rerun:** the affected package, cross-OS, or live/E2E group. | -| Install smoke | **Job:** `Run install smoke`
**Backing workflow:** `Install Smoke`
**Tests:** full install path with root Dockerfile smoke image reuse, QR package install, root and gateway Docker smokes, installer Docker tests, Bun global install image-provider smoke, and fast bundled-plugin install/uninstall E2E.
**Rerun:** `rerun_group=install-smoke`. | -| Cross-OS | **Job:** `cross_os_release_checks`
**Backing workflow:** `OpenClaw Cross-OS Release Checks (Reusable)`
**Tests:** fresh and upgrade lanes on Linux, Windows, and macOS for the selected provider and mode, using the candidate tarball plus a baseline package.
**Rerun:** `rerun_group=cross-os`. | -| Repo and live E2E | **Job:** `Run repo/live E2E validation`
**Backing workflow:** `OpenClaw Live And E2E Checks (Reusable)`
**Tests:** repository E2E, live cache, OpenAI websocket streaming, native live provider and plugin shards, and Docker-backed live model/backend/gateway harnesses selected by `release_profile`.
**Rerun:** `rerun_group=live-e2e`, optionally with `live_suite_filter`. | -| Docker release path | **Job:** `Run Docker release-path validation`
**Backing workflow:** `OpenClaw Live And E2E Checks (Reusable)`
**Tests:** release-path Docker chunks against the shared package artifact.
**Rerun:** `rerun_group=live-e2e`. | -| Package Acceptance | **Job:** `Run package acceptance`
**Backing workflow:** `Package Acceptance`
**Tests:** offline plugin package fixtures, plugin update, mock-OpenAI Telegram package acceptance, and published-upgrade survivor checks from every stable npm release at or after `2026.4.23` against the same tarball.
**Rerun:** `rerun_group=package`. | -| QA parity | **Job:** `Run QA Lab parity lane` and `Run QA Lab parity report`
**Backing workflow:** direct jobs
**Tests:** candidate and baseline agentic parity packs, then the parity report.
**Rerun:** `rerun_group=qa-parity` or `rerun_group=qa`. | -| QA live Matrix | **Job:** `Run QA Lab live Matrix lane`
**Backing workflow:** direct job
**Tests:** fast live Matrix QA profile in the `qa-live-shared` environment.
**Rerun:** `rerun_group=qa-live` or `rerun_group=qa`. | -| QA live Telegram | **Job:** `Run QA Lab live Telegram lane`
**Backing workflow:** direct job
**Tests:** live Telegram QA with Convex CI credential leases.
**Rerun:** `rerun_group=qa-live` or `rerun_group=qa`. | -| Release verifier | **Job:** `Verify release checks`
**Backing workflow:** none
**Tests:** required release-check jobs for the selected rerun group.
**Rerun:** rerun after focused child jobs pass. | +| Stage | Details | +| ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Release target | **Job:** `Resolve target ref`
**Backing workflow:** none
**Tests:** selected ref, optional expected SHA, profile, rerun group, and focused live suite filter.
**Rerun:** `rerun_group=release-checks`. | +| Package artifact | **Job:** `Prepare release package artifact`
**Backing workflow:** none
**Tests:** packs or resolves one candidate tarball and uploads `release-package-under-test` for downstream package-facing checks.
**Rerun:** the affected package, cross-OS, or live/E2E group. | +| Install smoke | **Job:** `Run install smoke`
**Backing workflow:** `Install Smoke`
**Tests:** full install path with root Dockerfile smoke image reuse, QR package install, root and gateway Docker smokes, installer Docker tests, Bun global install image-provider smoke, and fast bundled-plugin install/uninstall E2E.
**Rerun:** `rerun_group=install-smoke`. | +| Cross-OS | **Job:** `cross_os_release_checks`
**Backing workflow:** `OpenClaw Cross-OS Release Checks (Reusable)`
**Tests:** fresh and upgrade lanes on Linux, Windows, and macOS for the selected provider and mode, using the candidate tarball plus a baseline package.
**Rerun:** `rerun_group=cross-os`. | +| Repo and live E2E | **Job:** `Run repo/live E2E validation`
**Backing workflow:** `OpenClaw Live And E2E Checks (Reusable)`
**Tests:** repository E2E, live cache, OpenAI websocket streaming, native live provider and plugin shards, and Docker-backed live model/backend/gateway harnesses selected by `release_profile`.
**Runs:** `run_release_soak=true`, `release_profile=full`, or focused `rerun_group=live-e2e`.
**Rerun:** `rerun_group=live-e2e`, optionally with `live_suite_filter`. | +| Docker release path | **Job:** `Run Docker release-path validation`
**Backing workflow:** `OpenClaw Live And E2E Checks (Reusable)`
**Tests:** release-path Docker chunks against the shared package artifact.
**Runs:** `run_release_soak=true`, `release_profile=full`, or focused `rerun_group=live-e2e`.
**Rerun:** `rerun_group=live-e2e`. | +| Package Acceptance | **Job:** `Run package acceptance`
**Backing workflow:** `Package Acceptance`
**Tests:** offline plugin package fixtures, plugin update, mock-OpenAI Telegram package acceptance, and published-upgrade survivor checks against the same tarball. Blocking release checks use the default latest published baseline; soak checks expand to every stable npm release at or after `2026.4.23` plus reported-issue fixtures.
**Rerun:** `rerun_group=package`. | +| QA parity | **Job:** `Run QA Lab parity lane` and `Run QA Lab parity report`
**Backing workflow:** direct jobs
**Tests:** candidate and baseline agentic parity packs, then the parity report.
**Rerun:** `rerun_group=qa-parity` or `rerun_group=qa`. | +| QA live Matrix | **Job:** `Run QA Lab live Matrix lane`
**Backing workflow:** direct job
**Tests:** fast live Matrix QA profile in the `qa-live-shared` environment.
**Rerun:** `rerun_group=qa-live` or `rerun_group=qa`. | +| QA live Telegram | **Job:** `Run QA Lab live Telegram lane`
**Backing workflow:** direct job
**Tests:** live Telegram QA with Convex CI credential leases.
**Rerun:** `rerun_group=qa-live` or `rerun_group=qa`. | +| Release verifier | **Job:** `Verify release checks`
**Backing workflow:** none
**Tests:** required release-check jobs for the selected rerun group.
**Rerun:** rerun after focused child jobs pass. | ## Docker release-path chunks @@ -93,10 +98,11 @@ commands with package artifact and image reuse inputs when available. `release_profile` mostly controls live/provider breadth inside release checks. It does not remove normal full CI, Plugin Prerelease, install smoke, package -acceptance, QA Lab, or Docker release-path chunks. `full` also makes the -umbrella run package Telegram E2E against the parent release package artifact when -`rerun_group=all`, so a full pre-publish candidate does not silently skip that -Telegram package lane. +acceptance, or QA Lab. For `stable`, exhaustive repo/live E2E and Docker +release-path chunks are soak coverage and run when `run_release_soak=true`. +`full` forces soak coverage on and also makes the umbrella run package Telegram +E2E against the parent release package artifact when `rerun_group=all`, so a full +pre-publish candidate does not silently skip that Telegram package lane. | Profile | Intended use | Included live/provider coverage | | --------- | --------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |