diff --git a/.agents/skills/release-openclaw-ci/SKILL.md b/.agents/skills/release-openclaw-ci/SKILL.md index be0e83011f8..8b767d396a8 100644 --- a/.agents/skills/release-openclaw-ci/SKILL.md +++ b/.agents/skills/release-openclaw-ci/SKILL.md @@ -16,6 +16,10 @@ Use this with `$release-openclaw-maintainer` and `$openclaw-testing` when a rele - Watch one parent run plus compact child summaries. Avoid broad `gh run view` polling loops; REST quota is easy to burn. - Fetch logs only for failed or currently-blocking jobs. If quota is low, stop polling and wait for reset. - Treat live-provider flakes separately from code failures: prove key validity, provider HTTP status, retry evidence, and exact failing lane before editing code. +- Full Release Validation parent monitors fail fast: once a required child job + fails, the parent cancels the remaining child matrix and prints the failed + job summary. Inspect that first red job instead of waiting for unrelated + matrix tails. ## Preflight @@ -73,6 +77,9 @@ gh workflow run full-release-validation.yml \ ``` Use `release_profile=stable` unless the operator explicitly asks for the broad advisory provider/media matrix. Use narrow `rerun_group` after focused fixes. +Publish with `openclaw-release-publish.yml` using `release_profile=from-validation` +unless a maintainer intentionally wants to cross-check a specific profile; the +publish workflow reads the effective profile from the full-validation manifest. ## Watch diff --git a/.agents/skills/release-openclaw-maintainer/SKILL.md b/.agents/skills/release-openclaw-maintainer/SKILL.md index e081313f04f..6f549ea1fa8 100644 --- a/.agents/skills/release-openclaw-maintainer/SKILL.md +++ b/.agents/skills/release-openclaw-maintainer/SKILL.md @@ -49,17 +49,21 @@ Use this skill for release and publish-time workflow. Load `$release-private` if the next beta number until the matching npm package has actually published. If a published beta needs a fix, commit the fix on the release branch and increment to the next `-beta.N`. -- For a beta release train, run the fast local preflight first, publish the - beta to npm `beta`, then run the expensive published-package roster focused - on install/update/Docker/Parallels/NPM Telegram. If anything fails, fix it on - the release branch, commit/push/pull, increment beta number, and repeat. Run - the full expensive roster at least once before stable/latest promotion; for - later beta attempts, rerun only lanes whose evidence changed unless the fix - touches broad release, install/update, plugin, Docker, Parallels, or live QA - behavior. After each beta is published, scan current `main` once for critical - fixes that landed after the release branch cut and backport only important - low-risk fixes. Operators may authorize up to 4 autonomous beta attempts; - after 4 failed beta attempts, stop and report. +- For a beta release train, keep Full Release Validation as a pre-publish gate + unless the operator explicitly waives it. Run the fast local preflight, npm + preflight, full release validation, and performance in parallel where safe. + If anything fails before npm publish, fix it on the release branch, + forward-port the fix to `main`, move the unpublished beta tag/prerelease to + the fixed commit, and rerun the affected pre-publish gates. If anything fails + after npm publish, fix it, forward-port to `main`, increment beta number, and + repeat. After each beta publish, run the published-package roster focused on + install/update/Docker/Parallels/NPM Telegram. For later beta attempts, rerun + only lanes whose evidence changed unless the fix touches broad release, + install/update, plugin, Docker, Parallels, or live QA behavior. After each + beta is live, scan current `main` once for critical fixes that landed after + the release branch cut and backport only important low-risk fixes. Operators + may authorize up to 4 autonomous beta attempts; after 4 failed beta attempts, + stop and report. - As soon as the release candidate SHA exists, dispatch `OpenClaw Performance` with `target_ref=` in parallel with the other release work. Do not wait for full release validation to start the performance signal. @@ -468,8 +472,10 @@ node --import tsx scripts/openclaw-npm-postpublish-verify.ts - The npm workflow and the private mac publish workflow accept `preflight_only=true` to run validation/build/package steps without uploading public release assets. -- Real npm publish requires a prior successful npm preflight run id so the - publish job promotes the prepared tarball instead of rebuilding it. +- Real npm publish requires a prior successful npm preflight run id and the + successful Full Release Validation run id for the same tag/SHA so the publish + job promotes the prepared tarball instead of rebuilding it and attaches the + correct release evidence. - Real private mac publish requires a prior successful private mac preflight run id so the publish job promotes the prepared artifacts instead of rebuilding or renotarizing them again. @@ -499,11 +505,12 @@ node --import tsx scripts/openclaw-npm-postpublish-verify.ts instead of uploading public GitHub release assets. - Private smoke-test runs upload ad-hoc, non-notarized build artifacts as workflow artifacts and intentionally skip stable `appcast.xml` generation. -- For stable releases, npm preflight, public mac validation, private mac - validation, and private mac preflight must all pass before any real publish - run starts. For beta releases, npm preflight plus the selected Docker, - install/update, Parallels, and release-check lanes are sufficient unless mac - beta validation was explicitly requested. +- For stable releases, npm preflight, Full Release Validation, public mac + validation, private mac validation, and private mac preflight must all pass + before any real publish run starts. For beta releases, npm preflight and Full + Release Validation must pass before npm publish unless the operator explicitly + waives the full gate; mac beta validation is still only required when + requested. - Real publish runs may be dispatched from `main` or from a `release/YYYY.M.D` branch. For release-branch runs, the tag must be contained in that release branch, and the real publish must reuse a successful preflight diff --git a/.github/workflows/full-release-validation.yml b/.github/workflows/full-release-validation.yml index 12874cfd0bd..dd8572b5120 100644 --- a/.github/workflows/full-release-validation.yml +++ b/.github/workflows/full-release-validation.yml @@ -380,6 +380,21 @@ jobs: gh_with_retry api --paginate "repos/${GITHUB_REPOSITORY}/actions/runs/${run_id}/jobs?per_page=100" --jq '.jobs[]' } + fail_fast_failed_jobs() { + local failed_jobs_json + failed_jobs_json="$( + fetch_child_jobs | + jq -s '[.[] | select(.status == "completed" and .conclusion != "success" and .conclusion != "skipped")]' + )" + if jq -e 'length > 0' <<< "$failed_jobs_json" >/dev/null; then + echo "::error::${workflow} has failed child jobs before the workflow completed; cancelling the remaining matrix." + jq '.[] | {name, conclusion, url: .html_url}' <<< "$failed_jobs_json" + cancel_child + trap - EXIT INT TERM + exit 1 + fi + } + cancel_child() { if [[ -n "${run_id:-}" ]]; then echo "Cancelling child workflow ${workflow}: ${run_id}" >&2 @@ -395,6 +410,9 @@ jobs: break fi poll_count=$((poll_count + 1)) + if (( poll_count % 2 == 0 )); then + fail_fast_failed_jobs + fi if (( poll_count % 10 == 0 )); then echo "Still waiting on ${workflow}: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}" fetch_child_jobs | jq 'select(.status != "completed") | {name, status, url: .html_url}' || true @@ -510,6 +528,21 @@ jobs: gh_with_retry api --paginate "repos/${GITHUB_REPOSITORY}/actions/runs/${run_id}/jobs?per_page=100" --jq '.jobs[]' } + fail_fast_failed_jobs() { + local failed_jobs_json + failed_jobs_json="$( + fetch_child_jobs | + jq -s '[.[] | select(.status == "completed" and .conclusion != "success" and .conclusion != "skipped")]' + )" + if jq -e 'length > 0' <<< "$failed_jobs_json" >/dev/null; then + echo "::error::${workflow} has failed child jobs before the workflow completed; cancelling the remaining matrix." + jq '.[] | {name, conclusion, url: .html_url}' <<< "$failed_jobs_json" + cancel_child + trap - EXIT INT TERM + exit 1 + fi + } + cancel_child() { if [[ -n "${run_id:-}" ]]; then echo "Cancelling child workflow ${workflow}: ${run_id}" >&2 @@ -525,6 +558,9 @@ jobs: break fi poll_count=$((poll_count + 1)) + if (( poll_count % 2 == 0 )); then + fail_fast_failed_jobs + fi if (( poll_count % 10 == 0 )); then echo "Still waiting on ${workflow}: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}" fetch_child_jobs | jq 'select(.status != "completed") | {name, status, url: .html_url}' || true @@ -690,6 +726,24 @@ jobs: [[ "$saw_advisory" == "1" && "$failed" == "0" ]] } + fail_fast_failed_jobs() { + local failed_jobs_json + if [[ "$workflow" == "openclaw-release-checks.yml" && "$CHILD_WORKFLOW_REF" =~ ^tideclaw/alpha/[0-9]{4}-[0-9]{2}-[0-9]{2}-[0-9]{4}Z$ ]]; then + return 0 + fi + failed_jobs_json="$( + fetch_child_jobs | + jq -s '[.[] | select(.status == "completed" and .conclusion != "success" and .conclusion != "skipped")]' + )" + if jq -e 'length > 0' <<< "$failed_jobs_json" >/dev/null; then + echo "::error::${workflow} has failed child jobs before the workflow completed; cancelling the remaining matrix." + jq '.[] | {name, conclusion, url: .html_url}' <<< "$failed_jobs_json" + cancel_child + trap - EXIT INT TERM + exit 1 + fi + } + cancel_child() { if [[ -n "${run_id:-}" ]]; then echo "Cancelling child workflow ${workflow}: ${run_id}" >&2 @@ -705,6 +759,9 @@ jobs: break fi poll_count=$((poll_count + 1)) + if (( poll_count % 2 == 0 )); then + fail_fast_failed_jobs + fi if (( poll_count % 10 == 0 )); then echo "Still waiting on ${workflow}: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}" fetch_child_jobs | jq 'select(.status != "completed") | {name, status, url: .html_url}' || true @@ -962,6 +1019,21 @@ jobs: } trap cancel_child EXIT INT TERM + fail_fast_failed_jobs() { + local failed_jobs_json + failed_jobs_json="$( + gh_with_retry run view "$run_id" --json jobs \ + --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success" and .conclusion != "skipped")]' + )" + if jq -e 'length > 0' <<< "$failed_jobs_json" >/dev/null; then + echo "::error::npm-telegram-beta-e2e.yml has failed child jobs before the workflow completed; cancelling the remaining run." + jq '.[] | {name, conclusion, url}' <<< "$failed_jobs_json" + cancel_child + trap - EXIT INT TERM + exit 1 + fi + } + poll_count=0 while true; do status="$(gh_with_retry run view "$run_id" --json status --jq '.status')" @@ -969,6 +1041,9 @@ jobs: break fi poll_count=$((poll_count + 1)) + if (( poll_count % 2 == 0 )); then + fail_fast_failed_jobs + fi if (( poll_count % 10 == 0 )); then echo "Still waiting on npm-telegram-beta-e2e.yml: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}" gh_with_retry run view "$run_id" --json jobs --jq '.jobs[] | select(.status != "completed") | {name, status, url}' || true diff --git a/.github/workflows/openclaw-release-publish.yml b/.github/workflows/openclaw-release-publish.yml index d4e1dcdbfaa..b8fb1ec55c9 100644 --- a/.github/workflows/openclaw-release-publish.yml +++ b/.github/workflows/openclaw-release-publish.yml @@ -46,11 +46,12 @@ on: default: true type: boolean release_profile: - description: Release coverage profile used for release evidence summaries + description: Release coverage profile used for release evidence summaries; default reads it from the validation manifest required: false - default: beta + default: from-validation type: choice options: + - from-validation - beta - stable - full @@ -135,9 +136,9 @@ jobs: exit 1 fi case "$RELEASE_PROFILE" in - beta|stable|full) ;; + from-validation|beta|stable|full) ;; *) - echo "release_profile must be one of: beta, stable, full" >&2 + echo "release_profile must be one of: from-validation, beta, stable, full" >&2 exit 1 ;; esac @@ -259,6 +260,7 @@ jobs: echo "sha=$release_sha" >> "$GITHUB_OUTPUT" - name: Validate full release validation manifest + id: full_manifest if: ${{ inputs.publish_openclaw_npm }} env: GH_TOKEN: ${{ github.token }} @@ -289,7 +291,7 @@ jobs: echo "Full release validation target SHA mismatch: expected $EXPECTED_SHA, got $target_sha" >&2 exit 1 fi - if [[ "$release_profile" != "$EXPECTED_RELEASE_PROFILE" ]]; then + if [[ "$EXPECTED_RELEASE_PROFILE" != "from-validation" && "$release_profile" != "$EXPECTED_RELEASE_PROFILE" ]]; then echo "Full release validation profile mismatch: expected $EXPECTED_RELEASE_PROFILE, got $release_profile" >&2 exit 1 fi @@ -297,6 +299,7 @@ jobs: echo "Full release validation must run rerun_group=all before npm publish; got $rerun_group" >&2 exit 1 fi + echo "release_profile=$release_profile" >> "$GITHUB_OUTPUT" - name: Validate release tag is reachable from a trusted release branch env: @@ -332,7 +335,7 @@ jobs: env: RELEASE_TAG: ${{ inputs.tag }} TARGET_SHA: ${{ steps.manifest.outputs.sha || steps.ref.outputs.sha }} - RELEASE_PROFILE: ${{ inputs.release_profile }} + RELEASE_PROFILE: ${{ steps.full_manifest.outputs.release_profile || inputs.release_profile }} FULL_RELEASE_VALIDATION_RUN_ID: ${{ inputs.full_release_validation_run_id }} run: | { @@ -501,7 +504,7 @@ jobs: wait_for_run() { local workflow="$1" local run_id="$2" - local status conclusion url updated_at created_at duration_seconds duration_label last_state + local status conclusion url updated_at created_at duration_seconds duration_label last_state failed_json last_state="" while true; do @@ -510,6 +513,14 @@ jobs: if [[ "$status" == "completed" ]]; then break fi + failed_json="$(gh run view --repo "$GITHUB_REPOSITORY" "$run_id" --json jobs \ + --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success" and .conclusion != "skipped")]' || true)" + if [[ -n "${failed_json}" ]] && jq -e 'length > 0' <<< "$failed_json" >/dev/null; then + echo "${workflow} has failed jobs before the workflow completed: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}" >&2 + jq '.[] | {name, conclusion, url}' <<< "$failed_json" >&2 || true + print_failed_run_summary "${run_id}" + return 1 + fi url="$(printf '%s' "$run_json" | jq -r '.url')" updated_at="$(printf '%s' "$run_json" | jq -r '.updatedAt')" state="${status}:${updated_at}" diff --git a/scripts/release-candidate-checklist.mjs b/scripts/release-candidate-checklist.mjs index 2b164c9be44..73f4505d565 100644 --- a/scripts/release-candidate-checklist.mjs +++ b/scripts/release-candidate-checklist.mjs @@ -477,7 +477,7 @@ export function buildPublishCommand(options) { ["npm_dist_tag", options.npmDistTag], ["plugin_publish_scope", options.pluginPublishScope], ["publish_openclaw_npm", "true"], - ["release_profile", options.releaseProfile], + ["release_profile", "from-validation"], ["wait_for_clawhub", "false"], ]; if (options.npmTelegramRunId) { diff --git a/test/scripts/package-acceptance-workflow.test.ts b/test/scripts/package-acceptance-workflow.test.ts index 8fd612afc09..0af050ad1d2 100644 --- a/test/scripts/package-acceptance-workflow.test.ts +++ b/test/scripts/package-acceptance-workflow.test.ts @@ -179,7 +179,7 @@ describe("package acceptance workflow", () => { expect(hydrateWindowsPnpm.run).toContain('"--filter",'); expect(hydrateWindowsPnpm.run).toContain('"openclaw",'); expect(hydrateWindowsPnpm.run).toContain( - 'New-Item -ItemType Junction -Path $workspaceNodeModules -Target $env:PNPM_CONFIG_MODULES_DIR', + "New-Item -ItemType Junction -Path $workspaceNodeModules -Target $env:PNPM_CONFIG_MODULES_DIR", ); expect(hydrateWindowsPnpm.run).toContain(".pnpm-workspace-state-v1.json"); expect(hydrateWindowsPnpm.run).not.toContain("Remove-Item -Recurse -Force"); @@ -1464,6 +1464,16 @@ describe("package artifact reuse", () => { expect(pluginNpmWorkflow).toContain("environment: npm-release"); expect(clawHubWorkflow).toContain("environment: clawhub-plugin-release"); expect(openclawNpmWorkflow).toContain("environment: npm-release"); + expect(releaseWorkflow).toContain("default: from-validation"); + expect(releaseWorkflow).toContain( + 'if [[ "$EXPECTED_RELEASE_PROFILE" != "from-validation" && "$release_profile" != "$EXPECTED_RELEASE_PROFILE" ]]; then', + ); + expect(releaseWorkflow).toContain( + 'echo "release_profile=$release_profile" >> "$GITHUB_OUTPUT"', + ); + expect(releaseWorkflow).toContain( + "has failed jobs before the workflow completed: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}", + ); expect(releaseWorkflow.lastIndexOf("create_or_update_github_release")).toBeLessThan( releaseWorkflow.indexOf('if [[ -n "${clawhub_pid}" ]] && ! wait "${clawhub_pid}"'), ); diff --git a/test/scripts/plugin-prerelease-test-plan.test.ts b/test/scripts/plugin-prerelease-test-plan.test.ts index 0d09f84fb80..7eec14b12a8 100644 --- a/test/scripts/plugin-prerelease-test-plan.test.ts +++ b/test/scripts/plugin-prerelease-test-plan.test.ts @@ -561,6 +561,13 @@ describe("scripts/lib/plugin-prerelease-test-plan.mjs", () => { expect(fullReleaseWorkflow.jobs.release_checks["timeout-minutes"]).toBe( "${{ inputs.release_profile != 'minimum' && 240 || 60 }}", ); + const fullReleaseSource = readFileSync(".github/workflows/full-release-validation.yml", "utf8"); + expect( + fullReleaseSource.match(/has failed child jobs before the workflow completed/gu)?.length, + ).toBeGreaterThanOrEqual(3); + expect(fullReleaseSource).toContain( + "npm-telegram-beta-e2e.yml has failed child jobs before the workflow completed; cancelling the remaining run.", + ); }); it("keeps runtime tool coverage blocking in release checks", () => {