From e9986aa7872d82ca3c609ed095b4d9cdc47f2b94 Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Mon, 27 Apr 2026 12:59:59 +0100
Subject: [PATCH] fix(ci): make full validation rerun-aware

---
 .agents/skills/openclaw-testing/SKILL.md      |  6 ++
 .github/workflows/full-release-validation.yml | 92 ++++++++++++++-----
 docs/ci.md                                    |  5 +-
 docs/reference/RELEASING.md                   |  4 +-
 4 files changed, 84 insertions(+), 23 deletions(-)

diff --git a/.agents/skills/openclaw-testing/SKILL.md b/.agents/skills/openclaw-testing/SKILL.md
index 841c3320960..60fd4c6660a 100644
--- a/.agents/skills/openclaw-testing/SKILL.md
+++ b/.agents/skills/openclaw-testing/SKILL.md
@@ -131,6 +131,12 @@ If a full run is already active on a newer `origin/main`, prefer watching that
 run over dispatching a duplicate. If you accidentally dispatch a stale duplicate,
 cancel it and monitor the current run.
 
+The child-dispatch jobs record the child run ids. The final
+`Verify full validation` job re-queries those child runs and is the canonical
+parent gate. If a child workflow failed but was later rerun successfully, rerun
+only the failed parent verifier job; do not dispatch a new full umbrella unless
+the release evidence is stale.
+
 ### Release Evidence
 
 After release-candidate validation or before a release decision, record the
diff --git a/.github/workflows/full-release-validation.yml b/.github/workflows/full-release-validation.yml
index 9a4b7b4a8db..c4329db67a3 100644
--- a/.github/workflows/full-release-validation.yml
+++ b/.github/workflows/full-release-validation.yml
@@ -109,8 +109,13 @@ jobs:
     needs: [resolve_target]
     runs-on: ubuntu-24.04
     timeout-minutes: 240
+    outputs:
+      run_id: ${{ steps.dispatch.outputs.run_id }}
+      url: ${{ steps.dispatch.outputs.url }}
+      conclusion: ${{ steps.dispatch.outputs.conclusion }}
     steps:
       - name: Dispatch and monitor CI
+        id: dispatch
         env:
           GH_TOKEN: ${{ github.token }}
           TARGET_REF: ${{ inputs.ref }}
@@ -146,6 +151,7 @@ jobs:
             fi
 
             echo "Dispatched ${workflow}: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}"
+            echo "run_id=${run_id}" >> "$GITHUB_OUTPUT"
 
             while true; do
               status="$(gh run view "$run_id" --json status --jq '.status')"
@@ -158,9 +164,10 @@ jobs:
             conclusion="$(gh run view "$run_id" --json conclusion --jq '.conclusion')"
             url="$(gh run view "$run_id" --json url --jq '.url')"
             echo "${workflow} finished with ${conclusion}: ${url}"
+            echo "url=${url}" >> "$GITHUB_OUTPUT"
+            echo "conclusion=${conclusion}" >> "$GITHUB_OUTPUT"
             if [[ "$conclusion" != "success" ]]; then
-              gh run view "$run_id" --json jobs --jq '.jobs[] | select(.conclusion != "success" and .conclusion != "skipped") | {name, conclusion, url}'
-              exit 1
+              gh run view "$run_id" --json jobs --jq '.jobs[] | select(.conclusion != "success" and .conclusion != "skipped") | {name, conclusion, url}' || true
             fi
           }
 
@@ -178,8 +185,13 @@ jobs:
     needs: [resolve_target]
     runs-on: ubuntu-24.04
     timeout-minutes: 720
+    outputs:
+      run_id: ${{ steps.dispatch.outputs.run_id }}
+      url: ${{ steps.dispatch.outputs.url }}
+      conclusion: ${{ steps.dispatch.outputs.conclusion }}
     steps:
       - name: Dispatch and monitor release checks
+        id: dispatch
         env:
           GH_TOKEN: ${{ github.token }}
           TARGET_REF: ${{ inputs.ref }}
@@ -217,6 +229,7 @@ jobs:
             fi
 
             echo "Dispatched ${workflow}: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}"
+            echo "run_id=${run_id}" >> "$GITHUB_OUTPUT"
 
             while true; do
               status="$(gh run view "$run_id" --json status --jq '.status')"
@@ -229,9 +242,10 @@ jobs:
             conclusion="$(gh run view "$run_id" --json conclusion --jq '.conclusion')"
             url="$(gh run view "$run_id" --json url --jq '.url')"
             echo "${workflow} finished with ${conclusion}: ${url}"
+            echo "url=${url}" >> "$GITHUB_OUTPUT"
+            echo "conclusion=${conclusion}" >> "$GITHUB_OUTPUT"
             if [[ "$conclusion" != "success" ]]; then
-              gh run view "$run_id" --json jobs --jq '.jobs[] | select(.conclusion != "success" and .conclusion != "skipped") | {name, conclusion, url}'
-              exit 1
+              gh run view "$run_id" --json jobs --jq '.jobs[] | select(.conclusion != "success" and .conclusion != "skipped") | {name, conclusion, url}' || true
             fi
           }
 
@@ -255,8 +269,13 @@ jobs:
     if: inputs.npm_telegram_package_spec != ''
     runs-on: ubuntu-24.04
     timeout-minutes: 120
+    outputs:
+      run_id: ${{ steps.dispatch.outputs.run_id }}
+      url: ${{ steps.dispatch.outputs.url }}
+      conclusion: ${{ steps.dispatch.outputs.conclusion }}
     steps:
       - name: Dispatch and monitor npm Telegram E2E
+        id: dispatch
         env:
           GH_TOKEN: ${{ github.token }}
           WORKFLOW_REF: ${{ inputs.workflow_ref }}
@@ -293,6 +312,7 @@ jobs:
           fi
 
           echo "Dispatched npm-telegram-beta-e2e.yml: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}"
+          echo "run_id=${run_id}" >> "$GITHUB_OUTPUT"
 
           while true; do
             status="$(gh run view "$run_id" --json status --jq '.status')"
@@ -305,9 +325,10 @@ jobs:
           conclusion="$(gh run view "$run_id" --json conclusion --jq '.conclusion')"
           url="$(gh run view "$run_id" --json url --jq '.url')"
           echo "npm-telegram-beta-e2e.yml finished with ${conclusion}: ${url}"
+          echo "url=${url}" >> "$GITHUB_OUTPUT"
+          echo "conclusion=${conclusion}" >> "$GITHUB_OUTPUT"
           if [[ "$conclusion" != "success" ]]; then
-            gh run view "$run_id" --json jobs --jq '.jobs[] | select(.conclusion != "success" and .conclusion != "skipped") | {name, conclusion, url}'
-            exit 1
+            gh run view "$run_id" --json jobs --jq '.jobs[] | select(.conclusion != "success" and .conclusion != "skipped") | {name, conclusion, url}' || true
           fi
 
   summary:
@@ -347,7 +368,7 @@ jobs:
               --arg release_id "$release_id" \
               --arg release_ref "$TARGET_REF" \
               --arg package_spec "$PACKAGE_SPEC" \
-              --arg notes "Automatically requested by Full Release Validation ${GITHUB_RUN_ID_VALUE} after child workflows completed." \
+              --arg notes "Automatically requested by Full Release Validation ${GITHUB_RUN_ID_VALUE} after child workflows completed; the parent summary re-checks current child run conclusions." \
               '{
                 event_type: "openclaw_full_release_validation_completed",
                 client_payload: {
@@ -370,22 +391,51 @@ jobs:
 
       - name: Verify child workflow results
         env:
-          NORMAL_CI_RESULT: ${{ needs.normal_ci.result }}
-          RELEASE_CHECKS_RESULT: ${{ needs.release_checks.result }}
+          GH_TOKEN: ${{ github.token }}
+          NORMAL_CI_RUN_ID: ${{ needs.normal_ci.outputs.run_id }}
+          RELEASE_CHECKS_RUN_ID: ${{ needs.release_checks.outputs.run_id }}
+          NPM_TELEGRAM_RUN_ID: ${{ needs.npm_telegram.outputs.run_id }}
           NPM_TELEGRAM_RESULT: ${{ needs.npm_telegram.result }}
         run: |
           set -euo pipefail
-          failed=0
-          for item in \
-            "normal_ci=${NORMAL_CI_RESULT}" \
-            "release_checks=${RELEASE_CHECKS_RESULT}" \
-            "npm_telegram=${NPM_TELEGRAM_RESULT}"
-          do
-            name="${item%%=*}"
-            result="${item#*=}"
-            if [[ "$result" != "success" && "$result" != "skipped" ]]; then
-              echo "::error::${name} ended with ${result}"
-              failed=1
+
+          check_child() {
+            local label="$1"
+            local run_id="$2"
+            local required="$3"
+
+            if [[ -z "${run_id// }" ]]; then
+              if [[ "$required" == "0" ]]; then
+                echo "${label}: skipped"
+                return 0
+              fi
+              echo "::error::${label} did not record a child run id."
+              return 1
             fi
-          done
+
+            local status conclusion url attempt
+            status="$(gh run view "$run_id" --json status --jq '.status')"
+            conclusion="$(gh run view "$run_id" --json conclusion --jq '.conclusion')"
+            url="$(gh run view "$run_id" --json url --jq '.url')"
+            attempt="$(gh run view "$run_id" --json attempt --jq '.attempt')"
+            echo "${label}: ${status}/${conclusion} attempt ${attempt}: ${url}"
+
+            if [[ "$status" != "completed" || "$conclusion" != "success" ]]; then
+              echo "::error::${label} child run ended with ${status}/${conclusion}: ${url}"
+              gh run view "$run_id" --json jobs --jq '.jobs[] | select(.conclusion != "success" and .conclusion != "skipped") | {name, status, conclusion, url}' || true
+              return 1
+            fi
+          }
+
+          failed=0
+
+          check_child "normal_ci" "$NORMAL_CI_RUN_ID" 1 || failed=1
+          check_child "release_checks" "$RELEASE_CHECKS_RUN_ID" 1 || failed=1
+
+          if [[ "$NPM_TELEGRAM_RESULT" == "skipped" && -z "${NPM_TELEGRAM_RUN_ID// }" ]]; then
+            check_child "npm_telegram" "" 0 || failed=1
+          else
+            check_child "npm_telegram" "$NPM_TELEGRAM_RUN_ID" 1 || failed=1
+          fi
+
           exit "$failed"
diff --git a/docs/ci.md b/docs/ci.md
index d11515643d0..8a4738d4d5f 100644
--- a/docs/ci.md
+++ b/docs/ci.md
@@ -14,7 +14,10 @@ manual `CI` workflow with that target, and dispatches `OpenClaw Release Checks`
 for install smoke, package acceptance, Docker release-path suites, live/E2E,
 OpenWebUI, QA Lab parity, Matrix, and Telegram lanes. It can also run the
 post-publish `NPM Telegram Beta E2E` workflow when a published package spec is
-provided.
+provided. The umbrella records the dispatched child run ids, and the final
+`Verify full validation` job re-checks the current child run conclusions. If a
+child workflow is rerun and turns green, rerun only the parent verifier job to
+refresh the umbrella result.
 
 `Package Acceptance` is the side-run workflow for validating a package artifact
 without blocking the release workflow. It resolves one candidate from a
diff --git a/docs/reference/RELEASING.md b/docs/reference/RELEASING.md
index c6f0cca2f63..e105f487e73 100644
--- a/docs/reference/RELEASING.md
+++ b/docs/reference/RELEASING.md
@@ -281,7 +281,9 @@ Do not use the full umbrella as the first rerun after a focused fix. If one box
 fails, use the failed child workflow, job, Docker lane, package profile, model
 provider, or QA lane for the next proof. Run the full umbrella again only when
 the fix changed shared release orchestration or made earlier all-box evidence
-stale.
+stale. The umbrella's final verifier re-checks the recorded child workflow run
+ids, so after a child workflow is rerun successfully, rerun only the failed
+`Verify full validation` parent job.
 
 ### Vitest