From 9f213354628fabd2507f5873da827e66dc97345b Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Wed, 29 Apr 2026 17:45:50 +0100
Subject: [PATCH] ci: reduce release runner queue pressure

---
 .agents/skills/openclaw-testing/SKILL.md      |  5 ++-
 .github/workflows/full-release-validation.yml | 22 ++++++++++
 .../openclaw-live-and-e2e-checks-reusable.yml |  4 +-
 .github/workflows/openclaw-release-checks.yml |  8 ++--
 .../workflows/qa-live-transports-convex.yml   | 10 ++---
 .../package-acceptance-workflow.test.ts       | 40 +++++++++++++++++++
 6 files changed, 77 insertions(+), 12 deletions(-)
diff --git a/.agents/skills/openclaw-testing/SKILL.md b/.agents/skills/openclaw-testing/SKILL.md
index 1b2c0a720cb..5f479af673d 100644
--- a/.agents/skills/openclaw-testing/SKILL.md
+++ b/.agents/skills/openclaw-testing/SKILL.md
@@ -173,7 +173,10 @@ For bounded recovery after a focused fix, pass `-f rerun_group=<group>`.
 Supported umbrella groups are `all`, `ci`, `plugin-prerelease`,
 `release-checks`, `install-smoke`, `cross-os`, `live-e2e`, `package`, `qa`,
 `qa-parity`, `qa-live`, and `npm-telegram`. Use the narrowest group that covers
-the failed box.
+the failed box. After a targeted release-check fix, do not restart the full
+umbrella by habit: dispatch the matching `rerun_group`, cancel older duplicate
+runs for the same target/group, and rerun only the parent verifier/evidence step
+after the child is green unless the release evidence is stale.
 
 ### Release Evidence
 
diff --git a/.github/workflows/full-release-validation.yml b/.github/workflows/full-release-validation.yml
index 73e7053fc2e..49b76948b5f 100644
--- a/.github/workflows/full-release-validation.yml
+++ b/.github/workflows/full-release-validation.yml
@@ -699,6 +699,28 @@ jobs:
                   | map("| `" + (.name | gsub("\\|"; "\\|")) + "` | `" + ((.conclusion // "") | tostring) + "` | " + (.durationMin | tostring) + " |")
                   | .[])
               ' || echo "_Unable to summarize jobs for run ${run_id}._"
+              echo
+              echo "### Longest queues: ${label}"
+              echo
+              gh run view "$run_id" --json createdAt,jobs --jq '
+                def ts: fromdateiso8601;
+                .createdAt as $createdAt |
+                "| Job | Result | Queue minutes | Run minutes |",
+                "| --- | --- | ---: | ---: |",
+                ([.jobs[]
+                  | select(.startedAt != "0001-01-01T00:00:00Z")
+                  | . + {
+                      queueMin: ((((.startedAt | ts) - ($createdAt | ts)) / 60) * 10 | round / 10),
+                      durationMin: (if .completedAt == "0001-01-01T00:00:00Z" then null else ((((.completedAt | ts) - (.startedAt | ts)) / 60) * 10 | round / 10) end)
+                    }
+                  | select(.queueMin > 0)
+                  | {name, conclusion, queueMin, durationMin}]
+                  | sort_by(.queueMin)
+                  | reverse
+                  | .[0:10]
+                  | map("| `" + (.name | gsub("\\|"; "\\|")) + "` | `" + ((.conclusion // "") | tostring) + "` | " + (.queueMin | tostring) + " | " + ((.durationMin // "") | tostring) + " |")
+                  | .[])
+              ' || echo "_Unable to summarize queue times for run ${run_id}._"
             } >> "$GITHUB_STEP_SUMMARY"
           }
 
diff --git a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml
index ac51aa49b5e..bf85f14a580 100644
--- a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml
+++ b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml
@@ -336,7 +336,7 @@ jobs:
   validate_repo_e2e:
     needs: validate_selected_ref
     if: inputs.include_repo_e2e
-    runs-on: blacksmith-32vcpu-ubuntu-2404
+    runs-on: blacksmith-8vcpu-ubuntu-2404
     timeout-minutes: 90
     env:
       OPENCLAW_VITEST_MAX_WORKERS: "2"
@@ -363,7 +363,7 @@ jobs:
   validate_special_e2e:
     needs: validate_selected_ref
     if: inputs.include_repo_e2e || (inputs.include_live_suites && !inputs.live_models_only)
-    runs-on: blacksmith-32vcpu-ubuntu-2404
+    runs-on: blacksmith-8vcpu-ubuntu-2404
     timeout-minutes: ${{ matrix.timeout_minutes }}
     strategy:
       fail-fast: false
diff --git a/.github/workflows/openclaw-release-checks.yml b/.github/workflows/openclaw-release-checks.yml
index af45fce7bef..1896c0e301e 100644
--- a/.github/workflows/openclaw-release-checks.yml
+++ b/.github/workflows/openclaw-release-checks.yml
@@ -481,7 +481,7 @@ jobs:
     name: Run QA Lab parity lane (${{ matrix.lane }})
     needs: [resolve_target]
     if: contains(fromJSON('["all","qa","qa-parity"]'), needs.resolve_target.outputs.rerun_group)
-    runs-on: blacksmith-32vcpu-ubuntu-2404
+    runs-on: blacksmith-8vcpu-ubuntu-2404
     timeout-minutes: 30
     permissions:
       contents: read
@@ -565,7 +565,7 @@ jobs:
     name: Run QA Lab parity report
     needs: [resolve_target, qa_lab_parity_lane_release_checks]
     if: contains(fromJSON('["all","qa","qa-parity"]'), needs.resolve_target.outputs.rerun_group)
-    runs-on: blacksmith-32vcpu-ubuntu-2404
+    runs-on: blacksmith-8vcpu-ubuntu-2404
     timeout-minutes: 20
     permissions:
       contents: read
@@ -621,7 +621,7 @@ jobs:
     name: Run QA Lab live Matrix lane
     needs: [resolve_target]
     if: contains(fromJSON('["all","qa","qa-live"]'), needs.resolve_target.outputs.rerun_group)
-    runs-on: blacksmith-32vcpu-ubuntu-2404
+    runs-on: blacksmith-8vcpu-ubuntu-2404
     timeout-minutes: 60
     permissions:
       contents: read
@@ -698,7 +698,7 @@ jobs:
     name: Run QA Lab live Telegram lane
     needs: [resolve_target]
     if: contains(fromJSON('["all","qa","qa-live"]'), needs.resolve_target.outputs.rerun_group)
-    runs-on: blacksmith-32vcpu-ubuntu-2404
+    runs-on: blacksmith-8vcpu-ubuntu-2404
     timeout-minutes: 60
     permissions:
       contents: read
diff --git a/.github/workflows/qa-live-transports-convex.yml b/.github/workflows/qa-live-transports-convex.yml
index 7651055203d..4827954fe5a 100644
--- a/.github/workflows/qa-live-transports-convex.yml
+++ b/.github/workflows/qa-live-transports-convex.yml
@@ -143,7 +143,7 @@ jobs:
   run_mock_parity:
     name: Run QA Lab parity gate
     needs: [validate_selected_ref]
-    runs-on: blacksmith-32vcpu-ubuntu-2404
+    runs-on: blacksmith-8vcpu-ubuntu-2404
     timeout-minutes: 30
     env:
       QA_PARITY_CONCURRENCY: "1"
@@ -215,7 +215,7 @@ jobs:
     name: Run Matrix live QA lane
     needs: [authorize_actor, validate_selected_ref]
     if: ${{ !(github.event_name == 'workflow_dispatch' && inputs.matrix_profile == 'all') }}
-    runs-on: blacksmith-32vcpu-ubuntu-2404
+    runs-on: blacksmith-8vcpu-ubuntu-2404
     timeout-minutes: 60
     environment: qa-live-shared
     steps:
@@ -290,7 +290,7 @@ jobs:
     name: Run Matrix live QA lane (${{ matrix.profile }})
     needs: [authorize_actor, validate_selected_ref]
     if: ${{ github.event_name == 'workflow_dispatch' && inputs.matrix_profile == 'all' }}
-    runs-on: blacksmith-32vcpu-ubuntu-2404
+    runs-on: blacksmith-8vcpu-ubuntu-2404
     timeout-minutes: 60
     environment: qa-live-shared
     strategy:
@@ -372,7 +372,7 @@ jobs:
   run_live_telegram:
     name: Run Telegram live QA lane with Convex leases
     needs: [authorize_actor, validate_selected_ref]
-    runs-on: blacksmith-32vcpu-ubuntu-2404
+    runs-on: blacksmith-8vcpu-ubuntu-2404
     timeout-minutes: 60
     environment: qa-live-shared
     steps:
@@ -465,7 +465,7 @@ jobs:
   run_live_discord:
     name: Run Discord live QA lane with Convex leases
     needs: [authorize_actor, validate_selected_ref]
-    runs-on: blacksmith-32vcpu-ubuntu-2404
+    runs-on: blacksmith-8vcpu-ubuntu-2404
     timeout-minutes: 60
     environment: qa-live-shared
     steps:
diff --git a/test/scripts/package-acceptance-workflow.test.ts b/test/scripts/package-acceptance-workflow.test.ts
index 18129f9e601..99a51b12fa6 100644
--- a/test/scripts/package-acceptance-workflow.test.ts
+++ b/test/scripts/package-acceptance-workflow.test.ts
@@ -6,6 +6,7 @@ const LIVE_E2E_WORKFLOW = ".github/workflows/openclaw-live-and-e2e-checks-reusab
 const NPM_TELEGRAM_WORKFLOW = ".github/workflows/npm-telegram-beta-e2e.yml";
 const RELEASE_CHECKS_WORKFLOW = ".github/workflows/openclaw-release-checks.yml";
 const FULL_RELEASE_VALIDATION_WORKFLOW = ".github/workflows/full-release-validation.yml";
+const QA_LIVE_TRANSPORTS_WORKFLOW = ".github/workflows/qa-live-transports-convex.yml";
 
 describe("package acceptance workflow", () => {
   it("resolves candidate package sources before reusing Docker E2E lanes", () => {
@@ -138,6 +139,8 @@ describe("package artifact reuse", () => {
       'OPENCLAW_LIVE_CLI_BACKEND_ARGS=["exec","--json","--color","never","--sandbox","danger-full-access","--skip-git-repo-check"]',
     );
     expect(workflow).toContain("bash .release-harness/scripts/ci-live-command-retry.sh");
+    expect(workflow).toMatch(/validate_repo_e2e:[\s\S]*?runs-on: blacksmith-8vcpu-ubuntu-2404/u);
+    expect(workflow).toMatch(/validate_special_e2e:[\s\S]*?runs-on: blacksmith-8vcpu-ubuntu-2404/u);
     expect(workflow).toMatch(
       /validate_live_provider_suites:[\s\S]*?runs-on: blacksmith-8vcpu-ubuntu-2404/u,
     );
@@ -349,4 +352,41 @@ describe("package artifact reuse", () => {
     expect(workflow).not.toContain("workflow_ref:");
     expect(workflow).not.toContain("inputs.workflow_ref");
   });
+
+  it("keeps release QA and repo E2E lanes off scarce 32-core runners", () => {
+    const releaseChecksWorkflow = readFileSync(RELEASE_CHECKS_WORKFLOW, "utf8");
+    const qaWorkflow = readFileSync(QA_LIVE_TRANSPORTS_WORKFLOW, "utf8");
+
+    for (const jobName of [
+      "qa_lab_parity_lane_release_checks",
+      "qa_lab_parity_report_release_checks",
+      "qa_live_matrix_release_checks",
+      "qa_live_telegram_release_checks",
+    ]) {
+      expect(releaseChecksWorkflow).toMatch(
+        new RegExp(`${jobName}:[\\s\\S]*?runs-on: blacksmith-8vcpu-ubuntu-2404`, "u"),
+      );
+    }
+
+    for (const jobName of [
+      "run_mock_parity",
+      "run_live_matrix",
+      "run_live_matrix_sharded",
+      "run_live_telegram",
+      "run_live_discord",
+    ]) {
+      expect(qaWorkflow).toMatch(
+        new RegExp(`${jobName}:[\\s\\S]*?runs-on: blacksmith-8vcpu-ubuntu-2404`, "u"),
+      );
+    }
+  });
+
+  it("summarizes queue time separately from execution time in full validation", () => {
+    const workflow = readFileSync(FULL_RELEASE_VALIDATION_WORKFLOW, "utf8");
+
+    expect(workflow).toContain("### Slowest jobs: ${label}");
+    expect(workflow).toContain("### Longest queues: ${label}");
+    expect(workflow).toContain("| Job | Result | Queue minutes | Run minutes |");
+    expect(workflow).toContain('gh run view "$run_id" --json createdAt,jobs');
+  });
 });