ci: reduce release runner queue pressure

This commit is contained in:
Peter Steinberger
2026-04-29 17:45:50 +01:00
parent b53ec93ed9
commit 9f21335462
6 changed files with 77 additions and 12 deletions

View File

@@ -173,7 +173,10 @@ For bounded recovery after a focused fix, pass `-f rerun_group=<group>`.
Supported umbrella groups are `all`, `ci`, `plugin-prerelease`,
`release-checks`, `install-smoke`, `cross-os`, `live-e2e`, `package`, `qa`,
`qa-parity`, `qa-live`, and `npm-telegram`. Use the narrowest group that covers
the failed box.
the failed box. After a targeted release-check fix, do not restart the full
umbrella by habit: dispatch the matching `rerun_group`, cancel older duplicate
runs for the same target/group, and rerun only the parent verifier/evidence step
after the child is green unless the release evidence is stale.
### Release Evidence

View File

@@ -699,6 +699,28 @@ jobs:
| map("| `" + (.name | gsub("\\|"; "\\|")) + "` | `" + ((.conclusion // "") | tostring) + "` | " + (.durationMin | tostring) + " |")
| .[])
' || echo "_Unable to summarize jobs for run ${run_id}._"
echo
echo "### Longest queues: ${label}"
echo
gh run view "$run_id" --json createdAt,jobs --jq '
def ts: fromdateiso8601;
.createdAt as $createdAt |
"| Job | Result | Queue minutes | Run minutes |",
"| --- | --- | ---: | ---: |",
([.jobs[]
| select(.startedAt != "0001-01-01T00:00:00Z")
| . + {
queueMin: ((((.startedAt | ts) - ($createdAt | ts)) / 60) * 10 | round / 10),
durationMin: (if .completedAt == "0001-01-01T00:00:00Z" then null else ((((.completedAt | ts) - (.startedAt | ts)) / 60) * 10 | round / 10) end)
}
| select(.queueMin > 0)
| {name, conclusion, queueMin, durationMin}]
| sort_by(.queueMin)
| reverse
| .[0:10]
| map("| `" + (.name | gsub("\\|"; "\\|")) + "` | `" + ((.conclusion // "") | tostring) + "` | " + (.queueMin | tostring) + " | " + ((.durationMin // "") | tostring) + " |")
| .[])
' || echo "_Unable to summarize queue times for run ${run_id}._"
} >> "$GITHUB_STEP_SUMMARY"
}

View File

@@ -336,7 +336,7 @@ jobs:
validate_repo_e2e:
needs: validate_selected_ref
if: inputs.include_repo_e2e
runs-on: blacksmith-32vcpu-ubuntu-2404
runs-on: blacksmith-8vcpu-ubuntu-2404
timeout-minutes: 90
env:
OPENCLAW_VITEST_MAX_WORKERS: "2"
@@ -363,7 +363,7 @@ jobs:
validate_special_e2e:
needs: validate_selected_ref
if: inputs.include_repo_e2e || (inputs.include_live_suites && !inputs.live_models_only)
runs-on: blacksmith-32vcpu-ubuntu-2404
runs-on: blacksmith-8vcpu-ubuntu-2404
timeout-minutes: ${{ matrix.timeout_minutes }}
strategy:
fail-fast: false

View File

@@ -481,7 +481,7 @@ jobs:
name: Run QA Lab parity lane (${{ matrix.lane }})
needs: [resolve_target]
if: contains(fromJSON('["all","qa","qa-parity"]'), needs.resolve_target.outputs.rerun_group)
runs-on: blacksmith-32vcpu-ubuntu-2404
runs-on: blacksmith-8vcpu-ubuntu-2404
timeout-minutes: 30
permissions:
contents: read
@@ -565,7 +565,7 @@ jobs:
name: Run QA Lab parity report
needs: [resolve_target, qa_lab_parity_lane_release_checks]
if: contains(fromJSON('["all","qa","qa-parity"]'), needs.resolve_target.outputs.rerun_group)
runs-on: blacksmith-32vcpu-ubuntu-2404
runs-on: blacksmith-8vcpu-ubuntu-2404
timeout-minutes: 20
permissions:
contents: read
@@ -621,7 +621,7 @@ jobs:
name: Run QA Lab live Matrix lane
needs: [resolve_target]
if: contains(fromJSON('["all","qa","qa-live"]'), needs.resolve_target.outputs.rerun_group)
runs-on: blacksmith-32vcpu-ubuntu-2404
runs-on: blacksmith-8vcpu-ubuntu-2404
timeout-minutes: 60
permissions:
contents: read
@@ -698,7 +698,7 @@ jobs:
name: Run QA Lab live Telegram lane
needs: [resolve_target]
if: contains(fromJSON('["all","qa","qa-live"]'), needs.resolve_target.outputs.rerun_group)
runs-on: blacksmith-32vcpu-ubuntu-2404
runs-on: blacksmith-8vcpu-ubuntu-2404
timeout-minutes: 60
permissions:
contents: read

View File

@@ -143,7 +143,7 @@ jobs:
run_mock_parity:
name: Run QA Lab parity gate
needs: [validate_selected_ref]
runs-on: blacksmith-32vcpu-ubuntu-2404
runs-on: blacksmith-8vcpu-ubuntu-2404
timeout-minutes: 30
env:
QA_PARITY_CONCURRENCY: "1"
@@ -215,7 +215,7 @@ jobs:
name: Run Matrix live QA lane
needs: [authorize_actor, validate_selected_ref]
if: ${{ !(github.event_name == 'workflow_dispatch' && inputs.matrix_profile == 'all') }}
runs-on: blacksmith-32vcpu-ubuntu-2404
runs-on: blacksmith-8vcpu-ubuntu-2404
timeout-minutes: 60
environment: qa-live-shared
steps:
@@ -290,7 +290,7 @@ jobs:
name: Run Matrix live QA lane (${{ matrix.profile }})
needs: [authorize_actor, validate_selected_ref]
if: ${{ github.event_name == 'workflow_dispatch' && inputs.matrix_profile == 'all' }}
runs-on: blacksmith-32vcpu-ubuntu-2404
runs-on: blacksmith-8vcpu-ubuntu-2404
timeout-minutes: 60
environment: qa-live-shared
strategy:
@@ -372,7 +372,7 @@ jobs:
run_live_telegram:
name: Run Telegram live QA lane with Convex leases
needs: [authorize_actor, validate_selected_ref]
runs-on: blacksmith-32vcpu-ubuntu-2404
runs-on: blacksmith-8vcpu-ubuntu-2404
timeout-minutes: 60
environment: qa-live-shared
steps:
@@ -465,7 +465,7 @@ jobs:
run_live_discord:
name: Run Discord live QA lane with Convex leases
needs: [authorize_actor, validate_selected_ref]
runs-on: blacksmith-32vcpu-ubuntu-2404
runs-on: blacksmith-8vcpu-ubuntu-2404
timeout-minutes: 60
environment: qa-live-shared
steps:

View File

@@ -6,6 +6,7 @@ const LIVE_E2E_WORKFLOW = ".github/workflows/openclaw-live-and-e2e-checks-reusab
const NPM_TELEGRAM_WORKFLOW = ".github/workflows/npm-telegram-beta-e2e.yml";
const RELEASE_CHECKS_WORKFLOW = ".github/workflows/openclaw-release-checks.yml";
const FULL_RELEASE_VALIDATION_WORKFLOW = ".github/workflows/full-release-validation.yml";
const QA_LIVE_TRANSPORTS_WORKFLOW = ".github/workflows/qa-live-transports-convex.yml";
describe("package acceptance workflow", () => {
it("resolves candidate package sources before reusing Docker E2E lanes", () => {
@@ -138,6 +139,8 @@ describe("package artifact reuse", () => {
'OPENCLAW_LIVE_CLI_BACKEND_ARGS=["exec","--json","--color","never","--sandbox","danger-full-access","--skip-git-repo-check"]',
);
expect(workflow).toContain("bash .release-harness/scripts/ci-live-command-retry.sh");
expect(workflow).toMatch(/validate_repo_e2e:[\s\S]*?runs-on: blacksmith-8vcpu-ubuntu-2404/u);
expect(workflow).toMatch(/validate_special_e2e:[\s\S]*?runs-on: blacksmith-8vcpu-ubuntu-2404/u);
expect(workflow).toMatch(
/validate_live_provider_suites:[\s\S]*?runs-on: blacksmith-8vcpu-ubuntu-2404/u,
);
@@ -349,4 +352,41 @@ describe("package artifact reuse", () => {
expect(workflow).not.toContain("workflow_ref:");
expect(workflow).not.toContain("inputs.workflow_ref");
});
it("keeps release QA and repo E2E lanes off scarce 32-core runners", () => {
const releaseChecksWorkflow = readFileSync(RELEASE_CHECKS_WORKFLOW, "utf8");
const qaWorkflow = readFileSync(QA_LIVE_TRANSPORTS_WORKFLOW, "utf8");
for (const jobName of [
"qa_lab_parity_lane_release_checks",
"qa_lab_parity_report_release_checks",
"qa_live_matrix_release_checks",
"qa_live_telegram_release_checks",
]) {
expect(releaseChecksWorkflow).toMatch(
new RegExp(`${jobName}:[\\s\\S]*?runs-on: blacksmith-8vcpu-ubuntu-2404`, "u"),
);
}
for (const jobName of [
"run_mock_parity",
"run_live_matrix",
"run_live_matrix_sharded",
"run_live_telegram",
"run_live_discord",
]) {
expect(qaWorkflow).toMatch(
new RegExp(`${jobName}:[\\s\\S]*?runs-on: blacksmith-8vcpu-ubuntu-2404`, "u"),
);
}
});
it("summarizes queue time separately from execution time in full validation", () => {
const workflow = readFileSync(FULL_RELEASE_VALIDATION_WORKFLOW, "utf8");
expect(workflow).toContain("### Slowest jobs: ${label}");
expect(workflow).toContain("### Longest queues: ${label}");
expect(workflow).toContain("| Job | Result | Queue minutes | Run minutes |");
expect(workflow).toContain('gh run view "$run_id" --json createdAt,jobs');
});
});