diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bcd0a330f37..acdc0d7789e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1960,3 +1960,53 @@ jobs: exit 1 ;; esac + + ci-timings-summary: + permissions: + actions: read + contents: read + name: ci-timings-summary + needs: + - preflight + - security-fast + - pnpm-store-warmup + - build-artifacts + - checks-fast-core + - checks-fast-plugin-contracts-shard + - checks-fast-channel-contracts-shard + - checks-node-compat + - checks-node-core-test-nondist-shard + - check-shard + - check-additional-shard + - check-docs + - skills-python + - checks-windows + - macos-node + - macos-swift + - android + if: ${{ !cancelled() && always() && (github.event_name != 'pull_request' || !github.event.pull_request.draft) }} + runs-on: ubuntu-24.04 + timeout-minutes: 5 + steps: + - name: Checkout timing summary helper + uses: actions/checkout@v6 + with: + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.base.sha || needs.preflight.outputs.checkout_revision || github.sha }} + fetch-depth: 1 + fetch-tags: false + persist-credentials: false + submodules: false + + - name: Write CI timing summary + env: + GH_TOKEN: ${{ github.token }} + run: | + node scripts/ci-run-timings.mjs "$GITHUB_RUN_ID" --limit 25 > ci-timings-summary.txt + cat ci-timings-summary.txt >> "$GITHUB_STEP_SUMMARY" + + - name: Upload CI timing summary + uses: actions/upload-artifact@v7 + with: + name: ci-timings-summary + path: ci-timings-summary.txt + retention-days: 14 diff --git a/docs/ci.md b/docs/ci.md index 89e00ba049a..21f9df11972 100644 --- a/docs/ci.md +++ b/docs/ci.md @@ -43,7 +43,9 @@ OpenClaw CI runs on every push to `main` and every pull request. The `preflight` GitHub may mark superseded jobs as `cancelled` when a newer push lands on the same PR or `main` ref. Treat that as CI noise unless the newest run for the same ref is also failing. Matrix jobs use `fail-fast: false`, and `build-artifacts` reports embedded channel, core-support-boundary, and gateway-watch failures directly instead of queuing tiny verifier jobs. The automatic CI concurrency key is versioned (`CI-v7-*`) so a GitHub-side zombie in an old queue group cannot indefinitely block newer main runs. Manual full-suite runs use `CI-manual-v1-*` and do not cancel in-progress runs. -Use `pnpm ci:timings`, `pnpm ci:timings:recent`, or `node scripts/ci-run-timings.mjs ` to summarize wall time, queue time, slowest jobs, and failures from GitHub Actions. For build timing, check the `build-artifacts` job's `Build dist` step: `pnpm build:ci-artifacts` prints `[build-all] phase timings:` and includes `ui:build`. The job also uploads the `startup-memory` artifact. +Use `pnpm ci:timings`, `pnpm ci:timings:recent`, or `node scripts/ci-run-timings.mjs ` to summarize wall time, queue time, slowest jobs, failures, and the `pnpm-store-warmup` fanout barrier from GitHub Actions. CI also uploads the same run summary as a `ci-timings-summary` artifact. For build timing, check the `build-artifacts` job's `Build dist` step: `pnpm build:ci-artifacts` prints `[build-all] phase timings:` and includes `ui:build`; the job also uploads the `startup-memory` artifact. + +For pull request runs, the terminal timing-summary job runs the helper from the trusted base revision before passing `GH_TOKEN` to `gh run view`. That keeps the tokened query out of branch-controlled code while still summarizing the pull request's current CI run. ## Real behavior proof diff --git a/scripts/ci-run-timings.mjs b/scripts/ci-run-timings.mjs index 74e9adbc34b..c16ddee26df 100644 --- a/scripts/ci-run-timings.mjs +++ b/scripts/ci-run-timings.mjs @@ -18,11 +18,35 @@ function formatSeconds(value) { return value === null ? "" : `${value}s`; } +function percentile(values, percentileValue) { + if (values.length === 0) { + return null; + } + const sorted = [...values].toSorted((left, right) => left - right); + const index = Math.min(sorted.length - 1, Math.ceil(sorted.length * percentileValue) - 1); + return sorted[index]; +} + function parseRunList(raw) { const parsed = JSON.parse(raw); return Array.isArray(parsed) ? parsed : []; } +function isPnpmStoreWarmupGatedJobName(name) { + return ( + name === "build-artifacts" || + name === "check-docs" || + name === "check-guards" || + name === "check-prod-types" || + name === "check-lint" || + name === "check-dependencies" || + name === "check-test-types" || + name.startsWith("check-additional-") || + name.startsWith("checks-fast-") || + (name.startsWith("checks-node-") && !name.startsWith("checks-node-compat-")) + ); +} + function collectRunTimingContext(run) { const created = parseTime(run.createdAt); const updated = parseTime(run.updatedAt); @@ -69,6 +93,46 @@ export function summarizeRunTimings(run, limit = 15) { }; } +export function summarizePnpmStoreWarmupBarrier(run, windowSeconds = 5) { + const { jobs } = collectRunTimingContext(run); + const preflight = jobs.find((job) => job.name === "preflight"); + const warmup = jobs.find((job) => job.name === "pnpm-store-warmup"); + if (!warmup?.started || !warmup?.completed) { + return null; + } + + const postWarmupJobs = jobs.filter( + (job) => + job.name !== "preflight" && + job.name !== "security-fast" && + job.name !== "pnpm-store-warmup" && + isPnpmStoreWarmupGatedJobName(job.name) && + job.status === "completed" && + job.conclusion !== "skipped" && + job.started !== null && + job.started >= warmup.completed && + (job.durationSeconds ?? 0) > 5, + ); + const startDelays = postWarmupJobs + .map((job) => secondsBetween(warmup.completed, job.started)) + .filter((delay) => delay !== null); + + return { + activePostWarmupJobCount: postWarmupJobs.length, + firstPostWarmupStartDelaySeconds: startDelays.length === 0 ? null : Math.min(...startDelays), + postWarmupP95StartDelaySeconds: percentile(startDelays, 0.95), + postWarmupStartedWithinWindow: startDelays.filter((delay) => delay <= windowSeconds).length, + preflightToWarmupCompleteSeconds: secondsBetween( + preflight?.completed ?? null, + warmup.completed, + ), + preflightToWarmupStartSeconds: secondsBetween(preflight?.completed ?? null, warmup.started), + warmupDurationSeconds: secondsBetween(warmup.started, warmup.completed), + warmupResult: `${warmup.status}/${warmup.conclusion}`, + windowSeconds, + }; +} + export function selectLatestMainPushCiRun(runs, headSha = null) { const pushRuns = runs.filter((run) => run.event === "push"); if (headSha) { @@ -193,15 +257,6 @@ function summarizeJobs(run) { }; } -function percentile(values, percentileValue) { - if (values.length === 0) { - return null; - } - const sorted = [...values].toSorted((left, right) => left - right); - const index = Math.min(sorted.length - 1, Math.ceil(sorted.length * percentileValue) - 1); - return sorted[index]; -} - function printSection(title, jobs, metric) { console.log(title); for (const job of jobs) { @@ -265,11 +320,32 @@ async function main() { return; } const runId = explicitRunId ?? (useLatestMain ? getLatestMainPushCiRunId() : getLatestCiRunId()); - const summary = summarizeRunTimings(loadRun(runId), limit); + const run = loadRun(runId); + const summary = summarizeRunTimings(run, limit); + const warmupBarrier = summarizePnpmStoreWarmupBarrier(run); console.log( `CI run ${runId}: ${summary.status}/${summary.conclusion} wall=${formatSeconds(summary.wallSeconds)}`, ); + if (warmupBarrier) { + console.log("\npnpm-store-warmup barrier"); + console.log( + [ + `result=${warmupBarrier.warmupResult}`, + `preflight->start=${formatSeconds(warmupBarrier.preflightToWarmupStartSeconds)}`, + `duration=${formatSeconds(warmupBarrier.warmupDurationSeconds)}`, + `preflight->complete=${formatSeconds(warmupBarrier.preflightToWarmupCompleteSeconds)}`, + ].join(" "), + ); + console.log( + [ + `active-post-warmup-jobs=${warmupBarrier.activePostWarmupJobCount}`, + `first-start-delay=${formatSeconds(warmupBarrier.firstPostWarmupStartDelaySeconds)}`, + `p95-start-delay=${formatSeconds(warmupBarrier.postWarmupP95StartDelaySeconds)}`, + `started-within-${warmupBarrier.windowSeconds}s=${warmupBarrier.postWarmupStartedWithinWindow}`, + ].join(" "), + ); + } printSection("\nSlowest jobs", summary.byDuration, "durationSeconds"); printSection("\nLongest queues", summary.byQueue, "queueSeconds"); if (summary.badJobs.length > 0) { diff --git a/test/scripts/ci-run-timings.test.ts b/test/scripts/ci-run-timings.test.ts index 7480e179f24..34a7c86a5b1 100644 --- a/test/scripts/ci-run-timings.test.ts +++ b/test/scripts/ci-run-timings.test.ts @@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest"; import { parseRunTimingArgs, selectLatestMainPushCiRun, + summarizePnpmStoreWarmupBarrier, summarizeRunTimings, } from "../../scripts/ci-run-timings.mjs"; @@ -80,6 +81,71 @@ describe("scripts/ci-run-timings.mjs", () => { }); }); + it("summarizes the pnpm store warmup fanout barrier", () => { + expect( + summarizePnpmStoreWarmupBarrier({ + conclusion: "success", + createdAt: "2026-05-28T23:03:01Z", + jobs: [ + { + completedAt: "2026-05-28T23:04:05Z", + conclusion: "success", + name: "preflight", + startedAt: "2026-05-28T23:03:55Z", + status: "completed", + }, + { + completedAt: "2026-05-28T23:04:27Z", + conclusion: "success", + name: "pnpm-store-warmup", + startedAt: "2026-05-28T23:04:07Z", + status: "completed", + }, + { + completedAt: "2026-05-28T23:06:26Z", + conclusion: "success", + name: "checks-fast-bundled-protocol", + startedAt: "2026-05-28T23:04:29Z", + status: "completed", + }, + { + completedAt: "2026-05-28T23:04:28Z", + conclusion: "skipped", + name: "check-docs", + startedAt: "2026-05-28T23:04:28Z", + status: "completed", + }, + { + completedAt: "2026-05-28T23:04:35Z", + conclusion: "success", + name: "security-fast", + startedAt: "2026-05-28T23:03:55Z", + status: "completed", + }, + { + completedAt: "2026-05-28T23:05:30Z", + conclusion: "success", + name: "checks-node-compat-node22", + startedAt: "2026-05-28T23:04:30Z", + status: "completed", + }, + ], + status: "completed", + updatedAt: "2026-05-28T23:07:33Z", + }), + ).toEqual({ + activePostWarmupJobCount: 1, + firstPostWarmupStartDelaySeconds: 2, + postWarmupP95StartDelaySeconds: 2, + postWarmupStartedWithinWindow: 1, + preflightToWarmupCompleteSeconds: 22, + preflightToWarmupStartSeconds: 2, + warmupDurationSeconds: 20, + warmupResult: "completed/success", + windowSeconds: 5, + }); + }); + it("falls back to the newest push CI run when the exact SHA has not appeared yet", () => { expect( selectLatestMainPushCiRun( diff --git a/test/scripts/ci-workflow-guards.test.ts b/test/scripts/ci-workflow-guards.test.ts index c2de73b9a7c..10642d210b0 100644 --- a/test/scripts/ci-workflow-guards.test.ts +++ b/test/scripts/ci-workflow-guards.test.ts @@ -47,6 +47,58 @@ describe("ci workflow guards", () => { expect(buildArtifactSteps.some((step) => step.run === "pnpm ui:build")).toBe(false); }); + it("uploads a CI timing summary after the run lanes finish", () => { + const workflow = readCiWorkflow(); + const timingJob = workflow.jobs["ci-timings-summary"]; + + expect(timingJob.permissions).toMatchObject({ actions: "read", contents: "read" }); + expect(timingJob.needs).toEqual([ + "preflight", + "security-fast", + "pnpm-store-warmup", + "build-artifacts", + "checks-fast-core", + "checks-fast-plugin-contracts-shard", + "checks-fast-channel-contracts-shard", + "checks-node-compat", + "checks-node-core-test-nondist-shard", + "check-shard", + "check-additional-shard", + "check-docs", + "skills-python", + "checks-windows", + "macos-node", + "macos-swift", + "android", + ]); + expect(timingJob.if).toContain("always()"); + expect(timingJob.if).toContain("!cancelled()"); + + const checkoutStep = timingJob.steps.find( + (step) => step.name === "Checkout timing summary helper", + ); + expect(checkoutStep.uses).toBe("actions/checkout@v6"); + expect(checkoutStep.with.ref).toBe( + "${{ github.event_name == 'pull_request' && github.event.pull_request.base.sha || needs.preflight.outputs.checkout_revision || github.sha }}", + ); + expect(checkoutStep.with["persist-credentials"]).toBe(false); + + const writeStep = timingJob.steps.find((step) => step.name === "Write CI timing summary"); + expect(writeStep.env).toMatchObject({ GH_TOKEN: "${{ github.token }}" }); + expect(writeStep.run).toContain( + 'node scripts/ci-run-timings.mjs "$GITHUB_RUN_ID" --limit 25 > ci-timings-summary.txt', + ); + expect(writeStep.run).toContain('cat ci-timings-summary.txt >> "$GITHUB_STEP_SUMMARY"'); + + const uploadStep = timingJob.steps.find((step) => step.name === "Upload CI timing summary"); + expect(uploadStep.uses).toBe("actions/upload-artifact@v7"); + expect(uploadStep.with).toMatchObject({ + name: "ci-timings-summary", + path: "ci-timings-summary.txt", + "retention-days": 14, + }); + }); + it("keeps push docs validation ClawHub-backed", () => { const workflow = readFileSync(".github/workflows/docs.yml", "utf8");