ci: split slow CI shards

This commit is contained in:
Peter Steinberger
2026-05-03 13:43:30 +01:00
parent c02bf2f399
commit a4a4cac8e9
6 changed files with 204 additions and 47 deletions

View File

@@ -1467,8 +1467,18 @@ jobs:
fail-fast: false
matrix:
include:
- check_name: check-additional-boundaries
- check_name: check-additional-boundaries-a
group: boundaries
boundary_shard: 1/4
- check_name: check-additional-boundaries-b
group: boundaries
boundary_shard: 2/4
- check_name: check-additional-boundaries-c
group: boundaries
boundary_shard: 3/4
- check_name: check-additional-boundaries-d
group: boundaries
boundary_shard: 4/4
- check_name: check-additional-extension-channels
group: extension-channels
- check_name: check-additional-extension-bundled
@@ -1573,6 +1583,7 @@ jobs:
- name: Run additional check shard
env:
ADDITIONAL_CHECK_GROUP: ${{ matrix.group }}
OPENCLAW_ADDITIONAL_BOUNDARY_SHARD: ${{ matrix.boundary_shard || '' }}
RUN_CONTROL_UI_I18N: ${{ needs.preflight.outputs.run_control_ui_i18n }}
OPENCLAW_ADDITIONAL_BOUNDARY_CONCURRENCY: 4
OPENCLAW_EXTENSION_BOUNDARY_CONCURRENCY: 6

View File

@@ -12,30 +12,30 @@ OpenClaw CI runs on every push to `main` and every pull request. The `preflight`
## Pipeline overview
| Job | Purpose | When it runs |
| -------------------------------- | ------------------------------------------------------------------------------------------------------------------- | ---------------------------------- |
| `preflight` | Detect docs-only changes, changed scopes, changed extensions, and build the CI manifest | Always on non-draft pushes and PRs |
| `security-scm-fast` | Private key detection and workflow audit via `zizmor` | Always on non-draft pushes and PRs |
| `security-dependency-audit` | Dependency-free production lockfile audit against npm advisories | Always on non-draft pushes and PRs |
| `security-fast` | Required aggregate for the fast security jobs | Always on non-draft pushes and PRs |
| `check-dependencies` | Production Knip dependency-only pass plus the unused-file allowlist guard | Node-relevant changes |
| `build-artifacts` | Build `dist/`, Control UI, built-artifact checks, and reusable downstream artifacts | Node-relevant changes |
| `checks-fast-core` | Fast Linux correctness lanes such as bundled/plugin-contract/protocol checks | Node-relevant changes |
| `checks-fast-contracts-channels` | Sharded channel contract checks with a stable aggregate check result | Node-relevant changes |
| `checks-node-core-test` | Core Node test shards, excluding channel, bundled, contract, and extension lanes | Node-relevant changes |
| `check` | Sharded main local gate equivalent: prod types, lint, guards, test types, and strict smoke | Node-relevant changes |
| `check-additional` | Architecture, boundary, prompt snapshot drift, extension-surface guards, package-boundary, and gateway-watch shards | Node-relevant changes |
| `build-smoke` | Built-CLI smoke tests and startup-memory smoke | Node-relevant changes |
| `checks` | Verifier for built-artifact channel tests | Node-relevant changes |
| `checks-node-compat-node22` | Node 22 compatibility build and smoke lane | Manual CI dispatch for releases |
| `check-docs` | Docs formatting, lint, and broken-link checks | Docs changed |
| `skills-python` | Ruff + pytest for Python-backed skills | Python-skill-relevant changes |
| `checks-windows` | Windows-specific process/path tests plus shared runtime import specifier regressions | Windows-relevant changes |
| `macos-node` | macOS TypeScript test lane using the shared built artifacts | macOS-relevant changes |
| `macos-swift` | Swift lint, build, and tests for the macOS app | macOS-relevant changes |
| `android` | Android unit tests for both flavors plus one debug APK build | Android-relevant changes |
| `test-performance-agent` | Daily Codex slow-test optimization after trusted activity | Main CI success or manual dispatch |
| `openclaw-performance` | Daily/on-demand Kova runtime performance reports with mock-provider, deep-profile, and GPT 5.4 live lanes | Scheduled and manual dispatch |
| Job | Purpose | When it runs |
| -------------------------------- | --------------------------------------------------------------------------------------------------------- | ---------------------------------- |
| `preflight` | Detect docs-only changes, changed scopes, changed extensions, and build the CI manifest | Always on non-draft pushes and PRs |
| `security-scm-fast` | Private key detection and workflow audit via `zizmor` | Always on non-draft pushes and PRs |
| `security-dependency-audit` | Dependency-free production lockfile audit against npm advisories | Always on non-draft pushes and PRs |
| `security-fast` | Required aggregate for the fast security jobs | Always on non-draft pushes and PRs |
| `check-dependencies` | Production Knip dependency-only pass plus the unused-file allowlist guard | Node-relevant changes |
| `build-artifacts` | Build `dist/`, Control UI, built-artifact checks, and reusable downstream artifacts | Node-relevant changes |
| `checks-fast-core` | Fast Linux correctness lanes such as bundled/plugin-contract/protocol checks | Node-relevant changes |
| `checks-fast-contracts-channels` | Sharded channel contract checks with a stable aggregate check result | Node-relevant changes |
| `checks-node-core-test` | Core Node test shards, excluding channel, bundled, contract, and extension lanes | Node-relevant changes |
| `check` | Sharded main local gate equivalent: prod types, lint, guards, test types, and strict smoke | Node-relevant changes |
| `check-additional` | Architecture, sharded boundary/prompt drift, extension guards, package boundary, and gateway watch | Node-relevant changes |
| `build-smoke` | Built-CLI smoke tests and startup-memory smoke | Node-relevant changes |
| `checks` | Verifier for built-artifact channel tests | Node-relevant changes |
| `checks-node-compat-node22` | Node 22 compatibility build and smoke lane | Manual CI dispatch for releases |
| `check-docs` | Docs formatting, lint, and broken-link checks | Docs changed |
| `skills-python` | Ruff + pytest for Python-backed skills | Python-skill-relevant changes |
| `checks-windows` | Windows-specific process/path tests plus shared runtime import specifier regressions | Windows-relevant changes |
| `macos-node` | macOS TypeScript test lane using the shared built artifacts | macOS-relevant changes |
| `macos-swift` | Swift lint, build, and tests for the macOS app | macOS-relevant changes |
| `android` | Android unit tests for both flavors plus one debug APK build | Android-relevant changes |
| `test-performance-agent` | Daily Codex slow-test optimization after trusted activity | Main CI success or manual dispatch |
| `openclaw-performance` | Daily/on-demand Kova runtime performance reports with mock-provider, deep-profile, and GPT 5.4 live lanes | Scheduled and manual dispatch |
## Fail-fast order
@@ -54,7 +54,7 @@ Scope logic lives in `scripts/ci-changed-scope.mjs` and is covered by unit tests
- **CI routing-only edits, selected cheap core-test fixture edits, and narrow plugin contract helper/test-routing edits** use a fast Node-only manifest path: `preflight`, security, and a single `checks-fast-core` task. That path skips build artifacts, Node 22 compatibility, channel contracts, full core shards, bundled-plugin shards, and additional guard matrices when the change is limited to the routing or helper surfaces the fast task exercises directly.
- **Windows Node checks** are scoped to Windows-specific process/path wrappers, npm/pnpm/UI runner helpers, package manager config, and the CI workflow surfaces that execute that lane; unrelated source, plugin, install-smoke, and test-only changes stay on the Linux Node lanes.
The slowest Node test families are split or balanced so each job stays small without over-reserving runners: channel contracts run as three weighted shards, small core unit lanes are paired, auto-reply runs as four balanced workers (with the reply subtree split into agent-runner, dispatch, and commands/state-routing shards), and agentic gateway/plugin configs are spread across the existing source-only agentic Node jobs instead of waiting on built artifacts. Broad browser, QA, media, and miscellaneous plugin tests use their dedicated Vitest configs instead of the shared plugin catch-all. Include-pattern shards record timing entries using the CI shard name, so `.artifacts/vitest-shard-timings.json` can distinguish a whole config from a filtered shard. `check-additional` keeps package-boundary compile/canary work together and separates runtime topology architecture from gateway watch coverage; the boundary guard shard runs its small independent guards concurrently inside one job, including `pnpm prompt:snapshots:check` so Codex runtime happy-path prompt drift is pinned to the PR that caused it. Gateway watch, channel tests, and the core support-boundary shard run concurrently inside `build-artifacts` after `dist/` and `dist-runtime/` are already built.
The slowest Node test families are split or balanced so each job stays small without over-reserving runners: channel contracts run as three weighted shards, core unit fast/support lanes run separately, core runtime infra is split between state and process/config shards, auto-reply runs as balanced workers (with the reply subtree split into agent-runner, dispatch, and commands/state-routing shards), and agentic gateway/server configs are split across chat/auth/model/http-plugin/runtime/startup lanes instead of waiting on built artifacts. Broad browser, QA, media, and miscellaneous plugin tests use their dedicated Vitest configs instead of the shared plugin catch-all. Include-pattern shards record timing entries using the CI shard name, so `.artifacts/vitest-shard-timings.json` can distinguish a whole config from a filtered shard. `check-additional` keeps package-boundary compile/canary work together and separates runtime topology architecture from gateway watch coverage; the boundary guard list is striped across four matrix shards, each running selected independent guards concurrently and printing per-check timings, including `pnpm prompt:snapshots:check` so Codex runtime happy-path prompt drift is pinned to the PR that caused it. Gateway watch, channel tests, and the core support-boundary shard run concurrently inside `build-artifacts` after `dist/` and `dist-runtime/` are already built.
Android CI runs both `testPlayDebugUnitTest` and `testThirdPartyDebugUnitTest` and then builds the Play debug APK. The third-party flavor has no separate source set or manifest; its unit-test lane still compiles the flavor with the SMS/call-log BuildConfig flags, while avoiding a duplicate debug APK packaging job on every Android-relevant push.

View File

@@ -187,6 +187,26 @@ function resolveGatewayServerShardName(file) {
) {
return "agentic-control-plane-auth-node";
}
if (
name.startsWith("server-startup") ||
name.startsWith("server-restart") ||
name.startsWith("server-runtime") ||
name.startsWith("server.lazy") ||
name.startsWith("server.health") ||
name.startsWith("server/health-state") ||
name.startsWith("server/readiness") ||
name === "server-close.test.ts"
) {
return "agentic-control-plane-startup-runtime";
}
if (
name.includes("plugin") ||
name.includes("hooks") ||
name.includes("http") ||
name.includes("ws-connection")
) {
return "agentic-control-plane-http-plugin-ws";
}
return "agentic-control-plane-runtime";
}
@@ -200,7 +220,9 @@ function createGatewayServerSplitShards() {
"agentic-control-plane-agent-chat",
"agentic-control-plane-auth-node",
"agentic-control-plane-http-models",
"agentic-control-plane-http-plugin-ws",
"agentic-control-plane-runtime",
"agentic-control-plane-startup-runtime",
]
.map((shardName) => ({
configs: ["test/vitest/vitest.gateway-server.config.ts"],
@@ -217,12 +239,8 @@ const SPLIT_NODE_SHARDS = new Map([
"core-unit-fast",
[
{
shardName: "core-unit-fast-support",
configs: [
"test/vitest/vitest.unit-fast.config.ts",
"test/vitest/vitest.unit-support.config.ts",
],
includeExternalConfigs: true,
shardName: "core-unit-fast",
configs: ["test/vitest/vitest.unit-fast.config.ts"],
requiresDist: false,
},
],
@@ -242,16 +260,32 @@ const SPLIT_NODE_SHARDS = new Map([
],
],
["core-unit-security", []],
["core-unit-support", []],
[
"core-unit-support",
[
{
shardName: "core-unit-support",
configs: ["test/vitest/vitest.unit-support.config.ts"],
requiresDist: false,
},
],
],
[
"core-runtime",
[
{
shardName: "core-runtime-infra",
shardName: "core-runtime-infra-state",
configs: [
"test/vitest/vitest.infra.config.ts",
"test/vitest/vitest.hooks.config.ts",
"test/vitest/vitest.secrets.config.ts",
],
requiresDist: false,
runner: "blacksmith-4vcpu-ubuntu-2404",
},
{
shardName: "core-runtime-infra-process",
configs: [
"test/vitest/vitest.logging.config.ts",
"test/vitest/vitest.process.config.ts",
"test/vitest/vitest.runtime-config.config.ts",

View File

@@ -1,5 +1,6 @@
#!/usr/bin/env node
import { spawn } from "node:child_process";
import { performance } from "node:perf_hooks";
export const BOUNDARY_CHECKS = [
["prompt:snapshots:check", "pnpm", ["prompt:snapshots:check"]],
@@ -63,12 +64,43 @@ export function resolveConcurrency(value, fallback = 4) {
return parsed;
}
export function parseShardSpec(value) {
if (!value) {
return null;
}
const match = String(value).match(/^(\d+)\/(\d+)$/u);
if (!match) {
throw new Error(`Invalid shard spec '${value}' (expected N/TOTAL)`);
}
const index = Number.parseInt(match[1], 10);
const count = Number.parseInt(match[2], 10);
if (
!Number.isInteger(index) ||
!Number.isInteger(count) ||
index < 1 ||
count < 1 ||
index > count
) {
throw new Error(`Invalid shard spec '${value}' (expected 1 <= N <= TOTAL)`);
}
return { count, index: index - 1, label: `${index}/${count}` };
}
export function selectChecksForShard(checks, shardSpec) {
const shard = typeof shardSpec === "string" ? parseShardSpec(shardSpec) : shardSpec;
if (!shard) {
return checks;
}
return checks.filter((_check, index) => index % shard.count === shard.index);
}
export function formatCommand({ command, args }) {
return [command, ...args].join(" ");
}
function runSingleCheck(check, { cwd, env }) {
return new Promise((resolve) => {
const startedAt = performance.now();
const child = spawn(check.command, check.args, {
cwd,
env,
@@ -83,14 +115,36 @@ function runSingleCheck(check, { cwd, env }) {
child.stderr.on("data", (chunk) => chunks.push(chunk));
child.on("error", (error) => {
chunks.push(`${error.stack ?? error.message}\n`);
resolve({ check, code: 1, signal: null, output: chunks.join("") });
resolve({
check,
code: 1,
durationMs: Math.round(performance.now() - startedAt),
signal: null,
output: chunks.join(""),
});
});
child.on("close", (code, signal) => {
resolve({ check, code: code ?? 1, signal, output: chunks.join("") });
resolve({
check,
code: code ?? 1,
durationMs: Math.round(performance.now() - startedAt),
signal,
output: chunks.join(""),
});
});
});
}
function formatDuration(ms) {
if (!Number.isFinite(ms)) {
return "";
}
if (ms < 1000) {
return `${ms}ms`;
}
return `${(ms / 1000).toFixed(1)}s`;
}
function writeGroupedResult(result, output) {
const success = result.code === 0;
output.write(`::group::${result.check.label}\n`);
@@ -99,16 +153,25 @@ function writeGroupedResult(result, output) {
output.write(result.output.endsWith("\n") ? result.output : `${result.output}\n`);
}
if (success) {
output.write(`[ok] ${result.check.label}\n`);
output.write(`[ok] ${result.check.label} in ${formatDuration(result.durationMs)}\n`);
} else {
const suffix = result.signal ? ` (signal ${result.signal})` : ` (exit ${result.code})`;
output.write(
`::error title=${result.check.label} failed::${result.check.label} failed${suffix}\n`,
`::error title=${result.check.label} failed::${result.check.label} failed${suffix} after ${formatDuration(result.durationMs)}\n`,
);
}
output.write("::endgroup::\n");
}
function writeTimingSummary(results, output) {
output.write("Additional boundary check timings:\n");
for (const result of [...results].toSorted((left, right) => right.durationMs - left.durationMs)) {
output.write(
`${result.check.label.padEnd(48)} ${formatDuration(result.durationMs).padStart(8)}\n`,
);
}
}
export async function runChecks(
checks = BOUNDARY_CHECKS,
{ concurrency = 4, cwd = process.cwd(), env = process.env, output = process.stdout } = {},
@@ -149,14 +212,34 @@ export async function runChecks(
failures += 1;
}
}
writeTimingSummary(results, output);
return failures;
}
function resolveCliShardSpec(args, env) {
const shardIndex = args.indexOf("--shard");
if (shardIndex !== -1) {
return args[shardIndex + 1] ?? "";
}
const inlineShard = args.find((arg) => arg.startsWith("--shard="));
if (inlineShard) {
return inlineShard.slice("--shard=".length);
}
return env.OPENCLAW_ADDITIONAL_BOUNDARY_SHARD ?? "";
}
if (import.meta.url === `file://${process.argv[1]}`) {
const concurrency = resolveConcurrency(
process.env.OPENCLAW_ADDITIONAL_BOUNDARY_CONCURRENCY ??
process.env.OPENCLAW_EXTENSION_BOUNDARY_CONCURRENCY,
);
const failures = await runChecks(BOUNDARY_CHECKS, { concurrency });
const shard = parseShardSpec(resolveCliShardSpec(process.argv.slice(2), process.env));
const checks = selectChecksForShard(BOUNDARY_CHECKS, shard);
if (shard) {
process.stdout.write(
`Running ${checks.length}/${BOUNDARY_CHECKS.length} additional boundary checks (shard ${shard.label})\n`,
);
}
const failures = await runChecks(checks, { concurrency });
process.exitCode = failures === 0 ? 0 : 1;
}

View File

@@ -78,7 +78,7 @@ function isGatewayServerTestFile(file: string): boolean {
}
describe("scripts/lib/ci-node-test-plan.mjs", () => {
it("combines the small core unit shards to reduce CI runner fanout", () => {
it("splits the slow core unit shards while keeping paired source/security coverage", () => {
const coreUnitShards = createNodeTestShards()
.filter((shard) => shard.shardName.startsWith("core-unit-"))
.map((shard) => ({
@@ -89,12 +89,9 @@ describe("scripts/lib/ci-node-test-plan.mjs", () => {
expect(coreUnitShards).toEqual([
{
configs: [
"test/vitest/vitest.unit-fast.config.ts",
"test/vitest/vitest.unit-support.config.ts",
],
configs: ["test/vitest/vitest.unit-fast.config.ts"],
requiresDist: false,
shardName: "core-unit-fast-support",
shardName: "core-unit-fast",
},
{
configs: [
@@ -109,6 +106,11 @@ describe("scripts/lib/ci-node-test-plan.mjs", () => {
requiresDist: false,
shardName: "core-unit-ui",
},
{
configs: ["test/vitest/vitest.unit-support.config.ts"],
requiresDist: false,
shardName: "core-unit-support",
},
]);
});
@@ -159,13 +161,20 @@ describe("scripts/lib/ci-node-test-plan.mjs", () => {
"test/vitest/vitest.infra.config.ts",
"test/vitest/vitest.hooks.config.ts",
"test/vitest/vitest.secrets.config.ts",
],
requiresDist: false,
runner: "blacksmith-4vcpu-ubuntu-2404",
shardName: "core-runtime-infra-state",
},
{
configs: [
"test/vitest/vitest.logging.config.ts",
"test/vitest/vitest.process.config.ts",
"test/vitest/vitest.runtime-config.config.ts",
],
requiresDist: false,
runner: "blacksmith-4vcpu-ubuntu-2404",
shardName: "core-runtime-infra",
shardName: "core-runtime-infra-process",
},
{
configs: [
@@ -216,7 +225,9 @@ describe("scripts/lib/ci-node-test-plan.mjs", () => {
"agentic-control-plane-agent-chat",
"agentic-control-plane-auth-node",
"agentic-control-plane-http-models",
"agentic-control-plane-http-plugin-ws",
"agentic-control-plane-runtime",
"agentic-control-plane-startup-runtime",
]);
expect(controlPlaneShards).toEqual(
controlPlaneShards.map((shard) => ({

View File

@@ -2,8 +2,10 @@ import { describe, expect, it } from "vitest";
import {
BOUNDARY_CHECKS,
formatCommand,
parseShardSpec,
resolveConcurrency,
runChecks,
selectChecksForShard,
} from "../../scripts/run-additional-boundary-checks.mjs";
function createOutputBuffer() {
@@ -40,6 +42,21 @@ describe("run-additional-boundary-checks", () => {
);
});
it("parses and applies CI shard specs", () => {
expect(parseShardSpec("2/4")).toEqual({ count: 4, index: 1, label: "2/4" });
expect(selectChecksForShard(BOUNDARY_CHECKS, "1/4")).toEqual(
BOUNDARY_CHECKS.filter((_check, index) => index % 4 === 0),
);
const shardedLabels = [1, 2, 3, 4].flatMap((index) =>
selectChecksForShard(BOUNDARY_CHECKS, `${index}/4`).map((check) => check.label),
);
expect(shardedLabels.toSorted()).toEqual(
BOUNDARY_CHECKS.map((check) => check.label).toSorted(),
);
expect(new Set(shardedLabels).size).toBe(BOUNDARY_CHECKS.length);
expect(() => parseShardSpec("5/4")).toThrow("Invalid shard spec");
});
it("buffers grouped output and reports aggregate failures", async () => {
const buffer = createOutputBuffer();
const failures = await runChecks(
@@ -62,9 +79,10 @@ describe("run-additional-boundary-checks", () => {
expect(failures).toBe(1);
expect(text).toContain("::group::passes");
expect(text).toContain("ok-out");
expect(text).toContain("[ok] passes");
expect(text).toContain("[ok] passes in ");
expect(text).toContain("::group::fails");
expect(text).toContain("bad-out");
expect(text).toContain("::error title=fails failed::fails failed (exit 7)");
expect(text).toContain("Additional boundary check timings:");
});
});