ci: split slow CI shards

2026-05-06 06:00:43 +00:00 · 2026-05-03 13:43:30 +01:00
parent c02bf2f399
commit a4a4cac8e9
6 changed files with 204 additions and 47 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1467,8 +1467,18 @@ jobs:
      fail-fast: false
      matrix:
        include:
-          - check_name: check-additional-boundaries
+          - check_name: check-additional-boundaries-a
            group: boundaries
+            boundary_shard: 1/4
+          - check_name: check-additional-boundaries-b
+            group: boundaries
+            boundary_shard: 2/4
+          - check_name: check-additional-boundaries-c
+            group: boundaries
+            boundary_shard: 3/4
+          - check_name: check-additional-boundaries-d
+            group: boundaries
+            boundary_shard: 4/4
          - check_name: check-additional-extension-channels
            group: extension-channels
          - check_name: check-additional-extension-bundled
@@ -1573,6 +1583,7 @@ jobs:
      - name: Run additional check shard
        env:
          ADDITIONAL_CHECK_GROUP: ${{ matrix.group }}
+          OPENCLAW_ADDITIONAL_BOUNDARY_SHARD: ${{ matrix.boundary_shard || '' }}
          RUN_CONTROL_UI_I18N: ${{ needs.preflight.outputs.run_control_ui_i18n }}
          OPENCLAW_ADDITIONAL_BOUNDARY_CONCURRENCY: 4
          OPENCLAW_EXTENSION_BOUNDARY_CONCURRENCY: 6
--- a/docs/ci.md
+++ b/docs/ci.md
@@ -12,30 +12,30 @@ OpenClaw CI runs on every push to `main` and every pull request. The `preflight`

 ## Pipeline overview

-| Job                              | Purpose                                                                                                             | When it runs                       |
-| -------------------------------- | ------------------------------------------------------------------------------------------------------------------- | ---------------------------------- |
-| `preflight`                      | Detect docs-only changes, changed scopes, changed extensions, and build the CI manifest                             | Always on non-draft pushes and PRs |
-| `security-scm-fast`              | Private key detection and workflow audit via `zizmor`                                                               | Always on non-draft pushes and PRs |
-| `security-dependency-audit`      | Dependency-free production lockfile audit against npm advisories                                                    | Always on non-draft pushes and PRs |
-| `security-fast`                  | Required aggregate for the fast security jobs                                                                       | Always on non-draft pushes and PRs |
-| `check-dependencies`             | Production Knip dependency-only pass plus the unused-file allowlist guard                                           | Node-relevant changes              |
-| `build-artifacts`                | Build `dist/`, Control UI, built-artifact checks, and reusable downstream artifacts                                 | Node-relevant changes              |
-| `checks-fast-core`               | Fast Linux correctness lanes such as bundled/plugin-contract/protocol checks                                        | Node-relevant changes              |
-| `checks-fast-contracts-channels` | Sharded channel contract checks with a stable aggregate check result                                                | Node-relevant changes              |
-| `checks-node-core-test`          | Core Node test shards, excluding channel, bundled, contract, and extension lanes                                    | Node-relevant changes              |
-| `check`                          | Sharded main local gate equivalent: prod types, lint, guards, test types, and strict smoke                          | Node-relevant changes              |
-| `check-additional`               | Architecture, boundary, prompt snapshot drift, extension-surface guards, package-boundary, and gateway-watch shards | Node-relevant changes              |
-| `build-smoke`                    | Built-CLI smoke tests and startup-memory smoke                                                                      | Node-relevant changes              |
-| `checks`                         | Verifier for built-artifact channel tests                                                                           | Node-relevant changes              |
-| `checks-node-compat-node22`      | Node 22 compatibility build and smoke lane                                                                          | Manual CI dispatch for releases    |
-| `check-docs`                     | Docs formatting, lint, and broken-link checks                                                                       | Docs changed                       |
-| `skills-python`                  | Ruff + pytest for Python-backed skills                                                                              | Python-skill-relevant changes      |
-| `checks-windows`                 | Windows-specific process/path tests plus shared runtime import specifier regressions                                | Windows-relevant changes           |
-| `macos-node`                     | macOS TypeScript test lane using the shared built artifacts                                                         | macOS-relevant changes             |
-| `macos-swift`                    | Swift lint, build, and tests for the macOS app                                                                      | macOS-relevant changes             |
-| `android`                        | Android unit tests for both flavors plus one debug APK build                                                        | Android-relevant changes           |
-| `test-performance-agent`         | Daily Codex slow-test optimization after trusted activity                                                           | Main CI success or manual dispatch |
-| `openclaw-performance`           | Daily/on-demand Kova runtime performance reports with mock-provider, deep-profile, and GPT 5.4 live lanes           | Scheduled and manual dispatch      |
+| Job                              | Purpose                                                                                                   | When it runs                       |
+| -------------------------------- | --------------------------------------------------------------------------------------------------------- | ---------------------------------- |
+| `preflight`                      | Detect docs-only changes, changed scopes, changed extensions, and build the CI manifest                   | Always on non-draft pushes and PRs |
+| `security-scm-fast`              | Private key detection and workflow audit via `zizmor`                                                     | Always on non-draft pushes and PRs |
+| `security-dependency-audit`      | Dependency-free production lockfile audit against npm advisories                                          | Always on non-draft pushes and PRs |
+| `security-fast`                  | Required aggregate for the fast security jobs                                                             | Always on non-draft pushes and PRs |
+| `check-dependencies`             | Production Knip dependency-only pass plus the unused-file allowlist guard                                 | Node-relevant changes              |
+| `build-artifacts`                | Build `dist/`, Control UI, built-artifact checks, and reusable downstream artifacts                       | Node-relevant changes              |
+| `checks-fast-core`               | Fast Linux correctness lanes such as bundled/plugin-contract/protocol checks                              | Node-relevant changes              |
+| `checks-fast-contracts-channels` | Sharded channel contract checks with a stable aggregate check result                                      | Node-relevant changes              |
+| `checks-node-core-test`          | Core Node test shards, excluding channel, bundled, contract, and extension lanes                          | Node-relevant changes              |
+| `check`                          | Sharded main local gate equivalent: prod types, lint, guards, test types, and strict smoke                | Node-relevant changes              |
+| `check-additional`               | Architecture, sharded boundary/prompt drift, extension guards, package boundary, and gateway watch        | Node-relevant changes              |
+| `build-smoke`                    | Built-CLI smoke tests and startup-memory smoke                                                            | Node-relevant changes              |
+| `checks`                         | Verifier for built-artifact channel tests                                                                 | Node-relevant changes              |
+| `checks-node-compat-node22`      | Node 22 compatibility build and smoke lane                                                                | Manual CI dispatch for releases    |
+| `check-docs`                     | Docs formatting, lint, and broken-link checks                                                             | Docs changed                       |
+| `skills-python`                  | Ruff + pytest for Python-backed skills                                                                    | Python-skill-relevant changes      |
+| `checks-windows`                 | Windows-specific process/path tests plus shared runtime import specifier regressions                      | Windows-relevant changes           |
+| `macos-node`                     | macOS TypeScript test lane using the shared built artifacts                                               | macOS-relevant changes             |
+| `macos-swift`                    | Swift lint, build, and tests for the macOS app                                                            | macOS-relevant changes             |
+| `android`                        | Android unit tests for both flavors plus one debug APK build                                              | Android-relevant changes           |
+| `test-performance-agent`         | Daily Codex slow-test optimization after trusted activity                                                 | Main CI success or manual dispatch |
+| `openclaw-performance`           | Daily/on-demand Kova runtime performance reports with mock-provider, deep-profile, and GPT 5.4 live lanes | Scheduled and manual dispatch      |

 ## Fail-fast order

@@ -54,7 +54,7 @@ Scope logic lives in `scripts/ci-changed-scope.mjs` and is covered by unit tests
 - **CI routing-only edits, selected cheap core-test fixture edits, and narrow plugin contract helper/test-routing edits** use a fast Node-only manifest path: `preflight`, security, and a single `checks-fast-core` task. That path skips build artifacts, Node 22 compatibility, channel contracts, full core shards, bundled-plugin shards, and additional guard matrices when the change is limited to the routing or helper surfaces the fast task exercises directly.
 - **Windows Node checks** are scoped to Windows-specific process/path wrappers, npm/pnpm/UI runner helpers, package manager config, and the CI workflow surfaces that execute that lane; unrelated source, plugin, install-smoke, and test-only changes stay on the Linux Node lanes.

-The slowest Node test families are split or balanced so each job stays small without over-reserving runners: channel contracts run as three weighted shards, small core unit lanes are paired, auto-reply runs as four balanced workers (with the reply subtree split into agent-runner, dispatch, and commands/state-routing shards), and agentic gateway/plugin configs are spread across the existing source-only agentic Node jobs instead of waiting on built artifacts. Broad browser, QA, media, and miscellaneous plugin tests use their dedicated Vitest configs instead of the shared plugin catch-all. Include-pattern shards record timing entries using the CI shard name, so `.artifacts/vitest-shard-timings.json` can distinguish a whole config from a filtered shard. `check-additional` keeps package-boundary compile/canary work together and separates runtime topology architecture from gateway watch coverage; the boundary guard shard runs its small independent guards concurrently inside one job, including `pnpm prompt:snapshots:check` so Codex runtime happy-path prompt drift is pinned to the PR that caused it. Gateway watch, channel tests, and the core support-boundary shard run concurrently inside `build-artifacts` after `dist/` and `dist-runtime/` are already built.
+The slowest Node test families are split or balanced so each job stays small without over-reserving runners: channel contracts run as three weighted shards, core unit fast/support lanes run separately, core runtime infra is split between state and process/config shards, auto-reply runs as balanced workers (with the reply subtree split into agent-runner, dispatch, and commands/state-routing shards), and agentic gateway/server configs are split across chat/auth/model/http-plugin/runtime/startup lanes instead of waiting on built artifacts. Broad browser, QA, media, and miscellaneous plugin tests use their dedicated Vitest configs instead of the shared plugin catch-all. Include-pattern shards record timing entries using the CI shard name, so `.artifacts/vitest-shard-timings.json` can distinguish a whole config from a filtered shard. `check-additional` keeps package-boundary compile/canary work together and separates runtime topology architecture from gateway watch coverage; the boundary guard list is striped across four matrix shards, each running selected independent guards concurrently and printing per-check timings, including `pnpm prompt:snapshots:check` so Codex runtime happy-path prompt drift is pinned to the PR that caused it. Gateway watch, channel tests, and the core support-boundary shard run concurrently inside `build-artifacts` after `dist/` and `dist-runtime/` are already built.

 Android CI runs both `testPlayDebugUnitTest` and `testThirdPartyDebugUnitTest` and then builds the Play debug APK. The third-party flavor has no separate source set or manifest; its unit-test lane still compiles the flavor with the SMS/call-log BuildConfig flags, while avoiding a duplicate debug APK packaging job on every Android-relevant push.

--- a/scripts/lib/ci-node-test-plan.mjs
+++ b/scripts/lib/ci-node-test-plan.mjs
@@ -187,6 +187,26 @@ function resolveGatewayServerShardName(file) {
  ) {
    return "agentic-control-plane-auth-node";
  }
+  if (
+    name.startsWith("server-startup") ||
+    name.startsWith("server-restart") ||
+    name.startsWith("server-runtime") ||
+    name.startsWith("server.lazy") ||
+    name.startsWith("server.health") ||
+    name.startsWith("server/health-state") ||
+    name.startsWith("server/readiness") ||
+    name === "server-close.test.ts"
+  ) {
+    return "agentic-control-plane-startup-runtime";
+  }
+  if (
+    name.includes("plugin") ||
+    name.includes("hooks") ||
+    name.includes("http") ||
+    name.includes("ws-connection")
+  ) {
+    return "agentic-control-plane-http-plugin-ws";
+  }
  return "agentic-control-plane-runtime";
 }

@@ -200,7 +220,9 @@ function createGatewayServerSplitShards() {
    "agentic-control-plane-agent-chat",
    "agentic-control-plane-auth-node",
    "agentic-control-plane-http-models",
+    "agentic-control-plane-http-plugin-ws",
    "agentic-control-plane-runtime",
+    "agentic-control-plane-startup-runtime",
  ]
    .map((shardName) => ({
      configs: ["test/vitest/vitest.gateway-server.config.ts"],
@@ -217,12 +239,8 @@ const SPLIT_NODE_SHARDS = new Map([
    "core-unit-fast",
    [
      {
-        shardName: "core-unit-fast-support",
-        configs: [
-          "test/vitest/vitest.unit-fast.config.ts",
-          "test/vitest/vitest.unit-support.config.ts",
-        ],
-        includeExternalConfigs: true,
+        shardName: "core-unit-fast",
+        configs: ["test/vitest/vitest.unit-fast.config.ts"],
        requiresDist: false,
      },
    ],
@@ -242,16 +260,32 @@ const SPLIT_NODE_SHARDS = new Map([
    ],
  ],
  ["core-unit-security", []],
-  ["core-unit-support", []],
+  [
+    "core-unit-support",
+    [
+      {
+        shardName: "core-unit-support",
+        configs: ["test/vitest/vitest.unit-support.config.ts"],
+        requiresDist: false,
+      },
+    ],
+  ],
  [
    "core-runtime",
    [
      {
-        shardName: "core-runtime-infra",
+        shardName: "core-runtime-infra-state",
        configs: [
          "test/vitest/vitest.infra.config.ts",
          "test/vitest/vitest.hooks.config.ts",
          "test/vitest/vitest.secrets.config.ts",
+        ],
+        requiresDist: false,
+        runner: "blacksmith-4vcpu-ubuntu-2404",
+      },
+      {
+        shardName: "core-runtime-infra-process",
+        configs: [
          "test/vitest/vitest.logging.config.ts",
          "test/vitest/vitest.process.config.ts",
          "test/vitest/vitest.runtime-config.config.ts",
--- a/scripts/run-additional-boundary-checks.mjs
+++ b/scripts/run-additional-boundary-checks.mjs
@@ -1,5 +1,6 @@
 #!/usr/bin/env node
 import { spawn } from "node:child_process";
+import { performance } from "node:perf_hooks";

 export const BOUNDARY_CHECKS = [
  ["prompt:snapshots:check", "pnpm", ["prompt:snapshots:check"]],
@@ -63,12 +64,43 @@ export function resolveConcurrency(value, fallback = 4) {
  return parsed;
 }

+export function parseShardSpec(value) {
+  if (!value) {
+    return null;
+  }
+  const match = String(value).match(/^(\d+)\/(\d+)$/u);
+  if (!match) {
+    throw new Error(`Invalid shard spec '${value}' (expected N/TOTAL)`);
+  }
+  const index = Number.parseInt(match[1], 10);
+  const count = Number.parseInt(match[2], 10);
+  if (
+    !Number.isInteger(index) ||
+    !Number.isInteger(count) ||
+    index < 1 ||
+    count < 1 ||
+    index > count
+  ) {
+    throw new Error(`Invalid shard spec '${value}' (expected 1 <= N <= TOTAL)`);
+  }
+  return { count, index: index - 1, label: `${index}/${count}` };
+}
+
+export function selectChecksForShard(checks, shardSpec) {
+  const shard = typeof shardSpec === "string" ? parseShardSpec(shardSpec) : shardSpec;
+  if (!shard) {
+    return checks;
+  }
+  return checks.filter((_check, index) => index % shard.count === shard.index);
+}
+
 export function formatCommand({ command, args }) {
  return [command, ...args].join(" ");
 }

 function runSingleCheck(check, { cwd, env }) {
  return new Promise((resolve) => {
+    const startedAt = performance.now();
    const child = spawn(check.command, check.args, {
      cwd,
      env,
@@ -83,14 +115,36 @@ function runSingleCheck(check, { cwd, env }) {
    child.stderr.on("data", (chunk) => chunks.push(chunk));
    child.on("error", (error) => {
      chunks.push(`${error.stack ?? error.message}\n`);
-      resolve({ check, code: 1, signal: null, output: chunks.join("") });
+      resolve({
+        check,
+        code: 1,
+        durationMs: Math.round(performance.now() - startedAt),
+        signal: null,
+        output: chunks.join(""),
+      });
    });
    child.on("close", (code, signal) => {
-      resolve({ check, code: code ?? 1, signal, output: chunks.join("") });
+      resolve({
+        check,
+        code: code ?? 1,
+        durationMs: Math.round(performance.now() - startedAt),
+        signal,
+        output: chunks.join(""),
+      });
    });
  });
 }

+function formatDuration(ms) {
+  if (!Number.isFinite(ms)) {
+    return "";
+  }
+  if (ms < 1000) {
+    return `${ms}ms`;
+  }
+  return `${(ms / 1000).toFixed(1)}s`;
+}
+
 function writeGroupedResult(result, output) {
  const success = result.code === 0;
  output.write(`::group::${result.check.label}\n`);
@@ -99,16 +153,25 @@ function writeGroupedResult(result, output) {
    output.write(result.output.endsWith("\n") ? result.output : `${result.output}\n`);
  }
  if (success) {
-    output.write(`[ok] ${result.check.label}\n`);
+    output.write(`[ok] ${result.check.label} in ${formatDuration(result.durationMs)}\n`);
  } else {
    const suffix = result.signal ? ` (signal ${result.signal})` : ` (exit ${result.code})`;
    output.write(
-      `::error title=${result.check.label} failed::${result.check.label} failed${suffix}\n`,
+      `::error title=${result.check.label} failed::${result.check.label} failed${suffix} after ${formatDuration(result.durationMs)}\n`,
    );
  }
  output.write("::endgroup::\n");
 }

+function writeTimingSummary(results, output) {
+  output.write("Additional boundary check timings:\n");
+  for (const result of [...results].toSorted((left, right) => right.durationMs - left.durationMs)) {
+    output.write(
+      `${result.check.label.padEnd(48)} ${formatDuration(result.durationMs).padStart(8)}\n`,
+    );
+  }
+}
+
 export async function runChecks(
  checks = BOUNDARY_CHECKS,
  { concurrency = 4, cwd = process.cwd(), env = process.env, output = process.stdout } = {},
@@ -149,14 +212,34 @@ export async function runChecks(
      failures += 1;
    }
  }
+  writeTimingSummary(results, output);
  return failures;
 }

+function resolveCliShardSpec(args, env) {
+  const shardIndex = args.indexOf("--shard");
+  if (shardIndex !== -1) {
+    return args[shardIndex + 1] ?? "";
+  }
+  const inlineShard = args.find((arg) => arg.startsWith("--shard="));
+  if (inlineShard) {
+    return inlineShard.slice("--shard=".length);
+  }
+  return env.OPENCLAW_ADDITIONAL_BOUNDARY_SHARD ?? "";
+}
+
 if (import.meta.url === `file://${process.argv[1]}`) {
  const concurrency = resolveConcurrency(
    process.env.OPENCLAW_ADDITIONAL_BOUNDARY_CONCURRENCY ??
      process.env.OPENCLAW_EXTENSION_BOUNDARY_CONCURRENCY,
  );
-  const failures = await runChecks(BOUNDARY_CHECKS, { concurrency });
+  const shard = parseShardSpec(resolveCliShardSpec(process.argv.slice(2), process.env));
+  const checks = selectChecksForShard(BOUNDARY_CHECKS, shard);
+  if (shard) {
+    process.stdout.write(
+      `Running ${checks.length}/${BOUNDARY_CHECKS.length} additional boundary checks (shard ${shard.label})\n`,
+    );
+  }
+  const failures = await runChecks(checks, { concurrency });
  process.exitCode = failures === 0 ? 0 : 1;
 }
--- a/test/scripts/ci-node-test-plan.test.ts
+++ b/test/scripts/ci-node-test-plan.test.ts
@@ -78,7 +78,7 @@ function isGatewayServerTestFile(file: string): boolean {
 }

 describe("scripts/lib/ci-node-test-plan.mjs", () => {
-  it("combines the small core unit shards to reduce CI runner fanout", () => {
+  it("splits the slow core unit shards while keeping paired source/security coverage", () => {
    const coreUnitShards = createNodeTestShards()
      .filter((shard) => shard.shardName.startsWith("core-unit-"))
      .map((shard) => ({
@@ -89,12 +89,9 @@ describe("scripts/lib/ci-node-test-plan.mjs", () => {

    expect(coreUnitShards).toEqual([
      {
-        configs: [
-          "test/vitest/vitest.unit-fast.config.ts",
-          "test/vitest/vitest.unit-support.config.ts",
-        ],
+        configs: ["test/vitest/vitest.unit-fast.config.ts"],
        requiresDist: false,
-        shardName: "core-unit-fast-support",
+        shardName: "core-unit-fast",
      },
      {
        configs: [
@@ -109,6 +106,11 @@ describe("scripts/lib/ci-node-test-plan.mjs", () => {
        requiresDist: false,
        shardName: "core-unit-ui",
      },
+      {
+        configs: ["test/vitest/vitest.unit-support.config.ts"],
+        requiresDist: false,
+        shardName: "core-unit-support",
+      },
    ]);
  });

@@ -159,13 +161,20 @@ describe("scripts/lib/ci-node-test-plan.mjs", () => {
          "test/vitest/vitest.infra.config.ts",
          "test/vitest/vitest.hooks.config.ts",
          "test/vitest/vitest.secrets.config.ts",
+        ],
+        requiresDist: false,
+        runner: "blacksmith-4vcpu-ubuntu-2404",
+        shardName: "core-runtime-infra-state",
+      },
+      {
+        configs: [
          "test/vitest/vitest.logging.config.ts",
          "test/vitest/vitest.process.config.ts",
          "test/vitest/vitest.runtime-config.config.ts",
        ],
        requiresDist: false,
        runner: "blacksmith-4vcpu-ubuntu-2404",
-        shardName: "core-runtime-infra",
+        shardName: "core-runtime-infra-process",
      },
      {
        configs: [
@@ -216,7 +225,9 @@ describe("scripts/lib/ci-node-test-plan.mjs", () => {
      "agentic-control-plane-agent-chat",
      "agentic-control-plane-auth-node",
      "agentic-control-plane-http-models",
+      "agentic-control-plane-http-plugin-ws",
      "agentic-control-plane-runtime",
+      "agentic-control-plane-startup-runtime",
    ]);
    expect(controlPlaneShards).toEqual(
      controlPlaneShards.map((shard) => ({
--- a/test/scripts/run-additional-boundary-checks.test.ts
+++ b/test/scripts/run-additional-boundary-checks.test.ts
@@ -2,8 +2,10 @@ import { describe, expect, it } from "vitest";
 import {
  BOUNDARY_CHECKS,
  formatCommand,
+  parseShardSpec,
  resolveConcurrency,
  runChecks,
+  selectChecksForShard,
 } from "../../scripts/run-additional-boundary-checks.mjs";

 function createOutputBuffer() {
@@ -40,6 +42,21 @@ describe("run-additional-boundary-checks", () => {
    );
  });

+  it("parses and applies CI shard specs", () => {
+    expect(parseShardSpec("2/4")).toEqual({ count: 4, index: 1, label: "2/4" });
+    expect(selectChecksForShard(BOUNDARY_CHECKS, "1/4")).toEqual(
+      BOUNDARY_CHECKS.filter((_check, index) => index % 4 === 0),
+    );
+    const shardedLabels = [1, 2, 3, 4].flatMap((index) =>
+      selectChecksForShard(BOUNDARY_CHECKS, `${index}/4`).map((check) => check.label),
+    );
+    expect(shardedLabels.toSorted()).toEqual(
+      BOUNDARY_CHECKS.map((check) => check.label).toSorted(),
+    );
+    expect(new Set(shardedLabels).size).toBe(BOUNDARY_CHECKS.length);
+    expect(() => parseShardSpec("5/4")).toThrow("Invalid shard spec");
+  });
+
  it("buffers grouped output and reports aggregate failures", async () => {
    const buffer = createOutputBuffer();
    const failures = await runChecks(
@@ -62,9 +79,10 @@ describe("run-additional-boundary-checks", () => {
    expect(failures).toBe(1);
    expect(text).toContain("::group::passes");
    expect(text).toContain("ok-out");
-    expect(text).toContain("[ok] passes");
+    expect(text).toContain("[ok] passes in ");
    expect(text).toContain("::group::fails");
    expect(text).toContain("bad-out");
    expect(text).toContain("::error title=fails failed::fails failed (exit 7)");
+    expect(text).toContain("Additional boundary check timings:");
  });
 });