diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5f7e7b82ec1..b7cfbf0af5f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1467,8 +1467,18 @@ jobs: fail-fast: false matrix: include: - - check_name: check-additional-boundaries + - check_name: check-additional-boundaries-a group: boundaries + boundary_shard: 1/4 + - check_name: check-additional-boundaries-b + group: boundaries + boundary_shard: 2/4 + - check_name: check-additional-boundaries-c + group: boundaries + boundary_shard: 3/4 + - check_name: check-additional-boundaries-d + group: boundaries + boundary_shard: 4/4 - check_name: check-additional-extension-channels group: extension-channels - check_name: check-additional-extension-bundled @@ -1573,6 +1583,7 @@ jobs: - name: Run additional check shard env: ADDITIONAL_CHECK_GROUP: ${{ matrix.group }} + OPENCLAW_ADDITIONAL_BOUNDARY_SHARD: ${{ matrix.boundary_shard || '' }} RUN_CONTROL_UI_I18N: ${{ needs.preflight.outputs.run_control_ui_i18n }} OPENCLAW_ADDITIONAL_BOUNDARY_CONCURRENCY: 4 OPENCLAW_EXTENSION_BOUNDARY_CONCURRENCY: 6 diff --git a/docs/ci.md b/docs/ci.md index 1fe95d86f9c..063fa4505fe 100644 --- a/docs/ci.md +++ b/docs/ci.md @@ -12,30 +12,30 @@ OpenClaw CI runs on every push to `main` and every pull request. The `preflight` ## Pipeline overview -| Job | Purpose | When it runs | -| -------------------------------- | ------------------------------------------------------------------------------------------------------------------- | ---------------------------------- | -| `preflight` | Detect docs-only changes, changed scopes, changed extensions, and build the CI manifest | Always on non-draft pushes and PRs | -| `security-scm-fast` | Private key detection and workflow audit via `zizmor` | Always on non-draft pushes and PRs | -| `security-dependency-audit` | Dependency-free production lockfile audit against npm advisories | Always on non-draft pushes and PRs | -| `security-fast` | Required aggregate for the fast security jobs | Always on non-draft pushes and PRs | -| `check-dependencies` | Production Knip dependency-only pass plus the unused-file allowlist guard | Node-relevant changes | -| `build-artifacts` | Build `dist/`, Control UI, built-artifact checks, and reusable downstream artifacts | Node-relevant changes | -| `checks-fast-core` | Fast Linux correctness lanes such as bundled/plugin-contract/protocol checks | Node-relevant changes | -| `checks-fast-contracts-channels` | Sharded channel contract checks with a stable aggregate check result | Node-relevant changes | -| `checks-node-core-test` | Core Node test shards, excluding channel, bundled, contract, and extension lanes | Node-relevant changes | -| `check` | Sharded main local gate equivalent: prod types, lint, guards, test types, and strict smoke | Node-relevant changes | -| `check-additional` | Architecture, boundary, prompt snapshot drift, extension-surface guards, package-boundary, and gateway-watch shards | Node-relevant changes | -| `build-smoke` | Built-CLI smoke tests and startup-memory smoke | Node-relevant changes | -| `checks` | Verifier for built-artifact channel tests | Node-relevant changes | -| `checks-node-compat-node22` | Node 22 compatibility build and smoke lane | Manual CI dispatch for releases | -| `check-docs` | Docs formatting, lint, and broken-link checks | Docs changed | -| `skills-python` | Ruff + pytest for Python-backed skills | Python-skill-relevant changes | -| `checks-windows` | Windows-specific process/path tests plus shared runtime import specifier regressions | Windows-relevant changes | -| `macos-node` | macOS TypeScript test lane using the shared built artifacts | macOS-relevant changes | -| `macos-swift` | Swift lint, build, and tests for the macOS app | macOS-relevant changes | -| `android` | Android unit tests for both flavors plus one debug APK build | Android-relevant changes | -| `test-performance-agent` | Daily Codex slow-test optimization after trusted activity | Main CI success or manual dispatch | -| `openclaw-performance` | Daily/on-demand Kova runtime performance reports with mock-provider, deep-profile, and GPT 5.4 live lanes | Scheduled and manual dispatch | +| Job | Purpose | When it runs | +| -------------------------------- | --------------------------------------------------------------------------------------------------------- | ---------------------------------- | +| `preflight` | Detect docs-only changes, changed scopes, changed extensions, and build the CI manifest | Always on non-draft pushes and PRs | +| `security-scm-fast` | Private key detection and workflow audit via `zizmor` | Always on non-draft pushes and PRs | +| `security-dependency-audit` | Dependency-free production lockfile audit against npm advisories | Always on non-draft pushes and PRs | +| `security-fast` | Required aggregate for the fast security jobs | Always on non-draft pushes and PRs | +| `check-dependencies` | Production Knip dependency-only pass plus the unused-file allowlist guard | Node-relevant changes | +| `build-artifacts` | Build `dist/`, Control UI, built-artifact checks, and reusable downstream artifacts | Node-relevant changes | +| `checks-fast-core` | Fast Linux correctness lanes such as bundled/plugin-contract/protocol checks | Node-relevant changes | +| `checks-fast-contracts-channels` | Sharded channel contract checks with a stable aggregate check result | Node-relevant changes | +| `checks-node-core-test` | Core Node test shards, excluding channel, bundled, contract, and extension lanes | Node-relevant changes | +| `check` | Sharded main local gate equivalent: prod types, lint, guards, test types, and strict smoke | Node-relevant changes | +| `check-additional` | Architecture, sharded boundary/prompt drift, extension guards, package boundary, and gateway watch | Node-relevant changes | +| `build-smoke` | Built-CLI smoke tests and startup-memory smoke | Node-relevant changes | +| `checks` | Verifier for built-artifact channel tests | Node-relevant changes | +| `checks-node-compat-node22` | Node 22 compatibility build and smoke lane | Manual CI dispatch for releases | +| `check-docs` | Docs formatting, lint, and broken-link checks | Docs changed | +| `skills-python` | Ruff + pytest for Python-backed skills | Python-skill-relevant changes | +| `checks-windows` | Windows-specific process/path tests plus shared runtime import specifier regressions | Windows-relevant changes | +| `macos-node` | macOS TypeScript test lane using the shared built artifacts | macOS-relevant changes | +| `macos-swift` | Swift lint, build, and tests for the macOS app | macOS-relevant changes | +| `android` | Android unit tests for both flavors plus one debug APK build | Android-relevant changes | +| `test-performance-agent` | Daily Codex slow-test optimization after trusted activity | Main CI success or manual dispatch | +| `openclaw-performance` | Daily/on-demand Kova runtime performance reports with mock-provider, deep-profile, and GPT 5.4 live lanes | Scheduled and manual dispatch | ## Fail-fast order @@ -54,7 +54,7 @@ Scope logic lives in `scripts/ci-changed-scope.mjs` and is covered by unit tests - **CI routing-only edits, selected cheap core-test fixture edits, and narrow plugin contract helper/test-routing edits** use a fast Node-only manifest path: `preflight`, security, and a single `checks-fast-core` task. That path skips build artifacts, Node 22 compatibility, channel contracts, full core shards, bundled-plugin shards, and additional guard matrices when the change is limited to the routing or helper surfaces the fast task exercises directly. - **Windows Node checks** are scoped to Windows-specific process/path wrappers, npm/pnpm/UI runner helpers, package manager config, and the CI workflow surfaces that execute that lane; unrelated source, plugin, install-smoke, and test-only changes stay on the Linux Node lanes. -The slowest Node test families are split or balanced so each job stays small without over-reserving runners: channel contracts run as three weighted shards, small core unit lanes are paired, auto-reply runs as four balanced workers (with the reply subtree split into agent-runner, dispatch, and commands/state-routing shards), and agentic gateway/plugin configs are spread across the existing source-only agentic Node jobs instead of waiting on built artifacts. Broad browser, QA, media, and miscellaneous plugin tests use their dedicated Vitest configs instead of the shared plugin catch-all. Include-pattern shards record timing entries using the CI shard name, so `.artifacts/vitest-shard-timings.json` can distinguish a whole config from a filtered shard. `check-additional` keeps package-boundary compile/canary work together and separates runtime topology architecture from gateway watch coverage; the boundary guard shard runs its small independent guards concurrently inside one job, including `pnpm prompt:snapshots:check` so Codex runtime happy-path prompt drift is pinned to the PR that caused it. Gateway watch, channel tests, and the core support-boundary shard run concurrently inside `build-artifacts` after `dist/` and `dist-runtime/` are already built. +The slowest Node test families are split or balanced so each job stays small without over-reserving runners: channel contracts run as three weighted shards, core unit fast/support lanes run separately, core runtime infra is split between state and process/config shards, auto-reply runs as balanced workers (with the reply subtree split into agent-runner, dispatch, and commands/state-routing shards), and agentic gateway/server configs are split across chat/auth/model/http-plugin/runtime/startup lanes instead of waiting on built artifacts. Broad browser, QA, media, and miscellaneous plugin tests use their dedicated Vitest configs instead of the shared plugin catch-all. Include-pattern shards record timing entries using the CI shard name, so `.artifacts/vitest-shard-timings.json` can distinguish a whole config from a filtered shard. `check-additional` keeps package-boundary compile/canary work together and separates runtime topology architecture from gateway watch coverage; the boundary guard list is striped across four matrix shards, each running selected independent guards concurrently and printing per-check timings, including `pnpm prompt:snapshots:check` so Codex runtime happy-path prompt drift is pinned to the PR that caused it. Gateway watch, channel tests, and the core support-boundary shard run concurrently inside `build-artifacts` after `dist/` and `dist-runtime/` are already built. Android CI runs both `testPlayDebugUnitTest` and `testThirdPartyDebugUnitTest` and then builds the Play debug APK. The third-party flavor has no separate source set or manifest; its unit-test lane still compiles the flavor with the SMS/call-log BuildConfig flags, while avoiding a duplicate debug APK packaging job on every Android-relevant push. diff --git a/scripts/lib/ci-node-test-plan.mjs b/scripts/lib/ci-node-test-plan.mjs index 0f8f091108a..37dfa50b9b0 100644 --- a/scripts/lib/ci-node-test-plan.mjs +++ b/scripts/lib/ci-node-test-plan.mjs @@ -187,6 +187,26 @@ function resolveGatewayServerShardName(file) { ) { return "agentic-control-plane-auth-node"; } + if ( + name.startsWith("server-startup") || + name.startsWith("server-restart") || + name.startsWith("server-runtime") || + name.startsWith("server.lazy") || + name.startsWith("server.health") || + name.startsWith("server/health-state") || + name.startsWith("server/readiness") || + name === "server-close.test.ts" + ) { + return "agentic-control-plane-startup-runtime"; + } + if ( + name.includes("plugin") || + name.includes("hooks") || + name.includes("http") || + name.includes("ws-connection") + ) { + return "agentic-control-plane-http-plugin-ws"; + } return "agentic-control-plane-runtime"; } @@ -200,7 +220,9 @@ function createGatewayServerSplitShards() { "agentic-control-plane-agent-chat", "agentic-control-plane-auth-node", "agentic-control-plane-http-models", + "agentic-control-plane-http-plugin-ws", "agentic-control-plane-runtime", + "agentic-control-plane-startup-runtime", ] .map((shardName) => ({ configs: ["test/vitest/vitest.gateway-server.config.ts"], @@ -217,12 +239,8 @@ const SPLIT_NODE_SHARDS = new Map([ "core-unit-fast", [ { - shardName: "core-unit-fast-support", - configs: [ - "test/vitest/vitest.unit-fast.config.ts", - "test/vitest/vitest.unit-support.config.ts", - ], - includeExternalConfigs: true, + shardName: "core-unit-fast", + configs: ["test/vitest/vitest.unit-fast.config.ts"], requiresDist: false, }, ], @@ -242,16 +260,32 @@ const SPLIT_NODE_SHARDS = new Map([ ], ], ["core-unit-security", []], - ["core-unit-support", []], + [ + "core-unit-support", + [ + { + shardName: "core-unit-support", + configs: ["test/vitest/vitest.unit-support.config.ts"], + requiresDist: false, + }, + ], + ], [ "core-runtime", [ { - shardName: "core-runtime-infra", + shardName: "core-runtime-infra-state", configs: [ "test/vitest/vitest.infra.config.ts", "test/vitest/vitest.hooks.config.ts", "test/vitest/vitest.secrets.config.ts", + ], + requiresDist: false, + runner: "blacksmith-4vcpu-ubuntu-2404", + }, + { + shardName: "core-runtime-infra-process", + configs: [ "test/vitest/vitest.logging.config.ts", "test/vitest/vitest.process.config.ts", "test/vitest/vitest.runtime-config.config.ts", diff --git a/scripts/run-additional-boundary-checks.mjs b/scripts/run-additional-boundary-checks.mjs index 11182775bf9..8b656edf292 100644 --- a/scripts/run-additional-boundary-checks.mjs +++ b/scripts/run-additional-boundary-checks.mjs @@ -1,5 +1,6 @@ #!/usr/bin/env node import { spawn } from "node:child_process"; +import { performance } from "node:perf_hooks"; export const BOUNDARY_CHECKS = [ ["prompt:snapshots:check", "pnpm", ["prompt:snapshots:check"]], @@ -63,12 +64,43 @@ export function resolveConcurrency(value, fallback = 4) { return parsed; } +export function parseShardSpec(value) { + if (!value) { + return null; + } + const match = String(value).match(/^(\d+)\/(\d+)$/u); + if (!match) { + throw new Error(`Invalid shard spec '${value}' (expected N/TOTAL)`); + } + const index = Number.parseInt(match[1], 10); + const count = Number.parseInt(match[2], 10); + if ( + !Number.isInteger(index) || + !Number.isInteger(count) || + index < 1 || + count < 1 || + index > count + ) { + throw new Error(`Invalid shard spec '${value}' (expected 1 <= N <= TOTAL)`); + } + return { count, index: index - 1, label: `${index}/${count}` }; +} + +export function selectChecksForShard(checks, shardSpec) { + const shard = typeof shardSpec === "string" ? parseShardSpec(shardSpec) : shardSpec; + if (!shard) { + return checks; + } + return checks.filter((_check, index) => index % shard.count === shard.index); +} + export function formatCommand({ command, args }) { return [command, ...args].join(" "); } function runSingleCheck(check, { cwd, env }) { return new Promise((resolve) => { + const startedAt = performance.now(); const child = spawn(check.command, check.args, { cwd, env, @@ -83,14 +115,36 @@ function runSingleCheck(check, { cwd, env }) { child.stderr.on("data", (chunk) => chunks.push(chunk)); child.on("error", (error) => { chunks.push(`${error.stack ?? error.message}\n`); - resolve({ check, code: 1, signal: null, output: chunks.join("") }); + resolve({ + check, + code: 1, + durationMs: Math.round(performance.now() - startedAt), + signal: null, + output: chunks.join(""), + }); }); child.on("close", (code, signal) => { - resolve({ check, code: code ?? 1, signal, output: chunks.join("") }); + resolve({ + check, + code: code ?? 1, + durationMs: Math.round(performance.now() - startedAt), + signal, + output: chunks.join(""), + }); }); }); } +function formatDuration(ms) { + if (!Number.isFinite(ms)) { + return ""; + } + if (ms < 1000) { + return `${ms}ms`; + } + return `${(ms / 1000).toFixed(1)}s`; +} + function writeGroupedResult(result, output) { const success = result.code === 0; output.write(`::group::${result.check.label}\n`); @@ -99,16 +153,25 @@ function writeGroupedResult(result, output) { output.write(result.output.endsWith("\n") ? result.output : `${result.output}\n`); } if (success) { - output.write(`[ok] ${result.check.label}\n`); + output.write(`[ok] ${result.check.label} in ${formatDuration(result.durationMs)}\n`); } else { const suffix = result.signal ? ` (signal ${result.signal})` : ` (exit ${result.code})`; output.write( - `::error title=${result.check.label} failed::${result.check.label} failed${suffix}\n`, + `::error title=${result.check.label} failed::${result.check.label} failed${suffix} after ${formatDuration(result.durationMs)}\n`, ); } output.write("::endgroup::\n"); } +function writeTimingSummary(results, output) { + output.write("Additional boundary check timings:\n"); + for (const result of [...results].toSorted((left, right) => right.durationMs - left.durationMs)) { + output.write( + `${result.check.label.padEnd(48)} ${formatDuration(result.durationMs).padStart(8)}\n`, + ); + } +} + export async function runChecks( checks = BOUNDARY_CHECKS, { concurrency = 4, cwd = process.cwd(), env = process.env, output = process.stdout } = {}, @@ -149,14 +212,34 @@ export async function runChecks( failures += 1; } } + writeTimingSummary(results, output); return failures; } +function resolveCliShardSpec(args, env) { + const shardIndex = args.indexOf("--shard"); + if (shardIndex !== -1) { + return args[shardIndex + 1] ?? ""; + } + const inlineShard = args.find((arg) => arg.startsWith("--shard=")); + if (inlineShard) { + return inlineShard.slice("--shard=".length); + } + return env.OPENCLAW_ADDITIONAL_BOUNDARY_SHARD ?? ""; +} + if (import.meta.url === `file://${process.argv[1]}`) { const concurrency = resolveConcurrency( process.env.OPENCLAW_ADDITIONAL_BOUNDARY_CONCURRENCY ?? process.env.OPENCLAW_EXTENSION_BOUNDARY_CONCURRENCY, ); - const failures = await runChecks(BOUNDARY_CHECKS, { concurrency }); + const shard = parseShardSpec(resolveCliShardSpec(process.argv.slice(2), process.env)); + const checks = selectChecksForShard(BOUNDARY_CHECKS, shard); + if (shard) { + process.stdout.write( + `Running ${checks.length}/${BOUNDARY_CHECKS.length} additional boundary checks (shard ${shard.label})\n`, + ); + } + const failures = await runChecks(checks, { concurrency }); process.exitCode = failures === 0 ? 0 : 1; } diff --git a/test/scripts/ci-node-test-plan.test.ts b/test/scripts/ci-node-test-plan.test.ts index bcf0a00577c..05aca60bd14 100644 --- a/test/scripts/ci-node-test-plan.test.ts +++ b/test/scripts/ci-node-test-plan.test.ts @@ -78,7 +78,7 @@ function isGatewayServerTestFile(file: string): boolean { } describe("scripts/lib/ci-node-test-plan.mjs", () => { - it("combines the small core unit shards to reduce CI runner fanout", () => { + it("splits the slow core unit shards while keeping paired source/security coverage", () => { const coreUnitShards = createNodeTestShards() .filter((shard) => shard.shardName.startsWith("core-unit-")) .map((shard) => ({ @@ -89,12 +89,9 @@ describe("scripts/lib/ci-node-test-plan.mjs", () => { expect(coreUnitShards).toEqual([ { - configs: [ - "test/vitest/vitest.unit-fast.config.ts", - "test/vitest/vitest.unit-support.config.ts", - ], + configs: ["test/vitest/vitest.unit-fast.config.ts"], requiresDist: false, - shardName: "core-unit-fast-support", + shardName: "core-unit-fast", }, { configs: [ @@ -109,6 +106,11 @@ describe("scripts/lib/ci-node-test-plan.mjs", () => { requiresDist: false, shardName: "core-unit-ui", }, + { + configs: ["test/vitest/vitest.unit-support.config.ts"], + requiresDist: false, + shardName: "core-unit-support", + }, ]); }); @@ -159,13 +161,20 @@ describe("scripts/lib/ci-node-test-plan.mjs", () => { "test/vitest/vitest.infra.config.ts", "test/vitest/vitest.hooks.config.ts", "test/vitest/vitest.secrets.config.ts", + ], + requiresDist: false, + runner: "blacksmith-4vcpu-ubuntu-2404", + shardName: "core-runtime-infra-state", + }, + { + configs: [ "test/vitest/vitest.logging.config.ts", "test/vitest/vitest.process.config.ts", "test/vitest/vitest.runtime-config.config.ts", ], requiresDist: false, runner: "blacksmith-4vcpu-ubuntu-2404", - shardName: "core-runtime-infra", + shardName: "core-runtime-infra-process", }, { configs: [ @@ -216,7 +225,9 @@ describe("scripts/lib/ci-node-test-plan.mjs", () => { "agentic-control-plane-agent-chat", "agentic-control-plane-auth-node", "agentic-control-plane-http-models", + "agentic-control-plane-http-plugin-ws", "agentic-control-plane-runtime", + "agentic-control-plane-startup-runtime", ]); expect(controlPlaneShards).toEqual( controlPlaneShards.map((shard) => ({ diff --git a/test/scripts/run-additional-boundary-checks.test.ts b/test/scripts/run-additional-boundary-checks.test.ts index 05179bdf655..b6c986be781 100644 --- a/test/scripts/run-additional-boundary-checks.test.ts +++ b/test/scripts/run-additional-boundary-checks.test.ts @@ -2,8 +2,10 @@ import { describe, expect, it } from "vitest"; import { BOUNDARY_CHECKS, formatCommand, + parseShardSpec, resolveConcurrency, runChecks, + selectChecksForShard, } from "../../scripts/run-additional-boundary-checks.mjs"; function createOutputBuffer() { @@ -40,6 +42,21 @@ describe("run-additional-boundary-checks", () => { ); }); + it("parses and applies CI shard specs", () => { + expect(parseShardSpec("2/4")).toEqual({ count: 4, index: 1, label: "2/4" }); + expect(selectChecksForShard(BOUNDARY_CHECKS, "1/4")).toEqual( + BOUNDARY_CHECKS.filter((_check, index) => index % 4 === 0), + ); + const shardedLabels = [1, 2, 3, 4].flatMap((index) => + selectChecksForShard(BOUNDARY_CHECKS, `${index}/4`).map((check) => check.label), + ); + expect(shardedLabels.toSorted()).toEqual( + BOUNDARY_CHECKS.map((check) => check.label).toSorted(), + ); + expect(new Set(shardedLabels).size).toBe(BOUNDARY_CHECKS.length); + expect(() => parseShardSpec("5/4")).toThrow("Invalid shard spec"); + }); + it("buffers grouped output and reports aggregate failures", async () => { const buffer = createOutputBuffer(); const failures = await runChecks( @@ -62,9 +79,10 @@ describe("run-additional-boundary-checks", () => { expect(failures).toBe(1); expect(text).toContain("::group::passes"); expect(text).toContain("ok-out"); - expect(text).toContain("[ok] passes"); + expect(text).toContain("[ok] passes in "); expect(text).toContain("::group::fails"); expect(text).toContain("bad-out"); expect(text).toContain("::error title=fails failed::fails failed (exit 7)"); + expect(text).toContain("Additional boundary check timings:"); }); });