From 36c89ecaf6dff7bcf7ea480556328c81dd1de7f3 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 23 Apr 2026 19:10:43 +0100 Subject: [PATCH] ci: cap extension shard vitest workers --- .github/workflows/ci.yml | 1 + docs/ci.md | 2 +- test/vitest-projects-config.test.ts | 38 +++++++++++++++++++++++++- test/vitest/vitest.shared.config.ts | 42 ++++++++++++++++++++++++++--- 4 files changed, 78 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dca637ee80a..8233c1fd144 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -975,6 +975,7 @@ jobs: - name: Run extension shard env: OPENCLAW_EXTENSION_BATCH_PARALLEL: 2 + OPENCLAW_VITEST_MAX_WORKERS: 1 OPENCLAW_EXTENSION_BATCH: ${{ matrix.extensions_csv }} run: pnpm test:extensions:batch -- "$OPENCLAW_EXTENSION_BATCH" diff --git a/docs/ci.md b/docs/ci.md index 997fde96171..35b9a5e788b 100644 --- a/docs/ci.md +++ b/docs/ci.md @@ -75,7 +75,7 @@ Local changed-lane logic lives in `scripts/changed-lanes.mjs` and is executed by On pushes, the `checks` matrix adds the push-only `compat-node22` lane. On pull requests, that lane is skipped and the matrix stays focused on the normal test/channel lanes. -The slowest Node test families are split or balanced so each job stays small without over-reserving runners: channel contracts run as three weighted shards, bundled plugin tests balance across six extension workers, small core unit lanes are paired, auto-reply runs as three balanced workers instead of six tiny workers, and agentic gateway/plugin configs are spread across the existing source-only agentic Node jobs instead of waiting on built artifacts. Broad browser, QA, media, and miscellaneous plugin tests use their dedicated Vitest configs instead of the shared plugin catch-all. The broad agents lane uses the shared Vitest file-parallel scheduler because it is import/scheduling dominated rather than owned by a single slow test file. `runtime-config` runs with the infra core-runtime shard to keep the shared runtime shard from owning the tail. `check-additional` keeps package-boundary compile/canary work together and separates runtime topology architecture from gateway watch coverage; the boundary guard shard runs its small independent guards concurrently inside one job. Gateway watch, channel tests, and the core support-boundary shard run concurrently inside `build-artifacts` after `dist/` and `dist-runtime/` are already built, keeping their old check names as lightweight verifier jobs while avoiding two extra Blacksmith workers and a second artifact-consumer queue. +The slowest Node test families are split or balanced so each job stays small without over-reserving runners: channel contracts run as three weighted shards, bundled plugin tests balance across six extension workers, small core unit lanes are paired, auto-reply runs as three balanced workers instead of six tiny workers, and agentic gateway/plugin configs are spread across the existing source-only agentic Node jobs instead of waiting on built artifacts. Broad browser, QA, media, and miscellaneous plugin tests use their dedicated Vitest configs instead of the shared plugin catch-all. Extension shard jobs can run two plugin config groups concurrently, but cap each Vitest config to one worker so import-heavy plugin batches do not overcommit small CI runners. The broad agents lane uses the shared Vitest file-parallel scheduler because it is import/scheduling dominated rather than owned by a single slow test file. `runtime-config` runs with the infra core-runtime shard to keep the shared runtime shard from owning the tail. `check-additional` keeps package-boundary compile/canary work together and separates runtime topology architecture from gateway watch coverage; the boundary guard shard runs its small independent guards concurrently inside one job. Gateway watch, channel tests, and the core support-boundary shard run concurrently inside `build-artifacts` after `dist/` and `dist-runtime/` are already built, keeping their old check names as lightweight verifier jobs while avoiding two extra Blacksmith workers and a second artifact-consumer queue. Android CI runs both `testPlayDebugUnitTest` and `testThirdPartyDebugUnitTest`, then builds the Play debug APK. The third-party flavor has no separate source set or manifest; its unit-test lane still compiles that flavor with the SMS/call-log BuildConfig flags, while avoiding a duplicate debug APK packaging job on every Android-relevant push. `extension-fast` is PR-only because push runs already execute the full bundled plugin shards. That keeps changed-plugin feedback for reviews without reserving an extra Blacksmith worker on `main` for coverage already present in `checks-node-extensions`. diff --git a/test/vitest-projects-config.test.ts b/test/vitest-projects-config.test.ts index d584fdbc476..360d0498fea 100644 --- a/test/vitest-projects-config.test.ts +++ b/test/vitest-projects-config.test.ts @@ -12,7 +12,10 @@ import { } from "./vitest/vitest.contracts-shared.ts"; import { createGatewayVitestConfig } from "./vitest/vitest.gateway.config.ts"; import { createPluginSdkLightVitestConfig } from "./vitest/vitest.plugin-sdk-light.config.ts"; -import { sharedVitestConfig } from "./vitest/vitest.shared.config.ts"; +import { + resolveSharedVitestWorkerConfig, + sharedVitestConfig, +} from "./vitest/vitest.shared.config.ts"; import { createUiVitestConfig } from "./vitest/vitest.ui.config.ts"; import { createUnitFastVitestConfig } from "./vitest/vitest.unit-fast.config.ts"; import unitUiConfig from "./vitest/vitest.unit-ui.config.ts"; @@ -44,6 +47,39 @@ describe("projects vitest config", () => { expect(createContractsVitestConfig(pluginContractPatterns).test.pool).toBe("forks"); }); + it("honors explicit worker caps in CI vitest lanes", () => { + expect( + resolveSharedVitestWorkerConfig({ + env: { CI: "true", OPENCLAW_VITEST_MAX_WORKERS: "1" }, + isCI: true, + isWindows: false, + localScheduling: { + fileParallelism: false, + maxWorkers: 1, + throttledBySystem: false, + }, + }), + ).toEqual({ + fileParallelism: false, + maxWorkers: 1, + }); + expect( + resolveSharedVitestWorkerConfig({ + env: { CI: "true" }, + isCI: true, + isWindows: false, + localScheduling: { + fileParallelism: false, + maxWorkers: 1, + throttledBySystem: false, + }, + }), + ).toEqual({ + fileParallelism: true, + maxWorkers: 3, + }); + }); + it("keeps contract shards on the non-isolated fork runner by default", () => { const config = createContractsVitestConfig(pluginContractPatterns); expect(config.test.pool).toBe("forks"); diff --git a/test/vitest/vitest.shared.config.ts b/test/vitest/vitest.shared.config.ts index 613aa65326c..da298c03a3d 100644 --- a/test/vitest/vitest.shared.config.ts +++ b/test/vitest/vitest.shared.config.ts @@ -76,7 +76,43 @@ const localScheduling = resolveLocalVitestScheduling( detectVitestHostInfo(), defaultPool, ); -const ciWorkers = isWindows ? 2 : 3; + +function hasWorkerOverride(env: Record): boolean { + return Boolean((env.OPENCLAW_VITEST_MAX_WORKERS ?? env.OPENCLAW_TEST_WORKERS)?.trim()); +} + +export function resolveSharedVitestWorkerConfig(params: { + env?: Record; + isCI?: boolean; + isWindows?: boolean; + localScheduling?: LocalVitestScheduling; +}): Pick { + const env = params.env ?? process.env; + const local = params.localScheduling ?? localScheduling; + if (hasWorkerOverride(env)) { + return { + fileParallelism: local.fileParallelism, + maxWorkers: local.maxWorkers, + }; + } + if (params.isCI ?? isCI) { + return { + fileParallelism: true, + maxWorkers: (params.isWindows ?? isWindows) ? 2 : 3, + }; + } + return { + fileParallelism: local.fileParallelism, + maxWorkers: local.maxWorkers, + }; +} + +const workerConfig = resolveSharedVitestWorkerConfig({ + env: process.env, + isCI, + isWindows, + localScheduling, +}); if (!isCI && localScheduling.throttledBySystem && shouldPrintVitestThrottle(process.env)) { console.error( @@ -118,8 +154,8 @@ export const sharedVitestConfig = { isolate: false, pool: defaultPool, runner: nonIsolatedRunnerPath, - maxWorkers: isCI ? ciWorkers : localScheduling.maxWorkers, - fileParallelism: isCI ? true : localScheduling.fileParallelism, + maxWorkers: workerConfig.maxWorkers, + fileParallelism: workerConfig.fileParallelism, forceRerunTriggers: [ "package.json", "pnpm-lock.yaml",