diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 334253daa94..c7cd5af05ff 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1231,6 +1231,7 @@ jobs: NODE_OPTIONS: --max-old-space-size=6144 OPENCLAW_NODE_TEST_CONFIGS_JSON: ${{ toJson(matrix.configs) }} OPENCLAW_NODE_TEST_INCLUDE_PATTERNS_JSON: ${{ toJson(matrix.includePatterns) }} + OPENCLAW_VITEST_SHARD_NAME: ${{ matrix.shard_name }} OPENCLAW_TEST_PROJECTS_PARALLEL: "2" shell: bash run: | diff --git a/docs/ci.md b/docs/ci.md index 6e013c47cdd..d458b7a1ec0 100644 --- a/docs/ci.md +++ b/docs/ci.md @@ -98,7 +98,7 @@ Local changed-lane logic lives in `scripts/changed-lanes.mjs` and is executed by On pushes, the `checks` matrix adds the push-only `compat-node22` lane. On pull requests, that lane is skipped and the matrix stays focused on the normal test/channel lanes. -The slowest Node test families are split or balanced so each job stays small without over-reserving runners: channel contracts run as three weighted shards, bundled plugin tests balance across six extension workers, small core unit lanes are paired, auto-reply runs as three balanced workers instead of six tiny workers, and agentic gateway/plugin configs are spread across the existing source-only agentic Node jobs instead of waiting on built artifacts. Broad browser, QA, media, and miscellaneous plugin tests use their dedicated Vitest configs instead of the shared plugin catch-all. Extension shard jobs run up to two plugin config groups at a time with one Vitest worker per group and a larger Node heap so import-heavy plugin batches do not create extra CI jobs. The broad agents lane uses the shared Vitest file-parallel scheduler because it is import/scheduling dominated rather than owned by a single slow test file. `runtime-config` runs with the infra core-runtime shard to keep the shared runtime shard from owning the tail. `check-additional` keeps package-boundary compile/canary work together and separates runtime topology architecture from gateway watch coverage; the boundary guard shard runs its small independent guards concurrently inside one job. Gateway watch, channel tests, and the core support-boundary shard run concurrently inside `build-artifacts` after `dist/` and `dist-runtime/` are already built, keeping their old check names as lightweight verifier jobs while avoiding two extra Blacksmith workers and a second artifact-consumer queue. +The slowest Node test families are split or balanced so each job stays small without over-reserving runners: channel contracts run as three weighted shards, bundled plugin tests balance across six extension workers, small core unit lanes are paired, auto-reply runs as four balanced workers with the reply subtree split into agent-runner, dispatch, and commands/state-routing shards, and agentic gateway/plugin configs are spread across the existing source-only agentic Node jobs instead of waiting on built artifacts. Broad browser, QA, media, and miscellaneous plugin tests use their dedicated Vitest configs instead of the shared plugin catch-all. Extension shard jobs run up to two plugin config groups at a time with one Vitest worker per group and a larger Node heap so import-heavy plugin batches do not create extra CI jobs. The broad agents lane uses the shared Vitest file-parallel scheduler because it is import/scheduling dominated rather than owned by a single slow test file. `runtime-config` runs with the infra core-runtime shard to keep the shared runtime shard from owning the tail. Include-pattern shards record timing entries using the CI shard name, so `.artifacts/vitest-shard-timings.json` can distinguish a whole config from a filtered shard. `check-additional` keeps package-boundary compile/canary work together and separates runtime topology architecture from gateway watch coverage; the boundary guard shard runs its small independent guards concurrently inside one job. Gateway watch, channel tests, and the core support-boundary shard run concurrently inside `build-artifacts` after `dist/` and `dist-runtime/` are already built, keeping their old check names as lightweight verifier jobs while avoiding two extra Blacksmith workers and a second artifact-consumer queue. Android CI runs both `testPlayDebugUnitTest` and `testThirdPartyDebugUnitTest`, then builds the Play debug APK. The third-party flavor has no separate source set or manifest; its unit-test lane still compiles that flavor with the SMS/call-log BuildConfig flags, while avoiding a duplicate debug APK packaging job on every Android-relevant push. `extension-fast` is PR-only because push runs already execute the full bundled plugin shards. That keeps changed-plugin feedback for reviews without reserving an extra Blacksmith worker on `main` for coverage already present in `checks-node-extensions`. @@ -132,7 +132,10 @@ pnpm test:channels pnpm test:contracts:channels pnpm check:docs # docs format + lint + broken links pnpm build # build dist when CI artifact/build-smoke lanes matter +pnpm ci:timings # summarize the latest origin/main push CI run +pnpm ci:timings:recent # compare recent successful main CI runs node scripts/ci-run-timings.mjs # summarize wall time, queue time, and slowest jobs +node scripts/ci-run-timings.mjs --latest-main # ignore issue/comment noise and choose origin/main push CI node scripts/ci-run-timings.mjs --recent 10 # compare recent successful main CI runs pnpm test:perf:groups --full-suite --allow-failures --output .artifacts/test-perf/baseline-before.json pnpm test:perf:groups:compare .artifacts/test-perf/baseline-before.json .artifacts/test-perf/after-agent.json diff --git a/docs/help/testing.md b/docs/help/testing.md index 0f075885975..ccf076c019f 100644 --- a/docs/help/testing.md +++ b/docs/help/testing.md @@ -396,7 +396,7 @@ Think of the suites as “increasing realism” (and increasing flakiness/cost): - `pnpm check:changed` is the normal smart local gate for narrow work. It classifies the diff into core, core tests, extensions, extension tests, apps, docs, release metadata, and tooling, then runs the matching typecheck/lint/test lanes. Public Plugin SDK and plugin-contract changes include one extension validation pass because extensions depend on those core contracts. Release metadata-only version bumps run targeted version/config/root-dependency checks instead of the full suite, with a guard that rejects package changes outside the top-level version field. - Import-light unit tests from agents, commands, plugins, auto-reply helpers, `plugin-sdk`, and similar pure utility areas route through the `unit-fast` lane, which skips `test/setup-openclaw-runtime.ts`; stateful/runtime-heavy files stay on the existing lanes. - Selected `plugin-sdk` and `commands` helper source files also map changed-mode runs to explicit sibling tests in those light lanes, so helper edits avoid rerunning the full heavy suite for that directory. - - `auto-reply` has three dedicated buckets: top-level core helpers, top-level `reply.*` integration tests, and the `src/auto-reply/reply/**` subtree. This keeps the heaviest reply harness work off the cheap status/chunk/token tests. + - `auto-reply` has dedicated buckets for top-level core helpers, top-level `reply.*` integration tests, and the `src/auto-reply/reply/**` subtree. CI further splits the reply subtree into agent-runner, dispatch, and commands/state-routing shards so one import-heavy bucket does not own the full Node tail. @@ -462,6 +462,10 @@ Think of the suites as “increasing realism” (and increasing flakiness/cost): import-breakdown output. - `pnpm test:perf:imports:changed` scopes the same profiling view to files changed since `origin/main`. + - Shard timing data is written to `.artifacts/vitest-shard-timings.json`. + Whole-config runs use the config path as the key; include-pattern CI + shards append the shard name so filtered shards can be tracked + separately. - When one hot test still spends most of its time in startup imports, keep heavy dependencies behind a narrow local `*.runtime.ts` seam and mock that seam directly instead of deep-importing runtime helpers just diff --git a/docs/reference/test.md b/docs/reference/test.md index 227c7ca5252..8b44337f0d1 100644 --- a/docs/reference/test.md +++ b/docs/reference/test.md @@ -14,7 +14,7 @@ title: "Tests" - `pnpm changed:lanes`: shows the architectural lanes triggered by the diff against `origin/main`. - `pnpm check:changed`: runs the smart changed gate for the diff against `origin/main`. It runs core work with core test lanes, extension work with extension test lanes, test-only work with test typecheck/tests only, expands public Plugin SDK or plugin-contract changes to one extension validation pass, and keeps release metadata-only version bumps on targeted version/config/root-dependency checks. - `pnpm test`: routes explicit file/directory targets through scoped Vitest lanes. Untargeted runs use fixed shard groups and expand to leaf configs for local parallel execution; the extension group always expands to the per-extension shard configs instead of one giant root-project process. -- Full and extension shard runs update local timing data in `.artifacts/vitest-shard-timings.json`; later runs use those timings to balance slow and fast shards. Set `OPENCLAW_TEST_PROJECTS_TIMINGS=0` to ignore the local timing artifact. +- Full, extension, and include-pattern shard runs update local timing data in `.artifacts/vitest-shard-timings.json`; later whole-config runs use those timings to balance slow and fast shards. Include-pattern CI shards append the shard name to the timing key, which keeps filtered shard timings visible without replacing whole-config timing data. Set `OPENCLAW_TEST_PROJECTS_TIMINGS=0` to ignore the local timing artifact. - Selected `plugin-sdk` and `commands` test files now route through dedicated light lanes that keep only `test/setup.ts`, leaving runtime-heavy cases on their existing lanes. - Selected `plugin-sdk` and `commands` helper source files also map `pnpm test:changed` to explicit sibling tests in those light lanes, so small helper edits avoid rerunning the heavy runtime-backed suites. - `auto-reply` now also splits into three dedicated configs (`core`, `top-level`, `reply`) so the reply harness does not dominate the lighter top-level status/token/helper tests. diff --git a/package.json b/package.json index fc29c2b0b89..ceae8fd7d9b 100644 --- a/package.json +++ b/package.json @@ -1329,6 +1329,8 @@ "check:timed": "node scripts/check-timed.mjs", "check:timed:all-types": "node scripts/check-timed.mjs --include-test-types", "check:timed:architecture": "node scripts/check-timed.mjs --include-architecture", + "ci:timings": "node scripts/ci-run-timings.mjs --latest-main", + "ci:timings:recent": "node scripts/ci-run-timings.mjs --recent 10", "codex-app-server:protocol:check": "node --import tsx scripts/check-codex-app-server-protocol.ts", "codex-app-server:protocol:sync": "node --import tsx scripts/sync-codex-app-server-protocol.ts", "config:channels:check": "node --import tsx scripts/generate-bundled-channel-config-metadata.ts --check", diff --git a/scripts/ci-run-timings.mjs b/scripts/ci-run-timings.mjs index 6ce7a9fe664..74e9adbc34b 100644 --- a/scripts/ci-run-timings.mjs +++ b/scripts/ci-run-timings.mjs @@ -18,6 +18,11 @@ function formatSeconds(value) { return value === null ? "" : `${value}s`; } +function parseRunList(raw) { + const parsed = JSON.parse(raw); + return Array.isArray(parsed) ? parsed : []; +} + function collectRunTimingContext(run) { const created = parseTime(run.createdAt); const updated = parseTime(run.updatedAt); @@ -64,6 +69,17 @@ export function summarizeRunTimings(run, limit = 15) { }; } +export function selectLatestMainPushCiRun(runs, headSha = null) { + const pushRuns = runs.filter((run) => run.event === "push"); + if (headSha) { + const matchingRun = pushRuns.find((run) => run.headSha === headSha); + if (matchingRun) { + return matchingRun; + } + } + return pushRuns[0] ?? null; +} + function getLatestCiRunId() { const raw = execFileSync( "gh", @@ -78,6 +94,40 @@ function getLatestCiRunId() { return String(runId); } +function getRemoteMainSha() { + const raw = execFileSync("git", ["ls-remote", "origin", "main"], { encoding: "utf8" }).trim(); + const [sha] = raw.split(/\s+/u); + if (!sha) { + throw new Error("Could not resolve origin/main"); + } + return sha; +} + +function getLatestMainPushCiRunId() { + const headSha = getRemoteMainSha(); + const raw = execFileSync( + "gh", + [ + "run", + "list", + "--branch", + "main", + "--workflow", + "CI", + "--limit", + "20", + "--json", + "databaseId,headSha,event,status,conclusion", + ], + { encoding: "utf8" }, + ); + const run = selectLatestMainPushCiRun(parseRunList(raw), headSha); + if (!run?.databaseId) { + throw new Error(`No push CI run found for origin/main ${headSha.slice(0, 10)}`); + } + return String(run.databaseId); +} + function listRecentSuccessfulCiRuns(limit) { const raw = execFileSync( "gh", @@ -161,11 +211,15 @@ function printSection(title, jobs, metric) { } } -async function main() { - const args = process.argv.slice(2); +export function parseRunTimingArgs(args) { const recentIndex = args.indexOf("--recent"); const limitIndex = args.indexOf("--limit"); const ignoredArgIndexes = new Set(); + for (const [index, arg] of args.entries()) { + if (arg === "--" || arg === "--latest-main") { + ignoredArgIndexes.add(index); + } + } if (limitIndex !== -1) { ignoredArgIndexes.add(limitIndex); ignoredArgIndexes.add(limitIndex + 1); @@ -176,8 +230,21 @@ async function main() { } const limit = limitIndex === -1 ? 15 : Math.max(1, Number.parseInt(args[limitIndex + 1] ?? "", 10) || 15); - if (recentIndex !== -1) { - const recentLimit = Math.max(1, Number.parseInt(args[recentIndex + 1] ?? "", 10) || 10); + const recentLimit = + recentIndex === -1 ? null : Math.max(1, Number.parseInt(args[recentIndex + 1] ?? "", 10) || 10); + return { + explicitRunId: args.find((_arg, index) => !ignoredArgIndexes.has(index)), + limit, + recentLimit, + useLatestMain: args.includes("--latest-main"), + }; +} + +async function main() { + const { explicitRunId, limit, recentLimit, useLatestMain } = parseRunTimingArgs( + process.argv.slice(2), + ); + if (recentLimit !== null) { for (const run of listRecentSuccessfulCiRuns(recentLimit)) { const summary = summarizeJobs(loadRun(run.databaseId)); console.log( @@ -197,7 +264,7 @@ async function main() { } return; } - const runId = args.find((_arg, index) => !ignoredArgIndexes.has(index)) ?? getLatestCiRunId(); + const runId = explicitRunId ?? (useLatestMain ? getLatestMainPushCiRunId() : getLatestCiRunId()); const summary = summarizeRunTimings(loadRun(runId), limit); console.log( diff --git a/scripts/lib/ci-node-test-plan.mjs b/scripts/lib/ci-node-test-plan.mjs index e943302141b..ac4ec15036c 100644 --- a/scripts/lib/ci-node-test-plan.mjs +++ b/scripts/lib/ci-node-test-plan.mjs @@ -66,10 +66,8 @@ function createAutoReplyReplySplitShards() { } const mergedGroups = { - "auto-reply-reply-agent-dispatch": [ - ...groups["auto-reply-reply-agent-runner"], - ...groups["auto-reply-reply-dispatch"], - ], + "auto-reply-reply-agent-runner": groups["auto-reply-reply-agent-runner"], + "auto-reply-reply-dispatch": groups["auto-reply-reply-dispatch"], "auto-reply-reply-commands-state-routing": [ ...groups["auto-reply-reply-commands"], ...groups["auto-reply-reply-state-routing"], diff --git a/scripts/lib/vitest-shard-timings.mjs b/scripts/lib/vitest-shard-timings.mjs new file mode 100644 index 00000000000..e168a8cb72c --- /dev/null +++ b/scripts/lib/vitest-shard-timings.mjs @@ -0,0 +1,126 @@ +import { createHash } from "node:crypto"; +import fs from "node:fs"; +import path from "node:path"; + +const TIMINGS_FILE_ENV_KEY = "OPENCLAW_TEST_PROJECTS_TIMINGS_PATH"; +const TIMINGS_DISABLE_ENV_KEY = "OPENCLAW_TEST_PROJECTS_TIMINGS"; +const SHARD_NAME_ENV_KEY = "OPENCLAW_VITEST_SHARD_NAME"; + +function sanitizeTimingLabel(value) { + return String(value) + .trim() + .replace(/[^a-zA-Z0-9_.-]+/g, "-") + .replace(/^-+|-+$/g, ""); +} + +function hashIncludePatterns(includePatterns) { + return createHash("sha1").update(JSON.stringify(includePatterns)).digest("hex").slice(0, 12); +} + +export function shouldUseShardTimings(env = process.env) { + return env[TIMINGS_DISABLE_ENV_KEY] !== "0"; +} + +export function resolveShardTimingsPath(cwd = process.cwd(), env = process.env) { + return env[TIMINGS_FILE_ENV_KEY] || path.join(cwd, ".artifacts", "vitest-shard-timings.json"); +} + +export function resolveShardTimingKey(spec) { + if (!Array.isArray(spec.includePatterns) || spec.includePatterns.length === 0) { + return spec.config; + } + + const shardName = sanitizeTimingLabel(spec.env?.[SHARD_NAME_ENV_KEY] ?? ""); + if (shardName) { + return `${spec.config}#${shardName}`; + } + + return `${spec.config}#include-${spec.includePatterns.length}-${hashIncludePatterns( + spec.includePatterns, + )}`; +} + +export function createShardTimingSample(spec, durationMs) { + if (spec.watchMode || !Number.isFinite(durationMs) || durationMs <= 0) { + return null; + } + + const includePatternCount = Array.isArray(spec.includePatterns) ? spec.includePatterns.length : 0; + return { + baseConfig: spec.config, + config: resolveShardTimingKey(spec), + durationMs, + includePatternCount, + }; +} + +export function readShardTimings(cwd = process.cwd(), env = process.env) { + if (!shouldUseShardTimings(env)) { + return new Map(); + } + try { + const raw = fs.readFileSync(resolveShardTimingsPath(cwd, env), "utf8"); + const parsed = JSON.parse(raw); + const configs = parsed && typeof parsed === "object" ? parsed.configs : null; + if (!configs || typeof configs !== "object") { + return new Map(); + } + return new Map( + Object.entries(configs) + .map(([config, value]) => { + const durationMs = Number(value?.averageMs ?? value?.durationMs); + return Number.isFinite(durationMs) && durationMs > 0 ? [config, durationMs] : null; + }) + .filter(Boolean), + ); + } catch { + return new Map(); + } +} + +export function writeShardTimings(samples, cwd = process.cwd(), env = process.env) { + if (!shouldUseShardTimings(env) || samples.length === 0) { + return; + } + + const outputPath = resolveShardTimingsPath(cwd, env); + let current = { version: 1, configs: {} }; + try { + current = JSON.parse(fs.readFileSync(outputPath, "utf8")); + } catch { + // First run, or a corrupt local artifact. Rewrite below. + } + + const configs = + current && typeof current === "object" && current.configs && typeof current.configs === "object" + ? { ...current.configs } + : {}; + const updatedAt = new Date().toISOString(); + for (const sample of samples) { + if (!sample.config || !Number.isFinite(sample.durationMs) || sample.durationMs <= 0) { + continue; + } + const previous = configs[sample.config]; + const previousAverage = Number(previous?.averageMs ?? previous?.durationMs); + const sampleCount = Math.max(0, Number(previous?.sampleCount) || 0) + 1; + const averageMs = + Number.isFinite(previousAverage) && previousAverage > 0 + ? Math.round(previousAverage * 0.7 + sample.durationMs * 0.3) + : Math.round(sample.durationMs); + configs[sample.config] = { + averageMs, + lastMs: Math.round(sample.durationMs), + sampleCount, + updatedAt, + ...(sample.baseConfig && sample.baseConfig !== sample.config + ? { baseConfig: sample.baseConfig } + : {}), + ...(sample.includePatternCount ? { includePatternCount: sample.includePatternCount } : {}), + }; + } + + fs.mkdirSync(path.dirname(outputPath), { recursive: true }); + const tempPath = `${outputPath}.${process.pid}.tmp`; + fs.writeFileSync(tempPath, `${JSON.stringify({ version: 1, configs }, null, 2)}\n`, "utf8"); + fs.renameSync(tempPath, outputPath); +} diff --git a/scripts/test-projects.mjs b/scripts/test-projects.mjs index 83669bdd8f5..e2ab2667319 100644 --- a/scripts/test-projects.mjs +++ b/scripts/test-projects.mjs @@ -1,5 +1,4 @@ import fs from "node:fs"; -import path from "node:path"; import { performance } from "node:perf_hooks"; import { acquireLocalHeavyCheckLockSync } from "./lib/local-heavy-check-runtime.mjs"; import { @@ -7,6 +6,11 @@ import { resolveLocalFullSuiteProfile, resolveLocalVitestEnv, } from "./lib/vitest-local-scheduling.mjs"; +import { + createShardTimingSample, + readShardTimings, + writeShardTimings, +} from "./lib/vitest-shard-timings.mjs"; import { resolveVitestCliEntry, resolveVitestNodeArgs, @@ -94,8 +98,6 @@ const FULL_SUITE_CONFIG_WEIGHT = new Map([ ["test/vitest/vitest.extension-memory.config.ts", 6], ["test/vitest/vitest.extension-msteams.config.ts", 4], ]); -const TIMINGS_FILE_ENV_KEY = "OPENCLAW_TEST_PROJECTS_TIMINGS_PATH"; -const TIMINGS_DISABLE_ENV_KEY = "OPENCLAW_TEST_PROJECTS_TIMINGS"; const releaseLockOnce = () => { if (lockReleased) { return; @@ -104,81 +106,6 @@ const releaseLockOnce = () => { releaseLock(); }; -function shouldUseShardTimings(env = process.env) { - return env[TIMINGS_DISABLE_ENV_KEY] !== "0"; -} - -function resolveShardTimingsPath(cwd = process.cwd(), env = process.env) { - return env[TIMINGS_FILE_ENV_KEY] || path.join(cwd, ".artifacts", "vitest-shard-timings.json"); -} - -function readShardTimings(cwd = process.cwd(), env = process.env) { - if (!shouldUseShardTimings(env)) { - return new Map(); - } - try { - const raw = fs.readFileSync(resolveShardTimingsPath(cwd, env), "utf8"); - const parsed = JSON.parse(raw); - const configs = parsed && typeof parsed === "object" ? parsed.configs : null; - if (!configs || typeof configs !== "object") { - return new Map(); - } - return new Map( - Object.entries(configs) - .map(([config, value]) => { - const durationMs = Number(value?.averageMs ?? value?.durationMs); - return Number.isFinite(durationMs) && durationMs > 0 ? [config, durationMs] : null; - }) - .filter(Boolean), - ); - } catch { - return new Map(); - } -} - -function writeShardTimings(samples, cwd = process.cwd(), env = process.env) { - if (!shouldUseShardTimings(env) || samples.length === 0) { - return; - } - - const outputPath = resolveShardTimingsPath(cwd, env); - let current = { version: 1, configs: {} }; - try { - current = JSON.parse(fs.readFileSync(outputPath, "utf8")); - } catch { - // First run, or a corrupt local artifact. Rewrite below. - } - - const configs = - current && typeof current === "object" && current.configs && typeof current.configs === "object" - ? { ...current.configs } - : {}; - const updatedAt = new Date().toISOString(); - for (const sample of samples) { - if (!sample.config || !Number.isFinite(sample.durationMs) || sample.durationMs <= 0) { - continue; - } - const previous = configs[sample.config]; - const previousAverage = Number(previous?.averageMs ?? previous?.durationMs); - const sampleCount = Math.max(0, Number(previous?.sampleCount) || 0) + 1; - const averageMs = - Number.isFinite(previousAverage) && previousAverage > 0 - ? Math.round(previousAverage * 0.7 + sample.durationMs * 0.3) - : Math.round(sample.durationMs); - configs[sample.config] = { - averageMs, - lastMs: Math.round(sample.durationMs), - sampleCount, - updatedAt, - }; - } - - fs.mkdirSync(path.dirname(outputPath), { recursive: true }); - const tempPath = `${outputPath}.${process.pid}.tmp`; - fs.writeFileSync(tempPath, `${JSON.stringify({ version: 1, configs }, null, 2)}\n`, "utf8"); - fs.renameSync(tempPath, outputPath); -} - function cleanupVitestRunSpec(spec) { if (!spec.includeFilePath) { return; @@ -263,8 +190,7 @@ async function runLoggedVitestSpec(spec) { } return { ...result, - timing: - !spec.watchMode && spec.includePatterns === null ? { config: spec.config, durationMs } : null, + timing: createShardTimingSample(spec, durationMs), }; } @@ -288,6 +214,7 @@ function interleaveSlowAndFastSpecs(sortedSpecs) { } function orderFullSuiteSpecsForParallelRun(specs, shardTimings = new Map()) { + const hasMatchingShardTiming = specs.some((spec) => shardTimings.has(spec.config)); const sortedSpecs = specs.toSorted((a, b) => { const weightDelta = resolveConfigSortWeight(b.config, shardTimings) - @@ -297,7 +224,7 @@ function orderFullSuiteSpecsForParallelRun(specs, shardTimings = new Map()) { } return a.config.localeCompare(b.config); }); - return shardTimings.size > 0 ? interleaveSlowAndFastSpecs(sortedSpecs) : sortedSpecs; + return hasMatchingShardTiming ? interleaveSlowAndFastSpecs(sortedSpecs) : sortedSpecs; } function isFullExtensionsProjectRun(specs) { diff --git a/scripts/test-projects.test-support.mjs b/scripts/test-projects.test-support.mjs index 4f3b4faf20f..8e646445def 100644 --- a/scripts/test-projects.test-support.mjs +++ b/scripts/test-projects.test-support.mjs @@ -237,9 +237,12 @@ const TOOLING_SOURCE_TEST_TARGETS = new Map([ ], ], ["scripts/run-oxlint.mjs", ["test/scripts/run-oxlint.test.ts"]], + ["scripts/ci-run-timings.mjs", ["test/scripts/ci-run-timings.test.ts"]], ["scripts/test-extension-batch.mjs", ["test/scripts/test-extension.test.ts"]], ["scripts/lib/extension-test-plan.mjs", ["test/scripts/test-extension.test.ts"]], ["scripts/lib/vitest-batch-runner.mjs", ["test/scripts/test-extension.test.ts"]], + ["scripts/lib/ci-node-test-plan.mjs", ["test/scripts/ci-node-test-plan.test.ts"]], + ["scripts/lib/vitest-shard-timings.mjs", ["test/scripts/vitest-shard-timings.test.ts"]], ["scripts/test-projects.mjs", ["test/scripts/test-projects.test.ts"]], ["scripts/test-projects.test-support.d.mts", ["test/scripts/test-projects.test.ts"]], ["scripts/test-projects.test-support.mjs", ["test/scripts/test-projects.test.ts"]], diff --git a/test/scripts/ci-node-test-plan.test.ts b/test/scripts/ci-node-test-plan.test.ts index 6965a18ea10..61124be6790 100644 --- a/test/scripts/ci-node-test-plan.test.ts +++ b/test/scripts/ci-node-test-plan.test.ts @@ -216,10 +216,16 @@ describe("scripts/lib/ci-node-test-plan.mjs", () => { shardName: "auto-reply-core-top-level", }, { - checkName: "checks-node-auto-reply-reply-agent-dispatch", + checkName: "checks-node-auto-reply-reply-agent-runner", configs: ["test/vitest/vitest.auto-reply-reply.config.ts"], requiresDist: false, - shardName: "auto-reply-reply-agent-dispatch", + shardName: "auto-reply-reply-agent-runner", + }, + { + checkName: "checks-node-auto-reply-reply-dispatch", + configs: ["test/vitest/vitest.auto-reply-reply.config.ts"], + requiresDist: false, + shardName: "auto-reply-reply-dispatch", }, { checkName: "checks-node-auto-reply-reply-commands-state-routing", diff --git a/test/scripts/ci-run-timings.test.ts b/test/scripts/ci-run-timings.test.ts index 108f7369276..07e4995669b 100644 --- a/test/scripts/ci-run-timings.test.ts +++ b/test/scripts/ci-run-timings.test.ts @@ -1,5 +1,9 @@ import { describe, expect, it } from "vitest"; -import { summarizeRunTimings } from "../../scripts/ci-run-timings.mjs"; +import { + parseRunTimingArgs, + selectLatestMainPushCiRun, + summarizeRunTimings, +} from "../../scripts/ci-run-timings.mjs"; describe("scripts/ci-run-timings.mjs", () => { it("separates queue time from job duration", () => { @@ -46,4 +50,58 @@ describe("scripts/ci-run-timings.mjs", () => { ["slow", 20], ]); }); + + it("selects the push CI run for the current main SHA", () => { + expect( + selectLatestMainPushCiRun( + [ + { + databaseId: 3, + event: "issue_comment", + headSha: "current", + }, + { + databaseId: 2, + event: "push", + headSha: "older", + }, + { + databaseId: 1, + event: "push", + headSha: "current", + }, + ], + "current", + ), + ).toMatchObject({ databaseId: 1 }); + }); + + it("falls back to the newest push CI run when the exact SHA has not appeared yet", () => { + expect( + selectLatestMainPushCiRun( + [ + { + databaseId: 4, + event: "issue_comment", + headSha: "current", + }, + { + databaseId: 3, + event: "push", + headSha: "previous", + }, + ], + "current", + ), + ).toMatchObject({ databaseId: 3 }); + }); + + it("ignores pnpm passthrough sentinels when parsing monitor args", () => { + expect(parseRunTimingArgs(["--latest-main", "--", "--limit", "3"])).toEqual({ + explicitRunId: undefined, + limit: 3, + recentLimit: null, + useLatestMain: true, + }); + }); }); diff --git a/test/scripts/vitest-shard-timings.test.ts b/test/scripts/vitest-shard-timings.test.ts new file mode 100644 index 00000000000..0029bba3044 --- /dev/null +++ b/test/scripts/vitest-shard-timings.test.ts @@ -0,0 +1,91 @@ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { describe, expect, it } from "vitest"; +import { + createShardTimingSample, + readShardTimings, + resolveShardTimingKey, + writeShardTimings, +} from "../../scripts/lib/vitest-shard-timings.mjs"; + +describe("scripts/lib/vitest-shard-timings.mjs", () => { + it("uses the config path as the timing key for whole-config runs", () => { + expect( + resolveShardTimingKey({ + config: "test/vitest/vitest.unit-fast.config.ts", + env: {}, + includePatterns: null, + }), + ).toBe("test/vitest/vitest.unit-fast.config.ts"); + }); + + it("uses the CI shard name for include-pattern timing keys", () => { + expect( + resolveShardTimingKey({ + config: "test/vitest/vitest.auto-reply-reply.config.ts", + env: { OPENCLAW_VITEST_SHARD_NAME: "auto-reply/reply agent dispatch" }, + includePatterns: ["src/auto-reply/reply/agent-runner.test.ts"], + }), + ).toBe("test/vitest/vitest.auto-reply-reply.config.ts#auto-reply-reply-agent-dispatch"); + }); + + it("falls back to a stable include-pattern hash outside CI", () => { + const first = resolveShardTimingKey({ + config: "test/vitest/vitest.auto-reply-reply.config.ts", + env: {}, + includePatterns: ["src/auto-reply/reply/agent-runner.test.ts"], + }); + const second = resolveShardTimingKey({ + config: "test/vitest/vitest.auto-reply-reply.config.ts", + env: {}, + includePatterns: ["src/auto-reply/reply/agent-runner.test.ts"], + }); + + expect(first).toBe(second); + expect(first).toMatch(/^test\/vitest\/vitest\.auto-reply-reply\.config\.ts#include-1-/u); + }); + + it("persists include-pattern timing metadata", () => { + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-shard-timings-")); + const env = { + OPENCLAW_TEST_PROJECTS_TIMINGS_PATH: path.join(tempDir, "timings.json"), + OPENCLAW_VITEST_SHARD_NAME: "auto-reply-reply-agent-runner", + }; + const sample = createShardTimingSample( + { + config: "test/vitest/vitest.auto-reply-reply.config.ts", + env, + includePatterns: ["src/auto-reply/reply/agent-runner.test.ts"], + watchMode: false, + }, + 1234, + ); + + expect(sample).toEqual({ + baseConfig: "test/vitest/vitest.auto-reply-reply.config.ts", + config: "test/vitest/vitest.auto-reply-reply.config.ts#auto-reply-reply-agent-runner", + durationMs: 1234, + includePatternCount: 1, + }); + + writeShardTimings([sample], tempDir, env); + + expect(readShardTimings(tempDir, env)).toEqual( + new Map([ + ["test/vitest/vitest.auto-reply-reply.config.ts#auto-reply-reply-agent-runner", 1234], + ]), + ); + expect( + JSON.parse(fs.readFileSync(env.OPENCLAW_TEST_PROJECTS_TIMINGS_PATH, "utf8")).configs[ + "test/vitest/vitest.auto-reply-reply.config.ts#auto-reply-reply-agent-runner" + ], + ).toMatchObject({ + averageMs: 1234, + baseConfig: "test/vitest/vitest.auto-reply-reply.config.ts", + includePatternCount: 1, + lastMs: 1234, + sampleCount: 1, + }); + }); +});