ci: split auto-reply shard timing

2026-05-06 05:40:44 +00:00 · 2026-04-25 23:46:56 +01:00
parent 1531123d35
commit 496d90c3b5
13 changed files with 382 additions and 96 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1231,6 +1231,7 @@ jobs:
          NODE_OPTIONS: --max-old-space-size=6144
          OPENCLAW_NODE_TEST_CONFIGS_JSON: ${{ toJson(matrix.configs) }}
          OPENCLAW_NODE_TEST_INCLUDE_PATTERNS_JSON: ${{ toJson(matrix.includePatterns) }}
+          OPENCLAW_VITEST_SHARD_NAME: ${{ matrix.shard_name }}
          OPENCLAW_TEST_PROJECTS_PARALLEL: "2"
        shell: bash
        run: |
--- a/docs/ci.md
+++ b/docs/ci.md
@@ -98,7 +98,7 @@ Local changed-lane logic lives in `scripts/changed-lanes.mjs` and is executed by

 On pushes, the `checks` matrix adds the push-only `compat-node22` lane. On pull requests, that lane is skipped and the matrix stays focused on the normal test/channel lanes.

-The slowest Node test families are split or balanced so each job stays small without over-reserving runners: channel contracts run as three weighted shards, bundled plugin tests balance across six extension workers, small core unit lanes are paired, auto-reply runs as three balanced workers instead of six tiny workers, and agentic gateway/plugin configs are spread across the existing source-only agentic Node jobs instead of waiting on built artifacts. Broad browser, QA, media, and miscellaneous plugin tests use their dedicated Vitest configs instead of the shared plugin catch-all. Extension shard jobs run up to two plugin config groups at a time with one Vitest worker per group and a larger Node heap so import-heavy plugin batches do not create extra CI jobs. The broad agents lane uses the shared Vitest file-parallel scheduler because it is import/scheduling dominated rather than owned by a single slow test file. `runtime-config` runs with the infra core-runtime shard to keep the shared runtime shard from owning the tail. `check-additional` keeps package-boundary compile/canary work together and separates runtime topology architecture from gateway watch coverage; the boundary guard shard runs its small independent guards concurrently inside one job. Gateway watch, channel tests, and the core support-boundary shard run concurrently inside `build-artifacts` after `dist/` and `dist-runtime/` are already built, keeping their old check names as lightweight verifier jobs while avoiding two extra Blacksmith workers and a second artifact-consumer queue.
+The slowest Node test families are split or balanced so each job stays small without over-reserving runners: channel contracts run as three weighted shards, bundled plugin tests balance across six extension workers, small core unit lanes are paired, auto-reply runs as four balanced workers with the reply subtree split into agent-runner, dispatch, and commands/state-routing shards, and agentic gateway/plugin configs are spread across the existing source-only agentic Node jobs instead of waiting on built artifacts. Broad browser, QA, media, and miscellaneous plugin tests use their dedicated Vitest configs instead of the shared plugin catch-all. Extension shard jobs run up to two plugin config groups at a time with one Vitest worker per group and a larger Node heap so import-heavy plugin batches do not create extra CI jobs. The broad agents lane uses the shared Vitest file-parallel scheduler because it is import/scheduling dominated rather than owned by a single slow test file. `runtime-config` runs with the infra core-runtime shard to keep the shared runtime shard from owning the tail. Include-pattern shards record timing entries using the CI shard name, so `.artifacts/vitest-shard-timings.json` can distinguish a whole config from a filtered shard. `check-additional` keeps package-boundary compile/canary work together and separates runtime topology architecture from gateway watch coverage; the boundary guard shard runs its small independent guards concurrently inside one job. Gateway watch, channel tests, and the core support-boundary shard run concurrently inside `build-artifacts` after `dist/` and `dist-runtime/` are already built, keeping their old check names as lightweight verifier jobs while avoiding two extra Blacksmith workers and a second artifact-consumer queue.
 Android CI runs both `testPlayDebugUnitTest` and `testThirdPartyDebugUnitTest`, then builds the Play debug APK. The third-party flavor has no separate source set or manifest; its unit-test lane still compiles that flavor with the SMS/call-log BuildConfig flags, while avoiding a duplicate debug APK packaging job on every Android-relevant push.
 `extension-fast` is PR-only because push runs already execute the full bundled plugin shards. That keeps changed-plugin feedback for reviews without reserving an extra Blacksmith worker on `main` for coverage already present in `checks-node-extensions`.

@@ -132,7 +132,10 @@ pnpm test:channels
 pnpm test:contracts:channels
 pnpm check:docs     # docs format + lint + broken links
 pnpm build          # build dist when CI artifact/build-smoke lanes matter
+pnpm ci:timings                               # summarize the latest origin/main push CI run
+pnpm ci:timings:recent                        # compare recent successful main CI runs
 node scripts/ci-run-timings.mjs <run-id>      # summarize wall time, queue time, and slowest jobs
+node scripts/ci-run-timings.mjs --latest-main # ignore issue/comment noise and choose origin/main push CI
 node scripts/ci-run-timings.mjs --recent 10   # compare recent successful main CI runs
 pnpm test:perf:groups --full-suite --allow-failures --output .artifacts/test-perf/baseline-before.json
 pnpm test:perf:groups:compare .artifacts/test-perf/baseline-before.json .artifacts/test-perf/after-agent.json
--- a/docs/help/testing.md
+++ b/docs/help/testing.md
@@ -396,7 +396,7 @@ Think of the suites as “increasing realism” (and increasing flakiness/cost):
    - `pnpm check:changed` is the normal smart local gate for narrow work. It classifies the diff into core, core tests, extensions, extension tests, apps, docs, release metadata, and tooling, then runs the matching typecheck/lint/test lanes. Public Plugin SDK and plugin-contract changes include one extension validation pass because extensions depend on those core contracts. Release metadata-only version bumps run targeted version/config/root-dependency checks instead of the full suite, with a guard that rejects package changes outside the top-level version field.
    - Import-light unit tests from agents, commands, plugins, auto-reply helpers, `plugin-sdk`, and similar pure utility areas route through the `unit-fast` lane, which skips `test/setup-openclaw-runtime.ts`; stateful/runtime-heavy files stay on the existing lanes.
    - Selected `plugin-sdk` and `commands` helper source files also map changed-mode runs to explicit sibling tests in those light lanes, so helper edits avoid rerunning the full heavy suite for that directory.
-    - `auto-reply` has three dedicated buckets: top-level core helpers, top-level `reply.*` integration tests, and the `src/auto-reply/reply/**` subtree. This keeps the heaviest reply harness work off the cheap status/chunk/token tests.
+    - `auto-reply` has dedicated buckets for top-level core helpers, top-level `reply.*` integration tests, and the `src/auto-reply/reply/**` subtree. CI further splits the reply subtree into agent-runner, dispatch, and commands/state-routing shards so one import-heavy bucket does not own the full Node tail.

  </Accordion>

@@ -462,6 +462,10 @@ Think of the suites as “increasing realism” (and increasing flakiness/cost):
      import-breakdown output.
    - `pnpm test:perf:imports:changed` scopes the same profiling view to
      files changed since `origin/main`.
+    - Shard timing data is written to `.artifacts/vitest-shard-timings.json`.
+      Whole-config runs use the config path as the key; include-pattern CI
+      shards append the shard name so filtered shards can be tracked
+      separately.
    - When one hot test still spends most of its time in startup imports,
      keep heavy dependencies behind a narrow local `*.runtime.ts` seam and
      mock that seam directly instead of deep-importing runtime helpers just
--- a/docs/reference/test.md
+++ b/docs/reference/test.md
@@ -14,7 +14,7 @@ title: "Tests"
 - `pnpm changed:lanes`: shows the architectural lanes triggered by the diff against `origin/main`.
 - `pnpm check:changed`: runs the smart changed gate for the diff against `origin/main`. It runs core work with core test lanes, extension work with extension test lanes, test-only work with test typecheck/tests only, expands public Plugin SDK or plugin-contract changes to one extension validation pass, and keeps release metadata-only version bumps on targeted version/config/root-dependency checks.
 - `pnpm test`: routes explicit file/directory targets through scoped Vitest lanes. Untargeted runs use fixed shard groups and expand to leaf configs for local parallel execution; the extension group always expands to the per-extension shard configs instead of one giant root-project process.
- Full and extension shard runs update local timing data in `.artifacts/vitest-shard-timings.json`; later runs use those timings to balance slow and fast shards. Set `OPENCLAW_TEST_PROJECTS_TIMINGS=0` to ignore the local timing artifact.
+- Full, extension, and include-pattern shard runs update local timing data in `.artifacts/vitest-shard-timings.json`; later whole-config runs use those timings to balance slow and fast shards. Include-pattern CI shards append the shard name to the timing key, which keeps filtered shard timings visible without replacing whole-config timing data. Set `OPENCLAW_TEST_PROJECTS_TIMINGS=0` to ignore the local timing artifact.
 - Selected `plugin-sdk` and `commands` test files now route through dedicated light lanes that keep only `test/setup.ts`, leaving runtime-heavy cases on their existing lanes.
 - Selected `plugin-sdk` and `commands` helper source files also map `pnpm test:changed` to explicit sibling tests in those light lanes, so small helper edits avoid rerunning the heavy runtime-backed suites.
 - `auto-reply` now also splits into three dedicated configs (`core`, `top-level`, `reply`) so the reply harness does not dominate the lighter top-level status/token/helper tests.
--- a/package.json
+++ b/package.json
@@ -1329,6 +1329,8 @@
    "check:timed": "node scripts/check-timed.mjs",
    "check:timed:all-types": "node scripts/check-timed.mjs --include-test-types",
    "check:timed:architecture": "node scripts/check-timed.mjs --include-architecture",
+    "ci:timings": "node scripts/ci-run-timings.mjs --latest-main",
+    "ci:timings:recent": "node scripts/ci-run-timings.mjs --recent 10",
    "codex-app-server:protocol:check": "node --import tsx scripts/check-codex-app-server-protocol.ts",
    "codex-app-server:protocol:sync": "node --import tsx scripts/sync-codex-app-server-protocol.ts",
    "config:channels:check": "node --import tsx scripts/generate-bundled-channel-config-metadata.ts --check",
--- a/scripts/ci-run-timings.mjs
+++ b/scripts/ci-run-timings.mjs
@@ -18,6 +18,11 @@ function formatSeconds(value) {
  return value === null ? "" : `${value}s`;
 }

+function parseRunList(raw) {
+  const parsed = JSON.parse(raw);
+  return Array.isArray(parsed) ? parsed : [];
+}
+
 function collectRunTimingContext(run) {
  const created = parseTime(run.createdAt);
  const updated = parseTime(run.updatedAt);
@@ -64,6 +69,17 @@ export function summarizeRunTimings(run, limit = 15) {
  };
 }

+export function selectLatestMainPushCiRun(runs, headSha = null) {
+  const pushRuns = runs.filter((run) => run.event === "push");
+  if (headSha) {
+    const matchingRun = pushRuns.find((run) => run.headSha === headSha);
+    if (matchingRun) {
+      return matchingRun;
+    }
+  }
+  return pushRuns[0] ?? null;
+}
+
 function getLatestCiRunId() {
  const raw = execFileSync(
    "gh",
@@ -78,6 +94,40 @@ function getLatestCiRunId() {
  return String(runId);
 }

+function getRemoteMainSha() {
+  const raw = execFileSync("git", ["ls-remote", "origin", "main"], { encoding: "utf8" }).trim();
+  const [sha] = raw.split(/\s+/u);
+  if (!sha) {
+    throw new Error("Could not resolve origin/main");
+  }
+  return sha;
+}
+
+function getLatestMainPushCiRunId() {
+  const headSha = getRemoteMainSha();
+  const raw = execFileSync(
+    "gh",
+    [
+      "run",
+      "list",
+      "--branch",
+      "main",
+      "--workflow",
+      "CI",
+      "--limit",
+      "20",
+      "--json",
+      "databaseId,headSha,event,status,conclusion",
+    ],
+    { encoding: "utf8" },
+  );
+  const run = selectLatestMainPushCiRun(parseRunList(raw), headSha);
+  if (!run?.databaseId) {
+    throw new Error(`No push CI run found for origin/main ${headSha.slice(0, 10)}`);
+  }
+  return String(run.databaseId);
+}
+
 function listRecentSuccessfulCiRuns(limit) {
  const raw = execFileSync(
    "gh",
@@ -161,11 +211,15 @@ function printSection(title, jobs, metric) {
  }
 }

-async function main() {
-  const args = process.argv.slice(2);
+export function parseRunTimingArgs(args) {
  const recentIndex = args.indexOf("--recent");
  const limitIndex = args.indexOf("--limit");
  const ignoredArgIndexes = new Set();
+  for (const [index, arg] of args.entries()) {
+    if (arg === "--" || arg === "--latest-main") {
+      ignoredArgIndexes.add(index);
+    }
+  }
  if (limitIndex !== -1) {
    ignoredArgIndexes.add(limitIndex);
    ignoredArgIndexes.add(limitIndex + 1);
@@ -176,8 +230,21 @@ async function main() {
  }
  const limit =
    limitIndex === -1 ? 15 : Math.max(1, Number.parseInt(args[limitIndex + 1] ?? "", 10) || 15);
-  if (recentIndex !== -1) {
-    const recentLimit = Math.max(1, Number.parseInt(args[recentIndex + 1] ?? "", 10) || 10);
+  const recentLimit =
+    recentIndex === -1 ? null : Math.max(1, Number.parseInt(args[recentIndex + 1] ?? "", 10) || 10);
+  return {
+    explicitRunId: args.find((_arg, index) => !ignoredArgIndexes.has(index)),
+    limit,
+    recentLimit,
+    useLatestMain: args.includes("--latest-main"),
+  };
+}
+
+async function main() {
+  const { explicitRunId, limit, recentLimit, useLatestMain } = parseRunTimingArgs(
+    process.argv.slice(2),
+  );
+  if (recentLimit !== null) {
    for (const run of listRecentSuccessfulCiRuns(recentLimit)) {
      const summary = summarizeJobs(loadRun(run.databaseId));
      console.log(
@@ -197,7 +264,7 @@ async function main() {
    }
    return;
  }
-  const runId = args.find((_arg, index) => !ignoredArgIndexes.has(index)) ?? getLatestCiRunId();
+  const runId = explicitRunId ?? (useLatestMain ? getLatestMainPushCiRunId() : getLatestCiRunId());
  const summary = summarizeRunTimings(loadRun(runId), limit);

  console.log(
--- a/scripts/lib/ci-node-test-plan.mjs
+++ b/scripts/lib/ci-node-test-plan.mjs
@@ -66,10 +66,8 @@ function createAutoReplyReplySplitShards() {
  }

  const mergedGroups = {
-    "auto-reply-reply-agent-dispatch": [
-      ...groups["auto-reply-reply-agent-runner"],
-      ...groups["auto-reply-reply-dispatch"],
-    ],
+    "auto-reply-reply-agent-runner": groups["auto-reply-reply-agent-runner"],
+    "auto-reply-reply-dispatch": groups["auto-reply-reply-dispatch"],
    "auto-reply-reply-commands-state-routing": [
      ...groups["auto-reply-reply-commands"],
      ...groups["auto-reply-reply-state-routing"],
--- a/scripts/lib/vitest-shard-timings.mjs
+++ b/scripts/lib/vitest-shard-timings.mjs
@@ -0,0 +1,126 @@
+import { createHash } from "node:crypto";
+import fs from "node:fs";
+import path from "node:path";
+
+const TIMINGS_FILE_ENV_KEY = "OPENCLAW_TEST_PROJECTS_TIMINGS_PATH";
+const TIMINGS_DISABLE_ENV_KEY = "OPENCLAW_TEST_PROJECTS_TIMINGS";
+const SHARD_NAME_ENV_KEY = "OPENCLAW_VITEST_SHARD_NAME";
+
+function sanitizeTimingLabel(value) {
+  return String(value)
+    .trim()
+    .replace(/[^a-zA-Z0-9_.-]+/g, "-")
+    .replace(/^-+|-+$/g, "");
+}
+
+function hashIncludePatterns(includePatterns) {
+  return createHash("sha1").update(JSON.stringify(includePatterns)).digest("hex").slice(0, 12);
+}
+
+export function shouldUseShardTimings(env = process.env) {
+  return env[TIMINGS_DISABLE_ENV_KEY] !== "0";
+}
+
+export function resolveShardTimingsPath(cwd = process.cwd(), env = process.env) {
+  return env[TIMINGS_FILE_ENV_KEY] || path.join(cwd, ".artifacts", "vitest-shard-timings.json");
+}
+
+export function resolveShardTimingKey(spec) {
+  if (!Array.isArray(spec.includePatterns) || spec.includePatterns.length === 0) {
+    return spec.config;
+  }
+
+  const shardName = sanitizeTimingLabel(spec.env?.[SHARD_NAME_ENV_KEY] ?? "");
+  if (shardName) {
+    return `${spec.config}#${shardName}`;
+  }
+
+  return `${spec.config}#include-${spec.includePatterns.length}-${hashIncludePatterns(
+    spec.includePatterns,
+  )}`;
+}
+
+export function createShardTimingSample(spec, durationMs) {
+  if (spec.watchMode || !Number.isFinite(durationMs) || durationMs <= 0) {
+    return null;
+  }
+
+  const includePatternCount = Array.isArray(spec.includePatterns) ? spec.includePatterns.length : 0;
+  return {
+    baseConfig: spec.config,
+    config: resolveShardTimingKey(spec),
+    durationMs,
+    includePatternCount,
+  };
+}
+
+export function readShardTimings(cwd = process.cwd(), env = process.env) {
+  if (!shouldUseShardTimings(env)) {
+    return new Map();
+  }
+  try {
+    const raw = fs.readFileSync(resolveShardTimingsPath(cwd, env), "utf8");
+    const parsed = JSON.parse(raw);
+    const configs = parsed && typeof parsed === "object" ? parsed.configs : null;
+    if (!configs || typeof configs !== "object") {
+      return new Map();
+    }
+    return new Map(
+      Object.entries(configs)
+        .map(([config, value]) => {
+          const durationMs = Number(value?.averageMs ?? value?.durationMs);
+          return Number.isFinite(durationMs) && durationMs > 0 ? [config, durationMs] : null;
+        })
+        .filter(Boolean),
+    );
+  } catch {
+    return new Map();
+  }
+}
+
+export function writeShardTimings(samples, cwd = process.cwd(), env = process.env) {
+  if (!shouldUseShardTimings(env) || samples.length === 0) {
+    return;
+  }
+
+  const outputPath = resolveShardTimingsPath(cwd, env);
+  let current = { version: 1, configs: {} };
+  try {
+    current = JSON.parse(fs.readFileSync(outputPath, "utf8"));
+  } catch {
+    // First run, or a corrupt local artifact. Rewrite below.
+  }
+
+  const configs =
+    current && typeof current === "object" && current.configs && typeof current.configs === "object"
+      ? { ...current.configs }
+      : {};
+  const updatedAt = new Date().toISOString();
+  for (const sample of samples) {
+    if (!sample.config || !Number.isFinite(sample.durationMs) || sample.durationMs <= 0) {
+      continue;
+    }
+    const previous = configs[sample.config];
+    const previousAverage = Number(previous?.averageMs ?? previous?.durationMs);
+    const sampleCount = Math.max(0, Number(previous?.sampleCount) || 0) + 1;
+    const averageMs =
+      Number.isFinite(previousAverage) && previousAverage > 0
+        ? Math.round(previousAverage * 0.7 + sample.durationMs * 0.3)
+        : Math.round(sample.durationMs);
+    configs[sample.config] = {
+      averageMs,
+      lastMs: Math.round(sample.durationMs),
+      sampleCount,
+      updatedAt,
+      ...(sample.baseConfig && sample.baseConfig !== sample.config
+        ? { baseConfig: sample.baseConfig }
+        : {}),
+      ...(sample.includePatternCount ? { includePatternCount: sample.includePatternCount } : {}),
+    };
+  }
+
+  fs.mkdirSync(path.dirname(outputPath), { recursive: true });
+  const tempPath = `${outputPath}.${process.pid}.tmp`;
+  fs.writeFileSync(tempPath, `${JSON.stringify({ version: 1, configs }, null, 2)}\n`, "utf8");
+  fs.renameSync(tempPath, outputPath);
+}
--- a/scripts/test-projects.mjs
+++ b/scripts/test-projects.mjs
@@ -1,5 +1,4 @@
 import fs from "node:fs";
-import path from "node:path";
 import { performance } from "node:perf_hooks";
 import { acquireLocalHeavyCheckLockSync } from "./lib/local-heavy-check-runtime.mjs";
 import {
@@ -7,6 +6,11 @@ import {
  resolveLocalFullSuiteProfile,
  resolveLocalVitestEnv,
 } from "./lib/vitest-local-scheduling.mjs";
+import {
+  createShardTimingSample,
+  readShardTimings,
+  writeShardTimings,
+} from "./lib/vitest-shard-timings.mjs";
 import {
  resolveVitestCliEntry,
  resolveVitestNodeArgs,
@@ -94,8 +98,6 @@ const FULL_SUITE_CONFIG_WEIGHT = new Map([
  ["test/vitest/vitest.extension-memory.config.ts", 6],
  ["test/vitest/vitest.extension-msteams.config.ts", 4],
 ]);
-const TIMINGS_FILE_ENV_KEY = "OPENCLAW_TEST_PROJECTS_TIMINGS_PATH";
-const TIMINGS_DISABLE_ENV_KEY = "OPENCLAW_TEST_PROJECTS_TIMINGS";
 const releaseLockOnce = () => {
  if (lockReleased) {
    return;
@@ -104,81 +106,6 @@ const releaseLockOnce = () => {
  releaseLock();
 };

-function shouldUseShardTimings(env = process.env) {
-  return env[TIMINGS_DISABLE_ENV_KEY] !== "0";
-}
-
-function resolveShardTimingsPath(cwd = process.cwd(), env = process.env) {
-  return env[TIMINGS_FILE_ENV_KEY] || path.join(cwd, ".artifacts", "vitest-shard-timings.json");
-}
-
-function readShardTimings(cwd = process.cwd(), env = process.env) {
-  if (!shouldUseShardTimings(env)) {
-    return new Map();
-  }
-  try {
-    const raw = fs.readFileSync(resolveShardTimingsPath(cwd, env), "utf8");
-    const parsed = JSON.parse(raw);
-    const configs = parsed && typeof parsed === "object" ? parsed.configs : null;
-    if (!configs || typeof configs !== "object") {
-      return new Map();
-    }
-    return new Map(
-      Object.entries(configs)
-        .map(([config, value]) => {
-          const durationMs = Number(value?.averageMs ?? value?.durationMs);
-          return Number.isFinite(durationMs) && durationMs > 0 ? [config, durationMs] : null;
-        })
-        .filter(Boolean),
-    );
-  } catch {
-    return new Map();
-  }
-}
-
-function writeShardTimings(samples, cwd = process.cwd(), env = process.env) {
-  if (!shouldUseShardTimings(env) || samples.length === 0) {
-    return;
-  }
-
-  const outputPath = resolveShardTimingsPath(cwd, env);
-  let current = { version: 1, configs: {} };
-  try {
-    current = JSON.parse(fs.readFileSync(outputPath, "utf8"));
-  } catch {
-    // First run, or a corrupt local artifact. Rewrite below.
-  }
-
-  const configs =
-    current && typeof current === "object" && current.configs && typeof current.configs === "object"
-      ? { ...current.configs }
-      : {};
-  const updatedAt = new Date().toISOString();
-  for (const sample of samples) {
-    if (!sample.config || !Number.isFinite(sample.durationMs) || sample.durationMs <= 0) {
-      continue;
-    }
-    const previous = configs[sample.config];
-    const previousAverage = Number(previous?.averageMs ?? previous?.durationMs);
-    const sampleCount = Math.max(0, Number(previous?.sampleCount) || 0) + 1;
-    const averageMs =
-      Number.isFinite(previousAverage) && previousAverage > 0
-        ? Math.round(previousAverage * 0.7 + sample.durationMs * 0.3)
-        : Math.round(sample.durationMs);
-    configs[sample.config] = {
-      averageMs,
-      lastMs: Math.round(sample.durationMs),
-      sampleCount,
-      updatedAt,
-    };
-  }
-
-  fs.mkdirSync(path.dirname(outputPath), { recursive: true });
-  const tempPath = `${outputPath}.${process.pid}.tmp`;
-  fs.writeFileSync(tempPath, `${JSON.stringify({ version: 1, configs }, null, 2)}\n`, "utf8");
-  fs.renameSync(tempPath, outputPath);
-}
-
 function cleanupVitestRunSpec(spec) {
  if (!spec.includeFilePath) {
    return;
@@ -263,8 +190,7 @@ async function runLoggedVitestSpec(spec) {
  }
  return {
    ...result,
-    timing:
-      !spec.watchMode && spec.includePatterns === null ? { config: spec.config, durationMs } : null,
+    timing: createShardTimingSample(spec, durationMs),
  };
 }

@@ -288,6 +214,7 @@ function interleaveSlowAndFastSpecs(sortedSpecs) {
 }

 function orderFullSuiteSpecsForParallelRun(specs, shardTimings = new Map()) {
+  const hasMatchingShardTiming = specs.some((spec) => shardTimings.has(spec.config));
  const sortedSpecs = specs.toSorted((a, b) => {
    const weightDelta =
      resolveConfigSortWeight(b.config, shardTimings) -
@@ -297,7 +224,7 @@ function orderFullSuiteSpecsForParallelRun(specs, shardTimings = new Map()) {
    }
    return a.config.localeCompare(b.config);
  });
-  return shardTimings.size > 0 ? interleaveSlowAndFastSpecs(sortedSpecs) : sortedSpecs;
+  return hasMatchingShardTiming ? interleaveSlowAndFastSpecs(sortedSpecs) : sortedSpecs;
 }

 function isFullExtensionsProjectRun(specs) {
--- a/scripts/test-projects.test-support.mjs
+++ b/scripts/test-projects.test-support.mjs
@@ -237,9 +237,12 @@ const TOOLING_SOURCE_TEST_TARGETS = new Map([
    ],
  ],
  ["scripts/run-oxlint.mjs", ["test/scripts/run-oxlint.test.ts"]],
+  ["scripts/ci-run-timings.mjs", ["test/scripts/ci-run-timings.test.ts"]],
  ["scripts/test-extension-batch.mjs", ["test/scripts/test-extension.test.ts"]],
  ["scripts/lib/extension-test-plan.mjs", ["test/scripts/test-extension.test.ts"]],
  ["scripts/lib/vitest-batch-runner.mjs", ["test/scripts/test-extension.test.ts"]],
+  ["scripts/lib/ci-node-test-plan.mjs", ["test/scripts/ci-node-test-plan.test.ts"]],
+  ["scripts/lib/vitest-shard-timings.mjs", ["test/scripts/vitest-shard-timings.test.ts"]],
  ["scripts/test-projects.mjs", ["test/scripts/test-projects.test.ts"]],
  ["scripts/test-projects.test-support.d.mts", ["test/scripts/test-projects.test.ts"]],
  ["scripts/test-projects.test-support.mjs", ["test/scripts/test-projects.test.ts"]],
--- a/test/scripts/ci-node-test-plan.test.ts
+++ b/test/scripts/ci-node-test-plan.test.ts
@@ -216,10 +216,16 @@ describe("scripts/lib/ci-node-test-plan.mjs", () => {
        shardName: "auto-reply-core-top-level",
      },
      {
-        checkName: "checks-node-auto-reply-reply-agent-dispatch",
+        checkName: "checks-node-auto-reply-reply-agent-runner",
        configs: ["test/vitest/vitest.auto-reply-reply.config.ts"],
        requiresDist: false,
-        shardName: "auto-reply-reply-agent-dispatch",
+        shardName: "auto-reply-reply-agent-runner",
+      },
+      {
+        checkName: "checks-node-auto-reply-reply-dispatch",
+        configs: ["test/vitest/vitest.auto-reply-reply.config.ts"],
+        requiresDist: false,
+        shardName: "auto-reply-reply-dispatch",
      },
      {
        checkName: "checks-node-auto-reply-reply-commands-state-routing",
--- a/test/scripts/ci-run-timings.test.ts
+++ b/test/scripts/ci-run-timings.test.ts
@@ -1,5 +1,9 @@
 import { describe, expect, it } from "vitest";
-import { summarizeRunTimings } from "../../scripts/ci-run-timings.mjs";
+import {
+  parseRunTimingArgs,
+  selectLatestMainPushCiRun,
+  summarizeRunTimings,
+} from "../../scripts/ci-run-timings.mjs";

 describe("scripts/ci-run-timings.mjs", () => {
  it("separates queue time from job duration", () => {
@@ -46,4 +50,58 @@ describe("scripts/ci-run-timings.mjs", () => {
      ["slow", 20],
    ]);
  });
+
+  it("selects the push CI run for the current main SHA", () => {
+    expect(
+      selectLatestMainPushCiRun(
+        [
+          {
+            databaseId: 3,
+            event: "issue_comment",
+            headSha: "current",
+          },
+          {
+            databaseId: 2,
+            event: "push",
+            headSha: "older",
+          },
+          {
+            databaseId: 1,
+            event: "push",
+            headSha: "current",
+          },
+        ],
+        "current",
+      ),
+    ).toMatchObject({ databaseId: 1 });
+  });
+
+  it("falls back to the newest push CI run when the exact SHA has not appeared yet", () => {
+    expect(
+      selectLatestMainPushCiRun(
+        [
+          {
+            databaseId: 4,
+            event: "issue_comment",
+            headSha: "current",
+          },
+          {
+            databaseId: 3,
+            event: "push",
+            headSha: "previous",
+          },
+        ],
+        "current",
+      ),
+    ).toMatchObject({ databaseId: 3 });
+  });
+
+  it("ignores pnpm passthrough sentinels when parsing monitor args", () => {
+    expect(parseRunTimingArgs(["--latest-main", "--", "--limit", "3"])).toEqual({
+      explicitRunId: undefined,
+      limit: 3,
+      recentLimit: null,
+      useLatestMain: true,
+    });
+  });
 });
--- a/test/scripts/vitest-shard-timings.test.ts
+++ b/test/scripts/vitest-shard-timings.test.ts
@@ -0,0 +1,91 @@
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+import { describe, expect, it } from "vitest";
+import {
+  createShardTimingSample,
+  readShardTimings,
+  resolveShardTimingKey,
+  writeShardTimings,
+} from "../../scripts/lib/vitest-shard-timings.mjs";
+
+describe("scripts/lib/vitest-shard-timings.mjs", () => {
+  it("uses the config path as the timing key for whole-config runs", () => {
+    expect(
+      resolveShardTimingKey({
+        config: "test/vitest/vitest.unit-fast.config.ts",
+        env: {},
+        includePatterns: null,
+      }),
+    ).toBe("test/vitest/vitest.unit-fast.config.ts");
+  });
+
+  it("uses the CI shard name for include-pattern timing keys", () => {
+    expect(
+      resolveShardTimingKey({
+        config: "test/vitest/vitest.auto-reply-reply.config.ts",
+        env: { OPENCLAW_VITEST_SHARD_NAME: "auto-reply/reply agent dispatch" },
+        includePatterns: ["src/auto-reply/reply/agent-runner.test.ts"],
+      }),
+    ).toBe("test/vitest/vitest.auto-reply-reply.config.ts#auto-reply-reply-agent-dispatch");
+  });
+
+  it("falls back to a stable include-pattern hash outside CI", () => {
+    const first = resolveShardTimingKey({
+      config: "test/vitest/vitest.auto-reply-reply.config.ts",
+      env: {},
+      includePatterns: ["src/auto-reply/reply/agent-runner.test.ts"],
+    });
+    const second = resolveShardTimingKey({
+      config: "test/vitest/vitest.auto-reply-reply.config.ts",
+      env: {},
+      includePatterns: ["src/auto-reply/reply/agent-runner.test.ts"],
+    });
+
+    expect(first).toBe(second);
+    expect(first).toMatch(/^test\/vitest\/vitest\.auto-reply-reply\.config\.ts#include-1-/u);
+  });
+
+  it("persists include-pattern timing metadata", () => {
+    const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-shard-timings-"));
+    const env = {
+      OPENCLAW_TEST_PROJECTS_TIMINGS_PATH: path.join(tempDir, "timings.json"),
+      OPENCLAW_VITEST_SHARD_NAME: "auto-reply-reply-agent-runner",
+    };
+    const sample = createShardTimingSample(
+      {
+        config: "test/vitest/vitest.auto-reply-reply.config.ts",
+        env,
+        includePatterns: ["src/auto-reply/reply/agent-runner.test.ts"],
+        watchMode: false,
+      },
+      1234,
+    );
+
+    expect(sample).toEqual({
+      baseConfig: "test/vitest/vitest.auto-reply-reply.config.ts",
+      config: "test/vitest/vitest.auto-reply-reply.config.ts#auto-reply-reply-agent-runner",
+      durationMs: 1234,
+      includePatternCount: 1,
+    });
+
+    writeShardTimings([sample], tempDir, env);
+
+    expect(readShardTimings(tempDir, env)).toEqual(
+      new Map([
+        ["test/vitest/vitest.auto-reply-reply.config.ts#auto-reply-reply-agent-runner", 1234],
+      ]),
+    );
+    expect(
+      JSON.parse(fs.readFileSync(env.OPENCLAW_TEST_PROJECTS_TIMINGS_PATH, "utf8")).configs[
+        "test/vitest/vitest.auto-reply-reply.config.ts#auto-reply-reply-agent-runner"
+      ],
+    ).toMatchObject({
+      averageMs: 1234,
+      baseConfig: "test/vitest/vitest.auto-reply-reply.config.ts",
+      includePatternCount: 1,
+      lastMs: 1234,
+      sampleCount: 1,
+    });
+  });
+});