ci: speed up release validation shards

This commit is contained in:
Peter Steinberger
2026-04-28 06:14:13 +01:00
parent 3d53b39917
commit 017b8db616
7 changed files with 223 additions and 44 deletions

View File

@@ -122,10 +122,13 @@ export function laneResources(poolLane) {
export function laneSummary(poolLane) {
const resources = laneResources(poolLane).join(",");
const timeout = poolLane.timeoutMs ? ` timeout=${Math.round(poolLane.timeoutMs / 1000)}s` : "";
const noOutputTimeout = poolLane.noOutputTimeoutMs
? ` no-output=${Math.round(poolLane.noOutputTimeoutMs / 1000)}s`
: "";
const retries = poolLane.retries > 0 ? ` retries=${poolLane.retries}` : "";
const cache = poolLane.cacheKey ? ` cache=${poolLane.cacheKey}` : "";
const image = poolLane.e2eImageKind ? ` image=${poolLane.e2eImageKind}` : "";
return `${poolLane.name}(w=${laneWeight(poolLane)} r=${resources}${timeout}${retries}${cache}${image})`;
return `${poolLane.name}(w=${laneWeight(poolLane)} r=${resources}${timeout}${noOutputTimeout}${retries}${cache}${image})`;
}
export function lanesNeedE2eImageKind(poolLanes, kind) {
@@ -179,6 +182,7 @@ export function buildPlanJson(params) {
imageKind: poolLane.e2eImageKind,
live: poolLane.live,
name: poolLane.name,
noOutputTimeoutMs: poolLane.noOutputTimeoutMs,
resources: laneResources(poolLane),
timeoutMs: poolLane.timeoutMs,
weight: laneWeight(poolLane),

View File

@@ -2,7 +2,8 @@
// Keep lane names, commands, image kind, timeout, resources, and release chunks
// here. Planning and execution live in separate modules.
const BUNDLED_UPDATE_TIMEOUT_MS = 20 * 60 * 1000;
const BUNDLED_UPDATE_NO_OUTPUT_TIMEOUT_MS = 4 * 60 * 1000;
const BUNDLED_UPDATE_TIMEOUT_MS = 6 * 60 * 1000;
export const DEFAULT_LIVE_RETRIES = 1;
const LIVE_ACP_TIMEOUT_MS = 20 * 60 * 1000;
const LIVE_CLI_TIMEOUT_MS = 20 * 60 * 1000;
@@ -37,6 +38,7 @@ function lane(name, command, options = {}) {
: (options.e2eImageKind ?? (options.live ? undefined : "functional")),
estimateSeconds: options.estimateSeconds,
live: options.live === true,
noOutputTimeoutMs: options.noOutputTimeoutMs,
name,
retryPatterns: options.retryPatterns ?? [],
retries: options.retries ?? 0,
@@ -131,7 +133,12 @@ const bundledChannelUpdateLanes = [
bundledChannelScenarioLane(
`bundled-channel-update-${target}`,
`OPENCLAW_BUNDLED_CHANNEL_SCENARIOS=0 OPENCLAW_BUNDLED_CHANNEL_UPDATE_SCENARIO=1 OPENCLAW_BUNDLED_CHANNEL_UPDATE_TARGETS=${target} OPENCLAW_BUNDLED_CHANNEL_ROOT_OWNED_SCENARIO=0 OPENCLAW_BUNDLED_CHANNEL_SETUP_ENTRY_SCENARIO=0 OPENCLAW_BUNDLED_CHANNEL_LOAD_FAILURE_SCENARIO=0 OPENCLAW_BUNDLED_CHANNEL_DISABLED_CONFIG_SCENARIO=0`,
{ retryPatterns: LIVE_RETRY_PATTERNS, retries: 1, timeoutMs: BUNDLED_UPDATE_TIMEOUT_MS },
{
noOutputTimeoutMs: BUNDLED_UPDATE_NO_OUTPUT_TIMEOUT_MS,
retryPatterns: LIVE_RETRY_PATTERNS,
retries: 1,
timeoutMs: BUNDLED_UPDATE_TIMEOUT_MS,
},
),
);
@@ -505,11 +512,8 @@ const primaryReleasePathChunks = {
"plugins-runtime-install-a": bundledPluginInstallUninstallLanes.slice(0, 4),
"plugins-runtime-install-b": bundledPluginInstallUninstallLanes.slice(4),
"bundled-channels-core": [releasePathBundledChannelLanes[0], ...bundledChannelSmokeLanes],
"bundled-channels-update-a": [
bundledChannelUpdateLanes[0],
bundledChannelUpdateLanes[1],
bundledChannelUpdateLanes[4],
],
"bundled-channels-update-a": [bundledChannelUpdateLanes[0], bundledChannelUpdateLanes[4]],
"bundled-channels-update-discord": [bundledChannelUpdateLanes[1]],
"bundled-channels-update-b": [
bundledChannelUpdateLanes[2],
bundledChannelUpdateLanes[3],
@@ -528,6 +532,11 @@ const legacyReleasePathChunks = {
"plugins-runtime": releasePathPluginRuntimeLanes,
"plugins-integrations": [...releasePathPluginRuntimeLanes, ...releasePathBundledChannelLanes],
"bundled-channels": releasePathBundledChannelLanes,
"bundled-channels-update-a-legacy": [
bundledChannelUpdateLanes[0],
bundledChannelUpdateLanes[1],
bundledChannelUpdateLanes[4],
],
};
function openWebUILane() {

View File

@@ -367,6 +367,7 @@ async function writeFailureIndex(logDir, summary) {
lane: failure.name,
logFile: failure.logFile,
name: failure.name,
noOutputTimedOut: failure.noOutputTimedOut,
rerunCommand: failure.rerunCommand,
status: failure.status,
timedOut: failure.timedOut,
@@ -441,54 +442,96 @@ function dockerPreflightContainerNames(raw) {
);
}
function runShellCommand({ command, env, label, logFile, timeoutMs }) {
function runShellCommand({ command, env, label, logFile, timeoutMs, noOutputTimeoutMs }) {
return new Promise((resolve) => {
const pipeOutput = Boolean(logFile || noOutputTimeoutMs > 0);
const child = spawn("bash", ["-c", command], {
cwd: ROOT_DIR,
detached: process.platform !== "win32",
env,
stdio: logFile ? ["ignore", "pipe", "pipe"] : "inherit",
stdio: pipeOutput ? ["ignore", "pipe", "pipe"] : "inherit",
});
activeChildren.add(child);
let timedOut = false;
let noOutputTimedOut = false;
let killTimer;
let stream;
let noOutputTimer;
const terminateForTimeout = (message, options = {}) => {
if (timedOut) {
return;
}
timedOut = true;
noOutputTimedOut = options.noOutput === true;
if (stream) {
stream.write(`\n==> [${label}] ${message}; sending SIGTERM\n`);
} else {
console.error(`==> [${label}] ${message}; sending SIGTERM`);
}
terminateChild(child, "SIGTERM");
killTimer = setTimeout(() => terminateChild(child, "SIGKILL"), 10_000);
killTimer.unref?.();
};
const resetNoOutputTimer = () => {
if (!noOutputTimeoutMs || noOutputTimeoutMs <= 0 || timedOut) {
return;
}
if (noOutputTimer) {
clearTimeout(noOutputTimer);
}
noOutputTimer = setTimeout(() => {
terminateForTimeout(`no output for ${noOutputTimeoutMs}ms`, { noOutput: true });
}, noOutputTimeoutMs);
noOutputTimer.unref?.();
};
const timeoutTimer =
timeoutMs > 0
? setTimeout(() => {
timedOut = true;
if (stream) {
stream.write(`\n==> [${label}] timeout after ${timeoutMs}ms; sending SIGTERM\n`);
}
terminateChild(child, "SIGTERM");
killTimer = setTimeout(() => terminateChild(child, "SIGKILL"), 10_000);
killTimer.unref?.();
terminateForTimeout(`timeout after ${timeoutMs}ms`);
}, timeoutMs)
: undefined;
timeoutTimer?.unref?.();
let stream;
if (logFile) {
stream = fs.createWriteStream(logFile, { flags: "a" });
stream.write(`==> [${label}] command: ${command}\n`);
stream.write(`==> [${label}] started: ${utcStamp()}\n`);
child.stdout.pipe(stream, { end: false });
child.stderr.pipe(stream, { end: false });
}
if (pipeOutput) {
const writeOutput = (target, chunk) => {
resetNoOutputTimer();
if (stream) {
stream.write(chunk);
} else {
target.write(chunk);
}
};
child.stdout.on("data", (chunk) => writeOutput(process.stdout, chunk));
child.stderr.on("data", (chunk) => writeOutput(process.stderr, chunk));
resetNoOutputTimer();
}
child.on("close", (status, signal) => {
if (timeoutTimer) {
clearTimeout(timeoutTimer);
}
if (noOutputTimer) {
clearTimeout(noOutputTimer);
}
if (killTimer) {
clearTimeout(killTimer);
}
activeChildren.delete(child);
const exitCode = typeof status === "number" ? status : signal ? 128 : 1;
if (stream) {
stream.write(`\n==> [${label}] finished: ${utcStamp()} status=${exitCode}\n`);
stream.write(
`\n==> [${label}] finished: ${utcStamp()} status=${exitCode}${
noOutputTimedOut ? " noOutputTimedOut=true" : ""
}\n`,
);
stream.end();
}
resolve({ signal, status: exitCode, timedOut });
resolve({ signal, status: exitCode, timedOut, noOutputTimedOut });
});
});
}
@@ -692,6 +735,7 @@ function laneEnv(poolLane, baseEnv, logDir, cacheKey) {
async function runLane(lane, baseEnv, logDir, fallbackTimeoutMs) {
const { name } = lane;
const timeoutMs = lane.timeoutMs ?? fallbackTimeoutMs;
const noOutputTimeoutMs = lane.noOutputTimeoutMs;
const logFile = path.join(logDir, `${name}.log`);
const env = laneEnv(lane, baseEnv, logDir, lane.cacheKey);
const command = withResolvedPnpmCommand(lane.command, env);
@@ -703,6 +747,7 @@ async function runLane(lane, baseEnv, logDir, fallbackTimeoutMs) {
`==> [${name}] cli tools dir: ${env.OPENCLAW_DOCKER_CLI_TOOLS_DIR}`,
`==> [${name}] cache dir: ${env.OPENCLAW_DOCKER_CACHE_HOME_DIR}`,
`==> [${name}] timeout: ${timeoutMs}ms`,
`==> [${name}] no output timeout: ${noOutputTimeoutMs ?? 0}ms`,
`==> [${name}] retries: ${lane.retries ?? 0}`,
`==> [${name}] e2e image kind: ${lane.e2eImageKind ?? "none"}`,
`==> [${name}] e2e image: ${env.OPENCLAW_DOCKER_E2E_IMAGE ?? ""}`,
@@ -721,11 +766,19 @@ async function runLane(lane, baseEnv, logDir, fallbackTimeoutMs) {
await fs.promises.appendFile(logFile, `\n==> [${name}] retry attempt ${attempt}\n`);
console.log(`==> [${name}] retry ${attempt}/${maxAttempts}`);
}
result = await runShellCommand({ command, env, label: name, logFile, timeoutMs });
result = await runShellCommand({
command,
env,
label: name,
logFile,
timeoutMs,
noOutputTimeoutMs,
});
attempts.push({
attempt,
elapsedSeconds: phaseElapsedSeconds(attemptStartedAt),
finishedAt: new Date().toISOString(),
noOutputTimedOut: result.noOutputTimedOut,
startedAt: new Date(attemptStartedAt).toISOString(),
status: result.status,
timedOut: result.timedOut,
@@ -760,6 +813,7 @@ async function runLane(lane, baseEnv, logDir, fallbackTimeoutMs) {
rerunCommand: buildLaneRerunCommand(name, baseEnv),
startedAt: startedAtIso,
status: result.status,
noOutputTimedOut: result.noOutputTimedOut,
timedOut: result.timedOut,
};
}