ci: speed up release validation shards

This commit is contained in:
Peter Steinberger
2026-04-28 06:14:13 +01:00
parent 3d53b39917
commit 017b8db616
7 changed files with 223 additions and 44 deletions

View File

@@ -367,6 +367,7 @@ async function writeFailureIndex(logDir, summary) {
lane: failure.name,
logFile: failure.logFile,
name: failure.name,
noOutputTimedOut: failure.noOutputTimedOut,
rerunCommand: failure.rerunCommand,
status: failure.status,
timedOut: failure.timedOut,
@@ -441,54 +442,96 @@ function dockerPreflightContainerNames(raw) {
);
}
function runShellCommand({ command, env, label, logFile, timeoutMs }) {
function runShellCommand({ command, env, label, logFile, timeoutMs, noOutputTimeoutMs }) {
return new Promise((resolve) => {
const pipeOutput = Boolean(logFile || noOutputTimeoutMs > 0);
const child = spawn("bash", ["-c", command], {
cwd: ROOT_DIR,
detached: process.platform !== "win32",
env,
stdio: logFile ? ["ignore", "pipe", "pipe"] : "inherit",
stdio: pipeOutput ? ["ignore", "pipe", "pipe"] : "inherit",
});
activeChildren.add(child);
let timedOut = false;
let noOutputTimedOut = false;
let killTimer;
let stream;
let noOutputTimer;
const terminateForTimeout = (message, options = {}) => {
if (timedOut) {
return;
}
timedOut = true;
noOutputTimedOut = options.noOutput === true;
if (stream) {
stream.write(`\n==> [${label}] ${message}; sending SIGTERM\n`);
} else {
console.error(`==> [${label}] ${message}; sending SIGTERM`);
}
terminateChild(child, "SIGTERM");
killTimer = setTimeout(() => terminateChild(child, "SIGKILL"), 10_000);
killTimer.unref?.();
};
const resetNoOutputTimer = () => {
if (!noOutputTimeoutMs || noOutputTimeoutMs <= 0 || timedOut) {
return;
}
if (noOutputTimer) {
clearTimeout(noOutputTimer);
}
noOutputTimer = setTimeout(() => {
terminateForTimeout(`no output for ${noOutputTimeoutMs}ms`, { noOutput: true });
}, noOutputTimeoutMs);
noOutputTimer.unref?.();
};
const timeoutTimer =
timeoutMs > 0
? setTimeout(() => {
timedOut = true;
if (stream) {
stream.write(`\n==> [${label}] timeout after ${timeoutMs}ms; sending SIGTERM\n`);
}
terminateChild(child, "SIGTERM");
killTimer = setTimeout(() => terminateChild(child, "SIGKILL"), 10_000);
killTimer.unref?.();
terminateForTimeout(`timeout after ${timeoutMs}ms`);
}, timeoutMs)
: undefined;
timeoutTimer?.unref?.();
let stream;
if (logFile) {
stream = fs.createWriteStream(logFile, { flags: "a" });
stream.write(`==> [${label}] command: ${command}\n`);
stream.write(`==> [${label}] started: ${utcStamp()}\n`);
child.stdout.pipe(stream, { end: false });
child.stderr.pipe(stream, { end: false });
}
if (pipeOutput) {
const writeOutput = (target, chunk) => {
resetNoOutputTimer();
if (stream) {
stream.write(chunk);
} else {
target.write(chunk);
}
};
child.stdout.on("data", (chunk) => writeOutput(process.stdout, chunk));
child.stderr.on("data", (chunk) => writeOutput(process.stderr, chunk));
resetNoOutputTimer();
}
child.on("close", (status, signal) => {
if (timeoutTimer) {
clearTimeout(timeoutTimer);
}
if (noOutputTimer) {
clearTimeout(noOutputTimer);
}
if (killTimer) {
clearTimeout(killTimer);
}
activeChildren.delete(child);
const exitCode = typeof status === "number" ? status : signal ? 128 : 1;
if (stream) {
stream.write(`\n==> [${label}] finished: ${utcStamp()} status=${exitCode}\n`);
stream.write(
`\n==> [${label}] finished: ${utcStamp()} status=${exitCode}${
noOutputTimedOut ? " noOutputTimedOut=true" : ""
}\n`,
);
stream.end();
}
resolve({ signal, status: exitCode, timedOut });
resolve({ signal, status: exitCode, timedOut, noOutputTimedOut });
});
});
}
@@ -692,6 +735,7 @@ function laneEnv(poolLane, baseEnv, logDir, cacheKey) {
async function runLane(lane, baseEnv, logDir, fallbackTimeoutMs) {
const { name } = lane;
const timeoutMs = lane.timeoutMs ?? fallbackTimeoutMs;
const noOutputTimeoutMs = lane.noOutputTimeoutMs;
const logFile = path.join(logDir, `${name}.log`);
const env = laneEnv(lane, baseEnv, logDir, lane.cacheKey);
const command = withResolvedPnpmCommand(lane.command, env);
@@ -703,6 +747,7 @@ async function runLane(lane, baseEnv, logDir, fallbackTimeoutMs) {
`==> [${name}] cli tools dir: ${env.OPENCLAW_DOCKER_CLI_TOOLS_DIR}`,
`==> [${name}] cache dir: ${env.OPENCLAW_DOCKER_CACHE_HOME_DIR}`,
`==> [${name}] timeout: ${timeoutMs}ms`,
`==> [${name}] no output timeout: ${noOutputTimeoutMs ?? 0}ms`,
`==> [${name}] retries: ${lane.retries ?? 0}`,
`==> [${name}] e2e image kind: ${lane.e2eImageKind ?? "none"}`,
`==> [${name}] e2e image: ${env.OPENCLAW_DOCKER_E2E_IMAGE ?? ""}`,
@@ -721,11 +766,19 @@ async function runLane(lane, baseEnv, logDir, fallbackTimeoutMs) {
await fs.promises.appendFile(logFile, `\n==> [${name}] retry attempt ${attempt}\n`);
console.log(`==> [${name}] retry ${attempt}/${maxAttempts}`);
}
result = await runShellCommand({ command, env, label: name, logFile, timeoutMs });
result = await runShellCommand({
command,
env,
label: name,
logFile,
timeoutMs,
noOutputTimeoutMs,
});
attempts.push({
attempt,
elapsedSeconds: phaseElapsedSeconds(attemptStartedAt),
finishedAt: new Date().toISOString(),
noOutputTimedOut: result.noOutputTimedOut,
startedAt: new Date(attemptStartedAt).toISOString(),
status: result.status,
timedOut: result.timedOut,
@@ -760,6 +813,7 @@ async function runLane(lane, baseEnv, logDir, fallbackTimeoutMs) {
rerunCommand: buildLaneRerunCommand(name, baseEnv),
startedAt: startedAtIso,
status: result.status,
noOutputTimedOut: result.noOutputTimedOut,
timedOut: result.timedOut,
};
}