fix: serialize run-node artifact writes

This commit is contained in:
Peter Steinberger
2026-04-21 03:53:02 +01:00
parent 11e6575c69
commit d1f7f69cd4
2 changed files with 203 additions and 29 deletions

View File

@@ -273,6 +273,23 @@ const isSignalKey = (signal) => Object.hasOwn(SIGNAL_EXIT_CODES, signal);
const getSignalExitCode = (signal) => (isSignalKey(signal) ? SIGNAL_EXIT_CODES[signal] : 1);
const RUN_NODE_OUTPUT_LOG_ENV = "OPENCLAW_RUN_NODE_OUTPUT_LOG";
const RUN_NODE_BUILD_LOCK_TIMEOUT_ENV = "OPENCLAW_RUN_NODE_BUILD_LOCK_TIMEOUT_MS";
const RUN_NODE_BUILD_LOCK_POLL_ENV = "OPENCLAW_RUN_NODE_BUILD_LOCK_POLL_MS";
const RUN_NODE_BUILD_LOCK_STALE_ENV = "OPENCLAW_RUN_NODE_BUILD_LOCK_STALE_MS";
const DEFAULT_BUILD_LOCK_TIMEOUT_MS = 5 * 60 * 1000;
const DEFAULT_BUILD_LOCK_POLL_MS = 100;
const DEFAULT_BUILD_LOCK_STALE_MS = 10 * 60 * 1000;
const parsePositiveIntegerEnv = (env, name, fallback) => {
const raw = env[name];
if (raw === undefined || raw === "") {
return fallback;
}
const parsed = Number(raw);
return Number.isInteger(parsed) && parsed > 0 ? parsed : fallback;
};
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
const resolveRunNodeOutputLogPath = (deps) => {
const outputLog = deps.env[RUN_NODE_OUTPUT_LOG_ENV]?.trim();
@@ -429,9 +446,94 @@ const closeRunNodeOutputTee = async (deps, exitCode) => {
return exitCode;
};
const syncRuntimeArtifacts = (deps) => {
const removeStaleBuildLock = (deps, lockDir, staleMs) => {
try {
deps.runRuntimePostBuild({ cwd: deps.cwd });
const stats = deps.fs.statSync(lockDir);
if (Date.now() - stats.mtimeMs < staleMs) {
return false;
}
deps.fs.rmSync(lockDir, { recursive: true, force: true });
return true;
} catch {
return false;
}
};
const acquireRunNodeBuildLock = async (deps) => {
const lockRoot = path.join(deps.cwd, ".artifacts");
const lockDir = path.join(lockRoot, "run-node-build.lock");
const timeoutMs = parsePositiveIntegerEnv(
deps.env,
RUN_NODE_BUILD_LOCK_TIMEOUT_ENV,
DEFAULT_BUILD_LOCK_TIMEOUT_MS,
);
const pollMs = parsePositiveIntegerEnv(
deps.env,
RUN_NODE_BUILD_LOCK_POLL_ENV,
DEFAULT_BUILD_LOCK_POLL_MS,
);
const staleMs = parsePositiveIntegerEnv(
deps.env,
RUN_NODE_BUILD_LOCK_STALE_ENV,
DEFAULT_BUILD_LOCK_STALE_MS,
);
const startedAt = Date.now();
let loggedWait = false;
while (Date.now() - startedAt < timeoutMs) {
try {
deps.fs.mkdirSync(lockRoot, { recursive: true });
deps.fs.mkdirSync(lockDir);
try {
deps.fs.writeFileSync(
path.join(lockDir, "owner.json"),
`${JSON.stringify(
{
pid: deps.process.pid,
startedAt: new Date().toISOString(),
args: deps.args,
},
null,
2,
)}\n`,
"utf8",
);
} catch {
// Owner metadata is diagnostic only; the directory itself is the lock.
}
return () => {
deps.fs.rmSync(lockDir, { recursive: true, force: true });
};
} catch (error) {
if (error?.code !== "EEXIST") {
throw error;
}
if (removeStaleBuildLock(deps, lockDir, staleMs)) {
continue;
}
if (!loggedWait) {
logRunner("Waiting for TypeScript/runtime artifact lock.", deps);
loggedWait = true;
}
await sleep(pollMs);
}
}
throw new Error(`timed out waiting for ${path.relative(deps.cwd, lockDir)}`);
};
const withRunNodeBuildLock = async (deps, callback) => {
const release = await acquireRunNodeBuildLock(deps);
try {
return await callback();
} finally {
release();
}
};
const syncRuntimeArtifacts = async (deps) => {
try {
await deps.runRuntimePostBuild({ cwd: deps.cwd });
} catch (error) {
logRunner(
`Failed to write runtime build artifacts: ${error?.message ?? "unknown error"}`,
@@ -491,38 +593,55 @@ export async function runNodeMain(params = {}) {
let exitCode = 1;
const buildRequirement = resolveBuildRequirement(deps);
if (!buildRequirement.shouldBuild) {
if (!shouldSkipCleanWatchRuntimeSync(deps) && !syncRuntimeArtifacts(deps)) {
return await closeRunNodeOutputTee(deps, 1);
if (!shouldSkipCleanWatchRuntimeSync(deps)) {
const synced = await withRunNodeBuildLock(
deps,
async () => await syncRuntimeArtifacts(deps),
);
if (!synced) {
return await closeRunNodeOutputTee(deps, 1);
}
}
exitCode = await runOpenClaw(deps);
return await closeRunNodeOutputTee(deps, exitCode);
}
logRunner(
`Building TypeScript (dist is stale: ${buildRequirement.reason} - ${formatBuildReason(buildRequirement.reason)}).`,
deps,
);
const buildCmd = deps.execPath;
const buildArgs = compilerArgs;
const build = deps.spawn(buildCmd, buildArgs, {
cwd: deps.cwd,
env: deps.env,
stdio: deps.outputTee ? ["inherit", "pipe", "pipe"] : "inherit",
});
pipeSpawnedOutput(build, deps);
const buildExitCode = await withRunNodeBuildLock(deps, async () => {
const lockedBuildRequirement = resolveBuildRequirement(deps);
if (!lockedBuildRequirement.shouldBuild) {
return (await syncRuntimeArtifacts(deps)) ? 0 : 1;
}
const buildRes = await waitForSpawnedProcess(build, deps);
const interruptedExitCode = getInterruptedSpawnExitCode(buildRes);
if (interruptedExitCode !== null) {
return await closeRunNodeOutputTee(deps, interruptedExitCode);
logRunner(
`Building TypeScript (dist is stale: ${lockedBuildRequirement.reason} - ${formatBuildReason(lockedBuildRequirement.reason)}).`,
deps,
);
const buildCmd = deps.execPath;
const buildArgs = compilerArgs;
const build = deps.spawn(buildCmd, buildArgs, {
cwd: deps.cwd,
env: deps.env,
stdio: deps.outputTee ? ["inherit", "pipe", "pipe"] : "inherit",
});
pipeSpawnedOutput(build, deps);
const buildRes = await waitForSpawnedProcess(build, deps);
const interruptedExitCode = getInterruptedSpawnExitCode(buildRes);
if (interruptedExitCode !== null) {
return interruptedExitCode;
}
if (buildRes.exitCode !== 0 && buildRes.exitCode !== null) {
return buildRes.exitCode;
}
if (!(await syncRuntimeArtifacts(deps))) {
return 1;
}
writeBuildStamp(deps);
return 0;
});
if (buildExitCode !== 0) {
return await closeRunNodeOutputTee(deps, buildExitCode);
}
if (buildRes.exitCode !== 0 && buildRes.exitCode !== null) {
return await closeRunNodeOutputTee(deps, buildRes.exitCode);
}
if (!syncRuntimeArtifacts(deps)) {
return await closeRunNodeOutputTee(deps, 1);
}
writeBuildStamp(deps);
exitCode = await runOpenClaw(deps);
return await closeRunNodeOutputTee(deps, exitCode);
} catch (error) {

View File

@@ -202,7 +202,7 @@ async function runStatusCommand(params: {
spawn: (cmd: string, args: string[]) => ReturnType<typeof createExitedProcess>;
spawnSync?: (cmd: string, args: string[]) => { status: number; stdout: string };
env?: Record<string, string>;
runRuntimePostBuild?: (params?: { cwd?: string }) => void;
runRuntimePostBuild?: (params?: { cwd?: string }) => void | Promise<void>;
}) {
return await runNodeMain({
cwd: params.tmp,
@@ -225,7 +225,7 @@ async function runQaCommand(params: {
spawn: (cmd: string, args: string[]) => ReturnType<typeof createExitedProcess>;
spawnSync?: (cmd: string, args: string[]) => { status: number; stdout: string };
env?: Record<string, string>;
runRuntimePostBuild?: (params?: { cwd?: string }) => void;
runRuntimePostBuild?: (params?: { cwd?: string }) => void | Promise<void>;
}) {
return await runNodeMain({
cwd: params.tmp,
@@ -619,6 +619,58 @@ describe("run-node script", () => {
});
});
it("serializes runtime postbuild restaging across concurrent clean launchers", async () => {
await withTempDir({ prefix: "openclaw-run-node-" }, async (tmp) => {
await setupTrackedProject(tmp, {
files: {
[ROOT_SRC]: "export const value = 1;\n",
},
oldPaths: [ROOT_SRC, ROOT_TSCONFIG, ROOT_PACKAGE],
buildPaths: [DIST_ENTRY, BUILD_STAMP],
});
let activePostbuilds = 0;
let maxActivePostbuilds = 0;
const runRuntimePostBuild = vi.fn(async () => {
activePostbuilds += 1;
maxActivePostbuilds = Math.max(maxActivePostbuilds, activePostbuilds);
await new Promise((resolve) => setTimeout(resolve, 25));
activePostbuilds -= 1;
});
const { spawn, spawnSync } = createSpawnRecorder({
gitHead: "abc123\n",
gitStatus: "",
});
await expect(
Promise.all([
runStatusCommand({
tmp,
spawn,
spawnSync,
env: {
OPENCLAW_RUN_NODE_BUILD_LOCK_POLL_MS: "1",
},
runRuntimePostBuild,
}),
runStatusCommand({
tmp,
spawn,
spawnSync,
env: {
OPENCLAW_RUN_NODE_BUILD_LOCK_POLL_MS: "1",
},
runRuntimePostBuild,
}),
]),
).resolves.toEqual([0, 0]);
expect(runRuntimePostBuild).toHaveBeenCalledTimes(2);
expect(maxActivePostbuilds).toBe(1);
expect(fsSync.existsSync(path.join(tmp, ".artifacts", "run-node-build.lock"))).toBe(false);
});
});
it("returns the build exit code when the compiler step fails", async () => {
await withTempDir({ prefix: "openclaw-run-node-" }, async (tmp) => {
const spawn = (cmd: string, args: string[] = []) => {
@@ -693,6 +745,9 @@ describe("run-node script", () => {
execPath: process.execPath,
});
while (spawn.mock.calls.length === 0) {
await new Promise((resolve) => setTimeout(resolve, 0));
}
fakeProcess.emit("SIGTERM");
const exitCode = await exitCodePromise;