diff --git a/scripts/run-node.mjs b/scripts/run-node.mjs index 1e8bd4b90af..d6543311735 100644 --- a/scripts/run-node.mjs +++ b/scripts/run-node.mjs @@ -273,6 +273,23 @@ const isSignalKey = (signal) => Object.hasOwn(SIGNAL_EXIT_CODES, signal); const getSignalExitCode = (signal) => (isSignalKey(signal) ? SIGNAL_EXIT_CODES[signal] : 1); const RUN_NODE_OUTPUT_LOG_ENV = "OPENCLAW_RUN_NODE_OUTPUT_LOG"; +const RUN_NODE_BUILD_LOCK_TIMEOUT_ENV = "OPENCLAW_RUN_NODE_BUILD_LOCK_TIMEOUT_MS"; +const RUN_NODE_BUILD_LOCK_POLL_ENV = "OPENCLAW_RUN_NODE_BUILD_LOCK_POLL_MS"; +const RUN_NODE_BUILD_LOCK_STALE_ENV = "OPENCLAW_RUN_NODE_BUILD_LOCK_STALE_MS"; +const DEFAULT_BUILD_LOCK_TIMEOUT_MS = 5 * 60 * 1000; +const DEFAULT_BUILD_LOCK_POLL_MS = 100; +const DEFAULT_BUILD_LOCK_STALE_MS = 10 * 60 * 1000; + +const parsePositiveIntegerEnv = (env, name, fallback) => { + const raw = env[name]; + if (raw === undefined || raw === "") { + return fallback; + } + const parsed = Number(raw); + return Number.isInteger(parsed) && parsed > 0 ? parsed : fallback; +}; + +const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); const resolveRunNodeOutputLogPath = (deps) => { const outputLog = deps.env[RUN_NODE_OUTPUT_LOG_ENV]?.trim(); @@ -429,9 +446,94 @@ const closeRunNodeOutputTee = async (deps, exitCode) => { return exitCode; }; -const syncRuntimeArtifacts = (deps) => { +const removeStaleBuildLock = (deps, lockDir, staleMs) => { try { - deps.runRuntimePostBuild({ cwd: deps.cwd }); + const stats = deps.fs.statSync(lockDir); + if (Date.now() - stats.mtimeMs < staleMs) { + return false; + } + deps.fs.rmSync(lockDir, { recursive: true, force: true }); + return true; + } catch { + return false; + } +}; + +const acquireRunNodeBuildLock = async (deps) => { + const lockRoot = path.join(deps.cwd, ".artifacts"); + const lockDir = path.join(lockRoot, "run-node-build.lock"); + const timeoutMs = parsePositiveIntegerEnv( + deps.env, + RUN_NODE_BUILD_LOCK_TIMEOUT_ENV, + DEFAULT_BUILD_LOCK_TIMEOUT_MS, + ); + const pollMs = parsePositiveIntegerEnv( + deps.env, + RUN_NODE_BUILD_LOCK_POLL_ENV, + DEFAULT_BUILD_LOCK_POLL_MS, + ); + const staleMs = parsePositiveIntegerEnv( + deps.env, + RUN_NODE_BUILD_LOCK_STALE_ENV, + DEFAULT_BUILD_LOCK_STALE_MS, + ); + const startedAt = Date.now(); + let loggedWait = false; + + while (Date.now() - startedAt < timeoutMs) { + try { + deps.fs.mkdirSync(lockRoot, { recursive: true }); + deps.fs.mkdirSync(lockDir); + try { + deps.fs.writeFileSync( + path.join(lockDir, "owner.json"), + `${JSON.stringify( + { + pid: deps.process.pid, + startedAt: new Date().toISOString(), + args: deps.args, + }, + null, + 2, + )}\n`, + "utf8", + ); + } catch { + // Owner metadata is diagnostic only; the directory itself is the lock. + } + return () => { + deps.fs.rmSync(lockDir, { recursive: true, force: true }); + }; + } catch (error) { + if (error?.code !== "EEXIST") { + throw error; + } + if (removeStaleBuildLock(deps, lockDir, staleMs)) { + continue; + } + if (!loggedWait) { + logRunner("Waiting for TypeScript/runtime artifact lock.", deps); + loggedWait = true; + } + await sleep(pollMs); + } + } + + throw new Error(`timed out waiting for ${path.relative(deps.cwd, lockDir)}`); +}; + +const withRunNodeBuildLock = async (deps, callback) => { + const release = await acquireRunNodeBuildLock(deps); + try { + return await callback(); + } finally { + release(); + } +}; + +const syncRuntimeArtifacts = async (deps) => { + try { + await deps.runRuntimePostBuild({ cwd: deps.cwd }); } catch (error) { logRunner( `Failed to write runtime build artifacts: ${error?.message ?? "unknown error"}`, @@ -491,38 +593,55 @@ export async function runNodeMain(params = {}) { let exitCode = 1; const buildRequirement = resolveBuildRequirement(deps); if (!buildRequirement.shouldBuild) { - if (!shouldSkipCleanWatchRuntimeSync(deps) && !syncRuntimeArtifacts(deps)) { - return await closeRunNodeOutputTee(deps, 1); + if (!shouldSkipCleanWatchRuntimeSync(deps)) { + const synced = await withRunNodeBuildLock( + deps, + async () => await syncRuntimeArtifacts(deps), + ); + if (!synced) { + return await closeRunNodeOutputTee(deps, 1); + } } exitCode = await runOpenClaw(deps); return await closeRunNodeOutputTee(deps, exitCode); } - logRunner( - `Building TypeScript (dist is stale: ${buildRequirement.reason} - ${formatBuildReason(buildRequirement.reason)}).`, - deps, - ); - const buildCmd = deps.execPath; - const buildArgs = compilerArgs; - const build = deps.spawn(buildCmd, buildArgs, { - cwd: deps.cwd, - env: deps.env, - stdio: deps.outputTee ? ["inherit", "pipe", "pipe"] : "inherit", - }); - pipeSpawnedOutput(build, deps); + const buildExitCode = await withRunNodeBuildLock(deps, async () => { + const lockedBuildRequirement = resolveBuildRequirement(deps); + if (!lockedBuildRequirement.shouldBuild) { + return (await syncRuntimeArtifacts(deps)) ? 0 : 1; + } - const buildRes = await waitForSpawnedProcess(build, deps); - const interruptedExitCode = getInterruptedSpawnExitCode(buildRes); - if (interruptedExitCode !== null) { - return await closeRunNodeOutputTee(deps, interruptedExitCode); + logRunner( + `Building TypeScript (dist is stale: ${lockedBuildRequirement.reason} - ${formatBuildReason(lockedBuildRequirement.reason)}).`, + deps, + ); + const buildCmd = deps.execPath; + const buildArgs = compilerArgs; + const build = deps.spawn(buildCmd, buildArgs, { + cwd: deps.cwd, + env: deps.env, + stdio: deps.outputTee ? ["inherit", "pipe", "pipe"] : "inherit", + }); + pipeSpawnedOutput(build, deps); + + const buildRes = await waitForSpawnedProcess(build, deps); + const interruptedExitCode = getInterruptedSpawnExitCode(buildRes); + if (interruptedExitCode !== null) { + return interruptedExitCode; + } + if (buildRes.exitCode !== 0 && buildRes.exitCode !== null) { + return buildRes.exitCode; + } + if (!(await syncRuntimeArtifacts(deps))) { + return 1; + } + writeBuildStamp(deps); + return 0; + }); + if (buildExitCode !== 0) { + return await closeRunNodeOutputTee(deps, buildExitCode); } - if (buildRes.exitCode !== 0 && buildRes.exitCode !== null) { - return await closeRunNodeOutputTee(deps, buildRes.exitCode); - } - if (!syncRuntimeArtifacts(deps)) { - return await closeRunNodeOutputTee(deps, 1); - } - writeBuildStamp(deps); exitCode = await runOpenClaw(deps); return await closeRunNodeOutputTee(deps, exitCode); } catch (error) { diff --git a/src/infra/run-node.test.ts b/src/infra/run-node.test.ts index e8b37288ea6..0af519bda88 100644 --- a/src/infra/run-node.test.ts +++ b/src/infra/run-node.test.ts @@ -202,7 +202,7 @@ async function runStatusCommand(params: { spawn: (cmd: string, args: string[]) => ReturnType; spawnSync?: (cmd: string, args: string[]) => { status: number; stdout: string }; env?: Record; - runRuntimePostBuild?: (params?: { cwd?: string }) => void; + runRuntimePostBuild?: (params?: { cwd?: string }) => void | Promise; }) { return await runNodeMain({ cwd: params.tmp, @@ -225,7 +225,7 @@ async function runQaCommand(params: { spawn: (cmd: string, args: string[]) => ReturnType; spawnSync?: (cmd: string, args: string[]) => { status: number; stdout: string }; env?: Record; - runRuntimePostBuild?: (params?: { cwd?: string }) => void; + runRuntimePostBuild?: (params?: { cwd?: string }) => void | Promise; }) { return await runNodeMain({ cwd: params.tmp, @@ -619,6 +619,58 @@ describe("run-node script", () => { }); }); + it("serializes runtime postbuild restaging across concurrent clean launchers", async () => { + await withTempDir({ prefix: "openclaw-run-node-" }, async (tmp) => { + await setupTrackedProject(tmp, { + files: { + [ROOT_SRC]: "export const value = 1;\n", + }, + oldPaths: [ROOT_SRC, ROOT_TSCONFIG, ROOT_PACKAGE], + buildPaths: [DIST_ENTRY, BUILD_STAMP], + }); + + let activePostbuilds = 0; + let maxActivePostbuilds = 0; + const runRuntimePostBuild = vi.fn(async () => { + activePostbuilds += 1; + maxActivePostbuilds = Math.max(maxActivePostbuilds, activePostbuilds); + await new Promise((resolve) => setTimeout(resolve, 25)); + activePostbuilds -= 1; + }); + const { spawn, spawnSync } = createSpawnRecorder({ + gitHead: "abc123\n", + gitStatus: "", + }); + + await expect( + Promise.all([ + runStatusCommand({ + tmp, + spawn, + spawnSync, + env: { + OPENCLAW_RUN_NODE_BUILD_LOCK_POLL_MS: "1", + }, + runRuntimePostBuild, + }), + runStatusCommand({ + tmp, + spawn, + spawnSync, + env: { + OPENCLAW_RUN_NODE_BUILD_LOCK_POLL_MS: "1", + }, + runRuntimePostBuild, + }), + ]), + ).resolves.toEqual([0, 0]); + + expect(runRuntimePostBuild).toHaveBeenCalledTimes(2); + expect(maxActivePostbuilds).toBe(1); + expect(fsSync.existsSync(path.join(tmp, ".artifacts", "run-node-build.lock"))).toBe(false); + }); + }); + it("returns the build exit code when the compiler step fails", async () => { await withTempDir({ prefix: "openclaw-run-node-" }, async (tmp) => { const spawn = (cmd: string, args: string[] = []) => { @@ -693,6 +745,9 @@ describe("run-node script", () => { execPath: process.execPath, }); + while (spawn.mock.calls.length === 0) { + await new Promise((resolve) => setTimeout(resolve, 0)); + } fakeProcess.emit("SIGTERM"); const exitCode = await exitCodePromise;