mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:30:42 +00:00
fix: recover stale runtime deps locks
This commit is contained in:
@@ -38,6 +38,9 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Fixes
|
||||
|
||||
- Plugins/startup: remove ownerless bundled runtime-dependency install locks
|
||||
after a short grace window and include lock owner details when startup times
|
||||
out waiting for a plugin runtime-deps lock. Thanks @steipete.
|
||||
- Agents/replies: forward sanitized underlying agent failure details on external
|
||||
channels instead of replacing unknown failures with a generic retry message.
|
||||
Thanks @steipete.
|
||||
|
||||
@@ -766,6 +766,72 @@ describe("ensureBundledPluginRuntimeDeps", () => {
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("does not expire fresh ownerless runtime-deps install locks", () => {
|
||||
expect(
|
||||
bundledRuntimeDepsTesting.shouldRemoveRuntimeDepsLock(
|
||||
{ lockDirMtimeMs: 1_000 },
|
||||
31_000,
|
||||
() => true,
|
||||
),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("does not expire ownerless runtime-deps install locks when the owner file changed recently", () => {
|
||||
expect(
|
||||
bundledRuntimeDepsTesting.shouldRemoveRuntimeDepsLock(
|
||||
{ lockDirMtimeMs: 1_000, ownerFileMtimeMs: 31_000 },
|
||||
61_000,
|
||||
() => true,
|
||||
),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("expires ownerless runtime-deps install locks after the owner write grace window", () => {
|
||||
expect(
|
||||
bundledRuntimeDepsTesting.shouldRemoveRuntimeDepsLock(
|
||||
{ lockDirMtimeMs: 1_000 },
|
||||
31_001,
|
||||
() => true,
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("expires ownerless runtime-deps install locks when lock and owner file are stale", () => {
|
||||
expect(
|
||||
bundledRuntimeDepsTesting.shouldRemoveRuntimeDepsLock(
|
||||
{ lockDirMtimeMs: 1_000, ownerFileMtimeMs: 2_000 },
|
||||
32_001,
|
||||
() => true,
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("includes runtime-deps lock owner details in timeout messages", () => {
|
||||
const message = bundledRuntimeDepsTesting.formatRuntimeDepsLockTimeoutMessage({
|
||||
lockDir: "/tmp/openclaw-plugin/.openclaw-runtime-deps.lock",
|
||||
owner: {
|
||||
pid: 0,
|
||||
createdAtMs: 1_000,
|
||||
ownerFileState: "invalid",
|
||||
ownerFilePath: "/tmp/openclaw-plugin/.openclaw-runtime-deps.lock/owner.json",
|
||||
ownerFileMtimeMs: 2_500,
|
||||
ownerFileIsSymlink: true,
|
||||
lockDirMtimeMs: 2_000,
|
||||
},
|
||||
waitedMs: 300_123,
|
||||
nowMs: 303_000,
|
||||
});
|
||||
|
||||
expect(message).toContain("waited=300123ms");
|
||||
expect(message).toContain("ownerFile=invalid");
|
||||
expect(message).toContain("ownerFileSymlink=true");
|
||||
expect(message).toContain("pid=0 alive=false");
|
||||
expect(message).toContain("ownerAge=302000ms");
|
||||
expect(message).toContain("ownerFileAge=300500ms");
|
||||
expect(message).toContain("lockAge=301000ms");
|
||||
expect(message).toContain(".openclaw-runtime-deps.lock/owner.json");
|
||||
});
|
||||
|
||||
it("removes stale runtime-deps install locks before repairing deps", () => {
|
||||
const packageRoot = makeTempDir();
|
||||
const pluginRoot = path.join(packageRoot, "dist", "extensions", "openai");
|
||||
@@ -808,6 +874,100 @@ describe("ensureBundledPluginRuntimeDeps", () => {
|
||||
expect(fs.existsSync(lockDir)).toBe(false);
|
||||
});
|
||||
|
||||
it("removes stale malformed runtime-deps install locks before repairing deps", () => {
|
||||
const packageRoot = makeTempDir();
|
||||
const pluginRoot = path.join(packageRoot, "dist", "extensions", "browser");
|
||||
fs.mkdirSync(pluginRoot, { recursive: true });
|
||||
fs.writeFileSync(
|
||||
path.join(pluginRoot, "package.json"),
|
||||
JSON.stringify({
|
||||
dependencies: {
|
||||
"browser-runtime": "1.0.0",
|
||||
},
|
||||
}),
|
||||
);
|
||||
const installRoot = resolveBundledRuntimeDependencyInstallRoot(pluginRoot, { env: {} });
|
||||
const lockDir = path.join(installRoot, ".openclaw-runtime-deps.lock");
|
||||
fs.mkdirSync(lockDir, { recursive: true });
|
||||
const ownerPath = path.join(lockDir, "owner.json");
|
||||
fs.writeFileSync(ownerPath, "{", "utf8");
|
||||
fs.utimesSync(ownerPath, new Date(0), new Date(0));
|
||||
fs.utimesSync(lockDir, new Date(0), new Date(0));
|
||||
|
||||
const calls: BundledRuntimeDepsInstallParams[] = [];
|
||||
const result = ensureBundledPluginRuntimeDeps({
|
||||
env: {},
|
||||
installDeps: (params) => {
|
||||
calls.push(params);
|
||||
fs.mkdirSync(path.join(params.installRoot, "node_modules", "browser-runtime"), {
|
||||
recursive: true,
|
||||
});
|
||||
fs.writeFileSync(
|
||||
path.join(params.installRoot, "node_modules", "browser-runtime", "package.json"),
|
||||
JSON.stringify({ name: "browser-runtime", version: "1.0.0" }),
|
||||
);
|
||||
},
|
||||
pluginId: "browser",
|
||||
pluginRoot,
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
installedSpecs: ["browser-runtime@1.0.0"],
|
||||
retainSpecs: ["browser-runtime@1.0.0"],
|
||||
});
|
||||
expect(calls).toHaveLength(1);
|
||||
expect(fs.existsSync(lockDir)).toBe(false);
|
||||
});
|
||||
|
||||
const itSupportsSymlinks = process.platform === "win32" ? it.skip : it;
|
||||
itSupportsSymlinks(
|
||||
"removes stale runtime-deps install locks with broken owner symlinks before repairing deps",
|
||||
() => {
|
||||
const packageRoot = makeTempDir();
|
||||
const pluginRoot = path.join(packageRoot, "dist-runtime", "extensions", "browser");
|
||||
fs.mkdirSync(pluginRoot, { recursive: true });
|
||||
fs.writeFileSync(
|
||||
path.join(pluginRoot, "package.json"),
|
||||
JSON.stringify({
|
||||
dependencies: {
|
||||
"browser-runtime": "1.0.0",
|
||||
},
|
||||
}),
|
||||
);
|
||||
const installRoot = resolveBundledRuntimeDependencyInstallRoot(pluginRoot, { env: {} });
|
||||
const lockDir = path.join(installRoot, ".openclaw-runtime-deps.lock");
|
||||
fs.mkdirSync(lockDir, { recursive: true });
|
||||
const ownerPath = path.join(lockDir, "owner.json");
|
||||
fs.symlinkSync("../missing-owner.json", ownerPath);
|
||||
fs.lutimesSync(ownerPath, new Date(0), new Date(0));
|
||||
fs.utimesSync(lockDir, new Date(0), new Date(0));
|
||||
|
||||
const calls: BundledRuntimeDepsInstallParams[] = [];
|
||||
const result = ensureBundledPluginRuntimeDeps({
|
||||
env: {},
|
||||
installDeps: (params) => {
|
||||
calls.push(params);
|
||||
fs.mkdirSync(path.join(params.installRoot, "node_modules", "browser-runtime"), {
|
||||
recursive: true,
|
||||
});
|
||||
fs.writeFileSync(
|
||||
path.join(params.installRoot, "node_modules", "browser-runtime", "package.json"),
|
||||
JSON.stringify({ name: "browser-runtime", version: "1.0.0" }),
|
||||
);
|
||||
},
|
||||
pluginId: "browser",
|
||||
pluginRoot,
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
installedSpecs: ["browser-runtime@1.0.0"],
|
||||
retainSpecs: ["browser-runtime@1.0.0"],
|
||||
});
|
||||
expect(calls).toHaveLength(1);
|
||||
expect(fs.existsSync(lockDir)).toBe(false);
|
||||
},
|
||||
);
|
||||
|
||||
it("does not install when runtime deps are only workspace links", () => {
|
||||
const packageRoot = makeTempDir();
|
||||
const extensionsRoot = path.join(packageRoot, "dist", "extensions");
|
||||
|
||||
@@ -54,6 +54,7 @@ const BUNDLED_RUNTIME_DEPS_LOCK_OWNER_FILE = "owner.json";
|
||||
const BUNDLED_RUNTIME_DEPS_LOCK_WAIT_MS = 100;
|
||||
const BUNDLED_RUNTIME_DEPS_LOCK_TIMEOUT_MS = 5 * 60_000;
|
||||
const BUNDLED_RUNTIME_DEPS_LOCK_STALE_MS = 10 * 60_000;
|
||||
const BUNDLED_RUNTIME_DEPS_OWNERLESS_LOCK_STALE_MS = 30_000;
|
||||
|
||||
export type BundledRuntimeDepsNpmRunner = {
|
||||
command: string;
|
||||
@@ -186,21 +187,80 @@ function isProcessAlive(pid: number): boolean {
|
||||
try {
|
||||
process.kill(pid, 0);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
} catch (error) {
|
||||
return (error as NodeJS.ErrnoException).code === "EPERM";
|
||||
}
|
||||
}
|
||||
|
||||
function readRuntimeDepsLockOwner(lockDir: string): { pid?: number; createdAtMs?: number } {
|
||||
const owner = readJsonObject(path.join(lockDir, BUNDLED_RUNTIME_DEPS_LOCK_OWNER_FILE));
|
||||
type RuntimeDepsLockOwner = {
|
||||
pid?: number;
|
||||
createdAtMs?: number;
|
||||
ownerFileState: "ok" | "missing" | "invalid";
|
||||
ownerFilePath: string;
|
||||
ownerFileMtimeMs?: number;
|
||||
ownerFileIsSymlink?: boolean;
|
||||
lockDirMtimeMs?: number;
|
||||
};
|
||||
|
||||
function readRuntimeDepsLockOwner(lockDir: string): RuntimeDepsLockOwner {
|
||||
const ownerFilePath = path.join(lockDir, BUNDLED_RUNTIME_DEPS_LOCK_OWNER_FILE);
|
||||
let owner: JsonObject | null = null;
|
||||
let ownerFileState: RuntimeDepsLockOwner["ownerFileState"] = "missing";
|
||||
let ownerFileMtimeMs: number | undefined;
|
||||
let ownerFileIsSymlink: boolean | undefined;
|
||||
try {
|
||||
const ownerFileStat = fs.lstatSync(ownerFilePath);
|
||||
ownerFileMtimeMs = ownerFileStat.mtimeMs;
|
||||
ownerFileIsSymlink = ownerFileStat.isSymbolicLink();
|
||||
} catch {
|
||||
// The owner file may not exist yet, or may have been removed by the lock owner.
|
||||
}
|
||||
try {
|
||||
const parsed = JSON.parse(fs.readFileSync(ownerFilePath, "utf8")) as unknown;
|
||||
if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
|
||||
owner = parsed as JsonObject;
|
||||
ownerFileState = "ok";
|
||||
} else {
|
||||
ownerFileState = "invalid";
|
||||
}
|
||||
} catch (error) {
|
||||
ownerFileState =
|
||||
(error as NodeJS.ErrnoException).code === "ENOENT" && ownerFileMtimeMs === undefined
|
||||
? "missing"
|
||||
: "invalid";
|
||||
}
|
||||
let lockDirMtimeMs: number | undefined;
|
||||
try {
|
||||
lockDirMtimeMs = fs.statSync(lockDir).mtimeMs;
|
||||
} catch {
|
||||
// The lock may have disappeared between the mkdir failure and diagnostics.
|
||||
}
|
||||
return {
|
||||
pid: typeof owner?.pid === "number" ? owner.pid : undefined,
|
||||
createdAtMs: typeof owner?.createdAtMs === "number" ? owner.createdAtMs : undefined,
|
||||
ownerFileState,
|
||||
ownerFilePath,
|
||||
ownerFileMtimeMs,
|
||||
ownerFileIsSymlink,
|
||||
lockDirMtimeMs,
|
||||
};
|
||||
}
|
||||
|
||||
function latestFiniteMs(values: readonly (number | undefined)[]): number | undefined {
|
||||
let latest: number | undefined;
|
||||
for (const value of values) {
|
||||
if (typeof value !== "number" || !Number.isFinite(value)) {
|
||||
continue;
|
||||
}
|
||||
if (latest === undefined || value > latest) {
|
||||
latest = value;
|
||||
}
|
||||
}
|
||||
return latest;
|
||||
}
|
||||
|
||||
function shouldRemoveRuntimeDepsLock(
|
||||
owner: { pid?: number; createdAtMs?: number },
|
||||
owner: Pick<RuntimeDepsLockOwner, "pid" | "createdAtMs" | "lockDirMtimeMs" | "ownerFileMtimeMs">,
|
||||
nowMs: number,
|
||||
isAlive: (pid: number) => boolean = isProcessAlive,
|
||||
): boolean {
|
||||
@@ -208,13 +268,55 @@ function shouldRemoveRuntimeDepsLock(
|
||||
return !isAlive(owner.pid);
|
||||
}
|
||||
|
||||
if (typeof owner.createdAtMs === "number") {
|
||||
return nowMs - owner.createdAtMs > BUNDLED_RUNTIME_DEPS_LOCK_STALE_MS;
|
||||
}
|
||||
|
||||
const ownerlessObservedAtMs = latestFiniteMs([owner.lockDirMtimeMs, owner.ownerFileMtimeMs]);
|
||||
return (
|
||||
typeof owner.createdAtMs === "number" &&
|
||||
nowMs - owner.createdAtMs > BUNDLED_RUNTIME_DEPS_LOCK_STALE_MS
|
||||
typeof ownerlessObservedAtMs === "number" &&
|
||||
nowMs - ownerlessObservedAtMs > BUNDLED_RUNTIME_DEPS_OWNERLESS_LOCK_STALE_MS
|
||||
);
|
||||
}
|
||||
|
||||
function formatDurationMs(ms: number | undefined): string {
|
||||
return typeof ms === "number" && Number.isFinite(ms) ? `${Math.max(0, Math.round(ms))}ms` : "n/a";
|
||||
}
|
||||
|
||||
function formatRuntimeDepsLockTimeoutMessage(params: {
|
||||
lockDir: string;
|
||||
owner: RuntimeDepsLockOwner;
|
||||
waitedMs: number;
|
||||
nowMs: number;
|
||||
}): string {
|
||||
const ownerAgeMs =
|
||||
typeof params.owner.createdAtMs === "number"
|
||||
? params.nowMs - params.owner.createdAtMs
|
||||
: undefined;
|
||||
const lockAgeMs =
|
||||
typeof params.owner.lockDirMtimeMs === "number"
|
||||
? params.nowMs - params.owner.lockDirMtimeMs
|
||||
: undefined;
|
||||
const ownerFileAgeMs =
|
||||
typeof params.owner.ownerFileMtimeMs === "number"
|
||||
? params.nowMs - params.owner.ownerFileMtimeMs
|
||||
: undefined;
|
||||
const pidDetail =
|
||||
typeof params.owner.pid === "number"
|
||||
? `pid=${params.owner.pid} alive=${isProcessAlive(params.owner.pid)}`
|
||||
: "pid=missing";
|
||||
const ownerFileSymlink =
|
||||
typeof params.owner.ownerFileIsSymlink === "boolean" ? params.owner.ownerFileIsSymlink : "n/a";
|
||||
return (
|
||||
`Timed out waiting for bundled runtime deps lock at ${params.lockDir} ` +
|
||||
`(waited=${formatDurationMs(params.waitedMs)}, ownerFile=${params.owner.ownerFileState}, ownerFileSymlink=${ownerFileSymlink}, ` +
|
||||
`${pidDetail}, ownerAge=${formatDurationMs(ownerAgeMs)}, ownerFileAge=${formatDurationMs(ownerFileAgeMs)}, lockAge=${formatDurationMs(lockAgeMs)}, ` +
|
||||
`ownerFilePath=${params.owner.ownerFilePath}). If no OpenClaw/npm install is running, remove the lock directory and retry.`
|
||||
);
|
||||
}
|
||||
|
||||
export const __testing = {
|
||||
formatRuntimeDepsLockTimeoutMessage,
|
||||
shouldRemoveRuntimeDepsLock,
|
||||
};
|
||||
|
||||
@@ -240,11 +342,16 @@ function withBundledRuntimeDepsInstallRootLock<T>(installRoot: string, run: () =
|
||||
while (!locked) {
|
||||
try {
|
||||
fs.mkdirSync(lockDir);
|
||||
fs.writeFileSync(
|
||||
path.join(lockDir, BUNDLED_RUNTIME_DEPS_LOCK_OWNER_FILE),
|
||||
`${JSON.stringify({ pid: process.pid, createdAtMs: Date.now() }, null, 2)}\n`,
|
||||
"utf8",
|
||||
);
|
||||
try {
|
||||
fs.writeFileSync(
|
||||
path.join(lockDir, BUNDLED_RUNTIME_DEPS_LOCK_OWNER_FILE),
|
||||
`${JSON.stringify({ pid: process.pid, createdAtMs: Date.now() }, null, 2)}\n`,
|
||||
"utf8",
|
||||
);
|
||||
} catch (ownerWriteError) {
|
||||
fs.rmSync(lockDir, { recursive: true, force: true });
|
||||
throw ownerWriteError;
|
||||
}
|
||||
locked = true;
|
||||
} catch (error) {
|
||||
const code = (error as NodeJS.ErrnoException).code;
|
||||
@@ -252,10 +359,19 @@ function withBundledRuntimeDepsInstallRootLock<T>(installRoot: string, run: () =
|
||||
throw error;
|
||||
}
|
||||
removeRuntimeDepsLockIfStale(lockDir, Date.now());
|
||||
if (Date.now() - startedAt > BUNDLED_RUNTIME_DEPS_LOCK_TIMEOUT_MS) {
|
||||
throw new Error(`Timed out waiting for bundled runtime deps lock at ${lockDir}`, {
|
||||
cause: error,
|
||||
});
|
||||
const nowMs = Date.now();
|
||||
if (nowMs - startedAt > BUNDLED_RUNTIME_DEPS_LOCK_TIMEOUT_MS) {
|
||||
throw new Error(
|
||||
formatRuntimeDepsLockTimeoutMessage({
|
||||
lockDir,
|
||||
owner: readRuntimeDepsLockOwner(lockDir),
|
||||
waitedMs: nowMs - startedAt,
|
||||
nowMs,
|
||||
}),
|
||||
{
|
||||
cause: error,
|
||||
},
|
||||
);
|
||||
}
|
||||
sleepSync(BUNDLED_RUNTIME_DEPS_LOCK_WAIT_MS);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user