diff --git a/CHANGELOG.md b/CHANGELOG.md index 3354d012954..2f51386c364 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -69,6 +69,13 @@ Docs: https://docs.openclaw.ai - Providers/Azure OpenAI: give deployment-scoped image generation requests a longer 600s default timeout so slow `gpt-image-2` generations can complete without a per-call `timeoutMs`. Fixes #71705. Thanks @voytas75. +- Gateway/plugins: link source-checkout bundled runtime dependency caches instead + of recursively copying `node_modules` on the gateway main thread, preventing + local status, node, and skill probes from timing out during startup cache + restores. Thanks @steipete. +- Skills/remote nodes: only expose remote macOS skill bins for connected nodes, + clear stale bin matches when node probes fail, and include probe command, + timeout, bin count, and connection state in timeout logs. Thanks @steipete. - CLI/gateway: keep diagnostic probes from creating first-time read-only device pairings, while still reusing cached device tokens for detailed read probes. Fixes #71766. Thanks @SunboZ. diff --git a/docs/tools/skills.md b/docs/tools/skills.md index c9a03108074..44adae0a556 100644 --- a/docs/tools/skills.md +++ b/docs/tools/skills.md @@ -349,7 +349,7 @@ agent. If the Gateway is running on Linux but a **macOS node** is connected **with `system.run` allowed** (Exec approvals security not set to `deny`), OpenClaw can treat macOS-only skills as eligible when the required binaries are present on that node. The agent should execute those skills via the `exec` tool with `host=node`. -This relies on the node reporting its command support and on a bin probe via `system.run`. If the macOS node goes offline later, the skills remain visible; invocations may fail until the node reconnects. +This relies on the node reporting its command support and on a bin probe via `system.which` or `system.run`. Offline nodes do not make remote-only skills visible. If a connected node stops answering bin probes, OpenClaw clears its cached bin matches so agents no longer see skills that cannot currently run there. ## Skills watcher (auto-refresh) diff --git a/src/infra/skills-remote.test.ts b/src/infra/skills-remote.test.ts index 90ce1efd09e..60aa80e415c 100644 --- a/src/infra/skills-remote.test.ts +++ b/src/infra/skills-remote.test.ts @@ -1,14 +1,25 @@ import { randomUUID } from "node:crypto"; -import { describe, expect, it } from "vitest"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, describe, expect, it } from "vitest"; import { getSkillsSnapshotVersion, resetSkillsRefreshForTest } from "../agents/skills/refresh.js"; +import type { OpenClawConfig } from "../config/types.openclaw.js"; +import type { NodeRegistry } from "../gateway/node-registry.js"; import { getRemoteSkillEligibility, recordRemoteNodeBins, recordRemoteNodeInfo, removeRemoteNodeInfo, + refreshRemoteNodeBins, + setSkillsRemoteRegistry, } from "./skills-remote.js"; describe("skills-remote", () => { + afterEach(() => { + setSkillsRemoteRegistry(null); + }); + it("removes disconnected nodes from remote skill eligibility", () => { const nodeId = `node-${randomUUID()}`; const bin = `bin-${randomUUID()}`; @@ -134,4 +145,156 @@ describe("skills-remote", () => { removeRemoteNodeInfo(nodeId); } }); + + it("does not expose bins for nodes that only have cached paired metadata", () => { + const nodeId = `node-${randomUUID()}`; + const bin = `bin-${randomUUID()}`; + try { + recordRemoteNodeBins(nodeId, [bin]); + + expect(getRemoteSkillEligibility()?.hasBin(bin) ?? false).toBe(false); + } finally { + removeRemoteNodeInfo(nodeId); + } + }); + + it("clears stale bins when a connected node probe times out", async () => { + await resetSkillsRefreshForTest(); + const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-remote-skills-")); + const nodeId = `node-${randomUUID()}`; + const bin = `bin-${randomUUID()}`; + try { + fs.mkdirSync(path.join(workspaceDir, "remote-skill"), { recursive: true }); + fs.writeFileSync( + path.join(workspaceDir, "remote-skill", "SKILL.md"), + [ + "---", + "name: remote-skill", + "description: Needs a remote bin", + `metadata: { "openclaw": { "os": ["darwin"], "requires": { "bins": ["${bin}"] } } }`, + "---", + "# Remote Skill", + "", + ].join("\n"), + ); + const cfg = { + agents: { + defaults: { + workspace: workspaceDir, + }, + }, + } satisfies OpenClawConfig; + const invokeCalls: string[] = []; + setSkillsRemoteRegistry({ + listConnected: () => [], + get: () => undefined, + invoke: async (params: { command: string }) => { + invokeCalls.push(params.command); + return { + ok: false, + error: { code: "TIMEOUT", message: "node invoke timed out" }, + }; + }, + } as unknown as NodeRegistry); + recordRemoteNodeInfo({ + nodeId, + displayName: "Remote Mac", + platform: "darwin", + commands: ["system.run", "system.which"], + }); + recordRemoteNodeBins(nodeId, [bin]); + const before = getSkillsSnapshotVersion(workspaceDir); + + await refreshRemoteNodeBins({ + nodeId, + platform: "darwin", + commands: ["system.run", "system.which"], + cfg, + timeoutMs: 10, + }); + + expect(invokeCalls).toEqual(["system.which"]); + expect(getRemoteSkillEligibility()?.hasBin(bin) ?? false).toBe(false); + expect(getSkillsSnapshotVersion(workspaceDir)).toBeGreaterThan(before); + } finally { + removeRemoteNodeInfo(nodeId); + fs.rmSync(workspaceDir, { recursive: true, force: true }); + } + }); + + it("coalesces overlapping bin probes for the same node", async () => { + const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-remote-skills-")); + const nodeId = `node-${randomUUID()}`; + const bin = `bin-${randomUUID()}`; + let invokeCount = 0; + let releaseProbe!: () => void; + const probeStarted = new Promise((resolve) => { + setSkillsRemoteRegistry({ + listConnected: () => [], + get: () => undefined, + invoke: async () => { + invokeCount += 1; + resolve(); + await new Promise((release) => { + releaseProbe = release; + }); + return { + ok: false, + error: { code: "TIMEOUT", message: "node invoke timed out" }, + }; + }, + } as unknown as NodeRegistry); + }); + try { + fs.mkdirSync(path.join(workspaceDir, "remote-skill"), { recursive: true }); + fs.writeFileSync( + path.join(workspaceDir, "remote-skill", "SKILL.md"), + [ + "---", + "name: remote-skill", + "description: Needs a remote bin", + `metadata: { "openclaw": { "os": ["darwin"], "requires": { "bins": ["${bin}"] } } }`, + "---", + "# Remote Skill", + "", + ].join("\n"), + ); + const cfg = { + agents: { + defaults: { + workspace: workspaceDir, + }, + }, + } satisfies OpenClawConfig; + recordRemoteNodeInfo({ + nodeId, + displayName: "Remote Mac", + platform: "darwin", + commands: ["system.run", "system.which"], + }); + + const first = refreshRemoteNodeBins({ + nodeId, + platform: "darwin", + commands: ["system.run", "system.which"], + cfg, + timeoutMs: 10, + }); + await probeStarted; + const second = refreshRemoteNodeBins({ + nodeId, + platform: "darwin", + commands: ["system.run", "system.which"], + cfg, + timeoutMs: 10, + }); + releaseProbe(); + + await Promise.all([first, second]); + expect(invokeCount).toBe(1); + } finally { + removeRemoteNodeInfo(nodeId); + fs.rmSync(workspaceDir, { recursive: true, force: true }); + } + }); }); diff --git a/src/infra/skills-remote.ts b/src/infra/skills-remote.ts index bb8a17c20d1..f67c28e1bd4 100644 --- a/src/infra/skills-remote.ts +++ b/src/infra/skills-remote.ts @@ -18,11 +18,13 @@ type RemoteNodeRecord = { deviceFamily?: string; commands?: string[]; bins: Set; + connected: boolean; remoteIp?: string; }; const log = createSubsystemLogger("gateway/skills-remote"); const remoteNodes = new Map(); +const remoteBinProbeInflight = new Map>(); let remoteRegistry: NodeRegistry | null = null; function describeNode(nodeId: string): string { @@ -62,20 +64,36 @@ function extractErrorMessage(err: unknown): string | undefined { return undefined; } -function logRemoteBinProbeFailure(nodeId: string, err: unknown) { +function logRemoteBinProbeFailure( + nodeId: string, + err: unknown, + context?: { command?: string; timeoutMs?: number; requiredBinCount?: number }, +) { const message = extractErrorMessage(err); const label = describeNode(nodeId); + const details = [ + context?.command ? `command=${context.command}` : undefined, + typeof context?.timeoutMs === "number" ? `timeoutMs=${context.timeoutMs}` : undefined, + typeof context?.requiredBinCount === "number" + ? `requiredBins=${context.requiredBinCount}` + : undefined, + `connected=${remoteNodes.get(nodeId)?.connected === true ? "yes" : "no"}`, + ] + .filter(Boolean) + .join(" "); // Node unavailable errors (not connected or disconnected mid-operation) are expected // when nodes have transient connections - log at info level instead of warn if (message?.includes("node not connected") || message?.includes("node disconnected")) { - log.info(`remote bin probe skipped: node unavailable (${label})`); + log.info(`remote bin probe skipped: node unavailable (${label}; ${details})`); return; } if (message?.includes("invoke timed out") || message?.includes("timeout")) { - log.warn(`remote bin probe timed out (${label}); check node connectivity for ${label}`); + log.warn( + `remote bin probe timed out (${label}; ${details}); check node connectivity for ${label}`, + ); return; } - log.warn(`remote bin probe error (${label}): ${message ?? "unknown"}`); + log.warn(`remote bin probe error (${label}; ${details}): ${message ?? "unknown"}`); } function isMacPlatform(platform?: string, deviceFamily?: string): boolean { @@ -109,6 +127,7 @@ function upsertNode(record: { commands?: string[]; remoteIp?: string; bins?: string[]; + connected?: boolean; }) { const existing = remoteNodes.get(record.nodeId); const bins = new Set(record.bins ?? existing?.bins ?? []); @@ -120,9 +139,19 @@ function upsertNode(record: { commands: record.commands ?? existing?.commands, remoteIp: record.remoteIp ?? existing?.remoteIp, bins, + connected: record.connected ?? existing?.connected ?? false, }); } +function clearRemoteNodeBins(nodeId: string): boolean { + const existing = remoteNodes.get(nodeId); + if (!existing || existing.bins.size === 0) { + return false; + } + existing.bins = new Set(); + return true; +} + export function setSkillsRemoteRegistry(registry: NodeRegistry | null) { remoteRegistry = registry; } @@ -140,8 +169,14 @@ export async function primeRemoteSkillsCache() { commands: node.commands, remoteIp: node.remoteIp, bins: node.bins, + connected: false, }); - if (isMacPlatform(node.platform, node.deviceFamily) && supportsSystemRun(node.commands)) { + if ( + node.bins && + node.bins.length > 0 && + isMacPlatform(node.platform, node.deviceFamily) && + supportsSystemRun(node.commands) + ) { sawMac = true; } } @@ -161,7 +196,7 @@ export function recordRemoteNodeInfo(node: { commands?: string[]; remoteIp?: string; }) { - upsertNode(node); + upsertNode({ ...node, connected: true }); } export function recordRemoteNodeBins(nodeId: string, bins: string[]) { @@ -253,6 +288,28 @@ export async function refreshRemoteNodeBins(params: { commands?: string[]; cfg: OpenClawConfig; timeoutMs?: number; +}) { + const existing = remoteBinProbeInflight.get(params.nodeId); + if (existing) { + await existing; + return; + } + const run = refreshRemoteNodeBinsUncoalesced(params).finally(() => { + if (remoteBinProbeInflight.get(params.nodeId) === run) { + remoteBinProbeInflight.delete(params.nodeId); + } + }); + remoteBinProbeInflight.set(params.nodeId, run); + await run; +} + +async function refreshRemoteNodeBinsUncoalesced(params: { + nodeId: string; + platform?: string; + deviceFamily?: string; + commands?: string[]; + cfg: OpenClawConfig; + timeoutMs?: number; }) { if (!remoteRegistry) { return; @@ -278,27 +335,34 @@ export async function refreshRemoteNodeBins(params: { return; } + const binsList = [...requiredBins]; + const timeoutMs = params.timeoutMs ?? 15_000; + const command = canWhich ? "system.which" : "system.run"; + const logContext = { command, timeoutMs, requiredBinCount: binsList.length }; try { - const binsList = [...requiredBins]; const res = await remoteRegistry.invoke( canWhich ? { nodeId: params.nodeId, - command: "system.which", + command, params: { bins: binsList }, - timeoutMs: params.timeoutMs ?? 15_000, + timeoutMs, } : { nodeId: params.nodeId, - command: "system.run", + command, params: { command: ["/bin/sh", "-lc", buildBinProbeScript(binsList)], }, - timeoutMs: params.timeoutMs ?? 15_000, + timeoutMs, }, ); if (!res.ok) { - logRemoteBinProbeFailure(params.nodeId, res.error?.message ?? "unknown"); + const cleared = clearRemoteNodeBins(params.nodeId); + logRemoteBinProbeFailure(params.nodeId, res.error?.message ?? "unknown", logContext); + if (cleared) { + bumpSkillsSnapshotVersion({ reason: "remote-node" }); + } return; } const bins = parseBinProbePayload(res.payloadJSON, res.payload); @@ -312,7 +376,11 @@ export async function refreshRemoteNodeBins(params: { await updatePairedNodeMetadata(params.nodeId, { bins }); bumpSkillsSnapshotVersion({ reason: "remote-node" }); } catch (err) { - logRemoteBinProbeFailure(params.nodeId, err); + const cleared = clearRemoteNodeBins(params.nodeId); + logRemoteBinProbeFailure(params.nodeId, err, logContext); + if (cleared) { + bumpSkillsSnapshotVersion({ reason: "remote-node" }); + } } } @@ -320,7 +388,10 @@ export function getRemoteSkillEligibility(options?: { advertiseExecNode?: boolean; }): SkillEligibilityContext["remote"] | undefined { const macNodes = [...remoteNodes.values()].filter( - (node) => isMacPlatform(node.platform, node.deviceFamily) && supportsSystemRun(node.commands), + (node) => + node.connected && + isMacPlatform(node.platform, node.deviceFamily) && + supportsSystemRun(node.commands), ); if (macNodes.length === 0) { return undefined; diff --git a/src/plugins/bundled-runtime-deps.test.ts b/src/plugins/bundled-runtime-deps.test.ts index f75d4778878..812a3269da6 100644 --- a/src/plugins/bundled-runtime-deps.test.ts +++ b/src/plugins/bundled-runtime-deps.test.ts @@ -817,6 +817,60 @@ describe("ensureBundledPluginRuntimeDeps", () => { ).toEqual({ installedSpecs: [], retainSpecs: [] }); }); + it("links source-checkout runtime deps from the cache instead of copying them", () => { + const packageRoot = makeTempDir(); + fs.writeFileSync( + path.join(packageRoot, "package.json"), + JSON.stringify({ name: "openclaw", version: "2026.4.25" }), + ); + fs.writeFileSync(path.join(packageRoot, "pnpm-workspace.yaml"), "packages: []\n"); + fs.mkdirSync(path.join(packageRoot, "src"), { recursive: true }); + fs.mkdirSync(path.join(packageRoot, "extensions"), { recursive: true }); + const pluginRoot = path.join(packageRoot, "dist", "extensions", "voice-call"); + fs.mkdirSync(pluginRoot, { recursive: true }); + fs.writeFileSync( + path.join(pluginRoot, "package.json"), + JSON.stringify({ dependencies: { "voice-runtime": "1.0.0" } }), + ); + spawnSyncMock.mockImplementation((_command, _args, options) => { + const cwd = String(options?.cwd); + expect(cwd).toContain(path.join(".local", "bundled-plugin-runtime-deps")); + const depRoot = path.join(cwd, "node_modules", "voice-runtime"); + fs.mkdirSync(depRoot, { recursive: true }); + fs.writeFileSync( + path.join(depRoot, "package.json"), + JSON.stringify({ name: "voice-runtime", version: "1.0.0" }), + ); + return { status: 0, stdout: "", stderr: "" } as ReturnType; + }); + + expect( + ensureBundledPluginRuntimeDeps({ + env: {}, + pluginId: "voice-call", + pluginRoot, + }), + ).toEqual({ + installedSpecs: ["voice-runtime@1.0.0"], + retainSpecs: ["voice-runtime@1.0.0"], + }); + expect(spawnSyncMock).toHaveBeenCalledTimes(1); + expect(fs.lstatSync(path.join(pluginRoot, "node_modules")).isSymbolicLink()).toBe(true); + + fs.rmSync(path.join(pluginRoot, "node_modules"), { recursive: true, force: true }); + expect( + ensureBundledPluginRuntimeDeps({ + env: {}, + installDeps: () => { + throw new Error("cache restore should not reinstall"); + }, + pluginId: "voice-call", + pluginRoot, + }), + ).toEqual({ installedSpecs: [], retainSpecs: [] }); + expect(fs.lstatSync(path.join(pluginRoot, "node_modules")).isSymbolicLink()).toBe(true); + }); + it("retains existing staged deps without a retained manifest before shared installs", () => { const packageRoot = makeTempDir(); const stageDir = makeTempDir(); @@ -1194,6 +1248,7 @@ describe("ensureBundledPluginRuntimeDeps", () => { installExecutionRoot: expect.stringContaining( path.join(".local", "bundled-plugin-runtime-deps"), ), + linkNodeModulesFromExecutionRoot: true, missingSpecs: ["tokenjuice@0.6.1"], installSpecs: ["tokenjuice@0.6.1"], }, @@ -1276,6 +1331,7 @@ describe("ensureBundledPluginRuntimeDeps", () => { installExecutionRoot: expect.stringContaining( path.join(".local", "bundled-plugin-runtime-deps"), ), + linkNodeModulesFromExecutionRoot: true, missingSpecs: ["acpx@0.5.3"], installSpecs: ["acpx@0.5.3"], }, diff --git a/src/plugins/bundled-runtime-deps.ts b/src/plugins/bundled-runtime-deps.ts index 6f044e12162..9600b9dced0 100644 --- a/src/plugins/bundled-runtime-deps.ts +++ b/src/plugins/bundled-runtime-deps.ts @@ -27,6 +27,7 @@ export type RuntimeDepConflict = { export type BundledRuntimeDepsInstallParams = { installRoot: string; installExecutionRoot?: string; + linkNodeModulesFromExecutionRoot?: boolean; missingSpecs: string[]; installSpecs?: string[]; }; @@ -701,6 +702,31 @@ function replaceNodeModulesDir(targetDir: string, sourceDir: string): void { } } +function linkNodeModulesDir(targetDir: string, sourceDir: string): boolean { + const parentDir = path.dirname(targetDir); + const tempLink = path.join(parentDir, `.openclaw-runtime-deps-link-${process.pid}-${Date.now()}`); + try { + fs.symlinkSync(sourceDir, tempLink, process.platform === "win32" ? "junction" : "dir"); + fs.rmSync(targetDir, { recursive: true, force: true }); + fs.renameSync(tempLink, targetDir); + return true; + } catch { + try { + fs.rmSync(tempLink, { recursive: true, force: true }); + } catch { + // Best-effort cleanup; caller falls back to copying. + } + return false; + } +} + +function replaceNodeModulesDirFromCache(targetDir: string, sourceDir: string): void { + if (linkNodeModulesDir(targetDir, sourceDir)) { + return; + } + replaceNodeModulesDir(targetDir, sourceDir); +} + function restoreSourceCheckoutRuntimeDepsFromCache(params: { cacheDir: string | null; deps: readonly { name: string }[]; @@ -714,7 +740,10 @@ function restoreSourceCheckoutRuntimeDepsFromCache(params: { return false; } try { - replaceNodeModulesDir(path.join(params.installRoot, "node_modules"), cachedNodeModulesDir); + replaceNodeModulesDirFromCache( + path.join(params.installRoot, "node_modules"), + cachedNodeModulesDir, + ); return true; } catch { return false; @@ -1153,6 +1182,7 @@ function ensureNpmInstallExecutionManifest(installExecutionRoot: string): void { export function installBundledRuntimeDeps(params: { installRoot: string; installExecutionRoot?: string; + linkNodeModulesFromExecutionRoot?: boolean; missingSpecs: string[]; env: NodeJS.ProcessEnv; }): void { @@ -1198,7 +1228,12 @@ export function installBundledRuntimeDeps(params: { if (!fs.existsSync(stagedNodeModulesDir)) { throw new Error("npm install did not produce node_modules"); } - replaceNodeModulesDir(path.join(params.installRoot, "node_modules"), stagedNodeModulesDir); + const targetNodeModulesDir = path.join(params.installRoot, "node_modules"); + if (params.linkNodeModulesFromExecutionRoot) { + replaceNodeModulesDirFromCache(targetNodeModulesDir, stagedNodeModulesDir); + } else { + replaceNodeModulesDir(targetNodeModulesDir, stagedNodeModulesDir); + } } } finally { if (cleanInstallExecutionRoot) { @@ -1314,9 +1349,7 @@ export function ensureBundledPluginRuntimeDeps(params: { }); const isPluginRootInstall = path.resolve(installRoot) === path.resolve(params.pluginRoot); const sourceCheckoutCacheStage = - cacheDir && - isPluginRootInstall && - resolveSourceCheckoutBundledPluginPackageRoot(params.pluginRoot) + cacheDir && isPluginRootInstall && resolveSourceCheckoutPackageRoot(params.pluginRoot) ? cacheDir : undefined; const installExecutionRoot = @@ -1338,14 +1371,26 @@ export function ensureBundledPluginRuntimeDeps(params: { installBundledRuntimeDeps({ installRoot: installParams.installRoot, installExecutionRoot: installParams.installExecutionRoot, + linkNodeModulesFromExecutionRoot: installParams.linkNodeModulesFromExecutionRoot, missingSpecs: installParams.installSpecs ?? installParams.missingSpecs, env: params.env, })); - install({ installRoot, installExecutionRoot, missingSpecs, installSpecs }); + install({ + installRoot, + installExecutionRoot, + ...(sourceCheckoutCacheStage ? { linkNodeModulesFromExecutionRoot: true } : {}), + missingSpecs, + installSpecs, + }); + const cacheAlreadyPopulated = Boolean( + sourceCheckoutCacheStage && hasAllDependencySentinels(sourceCheckoutCacheStage, deps), + ); if (persistRetainedManifest) { writeRetainedRuntimeDepsManifest(installRoot, installSpecs); } - storeSourceCheckoutRuntimeDepsCache({ cacheDir, installRoot }); + if (!cacheAlreadyPopulated) { + storeSourceCheckoutRuntimeDepsCache({ cacheDir, installRoot }); + } return { installedSpecs: missingSpecs, retainSpecs: installSpecs }; }); }