diff --git a/src/agents/sandbox/constants.ts b/src/agents/sandbox/constants.ts index 173acf28d35..551991da9f1 100644 --- a/src/agents/sandbox/constants.ts +++ b/src/agents/sandbox/constants.ts @@ -53,3 +53,10 @@ export const SANDBOX_AGENT_WORKSPACE_MOUNT = "/agent"; export const SANDBOX_STATE_DIR = path.join(STATE_DIR, "sandbox"); export const SANDBOX_REGISTRY_PATH = path.join(SANDBOX_STATE_DIR, "containers.json"); export const SANDBOX_BROWSER_REGISTRY_PATH = path.join(SANDBOX_STATE_DIR, "browsers.json"); + +// Per-entry sharded directories — each container/browser gets its own JSON +// file under these dirs, eliminating cross-session lock contention on the +// monolithic registry files. The legacy *.json files above are migrated +// into these dirs on first read and then removed. +export const SANDBOX_CONTAINERS_DIR = path.join(SANDBOX_STATE_DIR, "containers"); +export const SANDBOX_BROWSERS_DIR = path.join(SANDBOX_STATE_DIR, "browsers"); diff --git a/src/agents/sandbox/docker.config-hash-recreate.test.ts b/src/agents/sandbox/docker.config-hash-recreate.test.ts index 4242ab8c570..290b0334667 100644 --- a/src/agents/sandbox/docker.config-hash-recreate.test.ts +++ b/src/agents/sandbox/docker.config-hash-recreate.test.ts @@ -25,12 +25,12 @@ const spawnState = vi.hoisted(() => ({ })); const registryMocks = vi.hoisted(() => ({ - readRegistry: vi.fn(), + readRegistryEntry: vi.fn(), updateRegistry: vi.fn(), })); vi.mock("./registry.js", () => ({ - readRegistry: registryMocks.readRegistry, + readRegistryEntry: registryMocks.readRegistryEntry, updateRegistry: registryMocks.updateRegistry, })); @@ -100,7 +100,7 @@ let ensureSandboxContainer: typeof import("./docker.js").ensureSandboxContainer; async function loadFreshDockerModuleForTest() { vi.resetModules(); vi.doMock("./registry.js", () => ({ - readRegistry: registryMocks.readRegistry, + readRegistryEntry: registryMocks.readRegistryEntry, updateRegistry: registryMocks.updateRegistry, })); vi.doMock("node:child_process", async () => createChildProcessMock()); @@ -185,7 +185,7 @@ describe("ensureSandboxContainer config-hash recreation", () => { spawnState.calls.length = 0; spawnState.inspectRunning = true; spawnState.labelHash = ""; - registryMocks.readRegistry.mockClear(); + registryMocks.readRegistryEntry.mockClear(); registryMocks.updateRegistry.mockClear(); registryMocks.updateRegistry.mockResolvedValue(undefined); await loadFreshDockerModuleForTest(); @@ -213,17 +213,13 @@ describe("ensureSandboxContainer config-hash recreation", () => { expect(newHash).not.toBe(oldHash); spawnState.labelHash = oldHash; - registryMocks.readRegistry.mockResolvedValue({ - entries: [ - { - containerName: "oc-test-shared", - sessionKey: "shared", - createdAtMs: 1, - lastUsedAtMs: 0, - image: newCfg.docker.image, - configHash: oldHash, - }, - ], + registryMocks.readRegistryEntry.mockResolvedValue({ + containerName: "oc-test-shared", + sessionKey: "shared", + createdAtMs: 1, + lastUsedAtMs: 0, + image: newCfg.docker.image, + configHash: oldHash, }); const containerName = await ensureSandboxContainer({ @@ -269,17 +265,13 @@ describe("ensureSandboxContainer config-hash recreation", () => { spawnState.inspectRunning = false; spawnState.labelHash = "stale-hash"; - registryMocks.readRegistry.mockResolvedValue({ - entries: [ - { - containerName: "oc-test-shared", - sessionKey: "shared", - createdAtMs: 1, - lastUsedAtMs: 0, - image: cfg.docker.image, - configHash: "stale-hash", - }, - ], + registryMocks.readRegistryEntry.mockResolvedValue({ + containerName: "oc-test-shared", + sessionKey: "shared", + createdAtMs: 1, + lastUsedAtMs: 0, + image: cfg.docker.image, + configHash: "stale-hash", }); const createCall = await ensureSandboxCreateCallForTest({ cfg, workspaceDir }); @@ -304,7 +296,7 @@ describe("ensureSandboxContainer config-hash recreation", () => { spawnState.inspectRunning = false; spawnState.labelHash = ""; - registryMocks.readRegistry.mockResolvedValue({ entries: [] }); + registryMocks.readRegistryEntry.mockResolvedValue(null); registryMocks.updateRegistry.mockResolvedValue(undefined); const createCall = await ensureSandboxCreateCallForTest({ cfg, workspaceDir }); @@ -320,7 +312,7 @@ describe("ensureSandboxContainer config-hash recreation", () => { spawnState.inspectRunning = false; spawnState.labelHash = ""; - registryMocks.readRegistry.mockResolvedValue({ entries: [] }); + registryMocks.readRegistryEntry.mockResolvedValue(null); const createCall = await ensureSandboxCreateCallForTest({ cfg, workspaceDir }); expect(createCall.args).toContain( diff --git a/src/agents/sandbox/docker.ts b/src/agents/sandbox/docker.ts index 24ed82b520a..597e749965e 100644 --- a/src/agents/sandbox/docker.ts +++ b/src/agents/sandbox/docker.ts @@ -167,7 +167,7 @@ import { markOpenClawExecEnv } from "../../infra/openclaw-exec-env.js"; import { defaultRuntime } from "../../runtime.js"; import { computeSandboxConfigHash } from "./config-hash.js"; import { DEFAULT_SANDBOX_IMAGE } from "./constants.js"; -import { readRegistry, updateRegistry } from "./registry.js"; +import { readRegistryEntry, updateRegistry } from "./registry.js"; import { resolveSandboxAgentId, resolveSandboxScopeKey, slugifySessionKey } from "./shared.js"; import type { SandboxConfig, SandboxDockerConfig, SandboxWorkspaceAccess } from "./types.js"; import { validateSandboxSecurity } from "./validate-sandbox-security.js"; @@ -580,8 +580,7 @@ export async function ensureSandboxContainer(params: { } | undefined; if (hasContainer) { - const registry = await readRegistry(); - registryEntry = registry.entries.find((entry) => entry.containerName === containerName); + registryEntry = (await readRegistryEntry(containerName)) ?? undefined; currentHash = await readContainerConfigHash(containerName); if (!currentHash) { currentHash = registryEntry?.configHash ?? null; diff --git a/src/agents/sandbox/registry.test.ts b/src/agents/sandbox/registry.test.ts index fd0707849b1..f04b2175e9e 100644 --- a/src/agents/sandbox/registry.test.ts +++ b/src/agents/sandbox/registry.test.ts @@ -1,67 +1,38 @@ import fs from "node:fs/promises"; -import { afterAll, afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { afterAll, afterEach, describe, expect, it, vi } from "vitest"; -type WriteDelayConfig = { - targetFile: "containers.json" | "browsers.json"; - containerName: string; - started: boolean; - markStarted: () => void; - waitForRelease: Promise; -}; - -const { TEST_STATE_DIR, SANDBOX_REGISTRY_PATH, SANDBOX_BROWSER_REGISTRY_PATH, writeGateState } = - vi.hoisted(() => { - const path = require("node:path"); - const { mkdtempSync } = require("node:fs"); - const { tmpdir } = require("node:os"); - const baseDir = mkdtempSync(path.join(tmpdir(), "openclaw-sandbox-registry-")); - - return { - TEST_STATE_DIR: baseDir, - SANDBOX_REGISTRY_PATH: path.join(baseDir, "containers.json"), - SANDBOX_BROWSER_REGISTRY_PATH: path.join(baseDir, "browsers.json"), - writeGateState: { active: null as WriteDelayConfig | null }, - }; - }); +const { + TEST_STATE_DIR, + SANDBOX_REGISTRY_PATH, + SANDBOX_BROWSER_REGISTRY_PATH, + SANDBOX_CONTAINERS_DIR, + SANDBOX_BROWSERS_DIR, +} = vi.hoisted(() => { + const p = require("node:path"); + const { mkdtempSync } = require("node:fs"); + const { tmpdir } = require("node:os"); + const baseDir = mkdtempSync(p.join(tmpdir(), "openclaw-sandbox-registry-")); + return { + TEST_STATE_DIR: baseDir, + SANDBOX_REGISTRY_PATH: p.join(baseDir, "containers.json"), + SANDBOX_BROWSER_REGISTRY_PATH: p.join(baseDir, "browsers.json"), + SANDBOX_CONTAINERS_DIR: p.join(baseDir, "containers"), + SANDBOX_BROWSERS_DIR: p.join(baseDir, "browsers"), + }; +}); vi.mock("./constants.js", () => ({ SANDBOX_STATE_DIR: TEST_STATE_DIR, SANDBOX_REGISTRY_PATH, SANDBOX_BROWSER_REGISTRY_PATH, + SANDBOX_CONTAINERS_DIR, + SANDBOX_BROWSERS_DIR, })); -vi.mock("../../infra/json-files.js", async () => { - const actual = await vi.importActual( - "../../infra/json-files.js", - ); - return { - ...actual, - writeJsonAtomic: async ( - filePath: string, - value: unknown, - options?: Parameters[2], - ) => { - const payload = JSON.stringify(value); - const gate = writeGateState.active; - if ( - gate && - filePath.includes(gate.targetFile) && - payloadMentionsContainer(payload, gate.containerName) - ) { - if (!gate.started) { - gate.started = true; - gate.markStarted(); - } - await gate.waitForRelease; - } - await actual.writeJsonAtomic(filePath, value, options); - }, - }; -}); - import { readBrowserRegistry, readRegistry, + readRegistryEntry, removeBrowserRegistryEntry, removeRegistryEntry, updateBrowserRegistry, @@ -71,54 +42,9 @@ import { type SandboxBrowserRegistryEntry = import("./registry.js").SandboxBrowserRegistryEntry; type SandboxRegistryEntry = import("./registry.js").SandboxRegistryEntry; -function payloadMentionsContainer(payload: string, containerName: string): boolean { - return ( - payload.includes(`"containerName":"${containerName}"`) || - payload.includes(`"containerName": "${containerName}"`) - ); -} - -async function seedMalformedContainerRegistry(payload: string) { - await fs.writeFile(SANDBOX_REGISTRY_PATH, payload, "utf-8"); -} - -async function seedMalformedBrowserRegistry(payload: string) { - await fs.writeFile(SANDBOX_BROWSER_REGISTRY_PATH, payload, "utf-8"); -} - -function installWriteGate( - targetFile: "containers.json" | "browsers.json", - containerName: string, -): { waitForStart: Promise; release: () => void } { - let markStarted = () => {}; - const waitForStart = new Promise((resolve) => { - markStarted = resolve; - }); - let resolveRelease = () => {}; - const waitForRelease = new Promise((resolve) => { - resolveRelease = resolve; - }); - writeGateState.active = { - targetFile, - containerName, - started: false, - markStarted, - waitForRelease, - }; - return { - waitForStart, - release: () => { - resolveRelease(); - writeGateState.active = null; - }, - }; -} - -beforeEach(() => { - writeGateState.active = null; -}); - afterEach(async () => { + await fs.rm(SANDBOX_CONTAINERS_DIR, { recursive: true, force: true }); + await fs.rm(SANDBOX_BROWSERS_DIR, { recursive: true, force: true }); await fs.rm(SANDBOX_REGISTRY_PATH, { force: true }); await fs.rm(SANDBOX_BROWSER_REGISTRY_PATH, { force: true }); await fs.rm(`${SANDBOX_REGISTRY_PATH}.lock`, { force: true }); @@ -154,11 +80,11 @@ function containerEntry(overrides: Partial = {}): SandboxR }; } -async function seedContainerRegistry(entries: SandboxRegistryEntry[]) { +async function seedMonolithicContainerRegistry(entries: SandboxRegistryEntry[]) { await fs.writeFile(SANDBOX_REGISTRY_PATH, `${JSON.stringify({ entries }, null, 2)}\n`, "utf-8"); } -async function seedBrowserRegistry(entries: SandboxBrowserRegistryEntry[]) { +async function seedMonolithicBrowserRegistry(entries: SandboxBrowserRegistryEntry[]) { await fs.writeFile( SANDBOX_BROWSER_REGISTRY_PATH, `${JSON.stringify({ entries }, null, 2)}\n`, @@ -166,9 +92,90 @@ async function seedBrowserRegistry(entries: SandboxBrowserRegistryEntry[]) { ); } -describe("registry race safety", () => { - it("normalizes legacy registry entries on read", async () => { - await seedContainerRegistry([ +describe("per-file sharded container registry", () => { + it("writes and reads a single container entry", async () => { + await updateRegistry(containerEntry({ containerName: "container-a" })); + + const registry = await readRegistry(); + expect(registry.entries).toHaveLength(1); + expect(registry.entries[0].containerName).toBe("container-a"); + }); + + it("keeps both container updates under concurrent writes", async () => { + // The old monolithic-file model serialized every writer through a + // single lock. Per-entry files must handle concurrent upserts of + // different containers without any ordering or contention. + await Promise.all([ + updateRegistry(containerEntry({ containerName: "container-a" })), + updateRegistry(containerEntry({ containerName: "container-b" })), + ]); + + const registry = await readRegistry(); + expect( + registry.entries + .map((entry) => entry.containerName) + .slice() + .toSorted(), + ).toEqual(["container-a", "container-b"]); + }); + + it("readRegistryEntry returns a single entry without scanning the whole dir", async () => { + await updateRegistry(containerEntry({ containerName: "container-x", sessionKey: "sess:x" })); + await updateRegistry(containerEntry({ containerName: "container-y", sessionKey: "sess:y" })); + + const entry = await readRegistryEntry("container-x"); + expect(entry).not.toBeNull(); + expect(entry?.containerName).toBe("container-x"); + expect(entry?.sessionKey).toBe("sess:x"); + }); + + it("readRegistryEntry returns null when the container has no entry file", async () => { + const missing = await readRegistryEntry("nonexistent-container"); + expect(missing).toBeNull(); + }); + + it("removeRegistryEntry deletes only the target container file", async () => { + await updateRegistry(containerEntry({ containerName: "container-a" })); + await updateRegistry(containerEntry({ containerName: "container-b" })); + await removeRegistryEntry("container-a"); + + const registry = await readRegistry(); + expect(registry.entries).toHaveLength(1); + expect(registry.entries[0].containerName).toBe("container-b"); + }); + + it("removeRegistryEntry is a no-op for a container that never existed", async () => { + // force:true on rm swallows ENOENT — callers should not have to + // check existence first. + await expect(removeRegistryEntry("never-created")).resolves.toBeUndefined(); + }); + + it("updateRegistry preserves createdAtMs and image from an existing entry", async () => { + await updateRegistry( + containerEntry({ + containerName: "c", + createdAtMs: 100, + lastUsedAtMs: 100, + image: "original:tag", + }), + ); + await updateRegistry( + containerEntry({ + containerName: "c", + createdAtMs: 999, + lastUsedAtMs: 200, + image: "ignored:tag", + }), + ); + + const entry = await readRegistryEntry("c"); + expect(entry?.createdAtMs).toBe(100); + expect(entry?.lastUsedAtMs).toBe(200); + expect(entry?.image).toBe("original:tag"); + }); + + it("readRegistry normalizes legacy entries that lack backendId/runtimeLabel/configLabelKind", async () => { + await seedMonolithicContainerRegistry([ { containerName: "legacy-container", sessionKey: "agent:main", @@ -189,46 +196,31 @@ describe("registry race safety", () => { ]); }); - it("keeps both container updates under concurrent writes", async () => { - await Promise.all([ - updateRegistry(containerEntry({ containerName: "container-a" })), - updateRegistry(containerEntry({ containerName: "container-b" })), - ]); + it("skips a single corrupt per-entry file without hiding the other entries", async () => { + // A crashed writer that left behind a partial JSON file must not + // prevent other containers from being enumerated. + await updateRegistry(containerEntry({ containerName: "good-a" })); + await updateRegistry(containerEntry({ containerName: "good-b" })); + await fs.writeFile(`${SANDBOX_CONTAINERS_DIR}/corrupt.json`, "{not json", "utf-8"); const registry = await readRegistry(); - expect(registry.entries).toHaveLength(2); expect( registry.entries .map((entry) => entry.containerName) .slice() .toSorted(), - ).toEqual(["container-a", "container-b"]); + ).toEqual(["good-a", "good-b"]); }); +}); - it("prevents concurrent container remove/update from resurrecting deleted entries", async () => { - await seedContainerRegistry([containerEntry({ containerName: "container-x" })]); - const writeGate = installWriteGate("containers.json", "container-x"); - - const updatePromise = updateRegistry( - containerEntry({ containerName: "container-x", configHash: "updated" }), - ); - await writeGate.waitForStart; - const removePromise = removeRegistryEntry("container-x"); - writeGate.release(); - await Promise.all([updatePromise, removePromise]); - - const registry = await readRegistry(); - expect(registry.entries).toHaveLength(0); - }); - - it("keeps both browser updates under concurrent writes", async () => { +describe("per-file sharded browser registry", () => { + it("writes and reads both browser entries under concurrent writes", async () => { await Promise.all([ updateBrowserRegistry(browserEntry({ containerName: "browser-a" })), updateBrowserRegistry(browserEntry({ containerName: "browser-b", cdpPort: 9223 })), ]); const registry = await readBrowserRegistry(); - expect(registry.entries).toHaveLength(2); expect( registry.entries .map((entry) => entry.containerName) @@ -237,38 +229,73 @@ describe("registry race safety", () => { ).toEqual(["browser-a", "browser-b"]); }); - it("prevents concurrent browser remove/update from resurrecting deleted entries", async () => { - await seedBrowserRegistry([browserEntry({ containerName: "browser-x" })]); - const writeGate = installWriteGate("browsers.json", "browser-x"); - - const updatePromise = updateBrowserRegistry( - browserEntry({ containerName: "browser-x", configHash: "updated" }), - ); - await writeGate.waitForStart; - const removePromise = removeBrowserRegistryEntry("browser-x"); - writeGate.release(); - await Promise.all([updatePromise, removePromise]); + it("removes a single browser entry", async () => { + await updateBrowserRegistry(browserEntry({ containerName: "browser-a" })); + await removeBrowserRegistryEntry("browser-a"); const registry = await readBrowserRegistry(); expect(registry.entries).toHaveLength(0); }); +}); - it("fails fast when registry files are malformed during update", async () => { - await seedMalformedContainerRegistry("{bad json"); - await seedMalformedBrowserRegistry("{bad json"); - await expect(updateRegistry(containerEntry())).rejects.toThrow(); - await expect(updateBrowserRegistry(browserEntry())).rejects.toThrow(); +describe("monolithic → per-file migration", () => { + it("migrates container entries from the legacy containers.json on first read", async () => { + await seedMonolithicContainerRegistry([ + containerEntry({ containerName: "old-a", sessionKey: "sess:a" }), + containerEntry({ containerName: "old-b", sessionKey: "sess:b" }), + ]); + + const registry = await readRegistry(); + expect( + registry.entries + .map((entry) => entry.containerName) + .slice() + .toSorted(), + ).toEqual(["old-a", "old-b"]); + + // Legacy file is removed once migration succeeds, so subsequent reads + // go straight to the sharded dir. + await expect(fs.access(SANDBOX_REGISTRY_PATH)).rejects.toThrow(); }); - it("fails fast when registry entries are invalid during update", async () => { - const invalidEntries = `{"entries":[{"sessionKey":"agent:main"}]}`; - await seedMalformedContainerRegistry(invalidEntries); - await seedMalformedBrowserRegistry(invalidEntries); - await expect(updateRegistry(containerEntry())).rejects.toThrow( - /Invalid sandbox registry format/, - ); - await expect(updateBrowserRegistry(browserEntry())).rejects.toThrow( - /Invalid sandbox registry format/, - ); + it("cleans up the stale .lock file alongside the migrated registry", async () => { + await seedMonolithicContainerRegistry([containerEntry({ containerName: "old-a" })]); + await fs.writeFile(`${SANDBOX_REGISTRY_PATH}.lock`, "stale", "utf-8"); + + await readRegistry(); + await expect(fs.access(`${SANDBOX_REGISTRY_PATH}.lock`)).rejects.toThrow(); + }); + + it("migrates browser entries from the legacy browsers.json", async () => { + await seedMonolithicBrowserRegistry([ + browserEntry({ containerName: "old-br-a" }), + browserEntry({ containerName: "old-br-b", cdpPort: 9223 }), + ]); + + const registry = await readBrowserRegistry(); + expect(registry.entries).toHaveLength(2); + await expect(fs.access(SANDBOX_BROWSER_REGISTRY_PATH)).rejects.toThrow(); + }); + + it("drops a malformed legacy containers.json rather than throwing forever", async () => { + // A corrupt legacy file would otherwise block every sandbox operation + // on every boot — dropping it lets the operator recover just by + // creating a new container. + await fs.writeFile(SANDBOX_REGISTRY_PATH, "{bad json", "utf-8"); + + const registry = await readRegistry(); + expect(registry.entries).toHaveLength(0); + await expect(fs.access(SANDBOX_REGISTRY_PATH)).rejects.toThrow(); + }); + + it("drops a legacy containers.json whose entries fail schema validation", async () => { + // entries missing containerName cannot be migrated safely. Rather + // than preserving the bad file, we remove it — the lost data was + // unusable anyway, and keeping it around would block future boots. + await fs.writeFile(SANDBOX_REGISTRY_PATH, `{"entries":[{"sessionKey":"agent:main"}]}`, "utf-8"); + + const registry = await readRegistry(); + expect(registry.entries).toHaveLength(0); + await expect(fs.access(SANDBOX_REGISTRY_PATH)).rejects.toThrow(); }); }); diff --git a/src/agents/sandbox/registry.ts b/src/agents/sandbox/registry.ts index be88017c978..7b2998d1d04 100644 --- a/src/agents/sandbox/registry.ts +++ b/src/agents/sandbox/registry.ts @@ -1,9 +1,14 @@ import fs from "node:fs/promises"; +import path from "node:path"; import { z } from "zod"; import { writeJsonAtomic } from "../../infra/json-files.js"; import { safeParseJsonWithSchema } from "../../utils/zod-parse.js"; -import { acquireSessionWriteLock } from "../session-write-lock.js"; -import { SANDBOX_BROWSER_REGISTRY_PATH, SANDBOX_REGISTRY_PATH } from "./constants.js"; +import { + SANDBOX_BROWSER_REGISTRY_PATH, + SANDBOX_BROWSERS_DIR, + SANDBOX_CONTAINERS_DIR, + SANDBOX_REGISTRY_PATH, +} from "./constants.js"; export type SandboxRegistryEntry = { containerName: string; @@ -36,8 +41,6 @@ type SandboxBrowserRegistry = { entries: SandboxBrowserRegistryEntry[]; }; -type RegistryReadMode = "strict" | "fallback"; - type RegistryEntry = { containerName: string; }; @@ -46,15 +49,9 @@ type RegistryFile = { entries: T[]; }; -type UpsertEntry = RegistryEntry & { - backendId?: string; - runtimeLabel?: string; - createdAtMs: number; - image: string; - configLabelKind?: string; - configHash?: string; -}; - +// Schemas are shared between the per-entry files (live writes) and the +// legacy monolithic files (one-shot migration). Both shapes must validate +// containerName; per-entry files are just the RegistryEntrySchema directly. const RegistryEntrySchema = z .object({ containerName: z.string(), @@ -74,69 +71,155 @@ function normalizeSandboxRegistryEntry(entry: SandboxRegistryEntry): SandboxRegi }; } -async function withRegistryLock(registryPath: string, fn: () => Promise): Promise { - const lock = await acquireSessionWriteLock({ - sessionFile: registryPath, - allowReentrant: false, - timeoutMs: 60_000, - }); - try { - return await fn(); - } finally { - await lock.release(); - } +// ── Per-entry file primitives ────────────────────────────────────────── +// +// Each container gets its own JSON file under the sharded directory. +// Writes use writeJsonAtomic (tmp + rename) for crash-safety. No file +// locks are needed — each concurrent writer only touches its own file, +// so there is zero cross-session contention on the monolithic lock that +// previously serialized every sandbox ensure/remove in the process tree. + +function entryFilePath(dir: string, containerName: string): string { + return path.join(dir, `${containerName}.json`); } -async function readRegistryFromFile( - registryPath: string, - mode: RegistryReadMode, -): Promise> { +async function readEntryFile( + dir: string, + containerName: string, +): Promise { + let raw: string; try { - const raw = await fs.readFile(registryPath, "utf-8"); - const parsed = safeParseJsonWithSchema(RegistryFileSchema, raw) as RegistryFile | null; - if (parsed) { - return parsed; - } - if (mode === "fallback") { - return { entries: [] }; - } - throw new Error(`Invalid sandbox registry format: ${registryPath}`); + raw = await fs.readFile(entryFilePath(dir, containerName), "utf-8"); } catch (error) { const code = (error as { code?: string } | null)?.code; if (code === "ENOENT") { - return { entries: [] }; + return null; } - if (mode === "fallback") { - return { entries: [] }; - } - if (error instanceof Error) { - throw error; - } - throw new Error(`Failed to read sandbox registry file: ${registryPath}`, { cause: error }); + throw error; + } + const parsed = safeParseJsonWithSchema(RegistryEntrySchema, raw) as T | null; + return parsed ?? null; +} + +async function writeEntryFile(dir: string, entry: T): Promise { + await fs.mkdir(dir, { recursive: true }); + await writeJsonAtomic(entryFilePath(dir, entry.containerName), entry, { trailingNewline: true }); +} + +async function removeEntryFile(dir: string, containerName: string): Promise { + try { + await fs.rm(entryFilePath(dir, containerName), { force: true }); + } catch { + // A concurrent remove or a missing file is fine — force:true already + // swallows ENOENT; any other error (e.g. permission) is non-fatal here + // because the caller's intent was "make sure it's gone". } } -async function writeRegistryFile( - registryPath: string, - registry: RegistryFile, -): Promise { - await writeJsonAtomic(registryPath, registry, { trailingNewline: true }); +/** Scan every per-entry JSON file in a sharded directory. */ +async function readAllEntries(dir: string): Promise { + let files: string[]; + try { + files = await fs.readdir(dir); + } catch { + return []; + } + const entries: T[] = []; + await Promise.all( + files + .filter((name) => name.endsWith(".json")) + .map(async (name) => { + try { + const raw = await fs.readFile(path.join(dir, name), "utf-8"); + const parsed = safeParseJsonWithSchema(RegistryEntrySchema, raw) as T | null; + if (parsed) { + entries.push(parsed); + } + // Corrupt / partially-written files are skipped rather than + // aborting the whole read: one bad entry should not hide every + // other container the operator has running. + } catch { + // ignore unreadable files for the same reason + } + }), + ); + return entries; } +// ── One-shot migration from monolithic file → per-entry files ────────── + +async function migrateMonolithicIfNeeded( + oldPath: string, + newDir: string, +): Promise { + let raw: string; + try { + raw = await fs.readFile(oldPath, "utf-8"); + } catch { + // Old file does not exist (already migrated on a previous boot, or + // fresh install). Nothing to do. + return; + } + const parsed = safeParseJsonWithSchema(RegistryFileSchema, raw) as RegistryFile | null; + if (!parsed || parsed.entries.length === 0) { + // Corrupt or empty — drop it (and its stale lock) so we don't re-attempt + // migration every read. + await fs.rm(oldPath, { force: true }).catch(() => {}); + await fs.rm(`${oldPath}.lock`, { force: true }).catch(() => {}); + return; + } + await fs.mkdir(newDir, { recursive: true }); + await Promise.all( + parsed.entries.map((entry) => + writeJsonAtomic(entryFilePath(newDir, entry.containerName), entry, { + trailingNewline: true, + }), + ), + ); + // Migration succeeded: remove the monolithic file and any leftover lock + // file from the previous single-writer scheme. + await fs.rm(oldPath, { force: true }).catch(() => {}); + await fs.rm(`${oldPath}.lock`, { force: true }).catch(() => {}); +} + +// ── Public API: Container Registry ───────────────────────────────────── + export async function readRegistry(): Promise { - const registry = await readRegistryFromFile( + await migrateMonolithicIfNeeded( SANDBOX_REGISTRY_PATH, - "fallback", + SANDBOX_CONTAINERS_DIR, ); - return { - entries: registry.entries.map((entry) => normalizeSandboxRegistryEntry(entry)), - }; + const entries = await readAllEntries(SANDBOX_CONTAINERS_DIR); + return { entries: entries.map(normalizeSandboxRegistryEntry) }; } -function upsertEntry(entries: T[], entry: T): T[] { - const existing = entries.find((item) => item.containerName === entry.containerName); - const next = entries.filter((item) => item.containerName !== entry.containerName); - next.push({ +/** + * Read a single container entry by name. + * + * O(1) file read — avoids scanning the entire sharded directory just to + * look up one container, which is the hot path for `ensureSandboxContainer`. + */ +export async function readRegistryEntry( + containerName: string, +): Promise { + await migrateMonolithicIfNeeded( + SANDBOX_REGISTRY_PATH, + SANDBOX_CONTAINERS_DIR, + ); + const entry = await readEntryFile(SANDBOX_CONTAINERS_DIR, containerName); + return entry ? normalizeSandboxRegistryEntry(entry) : null; +} + +export async function updateRegistry(entry: SandboxRegistryEntry): Promise { + await migrateMonolithicIfNeeded( + SANDBOX_REGISTRY_PATH, + SANDBOX_CONTAINERS_DIR, + ); + const existing = await readEntryFile( + SANDBOX_CONTAINERS_DIR, + entry.containerName, + ); + const merged: SandboxRegistryEntry = { ...entry, backendId: entry.backendId ?? existing?.backendId, runtimeLabel: entry.runtimeLabel ?? existing?.runtimeLabel, @@ -144,67 +227,42 @@ function upsertEntry(entries: T[], entry: T): T[] { image: existing?.image ?? entry.image, configLabelKind: entry.configLabelKind ?? existing?.configLabelKind, configHash: entry.configHash ?? existing?.configHash, - }); - return next; + }; + await writeEntryFile(SANDBOX_CONTAINERS_DIR, merged); } -function removeEntry(entries: T[], containerName: string): T[] { - return entries.filter((entry) => entry.containerName !== containerName); +export async function removeRegistryEntry(containerName: string): Promise { + await removeEntryFile(SANDBOX_CONTAINERS_DIR, containerName); } -async function withRegistryMutation( - registryPath: string, - mutate: (entries: T[]) => T[] | null, -): Promise { - await withRegistryLock(registryPath, async () => { - const registry = await readRegistryFromFile(registryPath, "strict"); - const next = mutate(registry.entries); - if (next === null) { - return; - } - await writeRegistryFile(registryPath, { entries: next }); - }); -} - -export async function updateRegistry(entry: SandboxRegistryEntry) { - await withRegistryMutation(SANDBOX_REGISTRY_PATH, (entries) => - upsertEntry(entries, entry), - ); -} - -export async function removeRegistryEntry(containerName: string) { - await withRegistryMutation(SANDBOX_REGISTRY_PATH, (entries) => { - const next = removeEntry(entries, containerName); - if (next.length === entries.length) { - return null; - } - return next; - }); -} +// ── Public API: Browser Registry ─────────────────────────────────────── export async function readBrowserRegistry(): Promise { - return await readRegistryFromFile( + await migrateMonolithicIfNeeded( SANDBOX_BROWSER_REGISTRY_PATH, - "fallback", + SANDBOX_BROWSERS_DIR, ); + return { entries: await readAllEntries(SANDBOX_BROWSERS_DIR) }; } -export async function updateBrowserRegistry(entry: SandboxBrowserRegistryEntry) { - await withRegistryMutation( +export async function updateBrowserRegistry(entry: SandboxBrowserRegistryEntry): Promise { + await migrateMonolithicIfNeeded( SANDBOX_BROWSER_REGISTRY_PATH, - (entries) => upsertEntry(entries, entry), + SANDBOX_BROWSERS_DIR, ); + const existing = await readEntryFile( + SANDBOX_BROWSERS_DIR, + entry.containerName, + ); + const merged: SandboxBrowserRegistryEntry = { + ...entry, + createdAtMs: existing?.createdAtMs ?? entry.createdAtMs, + image: existing?.image ?? entry.image, + configHash: entry.configHash ?? existing?.configHash, + }; + await writeEntryFile(SANDBOX_BROWSERS_DIR, merged); } -export async function removeBrowserRegistryEntry(containerName: string) { - await withRegistryMutation( - SANDBOX_BROWSER_REGISTRY_PATH, - (entries) => { - const next = removeEntry(entries, containerName); - if (next.length === entries.length) { - return null; - } - return next; - }, - ); +export async function removeBrowserRegistryEntry(containerName: string): Promise { + await removeEntryFile(SANDBOX_BROWSERS_DIR, containerName); }