From d8a2cd5204052122a42f8b3be38cecc5d27f124a Mon Sep 17 00:00:00 2001 From: Jason O'Neal Date: Mon, 18 May 2026 22:50:55 -0400 Subject: [PATCH] fix(backup): dereference archive hardlinks --- CHANGELOG.md | 1 + src/commands/backup-verify.test.ts | 59 ++++++++++++++++++++++++++++++ src/commands/backup-verify.ts | 45 ++++++++++++++++++++--- src/infra/backup-create.test.ts | 58 +++++++++++++++++++++++++++++ src/infra/backup-create.ts | 13 +++++++ 5 files changed, 171 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d923eedbc1e..ce14b2c91c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -436,6 +436,7 @@ Docs: https://docs.openclaw.ai - CLI: reject explicit port numbers above 65535 before they reach Gateway or Node bind paths. Fixes #83900. (#84008) Thanks @hclsys. - Codex app-server: preserve plugin tool auth profiles when Codex owns model transport so OpenClaw dynamic tools can resolve their provider credentials. (#83603) Thanks @rubencu. - Memory/search: scan the JS-side fallback vector path (used when the sqlite-vec index is unavailable or has a mismatched dimension) in bounded rowid batches and yield to the event loop between batches so large chunk tables can no longer pin the Node.js main thread for multi-second windows. Also keeps the SQL prepared statement rooted in a local so node:sqlite cannot finalize it mid-scan under heap pressure. Fixes #81172. Thanks @dev23xyz-oss. +- Backup: dereference hardlinks during archive creation and reject unsafe hardlink targets during verification so archives that pass `backup verify` do not fail broad extraction on macOS tar. Fixes #54242. - Memory Wiki: preserve fs-safe diagnostics when bridge source page writes fail for non-symlink filesystem safety reasons, so directory collisions are reported with the underlying error code. (#83776) Thanks @TurboTheTurtle. - Telegram: keep forum topics from blocking sibling topic traffic by routing inbound serialization, media/text buffers, and account API queues on topic-aware lanes. (#83829) - Telegram: keep queued forum-topic follow-up messages from inheriting superseded source abort signals, so later same-topic user turns can still run and reply after an active turn is replaced. (#83827) Thanks @VACInc. diff --git a/src/commands/backup-verify.test.ts b/src/commands/backup-verify.test.ts index 61c6451ae12..41a8bfd3bf0 100644 --- a/src/commands/backup-verify.test.ts +++ b/src/commands/backup-verify.test.ts @@ -1,6 +1,7 @@ import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; +import { gzipSync } from "node:zlib"; import * as tar from "tar"; import { afterEach, describe, expect, it, vi } from "vitest"; import { buildBackupArchiveRoot } from "./backup-shared.js"; @@ -32,6 +33,32 @@ function createBackupManifest(assetArchivePath: string, archiveRoot = TEST_ARCHI }; } +function encodeTarEntry(params: { + path: string; + contents?: string; + type?: "File" | "Link"; + linkpath?: string; +}): Buffer { + const body = Buffer.from(params.contents ?? "", "utf8"); + const header = new tar.Header({ + path: params.path, + type: params.type ?? "File", + size: params.type === "Link" ? 0 : body.length, + mode: 0o600, + uid: 0, + gid: 0, + mtime: new Date(0), + ...(params.linkpath ? { linkpath: params.linkpath } : {}), + }); + const headerBlock = Buffer.alloc(512); + header.encode(headerBlock); + if (params.type === "Link") { + return headerBlock; + } + const padding = Buffer.alloc((512 - (body.length % 512)) % 512); + return Buffer.concat([headerBlock, body, padding]); +} + async function createArchiveWithManifestContent( options: { tempPrefix: string; @@ -284,6 +311,38 @@ describe("backupVerifyCommand", () => { } }); + it("rejects unsafe hardlink targets", async () => { + const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-backup-linkpath-")); + const archivePath = path.join(tempDir, "broken.tar.gz"); + const payloadArchivePath = `${TEST_ARCHIVE_ROOT}/payload/posix/tmp/.openclaw/target.txt`; + const hardlinkArchivePath = `${TEST_ARCHIVE_ROOT}/payload/posix/tmp/.openclaw/hardlink.txt`; + try { + const archive = gzipSync( + Buffer.concat([ + encodeTarEntry({ + path: `${TEST_ARCHIVE_ROOT}/manifest.json`, + contents: `${JSON.stringify(createBackupManifest(payloadArchivePath), null, 2)}\n`, + }), + encodeTarEntry({ path: payloadArchivePath, contents: "payload\n" }), + encodeTarEntry({ + path: hardlinkArchivePath, + type: "Link", + linkpath: `${TEST_ARCHIVE_ROOT}/payload/../escaped.txt`, + }), + Buffer.alloc(1024), + ]), + ); + await fs.writeFile(archivePath, archive); + + const runtime = createBackupVerifyRuntime(); + await expect(backupVerifyCommand(runtime, { archive: archivePath })).rejects.toThrow( + /hardlink target.*path traversal segments/i, + ); + } finally { + await fs.rm(tempDir, { recursive: true, force: true }); + } + }); + it("ignores payload manifest.json files when locating the backup manifest", async () => { const archiveDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-backup-verify-out-")); try { diff --git a/src/commands/backup-verify.ts b/src/commands/backup-verify.ts index f9e78804af4..fad88003162 100644 --- a/src/commands/backup-verify.ts +++ b/src/commands/backup-verify.ts @@ -52,6 +52,12 @@ export type BackupVerifyResult = { entryCount: number; }; +type ArchiveEntry = { + path: string; + linkpath?: string; + type?: string; +}; + function stripTrailingSlashes(value: string): string { return value.replace(/\/+$/u, ""); } @@ -166,13 +172,18 @@ function parseManifest(raw: string): BackupManifest { }; } -async function listArchiveEntries(archivePath: string): Promise { - const entries: string[] = []; +async function listArchiveEntries(archivePath: string): Promise { + const entries: ArchiveEntry[] = []; await tar.t({ file: archivePath, gzip: true, onentry: (entry) => { - entries.push(entry.path); + entries.push({ + path: entry.path, + ...(entry.linkpath ? { linkpath: entry.linkpath } : {}), + ...(entry.type ? { type: entry.type } : {}), + }); + entry.resume(); }, }); return entries; @@ -248,6 +259,20 @@ function verifyManifestAgainstEntries(manifest: BackupManifest, entries: Set, + archiveRoot: string, +): void { + const normalizedRoot = normalizeArchiveRoot(archiveRoot); + for (const target of hardlinkTargets) { + if (!isArchivePathWithin(target.normalized, normalizedRoot)) { + throw new Error( + `Archive hardlink target is outside the declared archive root: ${target.entryPath} -> ${target.normalized}`, + ); + } + } +} + function formatResult(result: BackupVerifyResult): string { return [ `Backup archive OK: ${result.archivePath}`, @@ -283,9 +308,18 @@ export async function backupVerifyCommand( } const entries = rawEntries.map((entry) => ({ - raw: entry, - normalized: normalizeArchivePath(entry, "Archive entry"), + raw: entry.path, + normalized: normalizeArchivePath(entry.path, "Archive entry"), })); + const hardlinkTargets = rawEntries + .filter((entry) => entry.type === "Link" && entry.linkpath) + .map((entry) => ({ + entryPath: entry.path, + normalized: normalizeArchivePath( + entry.linkpath ?? "", + `Archive hardlink target for ${entry.path}`, + ), + })); const normalizedEntrySet = new Set(entries.map((entry) => entry.normalized)); const manifestMatches = entries.filter((entry) => isRootManifestEntry(entry.normalized)); @@ -304,6 +338,7 @@ export async function backupVerifyCommand( const manifestRaw = await extractManifest({ archivePath, manifestEntryPath }); const manifest = parseManifest(manifestRaw); verifyManifestAgainstEntries(manifest, normalizedEntrySet); + verifyHardlinkTargetsAgainstArchiveRoot(hardlinkTargets, manifest.archiveRoot); const result: BackupVerifyResult = { ok: true, diff --git a/src/infra/backup-create.test.ts b/src/infra/backup-create.test.ts index 7a8dca3ff35..3aafb335e56 100644 --- a/src/infra/backup-create.test.ts +++ b/src/infra/backup-create.test.ts @@ -43,6 +43,25 @@ async function listArchiveEntries(archivePath: string): Promise { return entries; } +async function listArchiveEntryDetails( + archivePath: string, +): Promise> { + const entries: Array<{ path: string; linkpath?: string; type?: string }> = []; + await tar.t({ + file: archivePath, + gzip: true, + onentry: (entry) => { + entries.push({ + path: entry.path, + ...(entry.linkpath ? { linkpath: entry.linkpath } : {}), + ...(entry.type ? { type: entry.type } : {}), + }); + entry.resume(); + }, + }); + return entries; +} + describe("formatBackupCreateSummary", () => { const backupArchiveLine = "Backup archive: /tmp/openclaw-backup.tar.gz"; @@ -406,6 +425,45 @@ describe("createBackupArchive", () => { ); }); + it("dereferences hardlinks instead of emitting restore-hostile Link entries", async () => { + await withOpenClawTestState( + { + layout: "state-only", + prefix: "openclaw-backup-hardlink-", + scenario: "minimal", + }, + async (state) => { + const stateDir = state.stateDir; + const outputDir = state.path("backups"); + const sourcePath = path.join(stateDir, "workspace-adx", "openclaw-src", "node_modules"); + const targetPath = path.join(sourcePath, "esbuild", "bin", "esbuild"); + const hardlinkPath = path.join(sourcePath, "@esbuild", "darwin-arm64", "bin", "esbuild"); + await fs.mkdir(path.dirname(targetPath), { recursive: true }); + await fs.mkdir(path.dirname(hardlinkPath), { recursive: true }); + await fs.writeFile(targetPath, "binary fixture\n", "utf8"); + await fs.link(targetPath, hardlinkPath); + await fs.mkdir(outputDir, { recursive: true }); + + const result = await createBackupArchive({ + output: outputDir, + includeWorkspace: false, + nowMs: Date.UTC(2026, 3, 29, 12, 0, 0), + }); + const entries = await listArchiveEntryDetails(result.archivePath); + + expect(entries.filter((entry) => entry.type === "Link")).toStrictEqual([]); + expect(entries.some((entry) => entry.path.endsWith("/esbuild/bin/esbuild"))).toBe(true); + expect( + entries.some((entry) => entry.path.endsWith("/@esbuild/darwin-arm64/bin/esbuild")), + ).toBe(true); + + const runtime: RuntimeEnv = { log: vi.fn(), error: vi.fn(), exit: vi.fn() }; + const verification = await backupVerifyCommand(runtime, { archive: result.archivePath }); + expect(verification.ok).toBe(true); + }, + ); + }); + it("does not duplicate the root manifest when the system tempdir lives inside the state dir", async () => { await withOpenClawTestState( { diff --git a/src/infra/backup-create.ts b/src/infra/backup-create.ts index 6ea353cd572..7b2b743c581 100644 --- a/src/infra/backup-create.ts +++ b/src/infra/backup-create.ts @@ -25,6 +25,18 @@ function loadTarRuntime(): Promise { return tarRuntimePromise; } +type BackupLinkCacheKey = `${number}:${number}`; + +class BackupLinkCache extends Map { + override get(_key: BackupLinkCacheKey): undefined { + return undefined; + } + + override set(_key: BackupLinkCacheKey, _value: string): this { + return this; + } +} + export type BackupCreateOptions = { output?: string; dryRun?: boolean; @@ -543,6 +555,7 @@ export async function createBackupArchive( gzip: true, portable: true, preservePaths: true, + linkCache: new BackupLinkCache(), filter: tarFilter, onWriteEntry: (entry) => { entry.path = remapArchiveEntryPath({