fix(backup): dereference archive hardlinks

This commit is contained in:
Jason O'Neal
2026-05-18 22:50:55 -04:00
committed by Peter Steinberger
parent d73f3ac85d
commit d8a2cd5204
5 changed files with 171 additions and 5 deletions

View File

@@ -436,6 +436,7 @@ Docs: https://docs.openclaw.ai
- CLI: reject explicit port numbers above 65535 before they reach Gateway or Node bind paths. Fixes #83900. (#84008) Thanks @hclsys.
- Codex app-server: preserve plugin tool auth profiles when Codex owns model transport so OpenClaw dynamic tools can resolve their provider credentials. (#83603) Thanks @rubencu.
- Memory/search: scan the JS-side fallback vector path (used when the sqlite-vec index is unavailable or has a mismatched dimension) in bounded rowid batches and yield to the event loop between batches so large chunk tables can no longer pin the Node.js main thread for multi-second windows. Also keeps the SQL prepared statement rooted in a local so node:sqlite cannot finalize it mid-scan under heap pressure. Fixes #81172. Thanks @dev23xyz-oss.
- Backup: dereference hardlinks during archive creation and reject unsafe hardlink targets during verification so archives that pass `backup verify` do not fail broad extraction on macOS tar. Fixes #54242.
- Memory Wiki: preserve fs-safe diagnostics when bridge source page writes fail for non-symlink filesystem safety reasons, so directory collisions are reported with the underlying error code. (#83776) Thanks @TurboTheTurtle.
- Telegram: keep forum topics from blocking sibling topic traffic by routing inbound serialization, media/text buffers, and account API queues on topic-aware lanes. (#83829)
- Telegram: keep queued forum-topic follow-up messages from inheriting superseded source abort signals, so later same-topic user turns can still run and reply after an active turn is replaced. (#83827) Thanks @VACInc.

View File

@@ -1,6 +1,7 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { gzipSync } from "node:zlib";
import * as tar from "tar";
import { afterEach, describe, expect, it, vi } from "vitest";
import { buildBackupArchiveRoot } from "./backup-shared.js";
@@ -32,6 +33,32 @@ function createBackupManifest(assetArchivePath: string, archiveRoot = TEST_ARCHI
};
}
function encodeTarEntry(params: {
path: string;
contents?: string;
type?: "File" | "Link";
linkpath?: string;
}): Buffer {
const body = Buffer.from(params.contents ?? "", "utf8");
const header = new tar.Header({
path: params.path,
type: params.type ?? "File",
size: params.type === "Link" ? 0 : body.length,
mode: 0o600,
uid: 0,
gid: 0,
mtime: new Date(0),
...(params.linkpath ? { linkpath: params.linkpath } : {}),
});
const headerBlock = Buffer.alloc(512);
header.encode(headerBlock);
if (params.type === "Link") {
return headerBlock;
}
const padding = Buffer.alloc((512 - (body.length % 512)) % 512);
return Buffer.concat([headerBlock, body, padding]);
}
async function createArchiveWithManifestContent(
options: {
tempPrefix: string;
@@ -284,6 +311,38 @@ describe("backupVerifyCommand", () => {
}
});
it("rejects unsafe hardlink targets", async () => {
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-backup-linkpath-"));
const archivePath = path.join(tempDir, "broken.tar.gz");
const payloadArchivePath = `${TEST_ARCHIVE_ROOT}/payload/posix/tmp/.openclaw/target.txt`;
const hardlinkArchivePath = `${TEST_ARCHIVE_ROOT}/payload/posix/tmp/.openclaw/hardlink.txt`;
try {
const archive = gzipSync(
Buffer.concat([
encodeTarEntry({
path: `${TEST_ARCHIVE_ROOT}/manifest.json`,
contents: `${JSON.stringify(createBackupManifest(payloadArchivePath), null, 2)}\n`,
}),
encodeTarEntry({ path: payloadArchivePath, contents: "payload\n" }),
encodeTarEntry({
path: hardlinkArchivePath,
type: "Link",
linkpath: `${TEST_ARCHIVE_ROOT}/payload/../escaped.txt`,
}),
Buffer.alloc(1024),
]),
);
await fs.writeFile(archivePath, archive);
const runtime = createBackupVerifyRuntime();
await expect(backupVerifyCommand(runtime, { archive: archivePath })).rejects.toThrow(
/hardlink target.*path traversal segments/i,
);
} finally {
await fs.rm(tempDir, { recursive: true, force: true });
}
});
it("ignores payload manifest.json files when locating the backup manifest", async () => {
const archiveDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-backup-verify-out-"));
try {

View File

@@ -52,6 +52,12 @@ export type BackupVerifyResult = {
entryCount: number;
};
type ArchiveEntry = {
path: string;
linkpath?: string;
type?: string;
};
function stripTrailingSlashes(value: string): string {
return value.replace(/\/+$/u, "");
}
@@ -166,13 +172,18 @@ function parseManifest(raw: string): BackupManifest {
};
}
async function listArchiveEntries(archivePath: string): Promise<string[]> {
const entries: string[] = [];
async function listArchiveEntries(archivePath: string): Promise<ArchiveEntry[]> {
const entries: ArchiveEntry[] = [];
await tar.t({
file: archivePath,
gzip: true,
onentry: (entry) => {
entries.push(entry.path);
entries.push({
path: entry.path,
...(entry.linkpath ? { linkpath: entry.linkpath } : {}),
...(entry.type ? { type: entry.type } : {}),
});
entry.resume();
},
});
return entries;
@@ -248,6 +259,20 @@ function verifyManifestAgainstEntries(manifest: BackupManifest, entries: Set<str
}
}
function verifyHardlinkTargetsAgainstArchiveRoot(
hardlinkTargets: Array<{ entryPath: string; normalized: string }>,
archiveRoot: string,
): void {
const normalizedRoot = normalizeArchiveRoot(archiveRoot);
for (const target of hardlinkTargets) {
if (!isArchivePathWithin(target.normalized, normalizedRoot)) {
throw new Error(
`Archive hardlink target is outside the declared archive root: ${target.entryPath} -> ${target.normalized}`,
);
}
}
}
function formatResult(result: BackupVerifyResult): string {
return [
`Backup archive OK: ${result.archivePath}`,
@@ -283,9 +308,18 @@ export async function backupVerifyCommand(
}
const entries = rawEntries.map((entry) => ({
raw: entry,
normalized: normalizeArchivePath(entry, "Archive entry"),
raw: entry.path,
normalized: normalizeArchivePath(entry.path, "Archive entry"),
}));
const hardlinkTargets = rawEntries
.filter((entry) => entry.type === "Link" && entry.linkpath)
.map((entry) => ({
entryPath: entry.path,
normalized: normalizeArchivePath(
entry.linkpath ?? "",
`Archive hardlink target for ${entry.path}`,
),
}));
const normalizedEntrySet = new Set(entries.map((entry) => entry.normalized));
const manifestMatches = entries.filter((entry) => isRootManifestEntry(entry.normalized));
@@ -304,6 +338,7 @@ export async function backupVerifyCommand(
const manifestRaw = await extractManifest({ archivePath, manifestEntryPath });
const manifest = parseManifest(manifestRaw);
verifyManifestAgainstEntries(manifest, normalizedEntrySet);
verifyHardlinkTargetsAgainstArchiveRoot(hardlinkTargets, manifest.archiveRoot);
const result: BackupVerifyResult = {
ok: true,

View File

@@ -43,6 +43,25 @@ async function listArchiveEntries(archivePath: string): Promise<string[]> {
return entries;
}
async function listArchiveEntryDetails(
archivePath: string,
): Promise<Array<{ path: string; linkpath?: string; type?: string }>> {
const entries: Array<{ path: string; linkpath?: string; type?: string }> = [];
await tar.t({
file: archivePath,
gzip: true,
onentry: (entry) => {
entries.push({
path: entry.path,
...(entry.linkpath ? { linkpath: entry.linkpath } : {}),
...(entry.type ? { type: entry.type } : {}),
});
entry.resume();
},
});
return entries;
}
describe("formatBackupCreateSummary", () => {
const backupArchiveLine = "Backup archive: /tmp/openclaw-backup.tar.gz";
@@ -406,6 +425,45 @@ describe("createBackupArchive", () => {
);
});
it("dereferences hardlinks instead of emitting restore-hostile Link entries", async () => {
await withOpenClawTestState(
{
layout: "state-only",
prefix: "openclaw-backup-hardlink-",
scenario: "minimal",
},
async (state) => {
const stateDir = state.stateDir;
const outputDir = state.path("backups");
const sourcePath = path.join(stateDir, "workspace-adx", "openclaw-src", "node_modules");
const targetPath = path.join(sourcePath, "esbuild", "bin", "esbuild");
const hardlinkPath = path.join(sourcePath, "@esbuild", "darwin-arm64", "bin", "esbuild");
await fs.mkdir(path.dirname(targetPath), { recursive: true });
await fs.mkdir(path.dirname(hardlinkPath), { recursive: true });
await fs.writeFile(targetPath, "binary fixture\n", "utf8");
await fs.link(targetPath, hardlinkPath);
await fs.mkdir(outputDir, { recursive: true });
const result = await createBackupArchive({
output: outputDir,
includeWorkspace: false,
nowMs: Date.UTC(2026, 3, 29, 12, 0, 0),
});
const entries = await listArchiveEntryDetails(result.archivePath);
expect(entries.filter((entry) => entry.type === "Link")).toStrictEqual([]);
expect(entries.some((entry) => entry.path.endsWith("/esbuild/bin/esbuild"))).toBe(true);
expect(
entries.some((entry) => entry.path.endsWith("/@esbuild/darwin-arm64/bin/esbuild")),
).toBe(true);
const runtime: RuntimeEnv = { log: vi.fn(), error: vi.fn(), exit: vi.fn() };
const verification = await backupVerifyCommand(runtime, { archive: result.archivePath });
expect(verification.ok).toBe(true);
},
);
});
it("does not duplicate the root manifest when the system tempdir lives inside the state dir", async () => {
await withOpenClawTestState(
{

View File

@@ -25,6 +25,18 @@ function loadTarRuntime(): Promise<TarRuntime> {
return tarRuntimePromise;
}
type BackupLinkCacheKey = `${number}:${number}`;
class BackupLinkCache extends Map<BackupLinkCacheKey, string> {
override get(_key: BackupLinkCacheKey): undefined {
return undefined;
}
override set(_key: BackupLinkCacheKey, _value: string): this {
return this;
}
}
export type BackupCreateOptions = {
output?: string;
dryRun?: boolean;
@@ -543,6 +555,7 @@ export async function createBackupArchive(
gzip: true,
portable: true,
preservePaths: true,
linkCache: new BackupLinkCache(),
filter: tarFilter,
onWriteEntry: (entry) => {
entry.path = remapArchiveEntryPath({