From 6565ae1857b1db906b9c429ec651b9d5f1eb0ea8 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 10 Mar 2026 23:52:31 +0000 Subject: [PATCH] refactor: extract archive staging helpers --- src/infra/archive-staging.ts | 218 +++++++++++++++++++++++++++++++++ src/infra/archive.ts | 231 +++-------------------------------- 2 files changed, 236 insertions(+), 213 deletions(-) create mode 100644 src/infra/archive-staging.ts diff --git a/src/infra/archive-staging.ts b/src/infra/archive-staging.ts new file mode 100644 index 00000000000..443e28e062e --- /dev/null +++ b/src/infra/archive-staging.ts @@ -0,0 +1,218 @@ +import fs from "node:fs/promises"; +import path from "node:path"; +import { copyFileWithinRoot } from "./fs-safe.js"; +import { isNotFoundPathError, isPathInside } from "./path-guards.js"; + +const ERROR_ARCHIVE_ENTRY_TRAVERSES_SYMLINK = "archive entry traverses symlink in destination"; + +export type ArchiveSecurityErrorCode = + | "destination-not-directory" + | "destination-symlink" + | "destination-symlink-traversal"; + +export class ArchiveSecurityError extends Error { + code: ArchiveSecurityErrorCode; + + constructor(code: ArchiveSecurityErrorCode, message: string, options?: ErrorOptions) { + super(message, options); + this.code = code; + this.name = "ArchiveSecurityError"; + } +} + +function symlinkTraversalError(originalPath: string): ArchiveSecurityError { + return new ArchiveSecurityError( + "destination-symlink-traversal", + `${ERROR_ARCHIVE_ENTRY_TRAVERSES_SYMLINK}: ${originalPath}`, + ); +} + +export async function prepareArchiveDestinationDir(destDir: string): Promise { + const stat = await fs.lstat(destDir); + if (stat.isSymbolicLink()) { + throw new ArchiveSecurityError("destination-symlink", "archive destination is a symlink"); + } + if (!stat.isDirectory()) { + throw new ArchiveSecurityError( + "destination-not-directory", + "archive destination is not a directory", + ); + } + return await fs.realpath(destDir); +} + +async function assertNoSymlinkTraversal(params: { + rootDir: string; + relPath: string; + originalPath: string; +}): Promise { + const parts = params.relPath.split(/[\\/]+/).filter(Boolean); + let current = path.resolve(params.rootDir); + for (const part of parts) { + current = path.join(current, part); + let stat: Awaited>; + try { + stat = await fs.lstat(current); + } catch (err) { + if (isNotFoundPathError(err)) { + continue; + } + throw err; + } + if (stat.isSymbolicLink()) { + throw symlinkTraversalError(params.originalPath); + } + } +} + +async function assertResolvedInsideDestination(params: { + destinationRealDir: string; + targetPath: string; + originalPath: string; +}): Promise { + let resolved: string; + try { + resolved = await fs.realpath(params.targetPath); + } catch (err) { + if (isNotFoundPathError(err)) { + return; + } + throw err; + } + if (!isPathInside(params.destinationRealDir, resolved)) { + throw symlinkTraversalError(params.originalPath); + } +} + +export async function prepareArchiveOutputPath(params: { + destinationDir: string; + destinationRealDir: string; + relPath: string; + outPath: string; + originalPath: string; + isDirectory: boolean; +}): Promise { + await assertNoSymlinkTraversal({ + rootDir: params.destinationDir, + relPath: params.relPath, + originalPath: params.originalPath, + }); + + if (params.isDirectory) { + await fs.mkdir(params.outPath, { recursive: true }); + await assertResolvedInsideDestination({ + destinationRealDir: params.destinationRealDir, + targetPath: params.outPath, + originalPath: params.originalPath, + }); + return; + } + + const parentDir = path.dirname(params.outPath); + await fs.mkdir(parentDir, { recursive: true }); + await assertResolvedInsideDestination({ + destinationRealDir: params.destinationRealDir, + targetPath: parentDir, + originalPath: params.originalPath, + }); +} + +async function applyStagedEntryMode(params: { + destinationRealDir: string; + relPath: string; + mode: number; + originalPath: string; +}): Promise { + const destinationPath = path.join(params.destinationRealDir, params.relPath); + await assertResolvedInsideDestination({ + destinationRealDir: params.destinationRealDir, + targetPath: destinationPath, + originalPath: params.originalPath, + }); + if (params.mode !== 0) { + await fs.chmod(destinationPath, params.mode).catch(() => undefined); + } +} + +export async function withStagedArchiveDestination(params: { + destinationRealDir: string; + run: (stagingDir: string) => Promise; +}): Promise { + const stagingDir = await fs.mkdtemp(path.join(params.destinationRealDir, ".openclaw-archive-")); + try { + return await params.run(stagingDir); + } finally { + await fs.rm(stagingDir, { recursive: true, force: true }).catch(() => undefined); + } +} + +export async function mergeExtractedTreeIntoDestination(params: { + sourceDir: string; + destinationDir: string; + destinationRealDir: string; +}): Promise { + const walk = async (currentSourceDir: string): Promise => { + const entries = await fs.readdir(currentSourceDir, { withFileTypes: true }); + for (const entry of entries) { + const sourcePath = path.join(currentSourceDir, entry.name); + const relPath = path.relative(params.sourceDir, sourcePath); + const originalPath = relPath.split(path.sep).join("/"); + const destinationPath = path.join(params.destinationDir, relPath); + const sourceStat = await fs.lstat(sourcePath); + + if (sourceStat.isSymbolicLink()) { + throw symlinkTraversalError(originalPath); + } + + if (sourceStat.isDirectory()) { + await prepareArchiveOutputPath({ + destinationDir: params.destinationDir, + destinationRealDir: params.destinationRealDir, + relPath, + outPath: destinationPath, + originalPath, + isDirectory: true, + }); + await walk(sourcePath); + await applyStagedEntryMode({ + destinationRealDir: params.destinationRealDir, + relPath, + mode: sourceStat.mode & 0o777, + originalPath, + }); + continue; + } + + if (!sourceStat.isFile()) { + throw new Error(`archive staging contains unsupported entry: ${originalPath}`); + } + + await prepareArchiveOutputPath({ + destinationDir: params.destinationDir, + destinationRealDir: params.destinationRealDir, + relPath, + outPath: destinationPath, + originalPath, + isDirectory: false, + }); + await copyFileWithinRoot({ + sourcePath, + rootDir: params.destinationRealDir, + relativePath: relPath, + mkdir: true, + }); + await applyStagedEntryMode({ + destinationRealDir: params.destinationRealDir, + relPath, + mode: sourceStat.mode & 0o777, + originalPath, + }); + } + }; + + await walk(params.sourceDir); +} + +export function createArchiveSymlinkTraversalError(originalPath: string): ArchiveSecurityError { + return symlinkTraversalError(originalPath); +} diff --git a/src/infra/archive.ts b/src/infra/archive.ts index 313cdbab439..cb808a88b9d 100644 --- a/src/infra/archive.ts +++ b/src/infra/archive.ts @@ -13,14 +13,16 @@ import { stripArchivePath, validateArchiveEntryPath, } from "./archive-path.js"; -import { sameFileIdentity } from "./file-identity.js"; import { - copyFileWithinRoot, - openFileWithinRoot, - openWritableFileWithinRoot, - SafeOpenError, -} from "./fs-safe.js"; -import { isNotFoundPathError, isPathInside } from "./path-guards.js"; + createArchiveSymlinkTraversalError, + mergeExtractedTreeIntoDestination, + prepareArchiveDestinationDir, + prepareArchiveOutputPath, + withStagedArchiveDestination, +} from "./archive-staging.js"; +import { sameFileIdentity } from "./file-identity.js"; +import { openFileWithinRoot, openWritableFileWithinRoot, SafeOpenError } from "./fs-safe.js"; +import { isNotFoundPathError } from "./path-guards.js"; export type ArchiveKind = "tar" | "zip"; @@ -42,20 +44,13 @@ export type ArchiveExtractLimits = { maxEntryBytes?: number; }; -export type ArchiveSecurityErrorCode = - | "destination-not-directory" - | "destination-symlink" - | "destination-symlink-traversal"; - -export class ArchiveSecurityError extends Error { - code: ArchiveSecurityErrorCode; - - constructor(code: ArchiveSecurityErrorCode, message: string, options?: ErrorOptions) { - super(message, options); - this.code = code; - this.name = "ArchiveSecurityError"; - } -} +export { ArchiveSecurityError, type ArchiveSecurityErrorCode } from "./archive-staging.js"; +export { + mergeExtractedTreeIntoDestination, + prepareArchiveDestinationDir, + prepareArchiveOutputPath, + withStagedArchiveDestination, +} from "./archive-staging.js"; /** @internal */ export const DEFAULT_MAX_ARCHIVE_BYTES_ZIP = 256 * 1024 * 1024; @@ -71,7 +66,6 @@ const ERROR_ARCHIVE_ENTRY_COUNT_EXCEEDS_LIMIT = "archive entry count exceeds lim const ERROR_ARCHIVE_ENTRY_EXTRACTED_SIZE_EXCEEDS_LIMIT = "archive entry extracted size exceeds limit"; const ERROR_ARCHIVE_EXTRACTED_SIZE_EXCEEDS_LIMIT = "archive extracted size exceeds limit"; -const ERROR_ARCHIVE_ENTRY_TRAVERSES_SYMLINK = "archive entry traverses symlink in destination"; const SUPPORTS_NOFOLLOW = process.platform !== "win32" && "O_NOFOLLOW" in fsConstants; const OPEN_WRITE_CREATE_FLAGS = fsConstants.O_WRONLY | @@ -222,197 +216,8 @@ function createExtractBudgetTransform(params: { }); } -function symlinkTraversalError(originalPath: string): ArchiveSecurityError { - return new ArchiveSecurityError( - "destination-symlink-traversal", - `${ERROR_ARCHIVE_ENTRY_TRAVERSES_SYMLINK}: ${originalPath}`, - ); -} - -export async function prepareArchiveDestinationDir(destDir: string): Promise { - const stat = await fs.lstat(destDir); - if (stat.isSymbolicLink()) { - throw new ArchiveSecurityError("destination-symlink", "archive destination is a symlink"); - } - if (!stat.isDirectory()) { - throw new ArchiveSecurityError( - "destination-not-directory", - "archive destination is not a directory", - ); - } - return await fs.realpath(destDir); -} - -async function assertNoSymlinkTraversal(params: { - rootDir: string; - relPath: string; - originalPath: string; -}): Promise { - const parts = params.relPath.split(/[\\/]+/).filter(Boolean); - let current = path.resolve(params.rootDir); - for (const part of parts) { - current = path.join(current, part); - let stat: Awaited>; - try { - stat = await fs.lstat(current); - } catch (err) { - if (isNotFoundPathError(err)) { - continue; - } - throw err; - } - if (stat.isSymbolicLink()) { - throw symlinkTraversalError(params.originalPath); - } - } -} - -async function assertResolvedInsideDestination(params: { - destinationRealDir: string; - targetPath: string; - originalPath: string; -}): Promise { - let resolved: string; - try { - resolved = await fs.realpath(params.targetPath); - } catch (err) { - if (isNotFoundPathError(err)) { - return; - } - throw err; - } - if (!isPathInside(params.destinationRealDir, resolved)) { - throw symlinkTraversalError(params.originalPath); - } -} - -async function prepareArchiveOutputPath(params: { - destinationDir: string; - destinationRealDir: string; - relPath: string; - outPath: string; - originalPath: string; - isDirectory: boolean; -}): Promise { - await assertNoSymlinkTraversal({ - rootDir: params.destinationDir, - relPath: params.relPath, - originalPath: params.originalPath, - }); - - if (params.isDirectory) { - await fs.mkdir(params.outPath, { recursive: true }); - await assertResolvedInsideDestination({ - destinationRealDir: params.destinationRealDir, - targetPath: params.outPath, - originalPath: params.originalPath, - }); - return; - } - - const parentDir = path.dirname(params.outPath); - await fs.mkdir(parentDir, { recursive: true }); - await assertResolvedInsideDestination({ - destinationRealDir: params.destinationRealDir, - targetPath: parentDir, - originalPath: params.originalPath, - }); -} - -async function applyStagedEntryMode(params: { - destinationRealDir: string; - relPath: string; - mode: number; - originalPath: string; -}): Promise { - const destinationPath = path.join(params.destinationRealDir, params.relPath); - await assertResolvedInsideDestination({ - destinationRealDir: params.destinationRealDir, - targetPath: destinationPath, - originalPath: params.originalPath, - }); - if (params.mode !== 0) { - await fs.chmod(destinationPath, params.mode).catch(() => undefined); - } -} - -export async function withStagedArchiveDestination(params: { - destinationRealDir: string; - run: (stagingDir: string) => Promise; -}): Promise { - const stagingDir = await fs.mkdtemp(path.join(params.destinationRealDir, ".openclaw-archive-")); - try { - return await params.run(stagingDir); - } finally { - await fs.rm(stagingDir, { recursive: true, force: true }).catch(() => undefined); - } -} - -export async function mergeExtractedTreeIntoDestination(params: { - sourceDir: string; - destinationDir: string; - destinationRealDir: string; -}): Promise { - const walk = async (currentSourceDir: string): Promise => { - const entries = await fs.readdir(currentSourceDir, { withFileTypes: true }); - for (const entry of entries) { - const sourcePath = path.join(currentSourceDir, entry.name); - const relPath = path.relative(params.sourceDir, sourcePath); - const originalPath = relPath.split(path.sep).join("/"); - const destinationPath = path.join(params.destinationDir, relPath); - const sourceStat = await fs.lstat(sourcePath); - - if (sourceStat.isSymbolicLink()) { - throw symlinkTraversalError(originalPath); - } - - if (sourceStat.isDirectory()) { - await prepareArchiveOutputPath({ - destinationDir: params.destinationDir, - destinationRealDir: params.destinationRealDir, - relPath, - outPath: destinationPath, - originalPath, - isDirectory: true, - }); - await walk(sourcePath); - await applyStagedEntryMode({ - destinationRealDir: params.destinationRealDir, - relPath, - mode: sourceStat.mode & 0o777, - originalPath, - }); - continue; - } - - if (!sourceStat.isFile()) { - throw new Error(`archive staging contains unsupported entry: ${originalPath}`); - } - - await prepareArchiveOutputPath({ - destinationDir: params.destinationDir, - destinationRealDir: params.destinationRealDir, - relPath, - outPath: destinationPath, - originalPath, - isDirectory: false, - }); - await copyFileWithinRoot({ - sourcePath, - rootDir: params.destinationRealDir, - relativePath: relPath, - mkdir: true, - }); - await applyStagedEntryMode({ - destinationRealDir: params.destinationRealDir, - relPath, - mode: sourceStat.mode & 0o777, - originalPath, - }); - } - }; - - await walk(params.sourceDir); +function symlinkTraversalError(originalPath: string) { + return createArchiveSymlinkTraversalError(originalPath); } type OpenZipOutputFileResult = {