From 0bac47de515c1da423cd50745cb94789923b6fb9 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 10 Mar 2026 23:53:32 +0000 Subject: [PATCH] refactor: split tar.bz2 extraction helpers --- src/agents/skills-install-extract.ts | 183 +++++++++++++++++++-------- 1 file changed, 127 insertions(+), 56 deletions(-) diff --git a/src/agents/skills-install-extract.ts b/src/agents/skills-install-extract.ts index 05fc348166a..02a5b22c3d5 100644 --- a/src/agents/skills-install-extract.ts +++ b/src/agents/skills-install-extract.ts @@ -12,6 +12,10 @@ import { parseTarVerboseMetadata } from "./skills-install-tar-verbose.js"; import { hasBinary } from "./skills.js"; export type ArchiveExtractResult = { stdout: string; stderr: string; code: number | null }; +type TarPreflightResult = { + entries: string[]; + metadata: ReturnType; +}; async function hashFileSha256(filePath: string): Promise { const hash = createHash("sha256"); @@ -27,6 +31,112 @@ async function hashFileSha256(filePath: string): Promise { }); } +function commandFailureResult( + result: { stdout: string; stderr: string; code: number | null }, + fallbackStderr: string, +): ArchiveExtractResult { + return { + stdout: result.stdout, + stderr: result.stderr || fallbackStderr, + code: result.code, + }; +} + +function buildTarExtractArgv(params: { + archivePath: string; + targetDir: string; + stripComponents: number; +}): string[] { + const argv = ["tar", "xf", params.archivePath, "-C", params.targetDir]; + if (params.stripComponents > 0) { + argv.push("--strip-components", String(params.stripComponents)); + } + return argv; +} + +async function readTarPreflight(params: { + archivePath: string; + timeoutMs: number; +}): Promise { + const listResult = await runCommandWithTimeout(["tar", "tf", params.archivePath], { + timeoutMs: params.timeoutMs, + }); + if (listResult.code !== 0) { + return commandFailureResult(listResult, "tar list failed"); + } + const entries = listResult.stdout + .split("\n") + .map((line) => line.trim()) + .filter(Boolean); + + const verboseResult = await runCommandWithTimeout(["tar", "tvf", params.archivePath], { + timeoutMs: params.timeoutMs, + }); + if (verboseResult.code !== 0) { + return commandFailureResult(verboseResult, "tar verbose list failed"); + } + const metadata = parseTarVerboseMetadata(verboseResult.stdout); + if (metadata.length !== entries.length) { + return { + stdout: verboseResult.stdout, + stderr: `tar verbose/list entry count mismatch (${metadata.length} vs ${entries.length})`, + code: 1, + }; + } + return { entries, metadata }; +} + +function isArchiveExtractFailure( + value: TarPreflightResult | ArchiveExtractResult, +): value is ArchiveExtractResult { + return "code" in value; +} + +async function verifyArchiveHashStable(params: { + archivePath: string; + expectedHash: string; +}): Promise { + const postPreflightHash = await hashFileSha256(params.archivePath); + if (postPreflightHash === params.expectedHash) { + return null; + } + return { + stdout: "", + stderr: "tar archive changed during safety preflight; refusing to extract", + code: 1, + }; +} + +async function extractTarBz2WithStaging(params: { + archivePath: string; + destinationRealDir: string; + stripComponents: number; + timeoutMs: number; +}): Promise { + return await withStagedArchiveDestination({ + destinationRealDir: params.destinationRealDir, + run: async (stagingDir) => { + const extractResult = await runCommandWithTimeout( + buildTarExtractArgv({ + archivePath: params.archivePath, + targetDir: stagingDir, + stripComponents: params.stripComponents, + }), + { timeoutMs: params.timeoutMs }, + ); + if (extractResult.code !== 0) { + return extractResult; + } + await mergeExtractedTreeIntoDestination({ + sourceDir: stagingDir, + destinationDir: params.destinationRealDir, + destinationRealDir: params.destinationRealDir, + }); + return extractResult; + }, + }); +} + export async function extractArchive(params: { archivePath: string; archiveType: string; @@ -73,46 +183,21 @@ export async function extractArchive(params: { const preflightHash = await hashFileSha256(archivePath); // Preflight list to prevent zip-slip style traversal before extraction. - const listResult = await runCommandWithTimeout(["tar", "tf", archivePath], { timeoutMs }); - if (listResult.code !== 0) { - return { - stdout: listResult.stdout, - stderr: listResult.stderr || "tar list failed", - code: listResult.code, - }; - } - const entries = listResult.stdout - .split("\n") - .map((line) => line.trim()) - .filter(Boolean); - - const verboseResult = await runCommandWithTimeout(["tar", "tvf", archivePath], { timeoutMs }); - if (verboseResult.code !== 0) { - return { - stdout: verboseResult.stdout, - stderr: verboseResult.stderr || "tar verbose list failed", - code: verboseResult.code, - }; - } - const metadata = parseTarVerboseMetadata(verboseResult.stdout); - if (metadata.length !== entries.length) { - return { - stdout: verboseResult.stdout, - stderr: `tar verbose/list entry count mismatch (${metadata.length} vs ${entries.length})`, - code: 1, - }; + const preflight = await readTarPreflight({ archivePath, timeoutMs }); + if (isArchiveExtractFailure(preflight)) { + return preflight; } const checkTarEntrySafety = createTarEntryPreflightChecker({ rootDir: destinationRealDir, stripComponents: strip, escapeLabel: "targetDir", }); - for (let i = 0; i < entries.length; i += 1) { - const entryPath = entries[i]; - const entryMeta = metadata[i]; + for (let i = 0; i < preflight.entries.length; i += 1) { + const entryPath = preflight.entries[i]; + const entryMeta = preflight.metadata[i]; if (!entryPath || !entryMeta) { return { - stdout: verboseResult.stdout, + stdout: "", stderr: "tar metadata parse failure", code: 1, }; @@ -124,33 +209,19 @@ export async function extractArchive(params: { }); } - const postPreflightHash = await hashFileSha256(archivePath); - if (postPreflightHash !== preflightHash) { - return { - stdout: "", - stderr: "tar archive changed during safety preflight; refusing to extract", - code: 1, - }; + const hashFailure = await verifyArchiveHashStable({ + archivePath, + expectedHash: preflightHash, + }); + if (hashFailure) { + return hashFailure; } - return await withStagedArchiveDestination({ + return await extractTarBz2WithStaging({ + archivePath, destinationRealDir, - run: async (stagingDir) => { - const argv = ["tar", "xf", archivePath, "-C", stagingDir]; - if (strip > 0) { - argv.push("--strip-components", String(strip)); - } - const extractResult = await runCommandWithTimeout(argv, { timeoutMs }); - if (extractResult.code !== 0) { - return extractResult; - } - await mergeExtractedTreeIntoDestination({ - sourceDir: stagingDir, - destinationDir: destinationRealDir, - destinationRealDir, - }); - return extractResult; - }, + stripComponents: strip, + timeoutMs, }); }