fix(archive): enforce extraction resource limits

This commit is contained in:
Peter Steinberger
2026-02-14 15:30:05 +01:00
parent c8424bf29a
commit d3ee5deb87
3 changed files with 200 additions and 39 deletions

View File

@@ -13,6 +13,7 @@ Docs: https://docs.openclaw.ai
- Security: fix Chutes manual OAuth login state validation (thanks @aether-ai-agent). (#16058)
- macOS: hard-limit unkeyed `openclaw://agent` deep links and ignore `deliver` / `to` / `channel` unless a valid unattended key is provided. Thanks @Cillian-Collins.
- Security/Google Chat: deprecate `users/<email>` allowlists (treat `users/...` as immutable user id only); keep raw email allowlists for usability. Thanks @vincentkoc.
- Security/Archive: enforce archive extraction entry/size limits to prevent resource exhaustion from high-expansion ZIP/TAR archives. Thanks @vincentkoc.
## 2026.2.14

View File

@@ -96,4 +96,45 @@ describe("archive utils", () => {
extractArchive({ archivePath, destDir: extractDir, timeoutMs: 5_000 }),
).rejects.toThrow(/escapes destination/i);
});
it("rejects zip archives that exceed extracted size budget", async () => {
const workDir = await makeTempDir();
const archivePath = path.join(workDir, "bundle.zip");
const extractDir = path.join(workDir, "extract");
const zip = new JSZip();
zip.file("package/big.txt", "x".repeat(64));
await fs.writeFile(archivePath, await zip.generateAsync({ type: "nodebuffer" }));
await fs.mkdir(extractDir, { recursive: true });
await expect(
extractArchive({
archivePath,
destDir: extractDir,
timeoutMs: 5_000,
limits: { maxExtractedBytes: 32 },
}),
).rejects.toThrow("archive extracted size exceeds limit");
});
it("rejects tar archives that exceed extracted size budget", async () => {
const workDir = await makeTempDir();
const archivePath = path.join(workDir, "bundle.tar");
const extractDir = path.join(workDir, "extract");
const packageDir = path.join(workDir, "package");
await fs.mkdir(packageDir, { recursive: true });
await fs.writeFile(path.join(packageDir, "big.txt"), "x".repeat(64));
await tar.c({ cwd: workDir, file: archivePath }, ["package"]);
await fs.mkdir(extractDir, { recursive: true });
await expect(
extractArchive({
archivePath,
destDir: extractDir,
timeoutMs: 5_000,
limits: { maxExtractedBytes: 32 },
}),
).rejects.toThrow("archive extracted size exceeds limit");
});
});

View File

@@ -1,6 +1,9 @@
import JSZip from "jszip";
import { createWriteStream } from "node:fs";
import fs from "node:fs/promises";
import path from "node:path";
import { Transform } from "node:stream";
import { pipeline } from "node:stream/promises";
import * as tar from "tar";
export type ArchiveKind = "tar" | "zip";
@@ -10,6 +13,20 @@ export type ArchiveLogger = {
warn?: (message: string) => void;
};
export type ArchiveExtractLimits = {
/**
* Max archive file bytes (compressed). Primarily protects zip extraction
* because we currently read the whole archive into memory for parsing.
*/
maxArchiveBytes?: number;
/** Max number of extracted entries (files + dirs). */
maxEntries?: number;
/** Max extracted bytes (sum of all files). */
maxExtractedBytes?: number;
/** Max extracted bytes for a single file entry. */
maxEntryBytes?: number;
};
const TAR_SUFFIXES = [".tgz", ".tar.gz", ".tar"];
export function resolveArchiveKind(filePath: string): ArchiveKind | null {
@@ -100,14 +117,14 @@ function validateArchiveEntryPath(entryPath: string): void {
}
function stripArchivePath(entryPath: string, stripComponents: number): string | null {
const normalized = path.posix.normalize(normalizeArchivePath(entryPath));
if (!normalized || normalized === "." || normalized === "./") {
const raw = normalizeArchivePath(entryPath);
if (!raw || raw === "." || raw === "./") {
return null;
}
// Keep the validation separate so callers can reject traversal in the original
// path even if stripping could make it "look" safe.
const parts = normalized.split("/").filter((part) => part.length > 0 && part !== ".");
// Important: mimic tar --strip-components semantics (raw segments before
// normalization) so strip-induced escapes like "a/../b" are not hidden.
const parts = raw.split("/").filter((part) => part.length > 0 && part !== ".");
const strip = Math.max(0, Math.floor(stripComponents));
const stripped = strip === 0 ? parts.join("/") : parts.slice(strip).join("/");
const result = path.posix.normalize(stripped);
@@ -126,16 +143,63 @@ function resolveCheckedOutPath(destDir: string, relPath: string, original: strin
return outPath;
}
function clampLimit(value: number | undefined): number | undefined {
if (typeof value !== "number" || !Number.isFinite(value)) {
return undefined;
}
const v = Math.floor(value);
return v > 0 ? v : undefined;
}
function resolveExtractLimits(limits?: ArchiveExtractLimits): Required<ArchiveExtractLimits> {
// Defaults: defensive, but should not break normal installs.
return {
maxArchiveBytes: clampLimit(limits?.maxArchiveBytes) ?? 256 * 1024 * 1024,
maxEntries: clampLimit(limits?.maxEntries) ?? 50_000,
maxExtractedBytes: clampLimit(limits?.maxExtractedBytes) ?? 512 * 1024 * 1024,
maxEntryBytes: clampLimit(limits?.maxEntryBytes) ?? 256 * 1024 * 1024,
};
}
function createExtractBudgetTransform(params: {
onChunkBytes: (bytes: number) => void;
}): Transform {
return new Transform({
transform(chunk, _encoding, callback) {
try {
const buf = chunk instanceof Buffer ? chunk : Buffer.from(chunk as Uint8Array);
params.onChunkBytes(buf.byteLength);
callback(null, buf);
} catch (err) {
callback(err instanceof Error ? err : new Error(String(err)));
}
},
});
}
async function extractZip(params: {
archivePath: string;
destDir: string;
stripComponents?: number;
limits?: ArchiveExtractLimits;
}): Promise<void> {
const limits = resolveExtractLimits(params.limits);
const stat = await fs.stat(params.archivePath);
if (stat.size > limits.maxArchiveBytes) {
throw new Error("archive size exceeds limit");
}
const buffer = await fs.readFile(params.archivePath);
const zip = await JSZip.loadAsync(buffer);
const entries = Object.values(zip.files);
const strip = Math.max(0, Math.floor(params.stripComponents ?? 0));
if (entries.length > limits.maxEntries) {
throw new Error("archive entry count exceeds limit");
}
let extractedBytes = 0;
for (const entry of entries) {
validateArchiveEntryPath(entry.name);
@@ -152,8 +216,38 @@ async function extractZip(params: {
}
await fs.mkdir(path.dirname(outPath), { recursive: true });
const data = await entry.async("nodebuffer");
await fs.writeFile(outPath, data);
let entryBytes = 0;
const onChunkBytes = (bytes: number) => {
entryBytes += bytes;
if (entryBytes > limits.maxEntryBytes) {
throw new Error("archive entry extracted size exceeds limit");
}
extractedBytes += bytes;
if (extractedBytes > limits.maxExtractedBytes) {
throw new Error("archive extracted size exceeds limit");
}
};
const readable =
typeof entry.nodeStream === "function"
? (entry.nodeStream() as unknown)
: await entry.async("nodebuffer");
try {
if (readable instanceof Buffer) {
onChunkBytes(readable.byteLength);
await fs.writeFile(outPath, readable);
} else {
await pipeline(
readable as NodeJS.ReadableStream,
createExtractBudgetTransform({ onChunkBytes }),
createWriteStream(outPath),
);
}
} catch (err) {
await fs.unlink(outPath).catch(() => undefined);
throw err;
}
// Best-effort permission restore for zip entries created on unix.
if (typeof entry.unixPermissions === "number") {
@@ -172,6 +266,7 @@ export async function extractArchive(params: {
kind?: ArchiveKind;
stripComponents?: number;
tarGzip?: boolean;
limits?: ArchiveExtractLimits;
logger?: ArchiveLogger;
}): Promise<void> {
const kind = params.kind ?? resolveArchiveKind(params.archivePath);
@@ -182,7 +277,9 @@ export async function extractArchive(params: {
const label = kind === "zip" ? "extract zip" : "extract tar";
if (kind === "tar") {
const strip = Math.max(0, Math.floor(params.stripComponents ?? 0));
let firstError: Error | undefined;
const limits = resolveExtractLimits(params.limits);
let entryCount = 0;
let extractedBytes = 0;
await withTimeout(
tar.x({
file: params.archivePath,
@@ -191,48 +288,69 @@ export async function extractArchive(params: {
gzip: params.tarGzip,
preservePaths: false,
strict: true,
filter: (entryPath, entry) => {
onReadEntry(entry) {
const archiveEntryPath =
typeof entry === "object" && entry !== null && "path" in entry
? String((entry as { path: unknown }).path)
: "";
const archiveEntryType =
typeof entry === "object" && entry !== null && "type" in entry
? String((entry as { type: unknown }).type)
: "";
const archiveEntrySize =
typeof entry === "object" &&
entry !== null &&
"size" in entry &&
typeof (entry as { size?: unknown }).size === "number" &&
Number.isFinite((entry as { size: number }).size)
? Math.max(0, Math.floor((entry as { size: number }).size))
: 0;
try {
validateArchiveEntryPath(entryPath);
// `tar`'s filter callback can pass either a ReadEntry or a Stats-ish object;
// fail closed for any link-like entries.
const entryType =
typeof entry === "object" &&
entry !== null &&
"type" in entry &&
typeof (entry as { type?: unknown }).type === "string"
? (entry as { type: string }).type
: undefined;
const isSymlink =
typeof entry === "object" &&
entry !== null &&
"isSymbolicLink" in entry &&
typeof (entry as { isSymbolicLink?: unknown }).isSymbolicLink === "function" &&
Boolean((entry as { isSymbolicLink: () => boolean }).isSymbolicLink());
if (entryType === "SymbolicLink" || entryType === "Link" || isSymlink) {
throw new Error(`tar entry is a link: ${entryPath}`);
}
const relPath = stripArchivePath(entryPath, strip);
validateArchiveEntryPath(archiveEntryPath);
const relPath = stripArchivePath(archiveEntryPath, strip);
if (!relPath) {
return false;
return;
}
validateArchiveEntryPath(relPath);
resolveCheckedOutPath(params.destDir, relPath, entryPath);
return true;
} catch (err) {
if (!firstError) {
firstError = err instanceof Error ? err : new Error(String(err));
resolveCheckedOutPath(params.destDir, relPath, archiveEntryPath);
if (
archiveEntryType === "SymbolicLink" ||
archiveEntryType === "Link" ||
archiveEntryType === "BlockDevice" ||
archiveEntryType === "CharacterDevice" ||
archiveEntryType === "FIFO" ||
archiveEntryType === "Socket"
) {
throw new Error(`tar entry is a link: ${archiveEntryPath}`);
}
return false;
entryCount += 1;
if (entryCount > limits.maxEntries) {
throw new Error("archive entry count exceeds limit");
}
if (archiveEntrySize > limits.maxEntryBytes) {
throw new Error("archive entry extracted size exceeds limit");
}
extractedBytes += archiveEntrySize;
if (extractedBytes > limits.maxExtractedBytes) {
throw new Error("archive extracted size exceeds limit");
}
} catch (err) {
const error = err instanceof Error ? err : new Error(String(err));
// Node's EventEmitter calls listeners with `this` bound to the
// emitter (tar.Unpack), which exposes Parser.abort().
const emitter = this as unknown as { abort?: (error: Error) => void };
emitter.abort?.(error);
}
},
}),
params.timeoutMs,
label,
);
if (firstError) {
throw firstError;
}
return;
}
@@ -241,6 +359,7 @@ export async function extractArchive(params: {
archivePath: params.archivePath,
destDir: params.destDir,
stripComponents: params.stripComponents,
limits: params.limits,
}),
params.timeoutMs,
label,