mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 10:00:42 +00:00
fix(file-transfer): address PR review feedback (security + availability)
Reviewer findings addressed (greptile + aisle): - policy: persistAllowAlways no longer escalates per-node approvals to the '*' wildcard entry; allow-always now writes under the specific node's own entry, never the wildcard (greptile P1 SECURITY). - policy: add literal '..' segment short-circuit in evaluateFilePolicy, raised before glob match. Stops "/allowed/../etc/passwd" from passing preflight against "/allowed/**" globs (aisle MEDIUM CWE-22). - file-write: replace no-op base64 try/catch with actual round-trip validation. Buffer.from(s, "base64") never throws — invalid input silently decoded to garbage bytes. Now re-encodes and compares modulo padding/url-variant chars (greptile P1 SECURITY). - file-write: document the parent-symlink residual risk and rely on the existing gateway-side post-flight policy check; full rollback requires a node-side file.unlink which is deferred to a follow-up. Initial segment-walk attempt was reverted because it false-positives on system symlinks like macOS /var → /private/var (aisle HIGH CWE-59). - dir-fetch tool: add preValidateTarball pass that runs `tar -tzvf` and rejects symlinks, hardlinks, absolute paths, '..' traversal, uncompressed sizes >64MB, and entry counts >5000 — before any extraction. Drops --no-overwrite-dir (GNU-only flag rejected by BSD tar on macOS) (aisle HIGH x2 CWE-22 + CWE-409, greptile P2). - dir-fetch tool: stream-hash files via fs.open + read loop instead of fs.readFile to avoid full-buffer reads on large extracted entries. - dir-fetch handler: replace spawnSync in countTarEntries with async spawn + bounded buffer so tar -tzf can't park the node-host event loop for up to 10s on a slow filesystem (greptile P1 AVAIL). - audit: clear auditDirPromise on rejection so a transient mkdir failure doesn't permanently silence the audit log (greptile P2). New tests: wildcard escalation rejection, base64 malformed/url-variant, '..' traversal short-circuit (3 cases). 84/84 passing.
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
import { spawn, spawnSync } from "node:child_process";
|
||||
import { spawn } from "node:child_process";
|
||||
import crypto from "node:crypto";
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
@@ -83,20 +83,56 @@ async function preflightDu(dirPath: string, maxBytes: number): Promise<boolean>
|
||||
});
|
||||
}
|
||||
|
||||
function countTarEntries(tarBuffer: Buffer): number {
|
||||
const result = spawnSync("tar", ["-tzf", "-"], {
|
||||
input: tarBuffer,
|
||||
maxBuffer: 32 * 1024 * 1024,
|
||||
timeout: 10000,
|
||||
async function countTarEntries(tarBuffer: Buffer): Promise<number> {
|
||||
// Async spawn so a slow `tar -tzf` doesn't park the node-host event
|
||||
// loop for up to 10s. Other in-flight requests continue to be served.
|
||||
return new Promise((resolve) => {
|
||||
const child = spawn("tar", ["-tzf", "-"], { stdio: ["pipe", "pipe", "ignore"] });
|
||||
let stdoutBuf = "";
|
||||
let aborted = false;
|
||||
const watchdog = setTimeout(() => {
|
||||
aborted = true;
|
||||
try {
|
||||
child.kill("SIGKILL");
|
||||
} catch {
|
||||
/* gone */
|
||||
}
|
||||
resolve(0);
|
||||
}, 10_000);
|
||||
child.stdout.on("data", (chunk: Buffer) => {
|
||||
stdoutBuf += chunk.toString();
|
||||
// Bound buffer growth — pathological archives shouldn't OOM us.
|
||||
if (stdoutBuf.length > 32 * 1024 * 1024) {
|
||||
aborted = true;
|
||||
try {
|
||||
child.kill("SIGKILL");
|
||||
} catch {
|
||||
/* gone */
|
||||
}
|
||||
clearTimeout(watchdog);
|
||||
resolve(0);
|
||||
}
|
||||
});
|
||||
child.on("close", (code) => {
|
||||
clearTimeout(watchdog);
|
||||
if (aborted) {
|
||||
return;
|
||||
}
|
||||
if (code !== 0) {
|
||||
resolve(0);
|
||||
return;
|
||||
}
|
||||
const lines = stdoutBuf.split("\n").filter((l) => l.trim().length > 0 && l !== "./");
|
||||
resolve(lines.length);
|
||||
});
|
||||
child.on("error", () => {
|
||||
clearTimeout(watchdog);
|
||||
if (!aborted) {
|
||||
resolve(0);
|
||||
}
|
||||
});
|
||||
child.stdin.end(tarBuffer);
|
||||
});
|
||||
if (result.status !== 0 || !result.stdout) {
|
||||
return 0;
|
||||
}
|
||||
const lines = (result.stdout as Buffer)
|
||||
.toString("utf-8")
|
||||
.split("\n")
|
||||
.filter((l) => l.trim().length > 0 && l !== "./");
|
||||
return lines.length;
|
||||
}
|
||||
|
||||
export async function handleDirFetch(params: DirFetchParams): Promise<DirFetchResult> {
|
||||
@@ -256,7 +292,7 @@ export async function handleDirFetch(params: DirFetchParams): Promise<DirFetchRe
|
||||
const sha256 = crypto.createHash("sha256").update(tarBuffer).digest("hex");
|
||||
const tarBase64 = tarBuffer.toString("base64");
|
||||
const tarBytes = tarBuffer.byteLength;
|
||||
const fileCount = countTarEntries(tarBuffer);
|
||||
const fileCount = await countTarEntries(tarBuffer);
|
||||
|
||||
return {
|
||||
ok: true,
|
||||
|
||||
@@ -191,6 +191,38 @@ describe("handleFileWrite — integrity check", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("handleFileWrite — base64 round-trip validation", () => {
|
||||
it("rejects malformed base64 that silently drops characters", async () => {
|
||||
const target = path.join(tmpRoot, "bad.bin");
|
||||
// "@" is not in the base64 alphabet — Buffer.from would silently drop
|
||||
// it and decode "AAA" instead of failing.
|
||||
const r = await handleFileWrite({
|
||||
path: target,
|
||||
contentBase64: "AAA@@@",
|
||||
});
|
||||
expect(r).toMatchObject({ ok: false, code: "INVALID_BASE64" });
|
||||
await expect(fs.access(target)).rejects.toMatchObject({ code: "ENOENT" });
|
||||
});
|
||||
|
||||
it("accepts standard base64 with and without padding", async () => {
|
||||
const target = path.join(tmpRoot, "padded.bin");
|
||||
// Buffer.from("hi") -> "aGk=" with padding, "aGk" without.
|
||||
const r1 = await handleFileWrite({ path: target, contentBase64: "aGk=" });
|
||||
expect(r1.ok).toBe(true);
|
||||
|
||||
const target2 = path.join(tmpRoot, "unpadded.bin");
|
||||
const r2 = await handleFileWrite({ path: target2, contentBase64: "aGk" });
|
||||
expect(r2.ok).toBe(true);
|
||||
});
|
||||
|
||||
it("accepts base64url variant (-_ instead of +/)", async () => {
|
||||
const target = path.join(tmpRoot, "url.bin");
|
||||
// Buffer.from([0xfb, 0xff]) -> "+/8=" standard, "-_8=" url
|
||||
const r = await handleFileWrite({ path: target, contentBase64: "-_8=" });
|
||||
expect(r.ok).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("handleFileWrite — size cap", () => {
|
||||
it("rejects content larger than the 16MB cap", async () => {
|
||||
const target = path.join(tmpRoot, "big.bin");
|
||||
|
||||
@@ -58,18 +58,20 @@ export async function handleFileWrite(
|
||||
return err("INVALID_PATH", "path must be absolute");
|
||||
}
|
||||
|
||||
// 2. Decode base64 → Buffer
|
||||
let buf: Buffer;
|
||||
try {
|
||||
buf = Buffer.from(contentBase64, "base64");
|
||||
// Verify round-trip to catch invalid base64
|
||||
if (
|
||||
buf.toString("base64") !== contentBase64 &&
|
||||
Buffer.from(contentBase64, "base64url").toString("base64url") !== contentBase64
|
||||
) {
|
||||
// Tolerate standard base64 with or without padding; just use what we decoded.
|
||||
}
|
||||
} catch {
|
||||
// 2. Decode base64 → Buffer.
|
||||
// Buffer.from(s, "base64") in Node never throws — it silently drops
|
||||
// non-base64 characters and returns whatever it could decode. That
|
||||
// means a typo or truncated input would land garbage on disk if we
|
||||
// accepted whatever decoded. Defense: round-trip the decoded buffer
|
||||
// back to base64 and compare against the input modulo padding/url
|
||||
// variants. A mismatch means characters were silently dropped.
|
||||
const buf = Buffer.from(contentBase64, "base64");
|
||||
const reEncoded = buf.toString("base64");
|
||||
// Normalize: drop padding and convert base64url chars to standard so the
|
||||
// comparison tolerates both "=" / no-"=" inputs and "-_" base64url.
|
||||
const normalize = (s: string): string =>
|
||||
s.replace(/=+$/u, "").replace(/-/gu, "+").replace(/_/gu, "/");
|
||||
if (normalize(reEncoded) !== normalize(contentBase64)) {
|
||||
return err("INVALID_BASE64", "contentBase64 is not valid base64");
|
||||
}
|
||||
|
||||
@@ -104,11 +106,16 @@ export async function handleFileWrite(
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Refuse to write through symlinks (lstat sees the link itself, not
|
||||
// its target). A path that's a symlink could escape the operator's
|
||||
// intended path policy — e.g., an allowed dir could contain a
|
||||
// symlink pointing at /etc/hosts.
|
||||
// Otherwise determine overwritten status and reject directories.
|
||||
// 4. The lstat-on-final check below catches the case where the target
|
||||
// itself is a symlink, but it does NOT catch a symlink in a parent
|
||||
// component (e.g. ~/Downloads/evil → /etc, write to .../evil/passwd).
|
||||
// The gateway-side post-flight check (in file-write-tool.ts)
|
||||
// canonicalizes via realpath after the write and re-runs policy
|
||||
// against that canonical path; an escape through a parent-dir
|
||||
// symlink surfaces there. The current behavior on a post-flight
|
||||
// deny is to throw loudly with the canonical path so the operator
|
||||
// can manually inspect — full rollback requires a node-side
|
||||
// file.unlink (out of scope; tracked as a follow-up).
|
||||
let overwritten = false;
|
||||
try {
|
||||
const existingLStat = await fs.lstat(targetPath);
|
||||
|
||||
@@ -51,12 +51,21 @@ async function ensureAuditDir(): Promise<string> {
|
||||
if (auditDirPromise) {
|
||||
return auditDirPromise;
|
||||
}
|
||||
auditDirPromise = (async () => {
|
||||
const promise = (async () => {
|
||||
const dir = path.join(os.homedir(), ".openclaw", "audit");
|
||||
await fs.mkdir(dir, { recursive: true, mode: 0o700 });
|
||||
return dir;
|
||||
})();
|
||||
return auditDirPromise;
|
||||
// If the mkdir rejects (transient permission error etc.), clear the
|
||||
// cached singleton so the NEXT call retries instead of permanently
|
||||
// silencing the audit log.
|
||||
promise.catch(() => {
|
||||
if (auditDirPromise === promise) {
|
||||
auditDirPromise = null;
|
||||
}
|
||||
});
|
||||
auditDirPromise = promise;
|
||||
return promise;
|
||||
}
|
||||
|
||||
function auditFilePath(dir: string): string {
|
||||
|
||||
@@ -56,6 +56,41 @@ describe("evaluateFilePolicy — default deny", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("evaluateFilePolicy — '..' traversal short-circuit", () => {
|
||||
it("rejects /allowed/../etc/passwd even when /allowed/** is allowed", () => {
|
||||
withConfig({
|
||||
n1: { allowReadPaths: ["/allowed/**"] },
|
||||
});
|
||||
const r = evaluateFilePolicy({
|
||||
nodeId: "n1",
|
||||
kind: "read",
|
||||
path: "/allowed/../etc/passwd",
|
||||
});
|
||||
expect(r).toMatchObject({ ok: false, code: "POLICY_DENIED", askable: false });
|
||||
expect(r.ok ? "" : r.reason).toMatch(/\.\./);
|
||||
});
|
||||
|
||||
it("rejects a path that ENDS in /..", () => {
|
||||
withConfig({
|
||||
n1: { allowReadPaths: ["/tmp/**"] },
|
||||
});
|
||||
const r = evaluateFilePolicy({
|
||||
nodeId: "n1",
|
||||
kind: "read",
|
||||
path: "/tmp/foo/..",
|
||||
});
|
||||
expect(r).toMatchObject({ ok: false, code: "POLICY_DENIED" });
|
||||
});
|
||||
|
||||
it("rejects bare '..'", () => {
|
||||
withConfig({
|
||||
n1: { allowReadPaths: ["/**"] },
|
||||
});
|
||||
const r = evaluateFilePolicy({ nodeId: "n1", kind: "read", path: ".." });
|
||||
expect(r).toMatchObject({ ok: false, code: "POLICY_DENIED" });
|
||||
});
|
||||
});
|
||||
|
||||
describe("evaluateFilePolicy — denyPaths always wins", () => {
|
||||
it("denies even when allowReadPaths matches", () => {
|
||||
withConfig({
|
||||
@@ -258,6 +293,36 @@ describe("persistAllowAlways", () => {
|
||||
expect(root.gateway.nodes.fileTransfer["Lobster"].allowWritePaths).toContain("/srv/out.txt");
|
||||
});
|
||||
|
||||
it("never persists under the '*' wildcard even when '*' is the matching key", async () => {
|
||||
let captured: Record<string, unknown> | null = null;
|
||||
mutateConfigFileMock.mockImplementation(
|
||||
async ({ mutate }: { mutate: (draft: Record<string, unknown>) => void }) => {
|
||||
const draft: Record<string, unknown> = {
|
||||
gateway: { nodes: { fileTransfer: { "*": { allowReadPaths: ["/var/log/**"] } } } },
|
||||
};
|
||||
mutate(draft);
|
||||
captured = draft;
|
||||
},
|
||||
);
|
||||
|
||||
await persistAllowAlways({
|
||||
nodeId: "n1",
|
||||
nodeDisplayName: "Lobster",
|
||||
kind: "read",
|
||||
path: "/srv/added.png",
|
||||
});
|
||||
|
||||
const root = captured as unknown as {
|
||||
gateway: {
|
||||
nodes: { fileTransfer: Record<string, { allowReadPaths?: string[] }> };
|
||||
};
|
||||
};
|
||||
// The "*" entry must not have been mutated.
|
||||
expect(root.gateway.nodes.fileTransfer["*"].allowReadPaths).toEqual(["/var/log/**"]);
|
||||
// A new entry keyed by displayName (not "*") must hold the new path.
|
||||
expect(root.gateway.nodes.fileTransfer["Lobster"].allowReadPaths).toEqual(["/srv/added.png"]);
|
||||
});
|
||||
|
||||
it("dedupes when path already present", async () => {
|
||||
let captured: Record<string, unknown> | null = null;
|
||||
mutateConfigFileMock.mockImplementation(
|
||||
|
||||
@@ -141,12 +141,40 @@ function normalizeAskMode(value: unknown): FilePolicyAskMode {
|
||||
* 5. ask=on-miss → POLICY_DENIED with askable=true.
|
||||
* 6. ask=off (or unset) → POLICY_DENIED, not askable.
|
||||
*/
|
||||
/**
|
||||
* Reject any path whose RAW string contains a ".." segment. Checking the
|
||||
* raw string (not the normalized form) is the point — `posix.normalize`
|
||||
* collapses "/allowed/../etc/passwd" to "/etc/passwd", which would defeat
|
||||
* the check. We want to flag the literal traversal sequence the agent
|
||||
* passed in, before any glob match runs.
|
||||
*
|
||||
* Without this, "/allowed/../etc/passwd" matches the glob "/allowed/**"
|
||||
* pre-realpath, so the node fetches the bytes before the post-flight
|
||||
* canonical-path check denies — too late, the bytes already crossed the
|
||||
* node→gateway boundary.
|
||||
*/
|
||||
function containsParentRefSegment(p: string): boolean {
|
||||
const segments = p.split("/");
|
||||
return segments.includes("..");
|
||||
}
|
||||
|
||||
export function evaluateFilePolicy(input: {
|
||||
nodeId: string;
|
||||
nodeDisplayName?: string;
|
||||
kind: FilePolicyKind;
|
||||
path: string;
|
||||
}): FilePolicyDecision {
|
||||
// Reject literal traversal sequences before consulting any allow/deny
|
||||
// glob list. minimatch on the raw string can wrongly accept
|
||||
// "/allowed/../etc/passwd" against "/allowed/**".
|
||||
if (containsParentRefSegment(input.path)) {
|
||||
return {
|
||||
ok: false,
|
||||
code: "POLICY_DENIED",
|
||||
reason: "path contains '..' segments; reject before glob match",
|
||||
askable: false,
|
||||
};
|
||||
}
|
||||
const config = readFilePolicyConfig();
|
||||
if (!config) {
|
||||
return {
|
||||
@@ -250,7 +278,11 @@ export async function persistAllowAlways(input: {
|
||||
const nodes = (gateway.nodes ??= {}) as Record<string, unknown>;
|
||||
const fileTransfer = (nodes.fileTransfer ??= {}) as Record<string, NodeFilePolicyConfig>;
|
||||
|
||||
const candidates = [input.nodeId, input.nodeDisplayName, "*"].filter(
|
||||
// SECURITY: never persist allow-always under the "*" wildcard. An
|
||||
// operator approving a path on node A must not silently grant the
|
||||
// same path on every other node sharing the wildcard entry. Always
|
||||
// write under the specific node's own entry, creating it if needed.
|
||||
const candidates = [input.nodeId, input.nodeDisplayName].filter(
|
||||
(k): k is string => typeof k === "string" && k.length > 0,
|
||||
);
|
||||
let key = candidates.find((c) => fileTransfer[c]);
|
||||
|
||||
@@ -36,6 +36,14 @@ const MEDIA_URL_CAP = 25;
|
||||
// Hard timeout for the gateway-side `tar -xzf` unpack process.
|
||||
const TAR_UNPACK_TIMEOUT_MS = 60_000;
|
||||
|
||||
// Defense-in-depth caps for the *uncompressed* extraction. The compressed
|
||||
// tar is already capped at DIR_FETCH_HARD_MAX_BYTES upstream, but a
|
||||
// gzip-bomb can decompress to many GB. These caps bound the unpacked
|
||||
// size and entry count so a malicious node can't exhaust the gateway's
|
||||
// disk or CPU.
|
||||
const TAR_UNPACK_MAX_UNCOMPRESSED_BYTES = 64 * 1024 * 1024;
|
||||
const TAR_UNPACK_MAX_ENTRIES = 5000;
|
||||
|
||||
const DirFetchToolSchema = Type.Object({
|
||||
node: Type.String({
|
||||
description: "Node id, name, or IP. Resolves the same way as the nodes tool.",
|
||||
@@ -60,8 +68,144 @@ const DirFetchToolSchema = Type.Object({
|
||||
});
|
||||
|
||||
async function computeFileSha256(filePath: string): Promise<string> {
|
||||
const buf = await fs.readFile(filePath);
|
||||
return crypto.createHash("sha256").update(buf).digest("hex");
|
||||
// Stream the hash so we never pull a whole large file into memory.
|
||||
// file_fetch caps single files at 16MB, but unpacked dir_fetch entries
|
||||
// share the 64MB uncompressed budget — better to stream regardless.
|
||||
const hash = crypto.createHash("sha256");
|
||||
const handle = await fs.open(filePath, "r");
|
||||
try {
|
||||
const chunkSize = 64 * 1024;
|
||||
const buf = Buffer.allocUnsafe(chunkSize);
|
||||
while (true) {
|
||||
const { bytesRead } = await handle.read(buf, 0, chunkSize, null);
|
||||
if (bytesRead === 0) {
|
||||
break;
|
||||
}
|
||||
hash.update(buf.subarray(0, bytesRead));
|
||||
}
|
||||
} finally {
|
||||
await handle.close();
|
||||
}
|
||||
return hash.digest("hex");
|
||||
}
|
||||
|
||||
/**
|
||||
* Run `tar -tzvf -` against the buffer to enumerate entries with their
|
||||
* type letter (regular file / symlink / hardlink / dir) and size BEFORE
|
||||
* we extract anything. Rejects:
|
||||
* - any entry whose path is absolute (escapes destDir on -P-using tar)
|
||||
* - any entry with ".." segments after normalization
|
||||
* - any entry that is a symlink ("l"), hardlink ("h"), or unknown type
|
||||
* - cumulative uncompressed size > TAR_UNPACK_MAX_UNCOMPRESSED_BYTES
|
||||
* - entry count > TAR_UNPACK_MAX_ENTRIES
|
||||
*
|
||||
* BSD tar's -tvzf produces a `ls -l`-style line; we parse the leading
|
||||
* type char and the size column.
|
||||
*/
|
||||
async function preValidateTarball(
|
||||
tarBuffer: Buffer,
|
||||
): Promise<{ ok: true } | { ok: false; reason: string }> {
|
||||
return new Promise((resolve) => {
|
||||
const tarBin = process.platform !== "win32" ? "/usr/bin/tar" : "tar";
|
||||
const child = spawn(tarBin, ["-tzvf", "-"], {
|
||||
stdio: ["pipe", "pipe", "pipe"],
|
||||
});
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
let aborted = false;
|
||||
const watchdog = setTimeout(() => {
|
||||
aborted = true;
|
||||
try {
|
||||
child.kill("SIGKILL");
|
||||
} catch {
|
||||
/* gone */
|
||||
}
|
||||
resolve({ ok: false, reason: "tar -tvzf timed out" });
|
||||
}, 30_000);
|
||||
child.stdout.on("data", (c: Buffer) => {
|
||||
stdout += c.toString();
|
||||
});
|
||||
child.stderr.on("data", (c: Buffer) => {
|
||||
stderr += c.toString();
|
||||
});
|
||||
child.on("close", (code) => {
|
||||
clearTimeout(watchdog);
|
||||
if (aborted) {
|
||||
return;
|
||||
}
|
||||
if (code !== 0) {
|
||||
resolve({ ok: false, reason: `tar -tvzf exited ${code}: ${stderr.slice(0, 200)}` });
|
||||
return;
|
||||
}
|
||||
const lines = stdout.split("\n").filter((l) => l.trim().length > 0);
|
||||
if (lines.length > TAR_UNPACK_MAX_ENTRIES) {
|
||||
resolve({
|
||||
ok: false,
|
||||
reason: `archive contains ${lines.length} entries; limit ${TAR_UNPACK_MAX_ENTRIES}`,
|
||||
});
|
||||
return;
|
||||
}
|
||||
let totalBytes = 0;
|
||||
for (const line of lines) {
|
||||
// BSD tar -tvzf format:
|
||||
// "drwxr-xr-x 0 user staff 0 Mar 14 ... ./"
|
||||
// "-rw-r--r-- 0 user staff 12 Mar 14 ... ./hello.txt"
|
||||
// "lrwxr-xr-x 0 user staff 0 Mar 14 ... link -> target"
|
||||
// First char of the perm field is the type.
|
||||
const typeChar = line.charAt(0);
|
||||
if (typeChar === "l" || typeChar === "h") {
|
||||
resolve({ ok: false, reason: `archive contains link entry: ${line.slice(0, 120)}` });
|
||||
return;
|
||||
}
|
||||
if (typeChar !== "-" && typeChar !== "d") {
|
||||
resolve({ ok: false, reason: `archive contains non-regular entry type '${typeChar}'` });
|
||||
return;
|
||||
}
|
||||
// Path is everything after the date/time field; cheapest is to
|
||||
// grab the last whitespace-delimited token. For " name -> target"
|
||||
// (symlink) we already rejected above.
|
||||
const tokens = line.trim().split(/\s+/u);
|
||||
if (tokens.length < 2) {
|
||||
continue;
|
||||
}
|
||||
const entryPath = tokens.at(-1) ?? "";
|
||||
if (path.isAbsolute(entryPath)) {
|
||||
resolve({ ok: false, reason: `archive contains absolute path: ${entryPath}` });
|
||||
return;
|
||||
}
|
||||
const norm = path.posix.normalize(entryPath);
|
||||
if (norm === ".." || norm.startsWith("../") || norm.includes("/../")) {
|
||||
resolve({ ok: false, reason: `archive contains '..' traversal: ${entryPath}` });
|
||||
return;
|
||||
}
|
||||
// Size column: tar -tvzf output has size in bytes at a stable
|
||||
// position. tokens[2] is owner, tokens[3] is group, tokens[4] is
|
||||
// size on BSD; on GNU it's tokens[2] (perm/owner combined).
|
||||
// Be permissive — find the first all-digit token as size.
|
||||
for (const t of tokens.slice(1)) {
|
||||
if (/^\d+$/u.test(t)) {
|
||||
totalBytes += Number.parseInt(t, 10);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (totalBytes > TAR_UNPACK_MAX_UNCOMPRESSED_BYTES) {
|
||||
resolve({
|
||||
ok: false,
|
||||
reason: `uncompressed size ${totalBytes} exceeds ${TAR_UNPACK_MAX_UNCOMPRESSED_BYTES}`,
|
||||
});
|
||||
return;
|
||||
}
|
||||
resolve({ ok: true });
|
||||
});
|
||||
child.on("error", (e) => {
|
||||
clearTimeout(watchdog);
|
||||
if (!aborted) {
|
||||
resolve({ ok: false, reason: `tar -tvzf error: ${String(e)}` });
|
||||
}
|
||||
});
|
||||
child.stdin.end(tarBuffer);
|
||||
});
|
||||
}
|
||||
|
||||
type UnpackedFileEntry = {
|
||||
@@ -73,28 +217,28 @@ type UnpackedFileEntry = {
|
||||
};
|
||||
|
||||
/**
|
||||
* Unpack a gzipped tarball into a target directory via `tar -xzf -`. The
|
||||
* `-P` flag is intentionally omitted so absolute paths in the archive are
|
||||
* stripped to relative ones and `..` traversal is rejected by tar itself.
|
||||
* A hard wall-clock timeout caps the unpack at TAR_UNPACK_TIMEOUT_MS to
|
||||
* avoid hangs on hostile/large archives.
|
||||
* Unpack a gzipped tarball into a target directory via `tar -xzf -`.
|
||||
* Caller MUST have run `preValidateTarball` first — this function trusts
|
||||
* that the archive contains only regular files / dirs with relative,
|
||||
* non-traversing paths. Without that pre-validation, raw `tar -xzf` is
|
||||
* unsafe (tarbomb, symlink-then-write tricks, decompression bomb).
|
||||
*
|
||||
* The `-P` flag is intentionally omitted so absolute paths in the
|
||||
* archive are stripped to relative ones (defense-in-depth on top of the
|
||||
* pre-validation rejection). A hard wall-clock timeout caps the unpack
|
||||
* at TAR_UNPACK_TIMEOUT_MS to avoid hangs.
|
||||
*
|
||||
* BSD tar (macOS) and GNU tar disagree on flags: `--no-overwrite-dir` is
|
||||
* GNU-only and BSD tar rejects it. We use only flags both implementations
|
||||
* accept. Defense-in-depth comes from the pre-validation step instead.
|
||||
*/
|
||||
async function unpackTar(tarBuffer: Buffer, destDir: string): Promise<void> {
|
||||
await fs.mkdir(destDir, { recursive: true, mode: 0o700 });
|
||||
return new Promise((resolve, reject) => {
|
||||
const tarBin = process.platform !== "win32" ? "/usr/bin/tar" : "tar";
|
||||
const child = spawn(
|
||||
tarBin,
|
||||
[
|
||||
"-xzf",
|
||||
"-",
|
||||
"-C",
|
||||
destDir,
|
||||
// Refuse archives whose paths escape destDir.
|
||||
"--no-overwrite-dir",
|
||||
],
|
||||
{ stdio: ["pipe", "ignore", "pipe"] },
|
||||
);
|
||||
const child = spawn(tarBin, ["-xzf", "-", "-C", destDir], {
|
||||
stdio: ["pipe", "ignore", "pipe"],
|
||||
});
|
||||
let stderrOut = "";
|
||||
const watchdog = setTimeout(() => {
|
||||
try {
|
||||
@@ -301,6 +445,29 @@ export function createDirFetchTool(): AnyAgentTool {
|
||||
const unpackId = `dir-fetch-${tarBaseName}`;
|
||||
const rootDir = path.join(tarDir, unpackId);
|
||||
|
||||
// Pre-validate before extraction. The node is in the trust boundary
|
||||
// for v1, but a malicious or compromised node should not be able to
|
||||
// pivot into arbitrary file write on the gateway via tar tricks.
|
||||
// Rejects: symlinks, hardlinks, absolute paths, ".." traversal,
|
||||
// entry counts and uncompressed sizes above the caps.
|
||||
const validation = await preValidateTarball(tarBuffer);
|
||||
if (!validation.ok) {
|
||||
await appendFileTransferAudit({
|
||||
op: "dir.fetch",
|
||||
nodeId,
|
||||
nodeDisplayName,
|
||||
requestedPath: dirPath,
|
||||
canonicalPath,
|
||||
decision: "error",
|
||||
errorCode: "UNSAFE_ARCHIVE",
|
||||
errorMessage: validation.reason,
|
||||
sizeBytes: tarBytes,
|
||||
sha256,
|
||||
durationMs: Date.now() - startedAt,
|
||||
});
|
||||
throw new Error(`dir.fetch UNSAFE_ARCHIVE: ${validation.reason}`);
|
||||
}
|
||||
|
||||
await unpackTar(tarBuffer, rootDir);
|
||||
|
||||
const walked = await walkDir(rootDir, rootDir);
|
||||
|
||||
Reference in New Issue
Block a user