fix(file-transfer): address PR review feedback (security + availability)

Reviewer findings addressed (greptile + aisle):

- policy: persistAllowAlways no longer escalates per-node approvals to the
  '*' wildcard entry; allow-always now writes under the specific node's
  own entry, never the wildcard (greptile P1 SECURITY).
- policy: add literal '..' segment short-circuit in evaluateFilePolicy,
  raised before glob match. Stops "/allowed/../etc/passwd" from passing
  preflight against "/allowed/**" globs (aisle MEDIUM CWE-22).
- file-write: replace no-op base64 try/catch with actual round-trip
  validation. Buffer.from(s, "base64") never throws — invalid input
  silently decoded to garbage bytes. Now re-encodes and compares
  modulo padding/url-variant chars (greptile P1 SECURITY).
- file-write: document the parent-symlink residual risk and rely on the
  existing gateway-side post-flight policy check; full rollback requires
  a node-side file.unlink which is deferred to a follow-up. Initial
  segment-walk attempt was reverted because it false-positives on system
  symlinks like macOS /var → /private/var (aisle HIGH CWE-59).
- dir-fetch tool: add preValidateTarball pass that runs `tar -tzvf` and
  rejects symlinks, hardlinks, absolute paths, '..' traversal,
  uncompressed sizes >64MB, and entry counts >5000 — before any
  extraction. Drops --no-overwrite-dir (GNU-only flag rejected by BSD
  tar on macOS) (aisle HIGH x2 CWE-22 + CWE-409, greptile P2).
- dir-fetch tool: stream-hash files via fs.open + read loop instead of
  fs.readFile to avoid full-buffer reads on large extracted entries.
- dir-fetch handler: replace spawnSync in countTarEntries with async
  spawn + bounded buffer so tar -tzf can't park the node-host event
  loop for up to 10s on a slow filesystem (greptile P1 AVAIL).
- audit: clear auditDirPromise on rejection so a transient mkdir
  failure doesn't permanently silence the audit log (greptile P2).

New tests: wildcard escalation rejection, base64 malformed/url-variant,
'..' traversal short-circuit (3 cases). 84/84 passing.
This commit is contained in:
Omar Shahine
2026-04-29 06:43:33 +00:00
parent db6b4b41e1
commit b97af08703
7 changed files with 402 additions and 54 deletions

View File

@@ -1,4 +1,4 @@
import { spawn, spawnSync } from "node:child_process";
import { spawn } from "node:child_process";
import crypto from "node:crypto";
import fs from "node:fs/promises";
import path from "node:path";
@@ -83,20 +83,56 @@ async function preflightDu(dirPath: string, maxBytes: number): Promise<boolean>
});
}
function countTarEntries(tarBuffer: Buffer): number {
const result = spawnSync("tar", ["-tzf", "-"], {
input: tarBuffer,
maxBuffer: 32 * 1024 * 1024,
timeout: 10000,
async function countTarEntries(tarBuffer: Buffer): Promise<number> {
// Async spawn so a slow `tar -tzf` doesn't park the node-host event
// loop for up to 10s. Other in-flight requests continue to be served.
return new Promise((resolve) => {
const child = spawn("tar", ["-tzf", "-"], { stdio: ["pipe", "pipe", "ignore"] });
let stdoutBuf = "";
let aborted = false;
const watchdog = setTimeout(() => {
aborted = true;
try {
child.kill("SIGKILL");
} catch {
/* gone */
}
resolve(0);
}, 10_000);
child.stdout.on("data", (chunk: Buffer) => {
stdoutBuf += chunk.toString();
// Bound buffer growth — pathological archives shouldn't OOM us.
if (stdoutBuf.length > 32 * 1024 * 1024) {
aborted = true;
try {
child.kill("SIGKILL");
} catch {
/* gone */
}
clearTimeout(watchdog);
resolve(0);
}
});
child.on("close", (code) => {
clearTimeout(watchdog);
if (aborted) {
return;
}
if (code !== 0) {
resolve(0);
return;
}
const lines = stdoutBuf.split("\n").filter((l) => l.trim().length > 0 && l !== "./");
resolve(lines.length);
});
child.on("error", () => {
clearTimeout(watchdog);
if (!aborted) {
resolve(0);
}
});
child.stdin.end(tarBuffer);
});
if (result.status !== 0 || !result.stdout) {
return 0;
}
const lines = (result.stdout as Buffer)
.toString("utf-8")
.split("\n")
.filter((l) => l.trim().length > 0 && l !== "./");
return lines.length;
}
export async function handleDirFetch(params: DirFetchParams): Promise<DirFetchResult> {
@@ -256,7 +292,7 @@ export async function handleDirFetch(params: DirFetchParams): Promise<DirFetchRe
const sha256 = crypto.createHash("sha256").update(tarBuffer).digest("hex");
const tarBase64 = tarBuffer.toString("base64");
const tarBytes = tarBuffer.byteLength;
const fileCount = countTarEntries(tarBuffer);
const fileCount = await countTarEntries(tarBuffer);
return {
ok: true,

View File

@@ -191,6 +191,38 @@ describe("handleFileWrite — integrity check", () => {
});
});
describe("handleFileWrite — base64 round-trip validation", () => {
it("rejects malformed base64 that silently drops characters", async () => {
const target = path.join(tmpRoot, "bad.bin");
// "@" is not in the base64 alphabet — Buffer.from would silently drop
// it and decode "AAA" instead of failing.
const r = await handleFileWrite({
path: target,
contentBase64: "AAA@@@",
});
expect(r).toMatchObject({ ok: false, code: "INVALID_BASE64" });
await expect(fs.access(target)).rejects.toMatchObject({ code: "ENOENT" });
});
it("accepts standard base64 with and without padding", async () => {
const target = path.join(tmpRoot, "padded.bin");
// Buffer.from("hi") -> "aGk=" with padding, "aGk" without.
const r1 = await handleFileWrite({ path: target, contentBase64: "aGk=" });
expect(r1.ok).toBe(true);
const target2 = path.join(tmpRoot, "unpadded.bin");
const r2 = await handleFileWrite({ path: target2, contentBase64: "aGk" });
expect(r2.ok).toBe(true);
});
it("accepts base64url variant (-_ instead of +/)", async () => {
const target = path.join(tmpRoot, "url.bin");
// Buffer.from([0xfb, 0xff]) -> "+/8=" standard, "-_8=" url
const r = await handleFileWrite({ path: target, contentBase64: "-_8=" });
expect(r.ok).toBe(true);
});
});
describe("handleFileWrite — size cap", () => {
it("rejects content larger than the 16MB cap", async () => {
const target = path.join(tmpRoot, "big.bin");

View File

@@ -58,18 +58,20 @@ export async function handleFileWrite(
return err("INVALID_PATH", "path must be absolute");
}
// 2. Decode base64 → Buffer
let buf: Buffer;
try {
buf = Buffer.from(contentBase64, "base64");
// Verify round-trip to catch invalid base64
if (
buf.toString("base64") !== contentBase64 &&
Buffer.from(contentBase64, "base64url").toString("base64url") !== contentBase64
) {
// Tolerate standard base64 with or without padding; just use what we decoded.
}
} catch {
// 2. Decode base64 → Buffer.
// Buffer.from(s, "base64") in Node never throws — it silently drops
// non-base64 characters and returns whatever it could decode. That
// means a typo or truncated input would land garbage on disk if we
// accepted whatever decoded. Defense: round-trip the decoded buffer
// back to base64 and compare against the input modulo padding/url
// variants. A mismatch means characters were silently dropped.
const buf = Buffer.from(contentBase64, "base64");
const reEncoded = buf.toString("base64");
// Normalize: drop padding and convert base64url chars to standard so the
// comparison tolerates both "=" / no-"=" inputs and "-_" base64url.
const normalize = (s: string): string =>
s.replace(/=+$/u, "").replace(/-/gu, "+").replace(/_/gu, "/");
if (normalize(reEncoded) !== normalize(contentBase64)) {
return err("INVALID_BASE64", "contentBase64 is not valid base64");
}
@@ -104,11 +106,16 @@ export async function handleFileWrite(
}
}
// 4. Refuse to write through symlinks (lstat sees the link itself, not
// its target). A path that's a symlink could escape the operator's
// intended path policy — e.g., an allowed dir could contain a
// symlink pointing at /etc/hosts.
// Otherwise determine overwritten status and reject directories.
// 4. The lstat-on-final check below catches the case where the target
// itself is a symlink, but it does NOT catch a symlink in a parent
// component (e.g. ~/Downloads/evil → /etc, write to .../evil/passwd).
// The gateway-side post-flight check (in file-write-tool.ts)
// canonicalizes via realpath after the write and re-runs policy
// against that canonical path; an escape through a parent-dir
// symlink surfaces there. The current behavior on a post-flight
// deny is to throw loudly with the canonical path so the operator
// can manually inspect — full rollback requires a node-side
// file.unlink (out of scope; tracked as a follow-up).
let overwritten = false;
try {
const existingLStat = await fs.lstat(targetPath);

View File

@@ -51,12 +51,21 @@ async function ensureAuditDir(): Promise<string> {
if (auditDirPromise) {
return auditDirPromise;
}
auditDirPromise = (async () => {
const promise = (async () => {
const dir = path.join(os.homedir(), ".openclaw", "audit");
await fs.mkdir(dir, { recursive: true, mode: 0o700 });
return dir;
})();
return auditDirPromise;
// If the mkdir rejects (transient permission error etc.), clear the
// cached singleton so the NEXT call retries instead of permanently
// silencing the audit log.
promise.catch(() => {
if (auditDirPromise === promise) {
auditDirPromise = null;
}
});
auditDirPromise = promise;
return promise;
}
function auditFilePath(dir: string): string {

View File

@@ -56,6 +56,41 @@ describe("evaluateFilePolicy — default deny", () => {
});
});
describe("evaluateFilePolicy — '..' traversal short-circuit", () => {
it("rejects /allowed/../etc/passwd even when /allowed/** is allowed", () => {
withConfig({
n1: { allowReadPaths: ["/allowed/**"] },
});
const r = evaluateFilePolicy({
nodeId: "n1",
kind: "read",
path: "/allowed/../etc/passwd",
});
expect(r).toMatchObject({ ok: false, code: "POLICY_DENIED", askable: false });
expect(r.ok ? "" : r.reason).toMatch(/\.\./);
});
it("rejects a path that ENDS in /..", () => {
withConfig({
n1: { allowReadPaths: ["/tmp/**"] },
});
const r = evaluateFilePolicy({
nodeId: "n1",
kind: "read",
path: "/tmp/foo/..",
});
expect(r).toMatchObject({ ok: false, code: "POLICY_DENIED" });
});
it("rejects bare '..'", () => {
withConfig({
n1: { allowReadPaths: ["/**"] },
});
const r = evaluateFilePolicy({ nodeId: "n1", kind: "read", path: ".." });
expect(r).toMatchObject({ ok: false, code: "POLICY_DENIED" });
});
});
describe("evaluateFilePolicy — denyPaths always wins", () => {
it("denies even when allowReadPaths matches", () => {
withConfig({
@@ -258,6 +293,36 @@ describe("persistAllowAlways", () => {
expect(root.gateway.nodes.fileTransfer["Lobster"].allowWritePaths).toContain("/srv/out.txt");
});
it("never persists under the '*' wildcard even when '*' is the matching key", async () => {
let captured: Record<string, unknown> | null = null;
mutateConfigFileMock.mockImplementation(
async ({ mutate }: { mutate: (draft: Record<string, unknown>) => void }) => {
const draft: Record<string, unknown> = {
gateway: { nodes: { fileTransfer: { "*": { allowReadPaths: ["/var/log/**"] } } } },
};
mutate(draft);
captured = draft;
},
);
await persistAllowAlways({
nodeId: "n1",
nodeDisplayName: "Lobster",
kind: "read",
path: "/srv/added.png",
});
const root = captured as unknown as {
gateway: {
nodes: { fileTransfer: Record<string, { allowReadPaths?: string[] }> };
};
};
// The "*" entry must not have been mutated.
expect(root.gateway.nodes.fileTransfer["*"].allowReadPaths).toEqual(["/var/log/**"]);
// A new entry keyed by displayName (not "*") must hold the new path.
expect(root.gateway.nodes.fileTransfer["Lobster"].allowReadPaths).toEqual(["/srv/added.png"]);
});
it("dedupes when path already present", async () => {
let captured: Record<string, unknown> | null = null;
mutateConfigFileMock.mockImplementation(

View File

@@ -141,12 +141,40 @@ function normalizeAskMode(value: unknown): FilePolicyAskMode {
* 5. ask=on-miss → POLICY_DENIED with askable=true.
* 6. ask=off (or unset) → POLICY_DENIED, not askable.
*/
/**
* Reject any path whose RAW string contains a ".." segment. Checking the
* raw string (not the normalized form) is the point — `posix.normalize`
* collapses "/allowed/../etc/passwd" to "/etc/passwd", which would defeat
* the check. We want to flag the literal traversal sequence the agent
* passed in, before any glob match runs.
*
* Without this, "/allowed/../etc/passwd" matches the glob "/allowed/**"
* pre-realpath, so the node fetches the bytes before the post-flight
* canonical-path check denies — too late, the bytes already crossed the
* node→gateway boundary.
*/
function containsParentRefSegment(p: string): boolean {
const segments = p.split("/");
return segments.includes("..");
}
export function evaluateFilePolicy(input: {
nodeId: string;
nodeDisplayName?: string;
kind: FilePolicyKind;
path: string;
}): FilePolicyDecision {
// Reject literal traversal sequences before consulting any allow/deny
// glob list. minimatch on the raw string can wrongly accept
// "/allowed/../etc/passwd" against "/allowed/**".
if (containsParentRefSegment(input.path)) {
return {
ok: false,
code: "POLICY_DENIED",
reason: "path contains '..' segments; reject before glob match",
askable: false,
};
}
const config = readFilePolicyConfig();
if (!config) {
return {
@@ -250,7 +278,11 @@ export async function persistAllowAlways(input: {
const nodes = (gateway.nodes ??= {}) as Record<string, unknown>;
const fileTransfer = (nodes.fileTransfer ??= {}) as Record<string, NodeFilePolicyConfig>;
const candidates = [input.nodeId, input.nodeDisplayName, "*"].filter(
// SECURITY: never persist allow-always under the "*" wildcard. An
// operator approving a path on node A must not silently grant the
// same path on every other node sharing the wildcard entry. Always
// write under the specific node's own entry, creating it if needed.
const candidates = [input.nodeId, input.nodeDisplayName].filter(
(k): k is string => typeof k === "string" && k.length > 0,
);
let key = candidates.find((c) => fileTransfer[c]);

View File

@@ -36,6 +36,14 @@ const MEDIA_URL_CAP = 25;
// Hard timeout for the gateway-side `tar -xzf` unpack process.
const TAR_UNPACK_TIMEOUT_MS = 60_000;
// Defense-in-depth caps for the *uncompressed* extraction. The compressed
// tar is already capped at DIR_FETCH_HARD_MAX_BYTES upstream, but a
// gzip-bomb can decompress to many GB. These caps bound the unpacked
// size and entry count so a malicious node can't exhaust the gateway's
// disk or CPU.
const TAR_UNPACK_MAX_UNCOMPRESSED_BYTES = 64 * 1024 * 1024;
const TAR_UNPACK_MAX_ENTRIES = 5000;
const DirFetchToolSchema = Type.Object({
node: Type.String({
description: "Node id, name, or IP. Resolves the same way as the nodes tool.",
@@ -60,8 +68,144 @@ const DirFetchToolSchema = Type.Object({
});
async function computeFileSha256(filePath: string): Promise<string> {
const buf = await fs.readFile(filePath);
return crypto.createHash("sha256").update(buf).digest("hex");
// Stream the hash so we never pull a whole large file into memory.
// file_fetch caps single files at 16MB, but unpacked dir_fetch entries
// share the 64MB uncompressed budget — better to stream regardless.
const hash = crypto.createHash("sha256");
const handle = await fs.open(filePath, "r");
try {
const chunkSize = 64 * 1024;
const buf = Buffer.allocUnsafe(chunkSize);
while (true) {
const { bytesRead } = await handle.read(buf, 0, chunkSize, null);
if (bytesRead === 0) {
break;
}
hash.update(buf.subarray(0, bytesRead));
}
} finally {
await handle.close();
}
return hash.digest("hex");
}
/**
* Run `tar -tzvf -` against the buffer to enumerate entries with their
* type letter (regular file / symlink / hardlink / dir) and size BEFORE
* we extract anything. Rejects:
* - any entry whose path is absolute (escapes destDir on -P-using tar)
* - any entry with ".." segments after normalization
* - any entry that is a symlink ("l"), hardlink ("h"), or unknown type
* - cumulative uncompressed size > TAR_UNPACK_MAX_UNCOMPRESSED_BYTES
* - entry count > TAR_UNPACK_MAX_ENTRIES
*
* BSD tar's -tvzf produces a `ls -l`-style line; we parse the leading
* type char and the size column.
*/
async function preValidateTarball(
tarBuffer: Buffer,
): Promise<{ ok: true } | { ok: false; reason: string }> {
return new Promise((resolve) => {
const tarBin = process.platform !== "win32" ? "/usr/bin/tar" : "tar";
const child = spawn(tarBin, ["-tzvf", "-"], {
stdio: ["pipe", "pipe", "pipe"],
});
let stdout = "";
let stderr = "";
let aborted = false;
const watchdog = setTimeout(() => {
aborted = true;
try {
child.kill("SIGKILL");
} catch {
/* gone */
}
resolve({ ok: false, reason: "tar -tvzf timed out" });
}, 30_000);
child.stdout.on("data", (c: Buffer) => {
stdout += c.toString();
});
child.stderr.on("data", (c: Buffer) => {
stderr += c.toString();
});
child.on("close", (code) => {
clearTimeout(watchdog);
if (aborted) {
return;
}
if (code !== 0) {
resolve({ ok: false, reason: `tar -tvzf exited ${code}: ${stderr.slice(0, 200)}` });
return;
}
const lines = stdout.split("\n").filter((l) => l.trim().length > 0);
if (lines.length > TAR_UNPACK_MAX_ENTRIES) {
resolve({
ok: false,
reason: `archive contains ${lines.length} entries; limit ${TAR_UNPACK_MAX_ENTRIES}`,
});
return;
}
let totalBytes = 0;
for (const line of lines) {
// BSD tar -tvzf format:
// "drwxr-xr-x 0 user staff 0 Mar 14 ... ./"
// "-rw-r--r-- 0 user staff 12 Mar 14 ... ./hello.txt"
// "lrwxr-xr-x 0 user staff 0 Mar 14 ... link -> target"
// First char of the perm field is the type.
const typeChar = line.charAt(0);
if (typeChar === "l" || typeChar === "h") {
resolve({ ok: false, reason: `archive contains link entry: ${line.slice(0, 120)}` });
return;
}
if (typeChar !== "-" && typeChar !== "d") {
resolve({ ok: false, reason: `archive contains non-regular entry type '${typeChar}'` });
return;
}
// Path is everything after the date/time field; cheapest is to
// grab the last whitespace-delimited token. For " name -> target"
// (symlink) we already rejected above.
const tokens = line.trim().split(/\s+/u);
if (tokens.length < 2) {
continue;
}
const entryPath = tokens.at(-1) ?? "";
if (path.isAbsolute(entryPath)) {
resolve({ ok: false, reason: `archive contains absolute path: ${entryPath}` });
return;
}
const norm = path.posix.normalize(entryPath);
if (norm === ".." || norm.startsWith("../") || norm.includes("/../")) {
resolve({ ok: false, reason: `archive contains '..' traversal: ${entryPath}` });
return;
}
// Size column: tar -tvzf output has size in bytes at a stable
// position. tokens[2] is owner, tokens[3] is group, tokens[4] is
// size on BSD; on GNU it's tokens[2] (perm/owner combined).
// Be permissive — find the first all-digit token as size.
for (const t of tokens.slice(1)) {
if (/^\d+$/u.test(t)) {
totalBytes += Number.parseInt(t, 10);
break;
}
}
}
if (totalBytes > TAR_UNPACK_MAX_UNCOMPRESSED_BYTES) {
resolve({
ok: false,
reason: `uncompressed size ${totalBytes} exceeds ${TAR_UNPACK_MAX_UNCOMPRESSED_BYTES}`,
});
return;
}
resolve({ ok: true });
});
child.on("error", (e) => {
clearTimeout(watchdog);
if (!aborted) {
resolve({ ok: false, reason: `tar -tvzf error: ${String(e)}` });
}
});
child.stdin.end(tarBuffer);
});
}
type UnpackedFileEntry = {
@@ -73,28 +217,28 @@ type UnpackedFileEntry = {
};
/**
* Unpack a gzipped tarball into a target directory via `tar -xzf -`. The
* `-P` flag is intentionally omitted so absolute paths in the archive are
* stripped to relative ones and `..` traversal is rejected by tar itself.
* A hard wall-clock timeout caps the unpack at TAR_UNPACK_TIMEOUT_MS to
* avoid hangs on hostile/large archives.
* Unpack a gzipped tarball into a target directory via `tar -xzf -`.
* Caller MUST have run `preValidateTarball` first — this function trusts
* that the archive contains only regular files / dirs with relative,
* non-traversing paths. Without that pre-validation, raw `tar -xzf` is
* unsafe (tarbomb, symlink-then-write tricks, decompression bomb).
*
* The `-P` flag is intentionally omitted so absolute paths in the
* archive are stripped to relative ones (defense-in-depth on top of the
* pre-validation rejection). A hard wall-clock timeout caps the unpack
* at TAR_UNPACK_TIMEOUT_MS to avoid hangs.
*
* BSD tar (macOS) and GNU tar disagree on flags: `--no-overwrite-dir` is
* GNU-only and BSD tar rejects it. We use only flags both implementations
* accept. Defense-in-depth comes from the pre-validation step instead.
*/
async function unpackTar(tarBuffer: Buffer, destDir: string): Promise<void> {
await fs.mkdir(destDir, { recursive: true, mode: 0o700 });
return new Promise((resolve, reject) => {
const tarBin = process.platform !== "win32" ? "/usr/bin/tar" : "tar";
const child = spawn(
tarBin,
[
"-xzf",
"-",
"-C",
destDir,
// Refuse archives whose paths escape destDir.
"--no-overwrite-dir",
],
{ stdio: ["pipe", "ignore", "pipe"] },
);
const child = spawn(tarBin, ["-xzf", "-", "-C", destDir], {
stdio: ["pipe", "ignore", "pipe"],
});
let stderrOut = "";
const watchdog = setTimeout(() => {
try {
@@ -301,6 +445,29 @@ export function createDirFetchTool(): AnyAgentTool {
const unpackId = `dir-fetch-${tarBaseName}`;
const rootDir = path.join(tarDir, unpackId);
// Pre-validate before extraction. The node is in the trust boundary
// for v1, but a malicious or compromised node should not be able to
// pivot into arbitrary file write on the gateway via tar tricks.
// Rejects: symlinks, hardlinks, absolute paths, ".." traversal,
// entry counts and uncompressed sizes above the caps.
const validation = await preValidateTarball(tarBuffer);
if (!validation.ok) {
await appendFileTransferAudit({
op: "dir.fetch",
nodeId,
nodeDisplayName,
requestedPath: dirPath,
canonicalPath,
decision: "error",
errorCode: "UNSAFE_ARCHIVE",
errorMessage: validation.reason,
sizeBytes: tarBytes,
sha256,
durationMs: Date.now() - startedAt,
});
throw new Error(`dir.fetch UNSAFE_ARCHIVE: ${validation.reason}`);
}
await unpackTar(tarBuffer, rootDir);
const walked = await walkDir(rootDir, rootDir);