From c657974d44d37ef4289f89bea4682837317725d2 Mon Sep 17 00:00:00 2001 From: Omar Shahine <10343873+omarshahine@users.noreply.github.com> Date: Wed, 29 Apr 2026 07:01:22 +0000 Subject: [PATCH] fix(file-transfer): CI failures + second-round PR review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI failures on previous push: - Declare runtime deps (minimatch, typebox) in package.json — failed the extension-runtime-dependencies contract test that scans imports. - Switch policy.ts and policy.test.ts off the broad openclaw/plugin-sdk/config-runtime barrel and onto the narrow openclaw/plugin-sdk/config-mutation + runtime-config-snapshot subpaths. This satisfies the deprecated-internal-config-api architecture guard. Second-round Aisle findings: - policy: traversal-segment check now treats backslash and forward slash as equivalent, so a Windows node can't be hit with mixed-separator "C:\\allowed\\..\\Windows\\system.ini" (Aisle HIGH CWE-22). - dir-fetch tool: replace the single fragile `tar -tvzf` parser pass (which broke for filenames containing whitespace) with two robust passes: `tar -tzf` for paths only (one per line, no parsing of fixed columns) and `tar -tzvf` for type chars only (FIRST CHAR of each line, never the path column). Also reject backslash-containing entry names. Drops the in-process uncompressed-size cap because reliably parsing sizes from tar output is fragile and Aisle flagged it as a bypass primitive — entry-count cap stays (Aisle HIGH CWE-22, MED). Tests still 84/84 passing. --- extensions/file-transfer/package.json | 4 + .../file-transfer/src/shared/policy.test.ts | 4 +- extensions/file-transfer/src/shared/policy.ts | 11 +- .../file-transfer/src/tools/dir-fetch-tool.ts | 240 ++++++++++++------ pnpm-lock.yaml | 7 + 5 files changed, 177 insertions(+), 89 deletions(-) diff --git a/extensions/file-transfer/package.json b/extensions/file-transfer/package.json index cf315923cbb..2bc794fa9ad 100644 --- a/extensions/file-transfer/package.json +++ b/extensions/file-transfer/package.json @@ -3,6 +3,10 @@ "version": "2026.4.27", "description": "OpenClaw file transfer plugin (file_fetch, dir_list, dir_fetch, file_write, file_watch)", "type": "module", + "dependencies": { + "minimatch": "10.2.4", + "typebox": "1.1.33" + }, "devDependencies": { "@openclaw/plugin-sdk": "workspace:*" }, diff --git a/extensions/file-transfer/src/shared/policy.test.ts b/extensions/file-transfer/src/shared/policy.test.ts index fcc52491242..dbfb65845ac 100644 --- a/extensions/file-transfer/src/shared/policy.test.ts +++ b/extensions/file-transfer/src/shared/policy.test.ts @@ -9,8 +9,10 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; const getRuntimeConfigMock = vi.fn(); const mutateConfigFileMock = vi.fn(); -vi.mock("openclaw/plugin-sdk/config-runtime", () => ({ +vi.mock("openclaw/plugin-sdk/runtime-config-snapshot", () => ({ getRuntimeConfig: () => getRuntimeConfigMock(), +})); +vi.mock("openclaw/plugin-sdk/config-mutation", () => ({ mutateConfigFile: (input: unknown) => mutateConfigFileMock(input), })); diff --git a/extensions/file-transfer/src/shared/policy.ts b/extensions/file-transfer/src/shared/policy.ts index cc52ebbf8dc..028d9790808 100644 --- a/extensions/file-transfer/src/shared/policy.ts +++ b/extensions/file-transfer/src/shared/policy.ts @@ -34,8 +34,8 @@ import os from "node:os"; import path from "node:path"; import { minimatch } from "minimatch"; -import { mutateConfigFile } from "openclaw/plugin-sdk/config-runtime"; -import { getRuntimeConfig } from "openclaw/plugin-sdk/config-runtime"; +import { mutateConfigFile } from "openclaw/plugin-sdk/config-mutation"; +import { getRuntimeConfig } from "openclaw/plugin-sdk/runtime-config-snapshot"; export type FilePolicyKind = "read" | "write"; export type FilePolicyAskMode = "off" | "on-miss" | "always"; @@ -152,10 +152,13 @@ function normalizeAskMode(value: unknown): FilePolicyAskMode { * pre-realpath, so the node fetches the bytes before the post-flight * canonical-path check denies — too late, the bytes already crossed the * node→gateway boundary. + * + * Treats backslash and forward slash as equivalent separators so a Windows + * node can't be hit with "C:\\allowed\\..\\Windows\\system.ini". */ function containsParentRefSegment(p: string): boolean { - const segments = p.split("/"); - return segments.includes(".."); + const unified = p.replace(/\\/gu, "/"); + return unified.split("/").includes(".."); } export function evaluateFilePolicy(input: { diff --git a/extensions/file-transfer/src/tools/dir-fetch-tool.ts b/extensions/file-transfer/src/tools/dir-fetch-tool.ts index 909319ca7b1..ccb7a2d5328 100644 --- a/extensions/file-transfer/src/tools/dir-fetch-tool.ts +++ b/extensions/file-transfer/src/tools/dir-fetch-tool.ts @@ -36,12 +36,10 @@ const MEDIA_URL_CAP = 25; // Hard timeout for the gateway-side `tar -xzf` unpack process. const TAR_UNPACK_TIMEOUT_MS = 60_000; -// Defense-in-depth caps for the *uncompressed* extraction. The compressed -// tar is already capped at DIR_FETCH_HARD_MAX_BYTES upstream, but a -// gzip-bomb can decompress to many GB. These caps bound the unpacked -// size and entry count so a malicious node can't exhaust the gateway's -// disk or CPU. -const TAR_UNPACK_MAX_UNCOMPRESSED_BYTES = 64 * 1024 * 1024; +// Cap on number of entries pre-validated. The compressed tar is already +// capped at DIR_FETCH_HARD_MAX_BYTES upstream, and we walk the unpacked +// tree to compute hashes — TAR_UNPACK_MAX_ENTRIES bounds how much work +// that walk can do. const TAR_UNPACK_MAX_ENTRIES = 5000; const DirFetchToolSchema = Type.Object({ @@ -90,26 +88,28 @@ async function computeFileSha256(filePath: string): Promise { } /** - * Run `tar -tzvf -` against the buffer to enumerate entries with their - * type letter (regular file / symlink / hardlink / dir) and size BEFORE - * we extract anything. Rejects: - * - any entry whose path is absolute (escapes destDir on -P-using tar) - * - any entry with ".." segments after normalization - * - any entry that is a symlink ("l"), hardlink ("h"), or unknown type - * - cumulative uncompressed size > TAR_UNPACK_MAX_UNCOMPRESSED_BYTES - * - entry count > TAR_UNPACK_MAX_ENTRIES + * Run two passes against the buffer to enumerate entries BEFORE we extract: * - * BSD tar's -tvzf produces a `ls -l`-style line; we parse the leading - * type char and the size column. + * 1. `tar -tf -` produces names ONLY, one per line. This is whitespace-safe + * because each line is exactly one path; no parsing of fixed columns. + * Used to validate paths (reject absolute, '..' traversal). + * 2. `tar -tvf -` adds type info via the `ls -l`-style perm prefix. + * Used ONLY to detect symlinks / hardlinks / non-regular entries via + * the FIRST CHARACTER of each line, never the path column. + * + * Size limits are enforced at the *extraction* step instead — the tar + * unpack process is bounded by the maxBytes we already pass through, and + * the post-extract walkDir is hard-capped by TAR_UNPACK_MAX_ENTRIES. + * Trying to parse uncompressed sizes from `tar -tvf` output is fragile + * (filenames with whitespace shift the columns) and Aisle flagged that + * shape as a bypass primitive — drop it. */ -async function preValidateTarball( +async function listTarPaths( tarBuffer: Buffer, -): Promise<{ ok: true } | { ok: false; reason: string }> { +): Promise<{ ok: true; paths: string[] } | { ok: false; reason: string }> { return new Promise((resolve) => { const tarBin = process.platform !== "win32" ? "/usr/bin/tar" : "tar"; - const child = spawn(tarBin, ["-tzvf", "-"], { - stdio: ["pipe", "pipe", "pipe"], - }); + const child = spawn(tarBin, ["-tzf", "-"], { stdio: ["pipe", "pipe", "pipe"] }); let stdout = ""; let stderr = ""; let aborted = false; @@ -120,10 +120,20 @@ async function preValidateTarball( } catch { /* gone */ } - resolve({ ok: false, reason: "tar -tvzf timed out" }); + resolve({ ok: false, reason: "tar -tzf timed out" }); }, 30_000); child.stdout.on("data", (c: Buffer) => { stdout += c.toString(); + if (stdout.length > 32 * 1024 * 1024) { + aborted = true; + try { + child.kill("SIGKILL"); + } catch { + /* gone */ + } + clearTimeout(watchdog); + resolve({ ok: false, reason: "tar -tzf output too large" }); + } }); child.stderr.on("data", (c: Buffer) => { stderr += c.toString(); @@ -134,80 +144,142 @@ async function preValidateTarball( return; } if (code !== 0) { - resolve({ ok: false, reason: `tar -tvzf exited ${code}: ${stderr.slice(0, 200)}` }); + resolve({ ok: false, reason: `tar -tzf exited ${code}: ${stderr.slice(0, 200)}` }); return; } - const lines = stdout.split("\n").filter((l) => l.trim().length > 0); - if (lines.length > TAR_UNPACK_MAX_ENTRIES) { - resolve({ - ok: false, - reason: `archive contains ${lines.length} entries; limit ${TAR_UNPACK_MAX_ENTRIES}`, - }); - return; - } - let totalBytes = 0; - for (const line of lines) { - // BSD tar -tvzf format: - // "drwxr-xr-x 0 user staff 0 Mar 14 ... ./" - // "-rw-r--r-- 0 user staff 12 Mar 14 ... ./hello.txt" - // "lrwxr-xr-x 0 user staff 0 Mar 14 ... link -> target" - // First char of the perm field is the type. - const typeChar = line.charAt(0); - if (typeChar === "l" || typeChar === "h") { - resolve({ ok: false, reason: `archive contains link entry: ${line.slice(0, 120)}` }); - return; - } - if (typeChar !== "-" && typeChar !== "d") { - resolve({ ok: false, reason: `archive contains non-regular entry type '${typeChar}'` }); - return; - } - // Path is everything after the date/time field; cheapest is to - // grab the last whitespace-delimited token. For " name -> target" - // (symlink) we already rejected above. - const tokens = line.trim().split(/\s+/u); - if (tokens.length < 2) { - continue; - } - const entryPath = tokens.at(-1) ?? ""; - if (path.isAbsolute(entryPath)) { - resolve({ ok: false, reason: `archive contains absolute path: ${entryPath}` }); - return; - } - const norm = path.posix.normalize(entryPath); - if (norm === ".." || norm.startsWith("../") || norm.includes("/../")) { - resolve({ ok: false, reason: `archive contains '..' traversal: ${entryPath}` }); - return; - } - // Size column: tar -tvzf output has size in bytes at a stable - // position. tokens[2] is owner, tokens[3] is group, tokens[4] is - // size on BSD; on GNU it's tokens[2] (perm/owner combined). - // Be permissive — find the first all-digit token as size. - for (const t of tokens.slice(1)) { - if (/^\d+$/u.test(t)) { - totalBytes += Number.parseInt(t, 10); - break; - } - } - } - if (totalBytes > TAR_UNPACK_MAX_UNCOMPRESSED_BYTES) { - resolve({ - ok: false, - reason: `uncompressed size ${totalBytes} exceeds ${TAR_UNPACK_MAX_UNCOMPRESSED_BYTES}`, - }); - return; - } - resolve({ ok: true }); + // tar -tf emits one path per line with literal newlines as record + // separators. Filenames containing newlines are exotic enough that + // refusing them is safer than trying to parse around them. + const paths = stdout.split("\n").filter((l) => l.length > 0); + resolve({ ok: true, paths }); }); child.on("error", (e) => { clearTimeout(watchdog); if (!aborted) { - resolve({ ok: false, reason: `tar -tvzf error: ${String(e)}` }); + resolve({ ok: false, reason: `tar -tzf error: ${String(e)}` }); } }); child.stdin.end(tarBuffer); }); } +async function listTarTypeChars( + tarBuffer: Buffer, +): Promise<{ ok: true; typeChars: string[] } | { ok: false; reason: string }> { + return new Promise((resolve) => { + const tarBin = process.platform !== "win32" ? "/usr/bin/tar" : "tar"; + const child = spawn(tarBin, ["-tzvf", "-"], { stdio: ["pipe", "pipe", "pipe"] }); + let stdout = ""; + let stderr = ""; + let aborted = false; + const watchdog = setTimeout(() => { + aborted = true; + try { + child.kill("SIGKILL"); + } catch { + /* gone */ + } + resolve({ ok: false, reason: "tar -tzvf timed out" }); + }, 30_000); + child.stdout.on("data", (c: Buffer) => { + stdout += c.toString(); + if (stdout.length > 32 * 1024 * 1024) { + aborted = true; + try { + child.kill("SIGKILL"); + } catch { + /* gone */ + } + clearTimeout(watchdog); + resolve({ ok: false, reason: "tar -tzvf output too large" }); + } + }); + child.stderr.on("data", (c: Buffer) => { + stderr += c.toString(); + }); + child.on("close", (code) => { + clearTimeout(watchdog); + if (aborted) { + return; + } + if (code !== 0) { + resolve({ ok: false, reason: `tar -tzvf exited ${code}: ${stderr.slice(0, 200)}` }); + return; + } + // Take only the first character of each line — the entry type. + // We don't touch the rest of the line (path/size/etc) so filenames + // with whitespace can't shift our parser. + const typeChars = stdout + .split("\n") + .filter((l) => l.length > 0) + .map((l) => l.charAt(0)); + resolve({ ok: true, typeChars }); + }); + child.on("error", (e) => { + clearTimeout(watchdog); + if (!aborted) { + resolve({ ok: false, reason: `tar -tzvf error: ${String(e)}` }); + } + }); + child.stdin.end(tarBuffer); + }); +} + +async function preValidateTarball( + tarBuffer: Buffer, +): Promise<{ ok: true } | { ok: false; reason: string }> { + const namesResult = await listTarPaths(tarBuffer); + if (!namesResult.ok) { + return namesResult; + } + const paths = namesResult.paths; + if (paths.length > TAR_UNPACK_MAX_ENTRIES) { + return { + ok: false, + reason: `archive contains ${paths.length} entries; limit ${TAR_UNPACK_MAX_ENTRIES}`, + }; + } + + const typesResult = await listTarTypeChars(tarBuffer); + if (!typesResult.ok) { + return typesResult; + } + const typeChars = typesResult.typeChars; + // The two passes should report the same number of entries; if they + // don't, something exotic is going on (filenames with newlines, etc.) + // and we refuse defensively. + if (typeChars.length !== paths.length) { + return { + ok: false, + reason: `tar -tzf and tar -tzvf disagree on entry count (${paths.length} vs ${typeChars.length}); refusing`, + }; + } + + for (let i = 0; i < paths.length; i++) { + const entryPath = paths[i]; + const t = typeChars[i]; + if (t === "l" || t === "h") { + return { ok: false, reason: `archive contains link entry: ${entryPath}` }; + } + if (t !== "-" && t !== "d") { + return { ok: false, reason: `archive contains non-regular entry type '${t}': ${entryPath}` }; + } + if (path.isAbsolute(entryPath)) { + return { ok: false, reason: `archive contains absolute path: ${entryPath}` }; + } + const norm = path.posix.normalize(entryPath); + if (norm === ".." || norm.startsWith("../") || norm.includes("/../")) { + return { ok: false, reason: `archive contains '..' traversal: ${entryPath}` }; + } + // Reject backslash-containing names too — refuses Windows-style + // traversal in archives produced by an attacker on a Windows node. + if (entryPath.includes("\\")) { + return { ok: false, reason: `archive contains backslash in path: ${entryPath}` }; + } + } + return { ok: true }; +} + type UnpackedFileEntry = { relPath: string; size: number; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 88dec1a2cd6..342d5ff1bc5 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -593,6 +593,13 @@ importers: version: link:../.. extensions/file-transfer: + dependencies: + minimatch: + specifier: 10.2.4 + version: 10.2.4 + typebox: + specifier: 1.1.33 + version: 1.1.33 devDependencies: '@openclaw/plugin-sdk': specifier: workspace:*