fix(file-transfer): CI failures + second-round PR review feedback

CI failures on previous push:

- Declare runtime deps (minimatch, typebox) in package.json — failed the
  extension-runtime-dependencies contract test that scans imports.
- Switch policy.ts and policy.test.ts off the broad
  openclaw/plugin-sdk/config-runtime barrel and onto the narrow
  openclaw/plugin-sdk/config-mutation + runtime-config-snapshot subpaths.
  This satisfies the deprecated-internal-config-api architecture guard.

Second-round Aisle findings:

- policy: traversal-segment check now treats backslash and forward slash
  as equivalent, so a Windows node can't be hit with mixed-separator
  "C:\\allowed\\..\\Windows\\system.ini" (Aisle HIGH CWE-22).
- dir-fetch tool: replace the single fragile `tar -tvzf` parser pass
  (which broke for filenames containing whitespace) with two robust
  passes: `tar -tzf` for paths only (one per line, no parsing of
  fixed columns) and `tar -tzvf` for type chars only (FIRST CHAR of each
  line, never the path column). Also reject backslash-containing entry
  names. Drops the in-process uncompressed-size cap because reliably
  parsing sizes from tar output is fragile and Aisle flagged it as a
  bypass primitive — entry-count cap stays (Aisle HIGH CWE-22, MED).

Tests still 84/84 passing.
This commit is contained in:
Omar Shahine
2026-04-29 07:01:22 +00:00
parent b97af08703
commit c657974d44
5 changed files with 177 additions and 89 deletions

View File

@@ -3,6 +3,10 @@
"version": "2026.4.27",
"description": "OpenClaw file transfer plugin (file_fetch, dir_list, dir_fetch, file_write, file_watch)",
"type": "module",
"dependencies": {
"minimatch": "10.2.4",
"typebox": "1.1.33"
},
"devDependencies": {
"@openclaw/plugin-sdk": "workspace:*"
},

View File

@@ -9,8 +9,10 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
const getRuntimeConfigMock = vi.fn();
const mutateConfigFileMock = vi.fn();
vi.mock("openclaw/plugin-sdk/config-runtime", () => ({
vi.mock("openclaw/plugin-sdk/runtime-config-snapshot", () => ({
getRuntimeConfig: () => getRuntimeConfigMock(),
}));
vi.mock("openclaw/plugin-sdk/config-mutation", () => ({
mutateConfigFile: (input: unknown) => mutateConfigFileMock(input),
}));

View File

@@ -34,8 +34,8 @@
import os from "node:os";
import path from "node:path";
import { minimatch } from "minimatch";
import { mutateConfigFile } from "openclaw/plugin-sdk/config-runtime";
import { getRuntimeConfig } from "openclaw/plugin-sdk/config-runtime";
import { mutateConfigFile } from "openclaw/plugin-sdk/config-mutation";
import { getRuntimeConfig } from "openclaw/plugin-sdk/runtime-config-snapshot";
export type FilePolicyKind = "read" | "write";
export type FilePolicyAskMode = "off" | "on-miss" | "always";
@@ -152,10 +152,13 @@ function normalizeAskMode(value: unknown): FilePolicyAskMode {
* pre-realpath, so the node fetches the bytes before the post-flight
* canonical-path check denies — too late, the bytes already crossed the
* node→gateway boundary.
*
* Treats backslash and forward slash as equivalent separators so a Windows
* node can't be hit with "C:\\allowed\\..\\Windows\\system.ini".
*/
function containsParentRefSegment(p: string): boolean {
const segments = p.split("/");
return segments.includes("..");
const unified = p.replace(/\\/gu, "/");
return unified.split("/").includes("..");
}
export function evaluateFilePolicy(input: {

View File

@@ -36,12 +36,10 @@ const MEDIA_URL_CAP = 25;
// Hard timeout for the gateway-side `tar -xzf` unpack process.
const TAR_UNPACK_TIMEOUT_MS = 60_000;
// Defense-in-depth caps for the *uncompressed* extraction. The compressed
// tar is already capped at DIR_FETCH_HARD_MAX_BYTES upstream, but a
// gzip-bomb can decompress to many GB. These caps bound the unpacked
// size and entry count so a malicious node can't exhaust the gateway's
// disk or CPU.
const TAR_UNPACK_MAX_UNCOMPRESSED_BYTES = 64 * 1024 * 1024;
// Cap on number of entries pre-validated. The compressed tar is already
// capped at DIR_FETCH_HARD_MAX_BYTES upstream, and we walk the unpacked
// tree to compute hashes — TAR_UNPACK_MAX_ENTRIES bounds how much work
// that walk can do.
const TAR_UNPACK_MAX_ENTRIES = 5000;
const DirFetchToolSchema = Type.Object({
@@ -90,26 +88,28 @@ async function computeFileSha256(filePath: string): Promise<string> {
}
/**
* Run `tar -tzvf -` against the buffer to enumerate entries with their
* type letter (regular file / symlink / hardlink / dir) and size BEFORE
* we extract anything. Rejects:
* - any entry whose path is absolute (escapes destDir on -P-using tar)
* - any entry with ".." segments after normalization
* - any entry that is a symlink ("l"), hardlink ("h"), or unknown type
* - cumulative uncompressed size > TAR_UNPACK_MAX_UNCOMPRESSED_BYTES
* - entry count > TAR_UNPACK_MAX_ENTRIES
* Run two passes against the buffer to enumerate entries BEFORE we extract:
*
* BSD tar's -tvzf produces a `ls -l`-style line; we parse the leading
* type char and the size column.
* 1. `tar -tf -` produces names ONLY, one per line. This is whitespace-safe
* because each line is exactly one path; no parsing of fixed columns.
* Used to validate paths (reject absolute, '..' traversal).
* 2. `tar -tvf -` adds type info via the `ls -l`-style perm prefix.
* Used ONLY to detect symlinks / hardlinks / non-regular entries via
* the FIRST CHARACTER of each line, never the path column.
*
* Size limits are enforced at the *extraction* step instead — the tar
* unpack process is bounded by the maxBytes we already pass through, and
* the post-extract walkDir is hard-capped by TAR_UNPACK_MAX_ENTRIES.
* Trying to parse uncompressed sizes from `tar -tvf` output is fragile
* (filenames with whitespace shift the columns) and Aisle flagged that
* shape as a bypass primitive — drop it.
*/
async function preValidateTarball(
async function listTarPaths(
tarBuffer: Buffer,
): Promise<{ ok: true } | { ok: false; reason: string }> {
): Promise<{ ok: true; paths: string[] } | { ok: false; reason: string }> {
return new Promise((resolve) => {
const tarBin = process.platform !== "win32" ? "/usr/bin/tar" : "tar";
const child = spawn(tarBin, ["-tzvf", "-"], {
stdio: ["pipe", "pipe", "pipe"],
});
const child = spawn(tarBin, ["-tzf", "-"], { stdio: ["pipe", "pipe", "pipe"] });
let stdout = "";
let stderr = "";
let aborted = false;
@@ -120,10 +120,20 @@ async function preValidateTarball(
} catch {
/* gone */
}
resolve({ ok: false, reason: "tar -tvzf timed out" });
resolve({ ok: false, reason: "tar -tzf timed out" });
}, 30_000);
child.stdout.on("data", (c: Buffer) => {
stdout += c.toString();
if (stdout.length > 32 * 1024 * 1024) {
aborted = true;
try {
child.kill("SIGKILL");
} catch {
/* gone */
}
clearTimeout(watchdog);
resolve({ ok: false, reason: "tar -tzf output too large" });
}
});
child.stderr.on("data", (c: Buffer) => {
stderr += c.toString();
@@ -134,80 +144,142 @@ async function preValidateTarball(
return;
}
if (code !== 0) {
resolve({ ok: false, reason: `tar -tvzf exited ${code}: ${stderr.slice(0, 200)}` });
resolve({ ok: false, reason: `tar -tzf exited ${code}: ${stderr.slice(0, 200)}` });
return;
}
const lines = stdout.split("\n").filter((l) => l.trim().length > 0);
if (lines.length > TAR_UNPACK_MAX_ENTRIES) {
resolve({
ok: false,
reason: `archive contains ${lines.length} entries; limit ${TAR_UNPACK_MAX_ENTRIES}`,
});
return;
}
let totalBytes = 0;
for (const line of lines) {
// BSD tar -tvzf format:
// "drwxr-xr-x 0 user staff 0 Mar 14 ... ./"
// "-rw-r--r-- 0 user staff 12 Mar 14 ... ./hello.txt"
// "lrwxr-xr-x 0 user staff 0 Mar 14 ... link -> target"
// First char of the perm field is the type.
const typeChar = line.charAt(0);
if (typeChar === "l" || typeChar === "h") {
resolve({ ok: false, reason: `archive contains link entry: ${line.slice(0, 120)}` });
return;
}
if (typeChar !== "-" && typeChar !== "d") {
resolve({ ok: false, reason: `archive contains non-regular entry type '${typeChar}'` });
return;
}
// Path is everything after the date/time field; cheapest is to
// grab the last whitespace-delimited token. For " name -> target"
// (symlink) we already rejected above.
const tokens = line.trim().split(/\s+/u);
if (tokens.length < 2) {
continue;
}
const entryPath = tokens.at(-1) ?? "";
if (path.isAbsolute(entryPath)) {
resolve({ ok: false, reason: `archive contains absolute path: ${entryPath}` });
return;
}
const norm = path.posix.normalize(entryPath);
if (norm === ".." || norm.startsWith("../") || norm.includes("/../")) {
resolve({ ok: false, reason: `archive contains '..' traversal: ${entryPath}` });
return;
}
// Size column: tar -tvzf output has size in bytes at a stable
// position. tokens[2] is owner, tokens[3] is group, tokens[4] is
// size on BSD; on GNU it's tokens[2] (perm/owner combined).
// Be permissive — find the first all-digit token as size.
for (const t of tokens.slice(1)) {
if (/^\d+$/u.test(t)) {
totalBytes += Number.parseInt(t, 10);
break;
}
}
}
if (totalBytes > TAR_UNPACK_MAX_UNCOMPRESSED_BYTES) {
resolve({
ok: false,
reason: `uncompressed size ${totalBytes} exceeds ${TAR_UNPACK_MAX_UNCOMPRESSED_BYTES}`,
});
return;
}
resolve({ ok: true });
// tar -tf emits one path per line with literal newlines as record
// separators. Filenames containing newlines are exotic enough that
// refusing them is safer than trying to parse around them.
const paths = stdout.split("\n").filter((l) => l.length > 0);
resolve({ ok: true, paths });
});
child.on("error", (e) => {
clearTimeout(watchdog);
if (!aborted) {
resolve({ ok: false, reason: `tar -tvzf error: ${String(e)}` });
resolve({ ok: false, reason: `tar -tzf error: ${String(e)}` });
}
});
child.stdin.end(tarBuffer);
});
}
async function listTarTypeChars(
tarBuffer: Buffer,
): Promise<{ ok: true; typeChars: string[] } | { ok: false; reason: string }> {
return new Promise((resolve) => {
const tarBin = process.platform !== "win32" ? "/usr/bin/tar" : "tar";
const child = spawn(tarBin, ["-tzvf", "-"], { stdio: ["pipe", "pipe", "pipe"] });
let stdout = "";
let stderr = "";
let aborted = false;
const watchdog = setTimeout(() => {
aborted = true;
try {
child.kill("SIGKILL");
} catch {
/* gone */
}
resolve({ ok: false, reason: "tar -tzvf timed out" });
}, 30_000);
child.stdout.on("data", (c: Buffer) => {
stdout += c.toString();
if (stdout.length > 32 * 1024 * 1024) {
aborted = true;
try {
child.kill("SIGKILL");
} catch {
/* gone */
}
clearTimeout(watchdog);
resolve({ ok: false, reason: "tar -tzvf output too large" });
}
});
child.stderr.on("data", (c: Buffer) => {
stderr += c.toString();
});
child.on("close", (code) => {
clearTimeout(watchdog);
if (aborted) {
return;
}
if (code !== 0) {
resolve({ ok: false, reason: `tar -tzvf exited ${code}: ${stderr.slice(0, 200)}` });
return;
}
// Take only the first character of each line — the entry type.
// We don't touch the rest of the line (path/size/etc) so filenames
// with whitespace can't shift our parser.
const typeChars = stdout
.split("\n")
.filter((l) => l.length > 0)
.map((l) => l.charAt(0));
resolve({ ok: true, typeChars });
});
child.on("error", (e) => {
clearTimeout(watchdog);
if (!aborted) {
resolve({ ok: false, reason: `tar -tzvf error: ${String(e)}` });
}
});
child.stdin.end(tarBuffer);
});
}
async function preValidateTarball(
tarBuffer: Buffer,
): Promise<{ ok: true } | { ok: false; reason: string }> {
const namesResult = await listTarPaths(tarBuffer);
if (!namesResult.ok) {
return namesResult;
}
const paths = namesResult.paths;
if (paths.length > TAR_UNPACK_MAX_ENTRIES) {
return {
ok: false,
reason: `archive contains ${paths.length} entries; limit ${TAR_UNPACK_MAX_ENTRIES}`,
};
}
const typesResult = await listTarTypeChars(tarBuffer);
if (!typesResult.ok) {
return typesResult;
}
const typeChars = typesResult.typeChars;
// The two passes should report the same number of entries; if they
// don't, something exotic is going on (filenames with newlines, etc.)
// and we refuse defensively.
if (typeChars.length !== paths.length) {
return {
ok: false,
reason: `tar -tzf and tar -tzvf disagree on entry count (${paths.length} vs ${typeChars.length}); refusing`,
};
}
for (let i = 0; i < paths.length; i++) {
const entryPath = paths[i];
const t = typeChars[i];
if (t === "l" || t === "h") {
return { ok: false, reason: `archive contains link entry: ${entryPath}` };
}
if (t !== "-" && t !== "d") {
return { ok: false, reason: `archive contains non-regular entry type '${t}': ${entryPath}` };
}
if (path.isAbsolute(entryPath)) {
return { ok: false, reason: `archive contains absolute path: ${entryPath}` };
}
const norm = path.posix.normalize(entryPath);
if (norm === ".." || norm.startsWith("../") || norm.includes("/../")) {
return { ok: false, reason: `archive contains '..' traversal: ${entryPath}` };
}
// Reject backslash-containing names too — refuses Windows-style
// traversal in archives produced by an attacker on a Windows node.
if (entryPath.includes("\\")) {
return { ok: false, reason: `archive contains backslash in path: ${entryPath}` };
}
}
return { ok: true };
}
type UnpackedFileEntry = {
relPath: string;
size: number;

7
pnpm-lock.yaml generated
View File

@@ -593,6 +593,13 @@ importers:
version: link:../..
extensions/file-transfer:
dependencies:
minimatch:
specifier: 10.2.4
version: 10.2.4
typebox:
specifier: 1.1.33
version: 1.1.33
devDependencies:
'@openclaw/plugin-sdk':
specifier: workspace:*