mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 09:30:43 +00:00
fix(file-transfer): CI failures + second-round PR review feedback
CI failures on previous push: - Declare runtime deps (minimatch, typebox) in package.json — failed the extension-runtime-dependencies contract test that scans imports. - Switch policy.ts and policy.test.ts off the broad openclaw/plugin-sdk/config-runtime barrel and onto the narrow openclaw/plugin-sdk/config-mutation + runtime-config-snapshot subpaths. This satisfies the deprecated-internal-config-api architecture guard. Second-round Aisle findings: - policy: traversal-segment check now treats backslash and forward slash as equivalent, so a Windows node can't be hit with mixed-separator "C:\\allowed\\..\\Windows\\system.ini" (Aisle HIGH CWE-22). - dir-fetch tool: replace the single fragile `tar -tvzf` parser pass (which broke for filenames containing whitespace) with two robust passes: `tar -tzf` for paths only (one per line, no parsing of fixed columns) and `tar -tzvf` for type chars only (FIRST CHAR of each line, never the path column). Also reject backslash-containing entry names. Drops the in-process uncompressed-size cap because reliably parsing sizes from tar output is fragile and Aisle flagged it as a bypass primitive — entry-count cap stays (Aisle HIGH CWE-22, MED). Tests still 84/84 passing.
This commit is contained in:
@@ -3,6 +3,10 @@
|
||||
"version": "2026.4.27",
|
||||
"description": "OpenClaw file transfer plugin (file_fetch, dir_list, dir_fetch, file_write, file_watch)",
|
||||
"type": "module",
|
||||
"dependencies": {
|
||||
"minimatch": "10.2.4",
|
||||
"typebox": "1.1.33"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@openclaw/plugin-sdk": "workspace:*"
|
||||
},
|
||||
|
||||
@@ -9,8 +9,10 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
const getRuntimeConfigMock = vi.fn();
|
||||
const mutateConfigFileMock = vi.fn();
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/config-runtime", () => ({
|
||||
vi.mock("openclaw/plugin-sdk/runtime-config-snapshot", () => ({
|
||||
getRuntimeConfig: () => getRuntimeConfigMock(),
|
||||
}));
|
||||
vi.mock("openclaw/plugin-sdk/config-mutation", () => ({
|
||||
mutateConfigFile: (input: unknown) => mutateConfigFileMock(input),
|
||||
}));
|
||||
|
||||
|
||||
@@ -34,8 +34,8 @@
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { minimatch } from "minimatch";
|
||||
import { mutateConfigFile } from "openclaw/plugin-sdk/config-runtime";
|
||||
import { getRuntimeConfig } from "openclaw/plugin-sdk/config-runtime";
|
||||
import { mutateConfigFile } from "openclaw/plugin-sdk/config-mutation";
|
||||
import { getRuntimeConfig } from "openclaw/plugin-sdk/runtime-config-snapshot";
|
||||
|
||||
export type FilePolicyKind = "read" | "write";
|
||||
export type FilePolicyAskMode = "off" | "on-miss" | "always";
|
||||
@@ -152,10 +152,13 @@ function normalizeAskMode(value: unknown): FilePolicyAskMode {
|
||||
* pre-realpath, so the node fetches the bytes before the post-flight
|
||||
* canonical-path check denies — too late, the bytes already crossed the
|
||||
* node→gateway boundary.
|
||||
*
|
||||
* Treats backslash and forward slash as equivalent separators so a Windows
|
||||
* node can't be hit with "C:\\allowed\\..\\Windows\\system.ini".
|
||||
*/
|
||||
function containsParentRefSegment(p: string): boolean {
|
||||
const segments = p.split("/");
|
||||
return segments.includes("..");
|
||||
const unified = p.replace(/\\/gu, "/");
|
||||
return unified.split("/").includes("..");
|
||||
}
|
||||
|
||||
export function evaluateFilePolicy(input: {
|
||||
|
||||
@@ -36,12 +36,10 @@ const MEDIA_URL_CAP = 25;
|
||||
// Hard timeout for the gateway-side `tar -xzf` unpack process.
|
||||
const TAR_UNPACK_TIMEOUT_MS = 60_000;
|
||||
|
||||
// Defense-in-depth caps for the *uncompressed* extraction. The compressed
|
||||
// tar is already capped at DIR_FETCH_HARD_MAX_BYTES upstream, but a
|
||||
// gzip-bomb can decompress to many GB. These caps bound the unpacked
|
||||
// size and entry count so a malicious node can't exhaust the gateway's
|
||||
// disk or CPU.
|
||||
const TAR_UNPACK_MAX_UNCOMPRESSED_BYTES = 64 * 1024 * 1024;
|
||||
// Cap on number of entries pre-validated. The compressed tar is already
|
||||
// capped at DIR_FETCH_HARD_MAX_BYTES upstream, and we walk the unpacked
|
||||
// tree to compute hashes — TAR_UNPACK_MAX_ENTRIES bounds how much work
|
||||
// that walk can do.
|
||||
const TAR_UNPACK_MAX_ENTRIES = 5000;
|
||||
|
||||
const DirFetchToolSchema = Type.Object({
|
||||
@@ -90,26 +88,28 @@ async function computeFileSha256(filePath: string): Promise<string> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Run `tar -tzvf -` against the buffer to enumerate entries with their
|
||||
* type letter (regular file / symlink / hardlink / dir) and size BEFORE
|
||||
* we extract anything. Rejects:
|
||||
* - any entry whose path is absolute (escapes destDir on -P-using tar)
|
||||
* - any entry with ".." segments after normalization
|
||||
* - any entry that is a symlink ("l"), hardlink ("h"), or unknown type
|
||||
* - cumulative uncompressed size > TAR_UNPACK_MAX_UNCOMPRESSED_BYTES
|
||||
* - entry count > TAR_UNPACK_MAX_ENTRIES
|
||||
* Run two passes against the buffer to enumerate entries BEFORE we extract:
|
||||
*
|
||||
* BSD tar's -tvzf produces a `ls -l`-style line; we parse the leading
|
||||
* type char and the size column.
|
||||
* 1. `tar -tf -` produces names ONLY, one per line. This is whitespace-safe
|
||||
* because each line is exactly one path; no parsing of fixed columns.
|
||||
* Used to validate paths (reject absolute, '..' traversal).
|
||||
* 2. `tar -tvf -` adds type info via the `ls -l`-style perm prefix.
|
||||
* Used ONLY to detect symlinks / hardlinks / non-regular entries via
|
||||
* the FIRST CHARACTER of each line, never the path column.
|
||||
*
|
||||
* Size limits are enforced at the *extraction* step instead — the tar
|
||||
* unpack process is bounded by the maxBytes we already pass through, and
|
||||
* the post-extract walkDir is hard-capped by TAR_UNPACK_MAX_ENTRIES.
|
||||
* Trying to parse uncompressed sizes from `tar -tvf` output is fragile
|
||||
* (filenames with whitespace shift the columns) and Aisle flagged that
|
||||
* shape as a bypass primitive — drop it.
|
||||
*/
|
||||
async function preValidateTarball(
|
||||
async function listTarPaths(
|
||||
tarBuffer: Buffer,
|
||||
): Promise<{ ok: true } | { ok: false; reason: string }> {
|
||||
): Promise<{ ok: true; paths: string[] } | { ok: false; reason: string }> {
|
||||
return new Promise((resolve) => {
|
||||
const tarBin = process.platform !== "win32" ? "/usr/bin/tar" : "tar";
|
||||
const child = spawn(tarBin, ["-tzvf", "-"], {
|
||||
stdio: ["pipe", "pipe", "pipe"],
|
||||
});
|
||||
const child = spawn(tarBin, ["-tzf", "-"], { stdio: ["pipe", "pipe", "pipe"] });
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
let aborted = false;
|
||||
@@ -120,10 +120,20 @@ async function preValidateTarball(
|
||||
} catch {
|
||||
/* gone */
|
||||
}
|
||||
resolve({ ok: false, reason: "tar -tvzf timed out" });
|
||||
resolve({ ok: false, reason: "tar -tzf timed out" });
|
||||
}, 30_000);
|
||||
child.stdout.on("data", (c: Buffer) => {
|
||||
stdout += c.toString();
|
||||
if (stdout.length > 32 * 1024 * 1024) {
|
||||
aborted = true;
|
||||
try {
|
||||
child.kill("SIGKILL");
|
||||
} catch {
|
||||
/* gone */
|
||||
}
|
||||
clearTimeout(watchdog);
|
||||
resolve({ ok: false, reason: "tar -tzf output too large" });
|
||||
}
|
||||
});
|
||||
child.stderr.on("data", (c: Buffer) => {
|
||||
stderr += c.toString();
|
||||
@@ -134,80 +144,142 @@ async function preValidateTarball(
|
||||
return;
|
||||
}
|
||||
if (code !== 0) {
|
||||
resolve({ ok: false, reason: `tar -tvzf exited ${code}: ${stderr.slice(0, 200)}` });
|
||||
resolve({ ok: false, reason: `tar -tzf exited ${code}: ${stderr.slice(0, 200)}` });
|
||||
return;
|
||||
}
|
||||
const lines = stdout.split("\n").filter((l) => l.trim().length > 0);
|
||||
if (lines.length > TAR_UNPACK_MAX_ENTRIES) {
|
||||
resolve({
|
||||
ok: false,
|
||||
reason: `archive contains ${lines.length} entries; limit ${TAR_UNPACK_MAX_ENTRIES}`,
|
||||
});
|
||||
return;
|
||||
}
|
||||
let totalBytes = 0;
|
||||
for (const line of lines) {
|
||||
// BSD tar -tvzf format:
|
||||
// "drwxr-xr-x 0 user staff 0 Mar 14 ... ./"
|
||||
// "-rw-r--r-- 0 user staff 12 Mar 14 ... ./hello.txt"
|
||||
// "lrwxr-xr-x 0 user staff 0 Mar 14 ... link -> target"
|
||||
// First char of the perm field is the type.
|
||||
const typeChar = line.charAt(0);
|
||||
if (typeChar === "l" || typeChar === "h") {
|
||||
resolve({ ok: false, reason: `archive contains link entry: ${line.slice(0, 120)}` });
|
||||
return;
|
||||
}
|
||||
if (typeChar !== "-" && typeChar !== "d") {
|
||||
resolve({ ok: false, reason: `archive contains non-regular entry type '${typeChar}'` });
|
||||
return;
|
||||
}
|
||||
// Path is everything after the date/time field; cheapest is to
|
||||
// grab the last whitespace-delimited token. For " name -> target"
|
||||
// (symlink) we already rejected above.
|
||||
const tokens = line.trim().split(/\s+/u);
|
||||
if (tokens.length < 2) {
|
||||
continue;
|
||||
}
|
||||
const entryPath = tokens.at(-1) ?? "";
|
||||
if (path.isAbsolute(entryPath)) {
|
||||
resolve({ ok: false, reason: `archive contains absolute path: ${entryPath}` });
|
||||
return;
|
||||
}
|
||||
const norm = path.posix.normalize(entryPath);
|
||||
if (norm === ".." || norm.startsWith("../") || norm.includes("/../")) {
|
||||
resolve({ ok: false, reason: `archive contains '..' traversal: ${entryPath}` });
|
||||
return;
|
||||
}
|
||||
// Size column: tar -tvzf output has size in bytes at a stable
|
||||
// position. tokens[2] is owner, tokens[3] is group, tokens[4] is
|
||||
// size on BSD; on GNU it's tokens[2] (perm/owner combined).
|
||||
// Be permissive — find the first all-digit token as size.
|
||||
for (const t of tokens.slice(1)) {
|
||||
if (/^\d+$/u.test(t)) {
|
||||
totalBytes += Number.parseInt(t, 10);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (totalBytes > TAR_UNPACK_MAX_UNCOMPRESSED_BYTES) {
|
||||
resolve({
|
||||
ok: false,
|
||||
reason: `uncompressed size ${totalBytes} exceeds ${TAR_UNPACK_MAX_UNCOMPRESSED_BYTES}`,
|
||||
});
|
||||
return;
|
||||
}
|
||||
resolve({ ok: true });
|
||||
// tar -tf emits one path per line with literal newlines as record
|
||||
// separators. Filenames containing newlines are exotic enough that
|
||||
// refusing them is safer than trying to parse around them.
|
||||
const paths = stdout.split("\n").filter((l) => l.length > 0);
|
||||
resolve({ ok: true, paths });
|
||||
});
|
||||
child.on("error", (e) => {
|
||||
clearTimeout(watchdog);
|
||||
if (!aborted) {
|
||||
resolve({ ok: false, reason: `tar -tvzf error: ${String(e)}` });
|
||||
resolve({ ok: false, reason: `tar -tzf error: ${String(e)}` });
|
||||
}
|
||||
});
|
||||
child.stdin.end(tarBuffer);
|
||||
});
|
||||
}
|
||||
|
||||
async function listTarTypeChars(
|
||||
tarBuffer: Buffer,
|
||||
): Promise<{ ok: true; typeChars: string[] } | { ok: false; reason: string }> {
|
||||
return new Promise((resolve) => {
|
||||
const tarBin = process.platform !== "win32" ? "/usr/bin/tar" : "tar";
|
||||
const child = spawn(tarBin, ["-tzvf", "-"], { stdio: ["pipe", "pipe", "pipe"] });
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
let aborted = false;
|
||||
const watchdog = setTimeout(() => {
|
||||
aborted = true;
|
||||
try {
|
||||
child.kill("SIGKILL");
|
||||
} catch {
|
||||
/* gone */
|
||||
}
|
||||
resolve({ ok: false, reason: "tar -tzvf timed out" });
|
||||
}, 30_000);
|
||||
child.stdout.on("data", (c: Buffer) => {
|
||||
stdout += c.toString();
|
||||
if (stdout.length > 32 * 1024 * 1024) {
|
||||
aborted = true;
|
||||
try {
|
||||
child.kill("SIGKILL");
|
||||
} catch {
|
||||
/* gone */
|
||||
}
|
||||
clearTimeout(watchdog);
|
||||
resolve({ ok: false, reason: "tar -tzvf output too large" });
|
||||
}
|
||||
});
|
||||
child.stderr.on("data", (c: Buffer) => {
|
||||
stderr += c.toString();
|
||||
});
|
||||
child.on("close", (code) => {
|
||||
clearTimeout(watchdog);
|
||||
if (aborted) {
|
||||
return;
|
||||
}
|
||||
if (code !== 0) {
|
||||
resolve({ ok: false, reason: `tar -tzvf exited ${code}: ${stderr.slice(0, 200)}` });
|
||||
return;
|
||||
}
|
||||
// Take only the first character of each line — the entry type.
|
||||
// We don't touch the rest of the line (path/size/etc) so filenames
|
||||
// with whitespace can't shift our parser.
|
||||
const typeChars = stdout
|
||||
.split("\n")
|
||||
.filter((l) => l.length > 0)
|
||||
.map((l) => l.charAt(0));
|
||||
resolve({ ok: true, typeChars });
|
||||
});
|
||||
child.on("error", (e) => {
|
||||
clearTimeout(watchdog);
|
||||
if (!aborted) {
|
||||
resolve({ ok: false, reason: `tar -tzvf error: ${String(e)}` });
|
||||
}
|
||||
});
|
||||
child.stdin.end(tarBuffer);
|
||||
});
|
||||
}
|
||||
|
||||
async function preValidateTarball(
|
||||
tarBuffer: Buffer,
|
||||
): Promise<{ ok: true } | { ok: false; reason: string }> {
|
||||
const namesResult = await listTarPaths(tarBuffer);
|
||||
if (!namesResult.ok) {
|
||||
return namesResult;
|
||||
}
|
||||
const paths = namesResult.paths;
|
||||
if (paths.length > TAR_UNPACK_MAX_ENTRIES) {
|
||||
return {
|
||||
ok: false,
|
||||
reason: `archive contains ${paths.length} entries; limit ${TAR_UNPACK_MAX_ENTRIES}`,
|
||||
};
|
||||
}
|
||||
|
||||
const typesResult = await listTarTypeChars(tarBuffer);
|
||||
if (!typesResult.ok) {
|
||||
return typesResult;
|
||||
}
|
||||
const typeChars = typesResult.typeChars;
|
||||
// The two passes should report the same number of entries; if they
|
||||
// don't, something exotic is going on (filenames with newlines, etc.)
|
||||
// and we refuse defensively.
|
||||
if (typeChars.length !== paths.length) {
|
||||
return {
|
||||
ok: false,
|
||||
reason: `tar -tzf and tar -tzvf disagree on entry count (${paths.length} vs ${typeChars.length}); refusing`,
|
||||
};
|
||||
}
|
||||
|
||||
for (let i = 0; i < paths.length; i++) {
|
||||
const entryPath = paths[i];
|
||||
const t = typeChars[i];
|
||||
if (t === "l" || t === "h") {
|
||||
return { ok: false, reason: `archive contains link entry: ${entryPath}` };
|
||||
}
|
||||
if (t !== "-" && t !== "d") {
|
||||
return { ok: false, reason: `archive contains non-regular entry type '${t}': ${entryPath}` };
|
||||
}
|
||||
if (path.isAbsolute(entryPath)) {
|
||||
return { ok: false, reason: `archive contains absolute path: ${entryPath}` };
|
||||
}
|
||||
const norm = path.posix.normalize(entryPath);
|
||||
if (norm === ".." || norm.startsWith("../") || norm.includes("/../")) {
|
||||
return { ok: false, reason: `archive contains '..' traversal: ${entryPath}` };
|
||||
}
|
||||
// Reject backslash-containing names too — refuses Windows-style
|
||||
// traversal in archives produced by an attacker on a Windows node.
|
||||
if (entryPath.includes("\\")) {
|
||||
return { ok: false, reason: `archive contains backslash in path: ${entryPath}` };
|
||||
}
|
||||
}
|
||||
return { ok: true };
|
||||
}
|
||||
|
||||
type UnpackedFileEntry = {
|
||||
relPath: string;
|
||||
size: number;
|
||||
|
||||
7
pnpm-lock.yaml
generated
7
pnpm-lock.yaml
generated
@@ -593,6 +593,13 @@ importers:
|
||||
version: link:../..
|
||||
|
||||
extensions/file-transfer:
|
||||
dependencies:
|
||||
minimatch:
|
||||
specifier: 10.2.4
|
||||
version: 10.2.4
|
||||
typebox:
|
||||
specifier: 1.1.33
|
||||
version: 1.1.33
|
||||
devDependencies:
|
||||
'@openclaw/plugin-sdk':
|
||||
specifier: workspace:*
|
||||
|
||||
Reference in New Issue
Block a user