mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-12 22:10:44 +00:00
[codex] Extract filesystem safety primitives (#77918)
* refactor: extract filesystem safety primitives * refactor: use fs-safe for file access helpers * refactor: reuse fs-safe for media reads * refactor: use fs-safe for image reads * refactor: reuse fs-safe in qqbot media opener * refactor: reuse fs-safe for local media checks * refactor: consume cleaner fs-safe api * refactor: align fs-safe json option names * fix: preserve fs-safe migration contracts * refactor: use fs-safe primitive subpaths * refactor: use grouped fs-safe subpaths * refactor: align fs-safe api usage * refactor: adapt private state store api * chore: refresh proof gate * refactor: follow fs-safe json api split * refactor: follow reduced fs-safe surface * build: default fs-safe python helper off * fix: preserve fs-safe plugin sdk aliases * refactor: consolidate fs-safe usage * refactor: unify fs-safe store usage * refactor: trim fs-safe temp workspace usage * refactor: hide low-level fs-safe primitives * build: use published fs-safe package * fix: preserve outbound recovery durability after rebase * chore: refresh pr checks
This commit is contained in:
committed by
GitHub
parent
61481eb34f
commit
538605ff44
@@ -3,6 +3,7 @@ import * as fs from "node:fs";
|
||||
import * as os from "node:os";
|
||||
import * as path from "node:path";
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import { normalizeSource } from "../messaging/media-source.js";
|
||||
import {
|
||||
ApiError,
|
||||
MediaFileType,
|
||||
@@ -333,4 +334,51 @@ describe("media-chunked: ChunkedMediaApi.uploadChunked", () => {
|
||||
await fs.promises.rm(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it("uses the verified localPath handle if the path is replaced before chunked upload", async () => {
|
||||
const tmp = await fs.promises.mkdtemp(path.join(os.tmpdir(), "chunked-verified-"));
|
||||
const filePath = path.join(tmp, "fixture.bin");
|
||||
await fs.promises.writeFile(filePath, FIXTURE_BUFFER);
|
||||
const source = await normalizeSource({ localPath: filePath }, { maxSize: 1_000_000 });
|
||||
await fs.promises.rm(filePath);
|
||||
await fs.promises.writeFile(filePath, Buffer.from("replacement bytes"));
|
||||
try {
|
||||
const client = mockApiClient();
|
||||
const tm = mockTokenManager();
|
||||
stubFetchOk();
|
||||
|
||||
client.request.mockImplementation(async (_t, _m, p) => {
|
||||
if (p.endsWith("/upload_prepare")) {
|
||||
return makePrepareResponse("uid-verified", 3);
|
||||
}
|
||||
if (p.endsWith("/upload_part_finish")) {
|
||||
return {};
|
||||
}
|
||||
if (p.endsWith("/files")) {
|
||||
return { file_uuid: "u", file_info: "fi", ttl: 10 } satisfies UploadMediaResponse;
|
||||
}
|
||||
throw new Error(`unexpected ${p}`);
|
||||
});
|
||||
|
||||
const api = new ChunkedMediaApi(client, tm);
|
||||
await api.uploadChunked({
|
||||
scope: "c2c",
|
||||
targetId: "u1",
|
||||
fileType: MediaFileType.VIDEO,
|
||||
source,
|
||||
creds: { appId: "a", clientSecret: "s" },
|
||||
});
|
||||
|
||||
const prepareCall = client.request.mock.calls.find((c) =>
|
||||
String(c[2]).endsWith("/upload_prepare"),
|
||||
)!;
|
||||
const prepareBody = prepareCall[3] as { md5: string };
|
||||
expect(prepareBody.md5).toBe(crypto.createHash("md5").update(FIXTURE_BUFFER).digest("hex"));
|
||||
} finally {
|
||||
if (source.kind === "localPath") {
|
||||
await source.opened?.close().catch(() => undefined);
|
||||
}
|
||||
await fs.promises.rm(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
@@ -35,8 +35,9 @@
|
||||
*/
|
||||
|
||||
import * as crypto from "node:crypto";
|
||||
import * as fs from "node:fs";
|
||||
import type { MediaSource } from "../messaging/media-source.js";
|
||||
import type { FileHandle } from "node:fs/promises";
|
||||
import type { MediaSource, OpenedLocalFile } from "../messaging/media-source.js";
|
||||
import { openLocalFile } from "../messaging/media-source.js";
|
||||
import {
|
||||
ApiError,
|
||||
MediaFileType,
|
||||
@@ -178,138 +179,137 @@ export class ChunkedMediaApi {
|
||||
async uploadChunked(opts: UploadChunkedOptions): Promise<UploadMediaResponse> {
|
||||
const prefix = opts.logPrefix ?? "[qqbot:chunked-upload]";
|
||||
|
||||
// 1. Resolve input: size + local path (or temp buffer handle).
|
||||
const input = resolveSource(opts.source, opts.fileName);
|
||||
|
||||
const displayName = input.fileName;
|
||||
const fileSize = input.size;
|
||||
const pathLabel = input.kind === "localPath" ? input.path : "<buffer>";
|
||||
|
||||
this.logger?.info?.(
|
||||
`${prefix} Start: file=${displayName} size=${formatFileSize(fileSize)} type=${opts.fileType}`,
|
||||
);
|
||||
|
||||
// 2. Compute md5 / sha1 / md5_10m. Identical for buffer and localPath,
|
||||
// but the localPath path streams so it never has to materialize the
|
||||
// whole file twice.
|
||||
const hashes = await computeHashes(input);
|
||||
this.logger?.debug?.(
|
||||
`${prefix} hashes: md5=${hashes.md5} sha1=${hashes.sha1} md5_10m=${hashes.md5_10m}`,
|
||||
);
|
||||
|
||||
// 3. Upload-cache fast path: the md5 hash is already a strong content
|
||||
// identifier, so we can short-circuit before even calling upload_prepare.
|
||||
if (this.cache) {
|
||||
const cached = this.cache.get(hashes.md5, opts.scope, opts.targetId, opts.fileType);
|
||||
if (cached) {
|
||||
this.logger?.info?.(
|
||||
`${prefix} cache HIT (md5=${hashes.md5.slice(0, 8)}) — skipping chunked upload`,
|
||||
);
|
||||
return { file_uuid: "", file_info: cached, ttl: 0 };
|
||||
}
|
||||
}
|
||||
|
||||
// 4. upload_prepare.
|
||||
const fileNameForPrepare =
|
||||
opts.fileType === MediaFileType.FILE ? this.sanitize(displayName) : displayName;
|
||||
const prepareResp = await this.callUploadPrepare(
|
||||
opts,
|
||||
fileNameForPrepare,
|
||||
fileSize,
|
||||
hashes,
|
||||
pathLabel,
|
||||
);
|
||||
|
||||
const { upload_id, parts } = prepareResp;
|
||||
const block_size = prepareResp.block_size;
|
||||
const maxConcurrent = Math.min(
|
||||
prepareResp.concurrency ? prepareResp.concurrency : DEFAULT_CONCURRENT_PARTS,
|
||||
MAX_CONCURRENT_PARTS,
|
||||
);
|
||||
const retryTimeoutMs = prepareResp.retry_timeout
|
||||
? Math.min(prepareResp.retry_timeout * 1000, MAX_PART_FINISH_RETRY_TIMEOUT_MS)
|
||||
: undefined;
|
||||
|
||||
this.logger?.info?.(
|
||||
`${prefix} prepared: upload_id=${upload_id} block=${formatFileSize(block_size)} parts=${parts.length} concurrency=${maxConcurrent}`,
|
||||
);
|
||||
|
||||
// 5. Upload every part. Concurrency is per-upload, not global.
|
||||
let completedParts = 0;
|
||||
let uploadedBytes = 0;
|
||||
|
||||
const uploadPart = async (part: UploadPart): Promise<void> => {
|
||||
const partIndex = part.index; // 1-based.
|
||||
const offset = (partIndex - 1) * block_size;
|
||||
const length = Math.min(block_size, fileSize - offset);
|
||||
|
||||
const partBuffer = await readPart(input, offset, length);
|
||||
const md5Hex = crypto.createHash("md5").update(partBuffer).digest("hex");
|
||||
|
||||
this.logger?.debug?.(
|
||||
`${prefix} part ${partIndex}/${parts.length}: ${formatFileSize(length)} offset=${offset} md5=${md5Hex}`,
|
||||
);
|
||||
|
||||
// 5a. PUT to pre-signed COS URL.
|
||||
await putToPresignedUrl(
|
||||
part.presigned_url,
|
||||
partBuffer,
|
||||
partIndex,
|
||||
parts.length,
|
||||
this.logger,
|
||||
prefix,
|
||||
);
|
||||
|
||||
// 5b. upload_part_finish — fetch a fresh token each time to defend
|
||||
// against long uploads exceeding the token TTL.
|
||||
await this.callUploadPartFinish(opts, upload_id, partIndex, length, md5Hex, retryTimeoutMs);
|
||||
|
||||
completedParts++;
|
||||
uploadedBytes += length;
|
||||
this.logger?.info?.(
|
||||
`${prefix} part ${partIndex}/${parts.length} done (${completedParts}/${parts.length})`,
|
||||
);
|
||||
|
||||
opts.onProgress?.({
|
||||
completedParts,
|
||||
totalParts: parts.length,
|
||||
uploadedBytes,
|
||||
totalBytes: fileSize,
|
||||
});
|
||||
};
|
||||
// 1. Resolve input: size + verified local file descriptor (or buffer).
|
||||
const input = await resolveSource(opts.source, opts.fileName);
|
||||
|
||||
try {
|
||||
const displayName = input.fileName;
|
||||
const fileSize = input.size;
|
||||
const pathLabel = input.kind === "localPath" ? input.path : "<buffer>";
|
||||
|
||||
this.logger?.info?.(
|
||||
`${prefix} Start: file=${displayName} size=${formatFileSize(fileSize)} type=${opts.fileType}`,
|
||||
);
|
||||
|
||||
// 2. Compute md5 / sha1 / md5_10m. Identical for buffer and localPath,
|
||||
// but the localPath descriptor streams so it never has to materialize the
|
||||
// whole file twice or reopen a path after validation.
|
||||
const hashes = await computeHashes(input);
|
||||
this.logger?.debug?.(
|
||||
`${prefix} hashes: md5=${hashes.md5} sha1=${hashes.sha1} md5_10m=${hashes.md5_10m}`,
|
||||
);
|
||||
|
||||
// 3. Upload-cache fast path: the md5 hash is already a strong content
|
||||
// identifier, so we can short-circuit before even calling upload_prepare.
|
||||
if (this.cache) {
|
||||
const cached = this.cache.get(hashes.md5, opts.scope, opts.targetId, opts.fileType);
|
||||
if (cached) {
|
||||
this.logger?.info?.(
|
||||
`${prefix} cache HIT (md5=${hashes.md5.slice(0, 8)}) — skipping chunked upload`,
|
||||
);
|
||||
return { file_uuid: "", file_info: cached, ttl: 0 };
|
||||
}
|
||||
}
|
||||
|
||||
// 4. upload_prepare.
|
||||
const fileNameForPrepare =
|
||||
opts.fileType === MediaFileType.FILE ? this.sanitize(displayName) : displayName;
|
||||
const prepareResp = await this.callUploadPrepare(
|
||||
opts,
|
||||
fileNameForPrepare,
|
||||
fileSize,
|
||||
hashes,
|
||||
pathLabel,
|
||||
);
|
||||
|
||||
const { upload_id, parts } = prepareResp;
|
||||
const block_size = prepareResp.block_size;
|
||||
const maxConcurrent = Math.min(
|
||||
prepareResp.concurrency ? prepareResp.concurrency : DEFAULT_CONCURRENT_PARTS,
|
||||
MAX_CONCURRENT_PARTS,
|
||||
);
|
||||
const retryTimeoutMs = prepareResp.retry_timeout
|
||||
? Math.min(prepareResp.retry_timeout * 1000, MAX_PART_FINISH_RETRY_TIMEOUT_MS)
|
||||
: undefined;
|
||||
|
||||
this.logger?.info?.(
|
||||
`${prefix} prepared: upload_id=${upload_id} block=${formatFileSize(block_size)} parts=${parts.length} concurrency=${maxConcurrent}`,
|
||||
);
|
||||
|
||||
// 5. Upload every part. Concurrency is per-upload, not global.
|
||||
let completedParts = 0;
|
||||
let uploadedBytes = 0;
|
||||
|
||||
const uploadPart = async (part: UploadPart): Promise<void> => {
|
||||
const partIndex = part.index; // 1-based.
|
||||
const offset = (partIndex - 1) * block_size;
|
||||
const length = Math.min(block_size, fileSize - offset);
|
||||
|
||||
const partBuffer = await readPart(input, offset, length);
|
||||
const md5Hex = crypto.createHash("md5").update(partBuffer).digest("hex");
|
||||
|
||||
this.logger?.debug?.(
|
||||
`${prefix} part ${partIndex}/${parts.length}: ${formatFileSize(length)} offset=${offset} md5=${md5Hex}`,
|
||||
);
|
||||
|
||||
// 5a. PUT to pre-signed COS URL.
|
||||
await putToPresignedUrl(
|
||||
part.presigned_url,
|
||||
partBuffer,
|
||||
partIndex,
|
||||
parts.length,
|
||||
this.logger,
|
||||
prefix,
|
||||
);
|
||||
|
||||
// 5b. upload_part_finish — fetch a fresh token each time to defend
|
||||
// against long uploads exceeding the token TTL.
|
||||
await this.callUploadPartFinish(opts, upload_id, partIndex, length, md5Hex, retryTimeoutMs);
|
||||
|
||||
completedParts++;
|
||||
uploadedBytes += length;
|
||||
this.logger?.info?.(
|
||||
`${prefix} part ${partIndex}/${parts.length} done (${completedParts}/${parts.length})`,
|
||||
);
|
||||
|
||||
opts.onProgress?.({
|
||||
completedParts,
|
||||
totalParts: parts.length,
|
||||
uploadedBytes,
|
||||
totalBytes: fileSize,
|
||||
});
|
||||
};
|
||||
|
||||
await runWithConcurrency(
|
||||
parts.map((part) => () => uploadPart(part)),
|
||||
maxConcurrent,
|
||||
);
|
||||
|
||||
this.logger?.info?.(`${prefix} all parts uploaded, completing...`);
|
||||
|
||||
// 6. complete_upload.
|
||||
const result = await this.callCompleteUpload(opts, upload_id);
|
||||
this.logger?.info?.(`${prefix} completed: file_uuid=${result.file_uuid} ttl=${result.ttl}s`);
|
||||
|
||||
// 7. Populate the shared upload cache so subsequent sends skip re-uploading.
|
||||
if (this.cache && result.file_info && result.ttl > 0) {
|
||||
this.cache.set(
|
||||
hashes.md5,
|
||||
opts.scope,
|
||||
opts.targetId,
|
||||
opts.fileType,
|
||||
result.file_info,
|
||||
result.file_uuid,
|
||||
result.ttl,
|
||||
);
|
||||
}
|
||||
|
||||
return result;
|
||||
} finally {
|
||||
// If the input opened a buffered read stream we don't keep state,
|
||||
// but localPath readers open / close the file per-part so there
|
||||
// is nothing to unwind here. Kept as a seam for future streaming
|
||||
// optimizations.
|
||||
if (input.kind === "localPath" && input.closeWhenDone) {
|
||||
await input.opened.close().catch(() => undefined);
|
||||
}
|
||||
}
|
||||
|
||||
this.logger?.info?.(`${prefix} all parts uploaded, completing...`);
|
||||
|
||||
// 6. complete_upload.
|
||||
const result = await this.callCompleteUpload(opts, upload_id);
|
||||
this.logger?.info?.(`${prefix} completed: file_uuid=${result.file_uuid} ttl=${result.ttl}s`);
|
||||
|
||||
// 7. Populate the shared upload cache so subsequent sends skip re-uploading.
|
||||
if (this.cache && result.file_info && result.ttl > 0) {
|
||||
this.cache.set(
|
||||
hashes.md5,
|
||||
opts.scope,
|
||||
opts.targetId,
|
||||
opts.fileType,
|
||||
result.file_info,
|
||||
result.file_uuid,
|
||||
result.ttl,
|
||||
);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// -------- Internal call wrappers --------
|
||||
@@ -429,17 +429,31 @@ export function isChunkedUploadImplemented(): boolean {
|
||||
* the bytes plus the metadata required by `upload_prepare`.
|
||||
*/
|
||||
type ChunkedInput =
|
||||
| { kind: "localPath"; path: string; size: number; fileName: string }
|
||||
| {
|
||||
kind: "localPath";
|
||||
path: string;
|
||||
size: number;
|
||||
fileName: string;
|
||||
opened: OpenedLocalFile;
|
||||
closeWhenDone: boolean;
|
||||
}
|
||||
| { kind: "buffer"; buffer: Buffer; size: number; fileName: string };
|
||||
|
||||
function resolveSource(source: MediaSource, fileNameOverride?: string): ChunkedInput {
|
||||
async function resolveSource(
|
||||
source: MediaSource,
|
||||
fileNameOverride?: string,
|
||||
): Promise<ChunkedInput> {
|
||||
if (source.kind === "localPath") {
|
||||
const inferredName = source.path.split(/[/\\]/).pop() || "file";
|
||||
const opened =
|
||||
source.opened ?? (await openLocalFile(source.path, { maxSize: Number.MAX_SAFE_INTEGER }));
|
||||
return {
|
||||
kind: "localPath",
|
||||
path: source.path,
|
||||
size: source.size,
|
||||
size: opened.size,
|
||||
fileName: fileNameOverride ?? inferredName,
|
||||
opened,
|
||||
closeWhenDone: source.opened === undefined,
|
||||
};
|
||||
}
|
||||
if (source.kind === "buffer") {
|
||||
@@ -460,14 +474,9 @@ async function readPart(input: ChunkedInput, offset: number, length: number): Pr
|
||||
if (input.kind === "buffer") {
|
||||
return input.buffer.subarray(offset, offset + length);
|
||||
}
|
||||
const handle = await fs.promises.open(input.path, "r");
|
||||
try {
|
||||
const buf = Buffer.alloc(length);
|
||||
const { bytesRead } = await handle.read(buf, 0, length, offset);
|
||||
return bytesRead < length ? buf.subarray(0, bytesRead) : buf;
|
||||
} finally {
|
||||
await handle.close();
|
||||
}
|
||||
const buf = Buffer.alloc(length);
|
||||
const { bytesRead } = await input.opened.handle.read(buf, 0, length, offset);
|
||||
return bytesRead < length ? buf.subarray(0, bytesRead) : buf;
|
||||
}
|
||||
|
||||
// ============ Hash computation ============
|
||||
@@ -476,8 +485,8 @@ async function readPart(input: ChunkedInput, offset: number, length: number): Pr
|
||||
* Stream the source once to compute md5 + sha1 + md5_10m.
|
||||
*
|
||||
* For buffer inputs the three hashes are computed in a single pass over
|
||||
* the existing memory. For localPath inputs a ReadStream drives the
|
||||
* hashers so memory use stays constant.
|
||||
* the existing memory. For localPath inputs the verified descriptor drives
|
||||
* the hashers so memory use stays constant.
|
||||
*/
|
||||
async function computeHashes(input: ChunkedInput): Promise<UploadPrepareHashes> {
|
||||
if (input.kind === "buffer") {
|
||||
@@ -497,7 +506,7 @@ async function computeHashes(input: ChunkedInput): Promise<UploadPrepareHashes>
|
||||
let consumed = 0;
|
||||
const needsMd5_10m = input.size > MD5_10M_SIZE;
|
||||
|
||||
const stream = fs.createReadStream(input.path);
|
||||
const stream = createReadStreamFromHandle(input.opened.handle);
|
||||
stream.on("data", (chunk: Buffer | string) => {
|
||||
const buf = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
|
||||
md5.update(buf);
|
||||
@@ -523,6 +532,10 @@ async function computeHashes(input: ChunkedInput): Promise<UploadPrepareHashes>
|
||||
});
|
||||
}
|
||||
|
||||
function createReadStreamFromHandle(handle: FileHandle): NodeJS.ReadableStream {
|
||||
return handle.createReadStream({ autoClose: false, start: 0 });
|
||||
}
|
||||
|
||||
// ============ COS PUT ============
|
||||
|
||||
/** Per-part retry budget for the COS PUT call (exponential backoff). */
|
||||
|
||||
Reference in New Issue
Block a user