[codex] Extract filesystem safety primitives (#77918)

* refactor: extract filesystem safety primitives

* refactor: use fs-safe for file access helpers

* refactor: reuse fs-safe for media reads

* refactor: use fs-safe for image reads

* refactor: reuse fs-safe in qqbot media opener

* refactor: reuse fs-safe for local media checks

* refactor: consume cleaner fs-safe api

* refactor: align fs-safe json option names

* fix: preserve fs-safe migration contracts

* refactor: use fs-safe primitive subpaths

* refactor: use grouped fs-safe subpaths

* refactor: align fs-safe api usage

* refactor: adapt private state store api

* chore: refresh proof gate

* refactor: follow fs-safe json api split

* refactor: follow reduced fs-safe surface

* build: default fs-safe python helper off

* fix: preserve fs-safe plugin sdk aliases

* refactor: consolidate fs-safe usage

* refactor: unify fs-safe store usage

* refactor: trim fs-safe temp workspace usage

* refactor: hide low-level fs-safe primitives

* build: use published fs-safe package

* fix: preserve outbound recovery durability after rebase

* chore: refresh pr checks
This commit is contained in:
Peter Steinberger
2026-05-06 02:15:17 +01:00
committed by GitHub
parent 61481eb34f
commit 538605ff44
356 changed files with 4918 additions and 11913 deletions

View File

@@ -3,6 +3,7 @@ import * as fs from "node:fs";
import * as os from "node:os";
import * as path from "node:path";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import { normalizeSource } from "../messaging/media-source.js";
import {
ApiError,
MediaFileType,
@@ -333,4 +334,51 @@ describe("media-chunked: ChunkedMediaApi.uploadChunked", () => {
await fs.promises.rm(tmp, { recursive: true, force: true });
}
});
it("uses the verified localPath handle if the path is replaced before chunked upload", async () => {
const tmp = await fs.promises.mkdtemp(path.join(os.tmpdir(), "chunked-verified-"));
const filePath = path.join(tmp, "fixture.bin");
await fs.promises.writeFile(filePath, FIXTURE_BUFFER);
const source = await normalizeSource({ localPath: filePath }, { maxSize: 1_000_000 });
await fs.promises.rm(filePath);
await fs.promises.writeFile(filePath, Buffer.from("replacement bytes"));
try {
const client = mockApiClient();
const tm = mockTokenManager();
stubFetchOk();
client.request.mockImplementation(async (_t, _m, p) => {
if (p.endsWith("/upload_prepare")) {
return makePrepareResponse("uid-verified", 3);
}
if (p.endsWith("/upload_part_finish")) {
return {};
}
if (p.endsWith("/files")) {
return { file_uuid: "u", file_info: "fi", ttl: 10 } satisfies UploadMediaResponse;
}
throw new Error(`unexpected ${p}`);
});
const api = new ChunkedMediaApi(client, tm);
await api.uploadChunked({
scope: "c2c",
targetId: "u1",
fileType: MediaFileType.VIDEO,
source,
creds: { appId: "a", clientSecret: "s" },
});
const prepareCall = client.request.mock.calls.find((c) =>
String(c[2]).endsWith("/upload_prepare"),
)!;
const prepareBody = prepareCall[3] as { md5: string };
expect(prepareBody.md5).toBe(crypto.createHash("md5").update(FIXTURE_BUFFER).digest("hex"));
} finally {
if (source.kind === "localPath") {
await source.opened?.close().catch(() => undefined);
}
await fs.promises.rm(tmp, { recursive: true, force: true });
}
});
});

View File

@@ -35,8 +35,9 @@
*/
import * as crypto from "node:crypto";
import * as fs from "node:fs";
import type { MediaSource } from "../messaging/media-source.js";
import type { FileHandle } from "node:fs/promises";
import type { MediaSource, OpenedLocalFile } from "../messaging/media-source.js";
import { openLocalFile } from "../messaging/media-source.js";
import {
ApiError,
MediaFileType,
@@ -178,138 +179,137 @@ export class ChunkedMediaApi {
async uploadChunked(opts: UploadChunkedOptions): Promise<UploadMediaResponse> {
const prefix = opts.logPrefix ?? "[qqbot:chunked-upload]";
// 1. Resolve input: size + local path (or temp buffer handle).
const input = resolveSource(opts.source, opts.fileName);
const displayName = input.fileName;
const fileSize = input.size;
const pathLabel = input.kind === "localPath" ? input.path : "<buffer>";
this.logger?.info?.(
`${prefix} Start: file=${displayName} size=${formatFileSize(fileSize)} type=${opts.fileType}`,
);
// 2. Compute md5 / sha1 / md5_10m. Identical for buffer and localPath,
// but the localPath path streams so it never has to materialize the
// whole file twice.
const hashes = await computeHashes(input);
this.logger?.debug?.(
`${prefix} hashes: md5=${hashes.md5} sha1=${hashes.sha1} md5_10m=${hashes.md5_10m}`,
);
// 3. Upload-cache fast path: the md5 hash is already a strong content
// identifier, so we can short-circuit before even calling upload_prepare.
if (this.cache) {
const cached = this.cache.get(hashes.md5, opts.scope, opts.targetId, opts.fileType);
if (cached) {
this.logger?.info?.(
`${prefix} cache HIT (md5=${hashes.md5.slice(0, 8)}) — skipping chunked upload`,
);
return { file_uuid: "", file_info: cached, ttl: 0 };
}
}
// 4. upload_prepare.
const fileNameForPrepare =
opts.fileType === MediaFileType.FILE ? this.sanitize(displayName) : displayName;
const prepareResp = await this.callUploadPrepare(
opts,
fileNameForPrepare,
fileSize,
hashes,
pathLabel,
);
const { upload_id, parts } = prepareResp;
const block_size = prepareResp.block_size;
const maxConcurrent = Math.min(
prepareResp.concurrency ? prepareResp.concurrency : DEFAULT_CONCURRENT_PARTS,
MAX_CONCURRENT_PARTS,
);
const retryTimeoutMs = prepareResp.retry_timeout
? Math.min(prepareResp.retry_timeout * 1000, MAX_PART_FINISH_RETRY_TIMEOUT_MS)
: undefined;
this.logger?.info?.(
`${prefix} prepared: upload_id=${upload_id} block=${formatFileSize(block_size)} parts=${parts.length} concurrency=${maxConcurrent}`,
);
// 5. Upload every part. Concurrency is per-upload, not global.
let completedParts = 0;
let uploadedBytes = 0;
const uploadPart = async (part: UploadPart): Promise<void> => {
const partIndex = part.index; // 1-based.
const offset = (partIndex - 1) * block_size;
const length = Math.min(block_size, fileSize - offset);
const partBuffer = await readPart(input, offset, length);
const md5Hex = crypto.createHash("md5").update(partBuffer).digest("hex");
this.logger?.debug?.(
`${prefix} part ${partIndex}/${parts.length}: ${formatFileSize(length)} offset=${offset} md5=${md5Hex}`,
);
// 5a. PUT to pre-signed COS URL.
await putToPresignedUrl(
part.presigned_url,
partBuffer,
partIndex,
parts.length,
this.logger,
prefix,
);
// 5b. upload_part_finish — fetch a fresh token each time to defend
// against long uploads exceeding the token TTL.
await this.callUploadPartFinish(opts, upload_id, partIndex, length, md5Hex, retryTimeoutMs);
completedParts++;
uploadedBytes += length;
this.logger?.info?.(
`${prefix} part ${partIndex}/${parts.length} done (${completedParts}/${parts.length})`,
);
opts.onProgress?.({
completedParts,
totalParts: parts.length,
uploadedBytes,
totalBytes: fileSize,
});
};
// 1. Resolve input: size + verified local file descriptor (or buffer).
const input = await resolveSource(opts.source, opts.fileName);
try {
const displayName = input.fileName;
const fileSize = input.size;
const pathLabel = input.kind === "localPath" ? input.path : "<buffer>";
this.logger?.info?.(
`${prefix} Start: file=${displayName} size=${formatFileSize(fileSize)} type=${opts.fileType}`,
);
// 2. Compute md5 / sha1 / md5_10m. Identical for buffer and localPath,
// but the localPath descriptor streams so it never has to materialize the
// whole file twice or reopen a path after validation.
const hashes = await computeHashes(input);
this.logger?.debug?.(
`${prefix} hashes: md5=${hashes.md5} sha1=${hashes.sha1} md5_10m=${hashes.md5_10m}`,
);
// 3. Upload-cache fast path: the md5 hash is already a strong content
// identifier, so we can short-circuit before even calling upload_prepare.
if (this.cache) {
const cached = this.cache.get(hashes.md5, opts.scope, opts.targetId, opts.fileType);
if (cached) {
this.logger?.info?.(
`${prefix} cache HIT (md5=${hashes.md5.slice(0, 8)}) — skipping chunked upload`,
);
return { file_uuid: "", file_info: cached, ttl: 0 };
}
}
// 4. upload_prepare.
const fileNameForPrepare =
opts.fileType === MediaFileType.FILE ? this.sanitize(displayName) : displayName;
const prepareResp = await this.callUploadPrepare(
opts,
fileNameForPrepare,
fileSize,
hashes,
pathLabel,
);
const { upload_id, parts } = prepareResp;
const block_size = prepareResp.block_size;
const maxConcurrent = Math.min(
prepareResp.concurrency ? prepareResp.concurrency : DEFAULT_CONCURRENT_PARTS,
MAX_CONCURRENT_PARTS,
);
const retryTimeoutMs = prepareResp.retry_timeout
? Math.min(prepareResp.retry_timeout * 1000, MAX_PART_FINISH_RETRY_TIMEOUT_MS)
: undefined;
this.logger?.info?.(
`${prefix} prepared: upload_id=${upload_id} block=${formatFileSize(block_size)} parts=${parts.length} concurrency=${maxConcurrent}`,
);
// 5. Upload every part. Concurrency is per-upload, not global.
let completedParts = 0;
let uploadedBytes = 0;
const uploadPart = async (part: UploadPart): Promise<void> => {
const partIndex = part.index; // 1-based.
const offset = (partIndex - 1) * block_size;
const length = Math.min(block_size, fileSize - offset);
const partBuffer = await readPart(input, offset, length);
const md5Hex = crypto.createHash("md5").update(partBuffer).digest("hex");
this.logger?.debug?.(
`${prefix} part ${partIndex}/${parts.length}: ${formatFileSize(length)} offset=${offset} md5=${md5Hex}`,
);
// 5a. PUT to pre-signed COS URL.
await putToPresignedUrl(
part.presigned_url,
partBuffer,
partIndex,
parts.length,
this.logger,
prefix,
);
// 5b. upload_part_finish — fetch a fresh token each time to defend
// against long uploads exceeding the token TTL.
await this.callUploadPartFinish(opts, upload_id, partIndex, length, md5Hex, retryTimeoutMs);
completedParts++;
uploadedBytes += length;
this.logger?.info?.(
`${prefix} part ${partIndex}/${parts.length} done (${completedParts}/${parts.length})`,
);
opts.onProgress?.({
completedParts,
totalParts: parts.length,
uploadedBytes,
totalBytes: fileSize,
});
};
await runWithConcurrency(
parts.map((part) => () => uploadPart(part)),
maxConcurrent,
);
this.logger?.info?.(`${prefix} all parts uploaded, completing...`);
// 6. complete_upload.
const result = await this.callCompleteUpload(opts, upload_id);
this.logger?.info?.(`${prefix} completed: file_uuid=${result.file_uuid} ttl=${result.ttl}s`);
// 7. Populate the shared upload cache so subsequent sends skip re-uploading.
if (this.cache && result.file_info && result.ttl > 0) {
this.cache.set(
hashes.md5,
opts.scope,
opts.targetId,
opts.fileType,
result.file_info,
result.file_uuid,
result.ttl,
);
}
return result;
} finally {
// If the input opened a buffered read stream we don't keep state,
// but localPath readers open / close the file per-part so there
// is nothing to unwind here. Kept as a seam for future streaming
// optimizations.
if (input.kind === "localPath" && input.closeWhenDone) {
await input.opened.close().catch(() => undefined);
}
}
this.logger?.info?.(`${prefix} all parts uploaded, completing...`);
// 6. complete_upload.
const result = await this.callCompleteUpload(opts, upload_id);
this.logger?.info?.(`${prefix} completed: file_uuid=${result.file_uuid} ttl=${result.ttl}s`);
// 7. Populate the shared upload cache so subsequent sends skip re-uploading.
if (this.cache && result.file_info && result.ttl > 0) {
this.cache.set(
hashes.md5,
opts.scope,
opts.targetId,
opts.fileType,
result.file_info,
result.file_uuid,
result.ttl,
);
}
return result;
}
// -------- Internal call wrappers --------
@@ -429,17 +429,31 @@ export function isChunkedUploadImplemented(): boolean {
* the bytes plus the metadata required by `upload_prepare`.
*/
type ChunkedInput =
| { kind: "localPath"; path: string; size: number; fileName: string }
| {
kind: "localPath";
path: string;
size: number;
fileName: string;
opened: OpenedLocalFile;
closeWhenDone: boolean;
}
| { kind: "buffer"; buffer: Buffer; size: number; fileName: string };
function resolveSource(source: MediaSource, fileNameOverride?: string): ChunkedInput {
async function resolveSource(
source: MediaSource,
fileNameOverride?: string,
): Promise<ChunkedInput> {
if (source.kind === "localPath") {
const inferredName = source.path.split(/[/\\]/).pop() || "file";
const opened =
source.opened ?? (await openLocalFile(source.path, { maxSize: Number.MAX_SAFE_INTEGER }));
return {
kind: "localPath",
path: source.path,
size: source.size,
size: opened.size,
fileName: fileNameOverride ?? inferredName,
opened,
closeWhenDone: source.opened === undefined,
};
}
if (source.kind === "buffer") {
@@ -460,14 +474,9 @@ async function readPart(input: ChunkedInput, offset: number, length: number): Pr
if (input.kind === "buffer") {
return input.buffer.subarray(offset, offset + length);
}
const handle = await fs.promises.open(input.path, "r");
try {
const buf = Buffer.alloc(length);
const { bytesRead } = await handle.read(buf, 0, length, offset);
return bytesRead < length ? buf.subarray(0, bytesRead) : buf;
} finally {
await handle.close();
}
const buf = Buffer.alloc(length);
const { bytesRead } = await input.opened.handle.read(buf, 0, length, offset);
return bytesRead < length ? buf.subarray(0, bytesRead) : buf;
}
// ============ Hash computation ============
@@ -476,8 +485,8 @@ async function readPart(input: ChunkedInput, offset: number, length: number): Pr
* Stream the source once to compute md5 + sha1 + md5_10m.
*
* For buffer inputs the three hashes are computed in a single pass over
* the existing memory. For localPath inputs a ReadStream drives the
* hashers so memory use stays constant.
* the existing memory. For localPath inputs the verified descriptor drives
* the hashers so memory use stays constant.
*/
async function computeHashes(input: ChunkedInput): Promise<UploadPrepareHashes> {
if (input.kind === "buffer") {
@@ -497,7 +506,7 @@ async function computeHashes(input: ChunkedInput): Promise<UploadPrepareHashes>
let consumed = 0;
const needsMd5_10m = input.size > MD5_10M_SIZE;
const stream = fs.createReadStream(input.path);
const stream = createReadStreamFromHandle(input.opened.handle);
stream.on("data", (chunk: Buffer | string) => {
const buf = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
md5.update(buf);
@@ -523,6 +532,10 @@ async function computeHashes(input: ChunkedInput): Promise<UploadPrepareHashes>
});
}
function createReadStreamFromHandle(handle: FileHandle): NodeJS.ReadableStream {
return handle.createReadStream({ autoClose: false, start: 0 });
}
// ============ COS PUT ============
/** Per-part retry budget for the COS PUT call (exponential backoff). */