fix: allow single-byte host-read text

This commit is contained in:
Frank Yang
2026-04-15 18:28:55 +08:00
parent cff3445a6f
commit ac89e9d964
2 changed files with 75 additions and 1 deletions

View File

@@ -303,6 +303,57 @@ describe("loadWebMedia", () => {
},
);
it.each([
{
label: "CSV",
fileName: "legacy.csv",
contentType: "text/csv",
body: Buffer.from("caf\xe9,ni\xf1o\n", "latin1"),
},
{
label: "Markdown",
fileName: "legacy.md",
contentType: "text/markdown",
body: Buffer.from("R\xe9sum\xe9\nni\xf1o\n", "latin1"),
},
])(
"loads valid single-byte encoded %s files when host-read capability is enabled",
async ({ fileName, contentType, body }) => {
const textFile = path.join(fixtureRoot, fileName);
await fs.writeFile(textFile, body);
const result = await loadWebMedia(textFile, {
maxBytes: 1024 * 1024,
localRoots: "any",
readFile: async (filePath) => await fs.readFile(filePath),
hostReadCapability: true,
});
expect(result.kind).toBe("document");
expect(result.contentType).toBe(contentType);
},
);
it.each([
{ label: "CSV", fileName: "high-bytes.csv" },
{ label: "Markdown", fileName: "high-bytes.md" },
])("rejects high-byte opaque data disguised as %s", async ({ fileName }) => {
const fakeTextFile = path.join(fixtureRoot, fileName);
const opaqueBinary = Buffer.alloc(9000);
for (let i = 0; i < opaqueBinary.length; i += 1) {
opaqueBinary[i] = 0xa0 + (i % 96);
}
await fs.writeFile(fakeTextFile, opaqueBinary);
await expect(
loadWebMedia(fakeTextFile, {
maxBytes: 1024 * 1024,
localRoots: "any",
readFile: async (filePath) => await fs.readFile(filePath),
hostReadCapability: true,
}),
).rejects.toMatchObject({
code: "path-not-allowed",
});
});
it("rejects traversal-style canvas media paths before filesystem access", async () => {
await expect(
loadWebMedia(`${CANVAS_HOST_PATH}/documents/../collection.media/tiny.png`),

View File

@@ -148,6 +148,25 @@ function getTextStats(text: string): { printableRatio: number } {
return { printableRatio: printable / total };
}
function hasSingleByteTextShape(buffer: Buffer): boolean {
if (buffer.length === 0) {
return true;
}
let asciiText = 0;
let control = 0;
for (const byte of buffer) {
if (byte === 9 || byte === 10 || byte === 13 || (byte >= 0x20 && byte <= 0x7e)) {
asciiText += 1;
continue;
}
if (byte < 0x20 || byte === 0x7f) {
control += 1;
}
}
const total = buffer.length;
return control === 0 && asciiText / total >= 0.5;
}
function decodeHostReadText(buffer: Buffer): string | undefined {
if (buffer.length === 0) {
return "";
@@ -172,7 +191,11 @@ function decodeHostReadText(buffer: Buffer): string | undefined {
}
return new TextDecoder("utf-8", { fatal: true }).decode(buffer);
} catch {
return undefined;
if (!hasSingleByteTextShape(buffer)) {
return undefined;
}
// WHATWG latin1 decodes common Excel-style single-byte exports via Windows-1252 mapping.
return new TextDecoder("latin1").decode(buffer);
}
}