mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 15:00:41 +00:00
fix: allow punctuation-heavy host-read text
This commit is contained in:
@@ -274,6 +274,35 @@ describe("loadWebMedia", () => {
|
||||
},
|
||||
);
|
||||
|
||||
it.each([
|
||||
{
|
||||
label: "CSV",
|
||||
fileName: "punctuation.csv",
|
||||
contentType: "text/csv",
|
||||
body: ",,,,,,,,,,\n",
|
||||
},
|
||||
{
|
||||
label: "Markdown",
|
||||
fileName: "punctuation.md",
|
||||
contentType: "text/markdown",
|
||||
body: "---\n***\n> > >\n",
|
||||
},
|
||||
])(
|
||||
"loads valid punctuation-heavy %s files when host-read capability is enabled",
|
||||
async ({ fileName, contentType, body }) => {
|
||||
const textFile = path.join(fixtureRoot, fileName);
|
||||
await fs.writeFile(textFile, Buffer.from(body, "utf8"));
|
||||
const result = await loadWebMedia(textFile, {
|
||||
maxBytes: 1024 * 1024,
|
||||
localRoots: "any",
|
||||
readFile: async (filePath) => await fs.readFile(filePath),
|
||||
hostReadCapability: true,
|
||||
});
|
||||
expect(result.kind).toBe("document");
|
||||
expect(result.contentType).toBe(contentType);
|
||||
},
|
||||
);
|
||||
|
||||
it("rejects traversal-style canvas media paths before filesystem access", async () => {
|
||||
await expect(
|
||||
loadWebMedia(`${CANVAS_HOST_PATH}/documents/../collection.media/tiny.png`),
|
||||
|
||||
@@ -90,7 +90,6 @@ const HOST_READ_ALLOWED_DOCUMENT_MIMES = new Set([
|
||||
// Markdown, so host-read needs an explicit "this really decodes as text" fallback.
|
||||
const HOST_READ_TEXT_PLAIN_ALIASES = new Set(["text/csv", "text/markdown"]);
|
||||
const MB = 1024 * 1024;
|
||||
const WORDISH_CHAR = /[\p{L}\p{N}]/u;
|
||||
|
||||
function resolveUtf16Charset(buffer?: Buffer): "utf-16le" | "utf-16be" | undefined {
|
||||
if (!buffer || buffer.length < 2) {
|
||||
@@ -124,18 +123,16 @@ function resolveUtf16Charset(buffer?: Buffer): "utf-16le" | "utf-16be" | undefin
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function getTextStats(text: string): { printableRatio: number; wordishRatio: number } {
|
||||
function getTextStats(text: string): { printableRatio: number } {
|
||||
if (!text) {
|
||||
return { printableRatio: 0, wordishRatio: 0 };
|
||||
return { printableRatio: 0 };
|
||||
}
|
||||
let printable = 0;
|
||||
let control = 0;
|
||||
let wordish = 0;
|
||||
for (const char of text) {
|
||||
const code = char.codePointAt(0) ?? 0;
|
||||
if (code === 9 || code === 10 || code === 13 || code === 32) {
|
||||
printable += 1;
|
||||
wordish += 1;
|
||||
continue;
|
||||
}
|
||||
if (code < 32 || (code >= 0x7f && code <= 0x9f)) {
|
||||
@@ -143,15 +140,12 @@ function getTextStats(text: string): { printableRatio: number; wordishRatio: num
|
||||
continue;
|
||||
}
|
||||
printable += 1;
|
||||
if (WORDISH_CHAR.test(char)) {
|
||||
wordish += 1;
|
||||
}
|
||||
}
|
||||
const total = printable + control;
|
||||
if (total === 0) {
|
||||
return { printableRatio: 0, wordishRatio: 0 };
|
||||
return { printableRatio: 0 };
|
||||
}
|
||||
return { printableRatio: printable / total, wordishRatio: wordish / total };
|
||||
return { printableRatio: printable / total };
|
||||
}
|
||||
|
||||
function decodeHostReadText(buffer: Buffer): string | undefined {
|
||||
@@ -193,8 +187,8 @@ function isValidatedHostReadText(buffer?: Buffer): boolean {
|
||||
if (text === undefined) {
|
||||
return false;
|
||||
}
|
||||
const { printableRatio, wordishRatio } = getTextStats(text);
|
||||
return printableRatio > 0.95 && wordishRatio > 0.2;
|
||||
const { printableRatio } = getTextStats(text);
|
||||
return printableRatio > 0.95;
|
||||
}
|
||||
|
||||
function formatMb(bytes: number, digits = 2): string {
|
||||
|
||||
Reference in New Issue
Block a user