mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 10:10:45 +00:00
fix(media): tighten hasSingleByteTextShape to reject mixed ASCII/high-byte blobs
Raise the ASCII floor to 70% and add an explicit 30% high-byte cap. The previous 50% threshold accepted alternating 0x41/0xFF buffers (50% ASCII, 0 control bytes), which decoded through Latin-1 and passed the printable-ratio gate — allowing opaque binary data to slip through as a CSV or Markdown document. Real single-byte text exports (e.g. Excel Latin-1 CSVs with accented chars like é, ñ) rarely exceed 20-25% high bytes, so the tighter thresholds do not regress legitimate input. Adds a regression test: 9000 bytes alternating 'A'/0xFF must be rejected as path-not-allowed. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
committed by
Frank Yang
parent
ac89e9d964
commit
a55d38ed6c
@@ -332,6 +332,31 @@ describe("loadWebMedia", () => {
|
||||
},
|
||||
);
|
||||
|
||||
it.each([
|
||||
{ label: "CSV", fileName: "alternating-high.csv" },
|
||||
{ label: "Markdown", fileName: "alternating-high.md" },
|
||||
])("rejects alternating ASCII/high-byte data disguised as %s", async ({ fileName }) => {
|
||||
const fakeTextFile = path.join(fixtureRoot, fileName);
|
||||
// Alternating 0x41 ('A') and 0xFF — exactly 50% ASCII, 50% high bytes.
|
||||
// With the old 50% threshold hasSingleByteTextShape would accept this;
|
||||
// the tightened 70%/30% thresholds must reject it.
|
||||
const mixed = Buffer.alloc(9000);
|
||||
for (let i = 0; i < mixed.length; i += 1) {
|
||||
mixed[i] = i % 2 === 0 ? 0x41 : 0xff;
|
||||
}
|
||||
await fs.writeFile(fakeTextFile, mixed);
|
||||
await expect(
|
||||
loadWebMedia(fakeTextFile, {
|
||||
maxBytes: 1024 * 1024,
|
||||
localRoots: "any",
|
||||
readFile: async (filePath) => await fs.readFile(filePath),
|
||||
hostReadCapability: true,
|
||||
}),
|
||||
).rejects.toMatchObject({
|
||||
code: "path-not-allowed",
|
||||
});
|
||||
});
|
||||
|
||||
it.each([
|
||||
{ label: "CSV", fileName: "high-bytes.csv" },
|
||||
{ label: "Markdown", fileName: "high-bytes.md" },
|
||||
|
||||
@@ -164,7 +164,8 @@ function hasSingleByteTextShape(buffer: Buffer): boolean {
|
||||
}
|
||||
}
|
||||
const total = buffer.length;
|
||||
return control === 0 && asciiText / total >= 0.5;
|
||||
const highBytes = total - asciiText - control;
|
||||
return control === 0 && asciiText / total >= 0.7 && highBytes / total <= 0.3;
|
||||
}
|
||||
|
||||
function decodeHostReadText(buffer: Buffer): string | undefined {
|
||||
|
||||
Reference in New Issue
Block a user