mirror of
https://github.com/openclaw/openclaw.git
synced 2026-07-01 08:13:35 +00:00
fix(agents): truncate console text on code-point boundaries (#96296)
sanitizeForConsole filtered control characters code-point-aware but then truncated with sanitized.slice(0, maxChars), which cuts on UTF-16 code units. When the cap landed between the two code units of an astral character (emoji, CJK extension, etc.) the output ended in a lone high surrogate before the ellipsis. Cap on code points instead. Co-authored-by: ly-wang19 <ly-wang19@users.noreply.github.com> Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
20
src/agents/console-sanitize.test.ts
Normal file
20
src/agents/console-sanitize.test.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
// Console sanitizer tests cover control-char filtering and code-point-safe truncation.
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { sanitizeForConsole } from "./console-sanitize.js";
|
||||
|
||||
const hasLoneSurrogate = (value: string) =>
|
||||
/[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]/.test(value);
|
||||
|
||||
describe("sanitizeForConsole", () => {
|
||||
it("truncates on code-point boundaries without splitting a surrogate pair", () => {
|
||||
const grin = String.fromCodePoint(0x1f600); // 😀 — two UTF-16 code units
|
||||
const out = sanitizeForConsole(grin.repeat(6), 3);
|
||||
expect(out).toBe(`${grin.repeat(3)}…`);
|
||||
expect(out !== undefined && hasLoneSurrogate(out)).toBe(false);
|
||||
});
|
||||
|
||||
it("filters control chars, flattens whitespace, and leaves short strings intact", () => {
|
||||
expect(sanitizeForConsole(" hello\tworld ")).toBe("hello world");
|
||||
expect(sanitizeForConsole(undefined)).toBeUndefined();
|
||||
});
|
||||
});
|
||||
@@ -22,5 +22,11 @@ export function sanitizeForConsole(text: string | undefined, maxChars = 200): st
|
||||
.replace(/[\r\n\t]+/g, " ")
|
||||
.replace(/\s+/g, " ")
|
||||
.trim();
|
||||
return sanitized.length > maxChars ? `${sanitized.slice(0, maxChars)}…` : sanitized;
|
||||
const codePoints = Array.from(sanitized);
|
||||
if (codePoints.length <= maxChars) {
|
||||
return sanitized;
|
||||
}
|
||||
// Cap on code-point boundaries so a maxChars cut never splits a surrogate pair (emoji/astral) and
|
||||
// leaves a lone surrogate before the ellipsis.
|
||||
return `${codePoints.slice(0, maxChars).join("")}…`;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user