diff --git a/src/agents/console-sanitize.test.ts b/src/agents/console-sanitize.test.ts new file mode 100644 index 00000000000..6c5849f93a5 --- /dev/null +++ b/src/agents/console-sanitize.test.ts @@ -0,0 +1,20 @@ +// Console sanitizer tests cover control-char filtering and code-point-safe truncation. +import { describe, expect, it } from "vitest"; +import { sanitizeForConsole } from "./console-sanitize.js"; + +const hasLoneSurrogate = (value: string) => + /[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(? { + it("truncates on code-point boundaries without splitting a surrogate pair", () => { + const grin = String.fromCodePoint(0x1f600); // 😀 — two UTF-16 code units + const out = sanitizeForConsole(grin.repeat(6), 3); + expect(out).toBe(`${grin.repeat(3)}…`); + expect(out !== undefined && hasLoneSurrogate(out)).toBe(false); + }); + + it("filters control chars, flattens whitespace, and leaves short strings intact", () => { + expect(sanitizeForConsole(" hello\tworld ")).toBe("hello world"); + expect(sanitizeForConsole(undefined)).toBeUndefined(); + }); +}); diff --git a/src/agents/console-sanitize.ts b/src/agents/console-sanitize.ts index 59c3cf2d621..d852e069337 100644 --- a/src/agents/console-sanitize.ts +++ b/src/agents/console-sanitize.ts @@ -22,5 +22,11 @@ export function sanitizeForConsole(text: string | undefined, maxChars = 200): st .replace(/[\r\n\t]+/g, " ") .replace(/\s+/g, " ") .trim(); - return sanitized.length > maxChars ? `${sanitized.slice(0, maxChars)}…` : sanitized; + const codePoints = Array.from(sanitized); + if (codePoints.length <= maxChars) { + return sanitized; + } + // Cap on code-point boundaries so a maxChars cut never splits a surrogate pair (emoji/astral) and + // leaves a lone surrogate before the ellipsis. + return `${codePoints.slice(0, maxChars).join("")}…`; }