fix(agents): truncate console text on code-point boundaries (#96296)

sanitizeForConsole filtered control characters code-point-aware but then
truncated with sanitized.slice(0, maxChars), which cuts on UTF-16 code units.
When the cap landed between the two code units of an astral character (emoji,
CJK extension, etc.) the output ended in a lone high surrogate before the
ellipsis. Cap on code points instead.

Co-authored-by: ly-wang19 <ly-wang19@users.noreply.github.com>
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
ly-wang19
2026-06-29 02:07:37 +08:00
committed by GitHub
parent 55d7b5b36c
commit ad8e7dcfe2
2 changed files with 27 additions and 1 deletions

View File

@@ -0,0 +1,20 @@
// Console sanitizer tests cover control-char filtering and code-point-safe truncation.
import { describe, expect, it } from "vitest";
import { sanitizeForConsole } from "./console-sanitize.js";
const hasLoneSurrogate = (value: string) =>
/[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]/.test(value);
describe("sanitizeForConsole", () => {
it("truncates on code-point boundaries without splitting a surrogate pair", () => {
const grin = String.fromCodePoint(0x1f600); // 😀 — two UTF-16 code units
const out = sanitizeForConsole(grin.repeat(6), 3);
expect(out).toBe(`${grin.repeat(3)}`);
expect(out !== undefined && hasLoneSurrogate(out)).toBe(false);
});
it("filters control chars, flattens whitespace, and leaves short strings intact", () => {
expect(sanitizeForConsole(" hello\tworld ")).toBe("hello world");
expect(sanitizeForConsole(undefined)).toBeUndefined();
});
});

View File

@@ -22,5 +22,11 @@ export function sanitizeForConsole(text: string | undefined, maxChars = 200): st
.replace(/[\r\n\t]+/g, " ")
.replace(/\s+/g, " ")
.trim();
return sanitized.length > maxChars ? `${sanitized.slice(0, maxChars)}` : sanitized;
const codePoints = Array.from(sanitized);
if (codePoints.length <= maxChars) {
return sanitized;
}
// Cap on code-point boundaries so a maxChars cut never splits a surrogate pair (emoji/astral) and
// leaves a lone surrogate before the ellipsis.
return `${codePoints.slice(0, maxChars).join("")}`;
}