Files
openclaw/src/agents/pi-embedded-utils.strip-model-special-tokens.test.ts
George Zhang 309162f9a2 fix: strip leaked model control tokens from user-facing text (#42173)
Models like GLM-5 and DeepSeek sometimes emit internal delimiter tokens in their responses. Uses generic pattern in the text extraction pipeline, following the same architecture as stripMinimaxToolCallXml.

Closes #40020
Supersedes #40573

Co-authored-by: imwyvern <100903837+imwyvern@users.noreply.github.com>
2026-03-10 06:27:59 -07:00

26 lines
973 B
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { describe, expect, it } from "vitest";
import { stripModelSpecialTokens } from "./pi-embedded-utils.js";
/**
* @see https://github.com/openclaw/openclaw/issues/40020
*/
describe("stripModelSpecialTokens", () => {
it("strips tokens and inserts space between adjacent words", () => {
expect(stripModelSpecialTokens("<|user|>Question<|assistant|>Answer")).toBe("Question Answer");
});
it("strips full-width pipe variants (DeepSeek U+FF5C)", () => {
expect(stripModelSpecialTokens("<begin▁of▁sentence>Hello there")).toBe("Hello there");
});
it("does not strip normal angle brackets or HTML", () => {
expect(stripModelSpecialTokens("a < b && c > d")).toBe("a < b && c > d");
expect(stripModelSpecialTokens("<div>hello</div>")).toBe("<div>hello</div>");
});
it("passes through text without tokens unchanged", () => {
const text = "Just a normal response.";
expect(stripModelSpecialTokens(text)).toBe(text);
});
});