mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-05 14:50:21 +00:00
Models like GLM-5 and DeepSeek sometimes emit internal delimiter tokens in their responses. Uses generic pattern in the text extraction pipeline, following the same architecture as stripMinimaxToolCallXml. Closes #40020 Supersedes #40573 Co-authored-by: imwyvern <100903837+imwyvern@users.noreply.github.com>
26 lines
973 B
TypeScript
26 lines
973 B
TypeScript
import { describe, expect, it } from "vitest";
|
||
import { stripModelSpecialTokens } from "./pi-embedded-utils.js";
|
||
|
||
/**
|
||
* @see https://github.com/openclaw/openclaw/issues/40020
|
||
*/
|
||
describe("stripModelSpecialTokens", () => {
|
||
it("strips tokens and inserts space between adjacent words", () => {
|
||
expect(stripModelSpecialTokens("<|user|>Question<|assistant|>Answer")).toBe("Question Answer");
|
||
});
|
||
|
||
it("strips full-width pipe variants (DeepSeek U+FF5C)", () => {
|
||
expect(stripModelSpecialTokens("<|begin▁of▁sentence|>Hello there")).toBe("Hello there");
|
||
});
|
||
|
||
it("does not strip normal angle brackets or HTML", () => {
|
||
expect(stripModelSpecialTokens("a < b && c > d")).toBe("a < b && c > d");
|
||
expect(stripModelSpecialTokens("<div>hello</div>")).toBe("<div>hello</div>");
|
||
});
|
||
|
||
it("passes through text without tokens unchanged", () => {
|
||
const text = "Just a normal response.";
|
||
expect(stripModelSpecialTokens(text)).toBe(text);
|
||
});
|
||
});
|