mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-20 05:31:30 +00:00
test: dedupe routing and text suites
This commit is contained in:
@@ -17,6 +17,30 @@ function expectFencesBalanced(chunks: string[]) {
|
||||
}
|
||||
}
|
||||
|
||||
function expectChunkLengths(chunks: string[], expectedLengths: number[]) {
|
||||
expect(chunks).toHaveLength(expectedLengths.length);
|
||||
expectedLengths.forEach((length, index) => {
|
||||
expect(chunks[index]?.length).toBe(length);
|
||||
});
|
||||
}
|
||||
|
||||
function expectNormalizedChunkJoin(chunks: string[], text: string) {
|
||||
expect(chunks.join(" ").replace(/\s+/g, " ").trim()).toBe(text.replace(/\s+/g, " ").trim());
|
||||
}
|
||||
|
||||
function expectChunkTextCase(params: {
|
||||
text: string;
|
||||
limit: number;
|
||||
assert: (chunks: string[], text: string) => void;
|
||||
}) {
|
||||
const chunks = chunkText(params.text, params.limit);
|
||||
params.assert(chunks, params.text);
|
||||
}
|
||||
|
||||
function expectChunkSpecialCase(run: () => void) {
|
||||
run();
|
||||
}
|
||||
|
||||
type ChunkCase = {
|
||||
name: string;
|
||||
text: string;
|
||||
@@ -30,6 +54,19 @@ function runChunkCases(chunker: (text: string, limit: number) => string[], cases
|
||||
});
|
||||
}
|
||||
|
||||
function expectChunkModeCase(params: {
|
||||
chunker: (text: string, limit: number, mode: "length" | "newline") => string[];
|
||||
text: string;
|
||||
limit: number;
|
||||
mode: "length" | "newline";
|
||||
expected: readonly string[];
|
||||
name?: string;
|
||||
}) {
|
||||
expect(params.chunker(params.text, params.limit, params.mode), params.name).toEqual(
|
||||
params.expected,
|
||||
);
|
||||
}
|
||||
|
||||
function expectMarkdownFenceSplitCases(
|
||||
cases: ReadonlyArray<{
|
||||
name: string;
|
||||
@@ -39,7 +76,7 @@ function expectMarkdownFenceSplitCases(
|
||||
expectedSuffix: string;
|
||||
}>,
|
||||
) {
|
||||
for (const { name, text, limit, expectedPrefix, expectedSuffix } of cases) {
|
||||
cases.forEach(({ name, text, limit, expectedPrefix, expectedSuffix }) => {
|
||||
const chunks = chunkMarkdownText(text, limit);
|
||||
expect(chunks.length, name).toBeGreaterThan(1);
|
||||
for (const chunk of chunks) {
|
||||
@@ -48,9 +85,28 @@ function expectMarkdownFenceSplitCases(
|
||||
expect(chunk.trimEnd().endsWith(expectedSuffix), name).toBe(true);
|
||||
}
|
||||
expectFencesBalanced(chunks);
|
||||
});
|
||||
}
|
||||
|
||||
function expectNoEmptyFencedChunks(text: string, limit: number) {
|
||||
const chunks = chunkMarkdownText(text, limit);
|
||||
for (const chunk of chunks) {
|
||||
const nonFenceLines = chunk
|
||||
.split("\n")
|
||||
.filter((line) => !/^( {0,3})(`{3,}|~{3,})(.*)$/.test(line));
|
||||
expect(nonFenceLines.join("\n").trim()).not.toBe("");
|
||||
}
|
||||
}
|
||||
|
||||
function expectFenceParseOccursOnce(text: string, limit: number) {
|
||||
const parseSpy = vi.spyOn(fences, "parseFenceSpans");
|
||||
const chunks = chunkMarkdownText(text, limit);
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(2);
|
||||
expect(parseSpy).toHaveBeenCalledTimes(1);
|
||||
parseSpy.mockRestore();
|
||||
}
|
||||
|
||||
const parentheticalCases: ChunkCase[] = [
|
||||
{
|
||||
name: "keeps parenthetical phrases together",
|
||||
@@ -72,232 +128,355 @@ const parentheticalCases: ChunkCase[] = [
|
||||
},
|
||||
];
|
||||
|
||||
const newlineModeFenceCases = (() => {
|
||||
const fence = "```python\ndef my_function():\n x = 1\n\n y = 2\n return x + y\n```";
|
||||
const longFence = `\`\`\`js\n${"const a = 1;\n".repeat(20)}\`\`\``;
|
||||
return [
|
||||
{
|
||||
name: "keeps single-newline fence+paragraph together",
|
||||
text: "```js\nconst a = 1;\nconst b = 2;\n```\nAfter",
|
||||
limit: 1000,
|
||||
expected: ["```js\nconst a = 1;\nconst b = 2;\n```\nAfter"],
|
||||
},
|
||||
{
|
||||
name: "keeps blank lines inside fence together",
|
||||
text: fence,
|
||||
limit: 1000,
|
||||
expected: [fence],
|
||||
},
|
||||
{
|
||||
name: "splits between fence and following paragraph",
|
||||
text: `${fence}\n\nAfter`,
|
||||
limit: 1000,
|
||||
expected: [fence, "After"],
|
||||
},
|
||||
{
|
||||
name: "defers long markdown blocks to markdown chunker",
|
||||
text: longFence,
|
||||
limit: 40,
|
||||
expected: chunkMarkdownText(longFence, 40),
|
||||
},
|
||||
] as const;
|
||||
})();
|
||||
|
||||
describe("chunkText", () => {
|
||||
it("keeps multi-line text in one chunk when under limit", () => {
|
||||
const text = "Line one\n\nLine two\n\nLine three";
|
||||
const chunks = chunkText(text, 1600);
|
||||
expect(chunks).toEqual([text]);
|
||||
});
|
||||
|
||||
it("splits only when text exceeds the limit", () => {
|
||||
const part = "a".repeat(20);
|
||||
const text = part.repeat(5); // 100 chars
|
||||
const chunks = chunkText(text, 60);
|
||||
expect(chunks.length).toBe(2);
|
||||
expect(chunks[0].length).toBe(60);
|
||||
expect(chunks[1].length).toBe(40);
|
||||
expect(chunks.join("")).toBe(text);
|
||||
});
|
||||
|
||||
it("prefers breaking at a newline before the limit", () => {
|
||||
const text = `paragraph one line\n\nparagraph two starts here and continues`;
|
||||
const chunks = chunkText(text, 40);
|
||||
expect(chunks).toEqual(["paragraph one line", "paragraph two starts here and continues"]);
|
||||
});
|
||||
|
||||
it("otherwise breaks at the last whitespace under the limit", () => {
|
||||
const text = "This is a message that should break nicely near a word boundary.";
|
||||
const chunks = chunkText(text, 30);
|
||||
expect(chunks[0].length).toBeLessThanOrEqual(30);
|
||||
expect(chunks[1].length).toBeLessThanOrEqual(30);
|
||||
expect(chunks.join(" ").replace(/\s+/g, " ").trim()).toBe(text.replace(/\s+/g, " ").trim());
|
||||
});
|
||||
|
||||
it("falls back to a hard break when no whitespace is present", () => {
|
||||
const text = "Supercalifragilisticexpialidocious"; // 34 chars
|
||||
const chunks = chunkText(text, 10);
|
||||
expect(chunks).toEqual(["Supercalif", "ragilistic", "expialidoc", "ious"]);
|
||||
it.each([
|
||||
{
|
||||
name: "keeps multi-line text in one chunk when under limit",
|
||||
text: "Line one\n\nLine two\n\nLine three",
|
||||
limit: 1600,
|
||||
assert: (chunks: string[], text: string) => {
|
||||
expect(chunks).toEqual([text]);
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "splits only when text exceeds the limit",
|
||||
text: "a".repeat(20).repeat(5),
|
||||
limit: 60,
|
||||
assert: (chunks: string[], text: string) => {
|
||||
expectChunkLengths(chunks, [60, 40]);
|
||||
expect(chunks.join("")).toBe(text);
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "prefers breaking at a newline before the limit",
|
||||
text: "paragraph one line\n\nparagraph two starts here and continues",
|
||||
limit: 40,
|
||||
assert: (chunks: string[]) => {
|
||||
expect(chunks).toEqual(["paragraph one line", "paragraph two starts here and continues"]);
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "otherwise breaks at the last whitespace under the limit",
|
||||
text: "This is a message that should break nicely near a word boundary.",
|
||||
limit: 30,
|
||||
assert: (chunks: string[], text: string) => {
|
||||
expect(chunks[0]?.length).toBeLessThanOrEqual(30);
|
||||
expect(chunks[1]?.length).toBeLessThanOrEqual(30);
|
||||
expectNormalizedChunkJoin(chunks, text);
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "falls back to a hard break when no whitespace is present",
|
||||
text: "Supercalifragilisticexpialidocious",
|
||||
limit: 10,
|
||||
assert: (chunks: string[]) => {
|
||||
expect(chunks).toEqual(["Supercalif", "ragilistic", "expialidoc", "ious"]);
|
||||
},
|
||||
},
|
||||
] as const)("$name", ({ text, limit, assert }) => {
|
||||
expectChunkTextCase({ text, limit, assert });
|
||||
});
|
||||
|
||||
runChunkCases(chunkText, [parentheticalCases[0]]);
|
||||
});
|
||||
|
||||
describe("resolveTextChunkLimit", () => {
|
||||
it("uses per-provider defaults", () => {
|
||||
expect(resolveTextChunkLimit(undefined, "whatsapp")).toBe(4000);
|
||||
expect(resolveTextChunkLimit(undefined, "telegram")).toBe(4000);
|
||||
expect(resolveTextChunkLimit(undefined, "slack")).toBe(4000);
|
||||
expect(resolveTextChunkLimit(undefined, "signal")).toBe(4000);
|
||||
expect(resolveTextChunkLimit(undefined, "imessage")).toBe(4000);
|
||||
expect(resolveTextChunkLimit(undefined, "discord")).toBe(4000);
|
||||
expect(
|
||||
resolveTextChunkLimit(undefined, "discord", undefined, {
|
||||
fallbackLimit: 2000,
|
||||
it.each([
|
||||
...(["whatsapp", "telegram", "slack", "signal", "imessage", "discord"] as const).map(
|
||||
(provider) => ({
|
||||
name: `uses default limit for ${provider}`,
|
||||
cfg: undefined,
|
||||
provider,
|
||||
accountId: undefined,
|
||||
options: undefined,
|
||||
expected: 4000,
|
||||
}),
|
||||
).toBe(2000);
|
||||
});
|
||||
|
||||
it("supports provider overrides", () => {
|
||||
const cfg = { channels: { telegram: { textChunkLimit: 1234 } } };
|
||||
expect(resolveTextChunkLimit(cfg, "whatsapp")).toBe(4000);
|
||||
expect(resolveTextChunkLimit(cfg, "telegram")).toBe(1234);
|
||||
});
|
||||
|
||||
it("prefers account overrides when provided", () => {
|
||||
const cfg = {
|
||||
channels: {
|
||||
telegram: {
|
||||
textChunkLimit: 2000,
|
||||
accounts: {
|
||||
default: { textChunkLimit: 1234 },
|
||||
primary: { textChunkLimit: 777 },
|
||||
),
|
||||
{
|
||||
name: "uses fallback limit override when provided",
|
||||
cfg: undefined,
|
||||
provider: "discord" as const,
|
||||
accountId: undefined,
|
||||
options: { fallbackLimit: 2000 },
|
||||
expected: 2000,
|
||||
},
|
||||
{
|
||||
name: "supports provider overrides for telegram",
|
||||
cfg: { channels: { telegram: { textChunkLimit: 1234 } } },
|
||||
provider: "telegram" as const,
|
||||
accountId: undefined,
|
||||
options: undefined,
|
||||
expected: 1234,
|
||||
},
|
||||
{
|
||||
name: "falls back when provider override does not match",
|
||||
cfg: { channels: { telegram: { textChunkLimit: 1234 } } },
|
||||
provider: "whatsapp" as const,
|
||||
accountId: undefined,
|
||||
options: undefined,
|
||||
expected: 4000,
|
||||
},
|
||||
{
|
||||
name: "prefers account overrides when provided",
|
||||
cfg: {
|
||||
channels: {
|
||||
telegram: {
|
||||
textChunkLimit: 2000,
|
||||
accounts: {
|
||||
default: { textChunkLimit: 1234 },
|
||||
primary: { textChunkLimit: 777 },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
expect(resolveTextChunkLimit(cfg, "telegram", "primary")).toBe(777);
|
||||
expect(resolveTextChunkLimit(cfg, "telegram", "default")).toBe(1234);
|
||||
});
|
||||
|
||||
it("uses the matching provider override", () => {
|
||||
const cfg = {
|
||||
channels: {
|
||||
discord: { textChunkLimit: 111 },
|
||||
slack: { textChunkLimit: 222 },
|
||||
provider: "telegram" as const,
|
||||
accountId: "primary",
|
||||
options: undefined,
|
||||
expected: 777,
|
||||
},
|
||||
{
|
||||
name: "uses default account override when requested",
|
||||
cfg: {
|
||||
channels: {
|
||||
telegram: {
|
||||
textChunkLimit: 2000,
|
||||
accounts: {
|
||||
default: { textChunkLimit: 1234 },
|
||||
primary: { textChunkLimit: 777 },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
expect(resolveTextChunkLimit(cfg, "discord")).toBe(111);
|
||||
expect(resolveTextChunkLimit(cfg, "slack")).toBe(222);
|
||||
expect(resolveTextChunkLimit(cfg, "telegram")).toBe(4000);
|
||||
provider: "telegram" as const,
|
||||
accountId: "default",
|
||||
options: undefined,
|
||||
expected: 1234,
|
||||
},
|
||||
{
|
||||
name: "uses the matching provider override for discord",
|
||||
cfg: {
|
||||
channels: {
|
||||
discord: { textChunkLimit: 111 },
|
||||
slack: { textChunkLimit: 222 },
|
||||
},
|
||||
},
|
||||
provider: "discord" as const,
|
||||
accountId: undefined,
|
||||
options: undefined,
|
||||
expected: 111,
|
||||
},
|
||||
{
|
||||
name: "uses the matching provider override for slack",
|
||||
cfg: {
|
||||
channels: {
|
||||
discord: { textChunkLimit: 111 },
|
||||
slack: { textChunkLimit: 222 },
|
||||
},
|
||||
},
|
||||
provider: "slack" as const,
|
||||
accountId: undefined,
|
||||
options: undefined,
|
||||
expected: 222,
|
||||
},
|
||||
{
|
||||
name: "falls back when multi-provider override does not match",
|
||||
cfg: {
|
||||
channels: {
|
||||
discord: { textChunkLimit: 111 },
|
||||
slack: { textChunkLimit: 222 },
|
||||
},
|
||||
},
|
||||
provider: "telegram" as const,
|
||||
accountId: undefined,
|
||||
options: undefined,
|
||||
expected: 4000,
|
||||
},
|
||||
] as const)("$name", ({ cfg, provider, accountId, options, expected }) => {
|
||||
expect(resolveTextChunkLimit(cfg as never, provider, accountId, options)).toBe(expected);
|
||||
});
|
||||
});
|
||||
|
||||
describe("chunkMarkdownText", () => {
|
||||
it("keeps fenced blocks intact when a safe break exists", () => {
|
||||
const prefix = "p".repeat(60);
|
||||
const fence = "```bash\nline1\nline2\n```";
|
||||
const suffix = "s".repeat(60);
|
||||
const text = `${prefix}\n\n${fence}\n\n${suffix}`;
|
||||
it.each([
|
||||
{
|
||||
name: "keeps fenced blocks intact when a safe break exists",
|
||||
run: () => {
|
||||
const prefix = "p".repeat(60);
|
||||
const fence = "```bash\nline1\nline2\n```";
|
||||
const suffix = "s".repeat(60);
|
||||
const text = `${prefix}\n\n${fence}\n\n${suffix}`;
|
||||
|
||||
const chunks = chunkMarkdownText(text, 40);
|
||||
expect(chunks.some((chunk) => chunk.trimEnd() === fence)).toBe(true);
|
||||
expectFencesBalanced(chunks);
|
||||
});
|
||||
|
||||
it("handles multiple fence marker styles when splitting inside fences", () => {
|
||||
const cases = [
|
||||
{
|
||||
name: "backtick fence",
|
||||
text: `\`\`\`txt\n${"a".repeat(500)}\n\`\`\``,
|
||||
limit: 120,
|
||||
expectedPrefix: "```txt\n",
|
||||
expectedSuffix: "```",
|
||||
const chunks = chunkMarkdownText(text, 40);
|
||||
expect(chunks.some((chunk) => chunk.trimEnd() === fence)).toBe(true);
|
||||
expectFencesBalanced(chunks);
|
||||
},
|
||||
{
|
||||
name: "tilde fence",
|
||||
text: `~~~sh\n${"x".repeat(600)}\n~~~`,
|
||||
limit: 140,
|
||||
expectedPrefix: "~~~sh\n",
|
||||
expectedSuffix: "~~~",
|
||||
},
|
||||
{
|
||||
name: "long backtick fence",
|
||||
text: `\`\`\`\`md\n${"y".repeat(600)}\n\`\`\`\``,
|
||||
limit: 140,
|
||||
expectedPrefix: "````md\n",
|
||||
expectedSuffix: "````",
|
||||
},
|
||||
{
|
||||
name: "indented fence",
|
||||
text: ` \`\`\`js\n ${"z".repeat(600)}\n \`\`\``,
|
||||
limit: 160,
|
||||
expectedPrefix: " ```js\n",
|
||||
expectedSuffix: " ```",
|
||||
},
|
||||
] as const;
|
||||
|
||||
expectMarkdownFenceSplitCases(cases);
|
||||
});
|
||||
|
||||
it("never produces an empty fenced chunk when splitting", () => {
|
||||
const text = `\`\`\`txt\n${"a".repeat(300)}\n\`\`\``;
|
||||
const chunks = chunkMarkdownText(text, 60);
|
||||
for (const chunk of chunks) {
|
||||
const nonFenceLines = chunk
|
||||
.split("\n")
|
||||
.filter((line) => !/^( {0,3})(`{3,}|~{3,})(.*)$/.test(line));
|
||||
expect(nonFenceLines.join("\n").trim()).not.toBe("");
|
||||
}
|
||||
},
|
||||
{
|
||||
name: "handles multiple fence marker styles when splitting inside fences",
|
||||
run: () =>
|
||||
expectMarkdownFenceSplitCases([
|
||||
{
|
||||
name: "backtick fence",
|
||||
text: `\`\`\`txt\n${"a".repeat(500)}\n\`\`\``,
|
||||
limit: 120,
|
||||
expectedPrefix: "```txt\n",
|
||||
expectedSuffix: "```",
|
||||
},
|
||||
{
|
||||
name: "tilde fence",
|
||||
text: `~~~sh\n${"x".repeat(600)}\n~~~`,
|
||||
limit: 140,
|
||||
expectedPrefix: "~~~sh\n",
|
||||
expectedSuffix: "~~~",
|
||||
},
|
||||
{
|
||||
name: "long backtick fence",
|
||||
text: `\`\`\`\`md\n${"y".repeat(600)}\n\`\`\`\``,
|
||||
limit: 140,
|
||||
expectedPrefix: "````md\n",
|
||||
expectedSuffix: "````",
|
||||
},
|
||||
{
|
||||
name: "indented fence",
|
||||
text: ` \`\`\`js\n ${"z".repeat(600)}\n \`\`\``,
|
||||
limit: 160,
|
||||
expectedPrefix: " ```js\n",
|
||||
expectedSuffix: " ```",
|
||||
},
|
||||
]),
|
||||
},
|
||||
] as const)("$name", ({ run }) => {
|
||||
expectChunkSpecialCase(run);
|
||||
});
|
||||
|
||||
runChunkCases(chunkMarkdownText, parentheticalCases);
|
||||
|
||||
it("hard-breaks when a parenthetical exceeds the limit", () => {
|
||||
const text = `(${"a".repeat(80)})`;
|
||||
const chunks = chunkMarkdownText(text, 20);
|
||||
expect(chunks[0]?.length).toBe(20);
|
||||
expect(chunks.join("")).toBe(text);
|
||||
});
|
||||
|
||||
it("parses fence spans once for long fenced payloads", () => {
|
||||
const parseSpy = vi.spyOn(fences, "parseFenceSpans");
|
||||
const text = `\`\`\`txt\n${"line\n".repeat(600)}\`\`\``;
|
||||
|
||||
const chunks = chunkMarkdownText(text, 80);
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(2);
|
||||
expect(parseSpy).toHaveBeenCalledTimes(1);
|
||||
parseSpy.mockRestore();
|
||||
it.each([
|
||||
{
|
||||
name: "never produces an empty fenced chunk when splitting",
|
||||
run: () => {
|
||||
expectNoEmptyFencedChunks(`\`\`\`txt\n${"a".repeat(300)}\n\`\`\``, 60);
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "hard-breaks when a parenthetical exceeds the limit",
|
||||
run: () => {
|
||||
const text = `(${"a".repeat(80)})`;
|
||||
const chunks = chunkMarkdownText(text, 20);
|
||||
expect(chunks[0]?.length).toBe(20);
|
||||
expect(chunks.join("")).toBe(text);
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "parses fence spans once for long fenced payloads",
|
||||
run: () => {
|
||||
expectFenceParseOccursOnce(`\`\`\`txt\n${"line\n".repeat(600)}\`\`\``, 80);
|
||||
},
|
||||
},
|
||||
] as const)("$name", ({ run }) => {
|
||||
expectChunkSpecialCase(run);
|
||||
});
|
||||
});
|
||||
|
||||
describe("chunkByNewline", () => {
|
||||
it("splits text on newlines", () => {
|
||||
const text = "Line one\nLine two\nLine three";
|
||||
const chunks = chunkByNewline(text, 1000);
|
||||
expect(chunks).toEqual(["Line one", "Line two", "Line three"]);
|
||||
it.each([
|
||||
{
|
||||
name: "splits text on newlines",
|
||||
text: "Line one\nLine two\nLine three",
|
||||
limit: 1000,
|
||||
expected: ["Line one", "Line two", "Line three"],
|
||||
},
|
||||
{
|
||||
name: "preserves blank lines by folding into the next chunk",
|
||||
text: "Line one\n\n\nLine two\n\nLine three",
|
||||
limit: 1000,
|
||||
expected: ["Line one", "\n\nLine two", "\nLine three"],
|
||||
},
|
||||
{
|
||||
name: "trims whitespace from lines",
|
||||
text: " Line one \n Line two ",
|
||||
limit: 1000,
|
||||
expected: ["Line one", "Line two"],
|
||||
},
|
||||
{
|
||||
name: "preserves leading blank lines on the first chunk",
|
||||
text: "\n\nLine one\nLine two",
|
||||
limit: 1000,
|
||||
expected: ["\n\nLine one", "Line two"],
|
||||
},
|
||||
{
|
||||
name: "preserves trailing blank lines on the last chunk",
|
||||
text: "Line one\n\n",
|
||||
limit: 1000,
|
||||
expected: ["Line one\n\n"],
|
||||
},
|
||||
{
|
||||
name: "keeps whitespace when trimLines is false",
|
||||
text: " indented line \nNext",
|
||||
limit: 1000,
|
||||
options: { trimLines: false },
|
||||
expected: [" indented line ", "Next"],
|
||||
},
|
||||
] as const)("$name", ({ text, limit, options, expected }) => {
|
||||
expect(chunkByNewline(text, limit, options)).toEqual(expected);
|
||||
});
|
||||
|
||||
it("preserves blank lines by folding into the next chunk", () => {
|
||||
const text = "Line one\n\n\nLine two\n\nLine three";
|
||||
const chunks = chunkByNewline(text, 1000);
|
||||
expect(chunks).toEqual(["Line one", "\n\nLine two", "\nLine three"]);
|
||||
});
|
||||
|
||||
it("trims whitespace from lines", () => {
|
||||
const text = " Line one \n Line two ";
|
||||
const chunks = chunkByNewline(text, 1000);
|
||||
expect(chunks).toEqual(["Line one", "Line two"]);
|
||||
});
|
||||
|
||||
it("preserves leading blank lines on the first chunk", () => {
|
||||
const text = "\n\nLine one\nLine two";
|
||||
const chunks = chunkByNewline(text, 1000);
|
||||
expect(chunks).toEqual(["\n\nLine one", "Line two"]);
|
||||
});
|
||||
|
||||
it("falls back to length-based for long lines", () => {
|
||||
const text = "Short line\n" + "a".repeat(50) + "\nAnother short";
|
||||
const chunks = chunkByNewline(text, 20);
|
||||
expect(chunks[0]).toBe("Short line");
|
||||
// Long line gets split into multiple chunks
|
||||
expect(chunks[1].length).toBe(20);
|
||||
expect(chunks[2].length).toBe(20);
|
||||
expect(chunks[3].length).toBe(10);
|
||||
expect(chunks[4]).toBe("Another short");
|
||||
});
|
||||
|
||||
it("does not split long lines when splitLongLines is false", () => {
|
||||
const text = "a".repeat(50);
|
||||
const chunks = chunkByNewline(text, 20, { splitLongLines: false });
|
||||
expect(chunks).toEqual([text]);
|
||||
it.each([
|
||||
{
|
||||
name: "falls back to length-based for long lines",
|
||||
run: () => {
|
||||
const text = "Short line\n" + "a".repeat(50) + "\nAnother short";
|
||||
const chunks = chunkByNewline(text, 20);
|
||||
expect(chunks[0]).toBe("Short line");
|
||||
expectChunkLengths(chunks.slice(1, 4), [20, 20, 10]);
|
||||
expect(chunks[4]).toBe("Another short");
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "does not split long lines when splitLongLines is false",
|
||||
run: () => {
|
||||
const text = "a".repeat(50);
|
||||
expect(chunkByNewline(text, 20, { splitLongLines: false })).toEqual([text]);
|
||||
},
|
||||
},
|
||||
] as const)("$name", ({ run }) => {
|
||||
expectChunkSpecialCase(run);
|
||||
});
|
||||
|
||||
it.each(["", " \n\n "] as const)("returns empty array for input %j", (text) => {
|
||||
expect(chunkByNewline(text, 100)).toEqual([]);
|
||||
});
|
||||
|
||||
it("preserves trailing blank lines on the last chunk", () => {
|
||||
const text = "Line one\n\n";
|
||||
const chunks = chunkByNewline(text, 1000);
|
||||
expect(chunks).toEqual(["Line one\n\n"]);
|
||||
});
|
||||
|
||||
it("keeps whitespace when trimLines is false", () => {
|
||||
const text = " indented line \nNext";
|
||||
const chunks = chunkByNewline(text, 1000, { trimLines: false });
|
||||
expect(chunks).toEqual([" indented line ", "Next"]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("chunkTextWithMode", () => {
|
||||
@@ -323,7 +502,14 @@ describe("chunkTextWithMode", () => {
|
||||
] as const)(
|
||||
"applies mode-specific chunking behavior: $name",
|
||||
({ text, mode, expected, name }) => {
|
||||
expect(chunkTextWithMode(text, 1000, mode), name).toEqual(expected);
|
||||
expectChunkModeCase({
|
||||
chunker: chunkTextWithMode,
|
||||
text,
|
||||
limit: 1000,
|
||||
mode,
|
||||
expected,
|
||||
name,
|
||||
});
|
||||
},
|
||||
);
|
||||
});
|
||||
@@ -349,71 +535,50 @@ describe("chunkMarkdownTextWithMode", () => {
|
||||
expected: ["Para one", "Para two"],
|
||||
},
|
||||
] as const)("applies markdown/newline mode behavior: $name", ({ text, mode, expected, name }) => {
|
||||
expect(chunkMarkdownTextWithMode(text, 1000, mode), name).toEqual(expected);
|
||||
expectChunkModeCase({
|
||||
chunker: chunkMarkdownTextWithMode,
|
||||
text,
|
||||
limit: 1000,
|
||||
mode,
|
||||
expected,
|
||||
name,
|
||||
});
|
||||
});
|
||||
|
||||
it("handles newline mode fence splitting rules", () => {
|
||||
const fence = "```python\ndef my_function():\n x = 1\n\n y = 2\n return x + y\n```";
|
||||
const longFence = `\`\`\`js\n${"const a = 1;\n".repeat(20)}\`\`\``;
|
||||
const cases = [
|
||||
{
|
||||
name: "keeps single-newline fence+paragraph together",
|
||||
text: "```js\nconst a = 1;\nconst b = 2;\n```\nAfter",
|
||||
limit: 1000,
|
||||
expected: ["```js\nconst a = 1;\nconst b = 2;\n```\nAfter"],
|
||||
},
|
||||
{
|
||||
name: "keeps blank lines inside fence together",
|
||||
text: fence,
|
||||
limit: 1000,
|
||||
expected: [fence],
|
||||
},
|
||||
{
|
||||
name: "splits between fence and following paragraph",
|
||||
text: `${fence}\n\nAfter`,
|
||||
limit: 1000,
|
||||
expected: [fence, "After"],
|
||||
},
|
||||
{
|
||||
name: "defers long markdown blocks to markdown chunker",
|
||||
text: longFence,
|
||||
limit: 40,
|
||||
expected: chunkMarkdownText(longFence, 40),
|
||||
},
|
||||
] as const;
|
||||
|
||||
for (const { text, limit, expected, name } of cases) {
|
||||
it.each(newlineModeFenceCases)(
|
||||
"handles newline mode fence splitting rules: $name",
|
||||
({ text, limit, expected, name }) => {
|
||||
expect(chunkMarkdownTextWithMode(text, limit, "newline"), name).toEqual(expected);
|
||||
}
|
||||
});
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
describe("resolveChunkMode", () => {
|
||||
it("resolves default, provider, account, and internal channel modes", () => {
|
||||
const providerCfg = { channels: { slack: { chunkMode: "newline" as const } } };
|
||||
const accountCfg = {
|
||||
channels: {
|
||||
slack: {
|
||||
chunkMode: "length" as const,
|
||||
accounts: {
|
||||
primary: { chunkMode: "newline" as const },
|
||||
},
|
||||
const providerCfg = { channels: { slack: { chunkMode: "newline" as const } } };
|
||||
const accountCfg = {
|
||||
channels: {
|
||||
slack: {
|
||||
chunkMode: "length" as const,
|
||||
accounts: {
|
||||
primary: { chunkMode: "newline" as const },
|
||||
},
|
||||
},
|
||||
};
|
||||
const cases = [
|
||||
{ cfg: undefined, provider: "telegram", accountId: undefined, expected: "length" },
|
||||
{ cfg: {}, provider: "discord", accountId: undefined, expected: "length" },
|
||||
{ cfg: undefined, provider: "bluebubbles", accountId: undefined, expected: "length" },
|
||||
{ cfg: providerCfg, provider: "__internal__", accountId: undefined, expected: "length" },
|
||||
{ cfg: providerCfg, provider: "slack", accountId: undefined, expected: "newline" },
|
||||
{ cfg: providerCfg, provider: "discord", accountId: undefined, expected: "length" },
|
||||
{ cfg: accountCfg, provider: "slack", accountId: "primary", expected: "newline" },
|
||||
{ cfg: accountCfg, provider: "slack", accountId: "other", expected: "length" },
|
||||
] as const;
|
||||
},
|
||||
};
|
||||
|
||||
for (const { cfg, provider, accountId, expected } of cases) {
|
||||
it.each([
|
||||
{ cfg: undefined, provider: "telegram", accountId: undefined, expected: "length" },
|
||||
{ cfg: {}, provider: "discord", accountId: undefined, expected: "length" },
|
||||
{ cfg: undefined, provider: "bluebubbles", accountId: undefined, expected: "length" },
|
||||
{ cfg: providerCfg, provider: "__internal__", accountId: undefined, expected: "length" },
|
||||
{ cfg: providerCfg, provider: "slack", accountId: undefined, expected: "newline" },
|
||||
{ cfg: providerCfg, provider: "discord", accountId: undefined, expected: "length" },
|
||||
{ cfg: accountCfg, provider: "slack", accountId: "primary", expected: "newline" },
|
||||
{ cfg: accountCfg, provider: "slack", accountId: "other", expected: "length" },
|
||||
] as const)(
|
||||
"resolves default/provider/account/internal chunk mode for $provider $accountId",
|
||||
({ cfg, provider, accountId, expected }) => {
|
||||
expect(resolveChunkMode(cfg as never, provider, accountId)).toBe(expected);
|
||||
}
|
||||
});
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
@@ -6,33 +6,59 @@ import {
|
||||
} from "./account-id.js";
|
||||
|
||||
describe("account id normalization", () => {
|
||||
it("defaults missing values to default account", () => {
|
||||
expect(normalizeAccountId(undefined)).toBe(DEFAULT_ACCOUNT_ID);
|
||||
expect(normalizeAccountId(null)).toBe(DEFAULT_ACCOUNT_ID);
|
||||
expect(normalizeAccountId(" ")).toBe(DEFAULT_ACCOUNT_ID);
|
||||
const reservedAccountIdCases = [
|
||||
{ name: "rejects __proto__ pollution keys", input: "__proto__" },
|
||||
{ name: "rejects constructor pollution keys", input: "constructor" },
|
||||
{ name: "rejects prototype pollution keys", input: "prototype" },
|
||||
] as const;
|
||||
|
||||
function expectNormalizedAccountIdCase(params: {
|
||||
input: string | null | undefined;
|
||||
expected: string | undefined;
|
||||
optional?: boolean;
|
||||
}) {
|
||||
const normalize = params.optional ? normalizeOptionalAccountId : normalizeAccountId;
|
||||
expect(normalize(params.input)).toBe(params.expected);
|
||||
}
|
||||
|
||||
it.each([
|
||||
{
|
||||
name: "defaults undefined to default account",
|
||||
input: undefined,
|
||||
expected: DEFAULT_ACCOUNT_ID,
|
||||
},
|
||||
{ name: "defaults null to default account", input: null, expected: DEFAULT_ACCOUNT_ID },
|
||||
{
|
||||
name: "defaults blank strings to default account",
|
||||
input: " ",
|
||||
expected: DEFAULT_ACCOUNT_ID,
|
||||
},
|
||||
{ name: "normalizes valid ids to lowercase", input: " Business_1 ", expected: "business_1" },
|
||||
{
|
||||
name: "sanitizes invalid characters into canonical ids",
|
||||
input: " Prod/US East ",
|
||||
expected: "prod-us-east",
|
||||
},
|
||||
...reservedAccountIdCases.map(({ name, input }) => ({
|
||||
name,
|
||||
input,
|
||||
expected: DEFAULT_ACCOUNT_ID,
|
||||
})),
|
||||
] as const)("$name", ({ input, expected }) => {
|
||||
expectNormalizedAccountIdCase({ input, expected });
|
||||
});
|
||||
|
||||
it("normalizes valid ids to lowercase", () => {
|
||||
expect(normalizeAccountId(" Business_1 ")).toBe("business_1");
|
||||
});
|
||||
|
||||
it("sanitizes invalid characters into canonical ids", () => {
|
||||
expect(normalizeAccountId(" Prod/US East ")).toBe("prod-us-east");
|
||||
});
|
||||
|
||||
it("rejects prototype-pollution key vectors", () => {
|
||||
expect(normalizeAccountId("__proto__")).toBe(DEFAULT_ACCOUNT_ID);
|
||||
expect(normalizeAccountId("constructor")).toBe(DEFAULT_ACCOUNT_ID);
|
||||
expect(normalizeAccountId("prototype")).toBe(DEFAULT_ACCOUNT_ID);
|
||||
expect(normalizeOptionalAccountId("__proto__")).toBeUndefined();
|
||||
expect(normalizeOptionalAccountId("constructor")).toBeUndefined();
|
||||
expect(normalizeOptionalAccountId("prototype")).toBeUndefined();
|
||||
});
|
||||
|
||||
it("preserves optional semantics without forcing default", () => {
|
||||
expect(normalizeOptionalAccountId(undefined)).toBeUndefined();
|
||||
expect(normalizeOptionalAccountId(" ")).toBeUndefined();
|
||||
expect(normalizeOptionalAccountId(" !!! ")).toBeUndefined();
|
||||
expect(normalizeOptionalAccountId(" Business ")).toBe("business");
|
||||
it.each([
|
||||
{ name: "keeps undefined optional values unset", input: undefined, expected: undefined },
|
||||
{ name: "keeps blank optional values unset", input: " ", expected: undefined },
|
||||
{ name: "keeps invalid optional values unset", input: " !!! ", expected: undefined },
|
||||
...reservedAccountIdCases.map(({ name, input }) => ({
|
||||
name: name.replace(" pollution keys", " optional values"),
|
||||
input,
|
||||
expected: undefined,
|
||||
})),
|
||||
{ name: "normalizes valid optional values", input: " Business ", expected: "business" },
|
||||
] as const)("$name", ({ input, expected }) => {
|
||||
expectNormalizedAccountIdCase({ input, expected, optional: true });
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,42 +1,98 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { resolveAccountEntry, resolveNormalizedAccountEntry } from "./account-lookup.js";
|
||||
|
||||
function createAccountsWithPrototypePollution() {
|
||||
const inherited = { default: { id: "polluted" } };
|
||||
return Object.create(inherited) as Record<string, { id: string }>;
|
||||
}
|
||||
|
||||
function expectResolvedAccountLookupCase(
|
||||
actual: { id: string } | undefined,
|
||||
expected: { id: string } | undefined,
|
||||
) {
|
||||
expect(actual).toEqual(expected);
|
||||
}
|
||||
|
||||
function expectPrototypePollutionIgnoredCase(
|
||||
resolve: (accounts: Record<string, { id: string }>) => { id: string } | undefined,
|
||||
) {
|
||||
const pollutedAccounts = createAccountsWithPrototypePollution();
|
||||
expect(resolve(pollutedAccounts)).toBeUndefined();
|
||||
}
|
||||
|
||||
function expectAccountLookupCase(params: {
|
||||
accounts?: Record<string, { id: string }>;
|
||||
resolve: (accounts: Record<string, { id: string }>) => { id: string } | undefined;
|
||||
expected: { id: string } | undefined;
|
||||
}) {
|
||||
expectResolvedAccountLookupCase(params.resolve(params.accounts ?? {}), params.expected);
|
||||
}
|
||||
|
||||
describe("resolveAccountEntry", () => {
|
||||
it("resolves direct and case-insensitive account keys", () => {
|
||||
const accounts = {
|
||||
default: { id: "default" },
|
||||
Business: { id: "business" },
|
||||
};
|
||||
expect(resolveAccountEntry(accounts, "default")).toEqual({ id: "default" });
|
||||
expect(resolveAccountEntry(accounts, "business")).toEqual({ id: "business" });
|
||||
const accounts = {
|
||||
default: { id: "default" },
|
||||
Business: { id: "business" },
|
||||
};
|
||||
|
||||
it.each([
|
||||
{
|
||||
name: "resolves the default account key",
|
||||
resolve: (localAccounts: Record<string, { id: string }>) =>
|
||||
resolveAccountEntry(localAccounts, "default"),
|
||||
expected: { id: "default" },
|
||||
},
|
||||
{
|
||||
name: "resolves a normalized business account key",
|
||||
resolve: (localAccounts: Record<string, { id: string }>) =>
|
||||
resolveAccountEntry(localAccounts, "business"),
|
||||
expected: { id: "business" },
|
||||
},
|
||||
] as const)("$name", ({ resolve, expected }) => {
|
||||
expectAccountLookupCase({ accounts, resolve, expected });
|
||||
});
|
||||
|
||||
it("ignores prototype-chain values", () => {
|
||||
const inherited = { default: { id: "polluted" } };
|
||||
const accounts = Object.create(inherited) as Record<string, { id: string }>;
|
||||
expect(resolveAccountEntry(accounts, "default")).toBeUndefined();
|
||||
expectPrototypePollutionIgnoredCase((localAccounts) =>
|
||||
resolveAccountEntry(localAccounts, "default"),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolveNormalizedAccountEntry", () => {
|
||||
it("resolves normalized account keys with a custom normalizer", () => {
|
||||
const accounts = {
|
||||
"Ops Team": { id: "ops" },
|
||||
};
|
||||
const normalizeAccountId = (accountId: string) =>
|
||||
accountId.trim().toLowerCase().replaceAll(" ", "-");
|
||||
|
||||
expect(
|
||||
resolveNormalizedAccountEntry(accounts, "ops-team", (accountId) =>
|
||||
accountId.trim().toLowerCase().replaceAll(" ", "-"),
|
||||
),
|
||||
).toEqual({ id: "ops" });
|
||||
});
|
||||
it.each([
|
||||
{
|
||||
name: "resolves normalized account keys with a custom normalizer",
|
||||
accounts: {
|
||||
"Ops Team": { id: "ops" },
|
||||
},
|
||||
resolve: (accounts: Record<string, { id: string }>) =>
|
||||
resolveNormalizedAccountEntry(accounts, "ops-team", normalizeAccountId),
|
||||
expected: {
|
||||
id: "ops",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ignores prototype-chain values",
|
||||
resolve: () => undefined,
|
||||
expected: undefined,
|
||||
assert: () =>
|
||||
expectPrototypePollutionIgnoredCase((accounts) =>
|
||||
resolveNormalizedAccountEntry(accounts, "default", (accountId) => accountId),
|
||||
),
|
||||
},
|
||||
] as const)("$name", ({ accounts, resolve, expected, assert }) => {
|
||||
if (assert) {
|
||||
assert();
|
||||
return;
|
||||
}
|
||||
|
||||
it("ignores prototype-chain values", () => {
|
||||
const inherited = { default: { id: "polluted" } };
|
||||
const accounts = Object.create(inherited) as Record<string, { id: string }>;
|
||||
|
||||
expect(
|
||||
resolveNormalizedAccountEntry(accounts, "default", (accountId) => accountId),
|
||||
).toBeUndefined();
|
||||
expectAccountLookupCase({
|
||||
accounts,
|
||||
resolve,
|
||||
expected,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -6,65 +6,62 @@ describe("Discord Session Key Continuity", () => {
|
||||
const channel = "discord";
|
||||
const accountId = "default";
|
||||
|
||||
it("generates distinct keys for DM vs Channel (dmScope=main)", () => {
|
||||
// Scenario: Default config (dmScope=main)
|
||||
const dmKey = buildAgentSessionKey({
|
||||
function buildDiscordSessionKey(params: {
|
||||
peer: { kind: "direct" | "channel"; id: string };
|
||||
dmScope?: "main" | "per-peer";
|
||||
}) {
|
||||
return buildAgentSessionKey({
|
||||
agentId,
|
||||
channel,
|
||||
accountId,
|
||||
dmScope: params.dmScope ?? "main",
|
||||
peer: params.peer,
|
||||
});
|
||||
}
|
||||
|
||||
function expectDistinctDmAndChannelKeys(params: {
|
||||
dmScope: "main" | "per-peer";
|
||||
expectedDmKey: string;
|
||||
}) {
|
||||
const dmKey = buildDiscordSessionKey({
|
||||
peer: { kind: "direct", id: "user123" },
|
||||
dmScope: "main",
|
||||
dmScope: params.dmScope,
|
||||
});
|
||||
|
||||
const groupKey = buildAgentSessionKey({
|
||||
agentId,
|
||||
channel,
|
||||
accountId,
|
||||
const groupKey = buildDiscordSessionKey({
|
||||
peer: { kind: "channel", id: "channel456" },
|
||||
dmScope: "main",
|
||||
});
|
||||
|
||||
expect(dmKey).toBe("agent:main:main");
|
||||
expect(dmKey).toBe(params.expectedDmKey);
|
||||
expect(groupKey).toBe("agent:main:discord:channel:channel456");
|
||||
expect(dmKey).not.toBe(groupKey);
|
||||
});
|
||||
}
|
||||
|
||||
it("generates distinct keys for DM vs Channel (dmScope=per-peer)", () => {
|
||||
// Scenario: Multi-user bot config
|
||||
const dmKey = buildAgentSessionKey({
|
||||
agentId,
|
||||
channel,
|
||||
accountId,
|
||||
peer: { kind: "direct", id: "user123" },
|
||||
dmScope: "per-peer",
|
||||
});
|
||||
|
||||
const groupKey = buildAgentSessionKey({
|
||||
agentId,
|
||||
channel,
|
||||
accountId,
|
||||
peer: { kind: "channel", id: "channel456" },
|
||||
dmScope: "per-peer",
|
||||
});
|
||||
|
||||
expect(dmKey).toBe("agent:main:direct:user123");
|
||||
expect(groupKey).toBe("agent:main:discord:channel:channel456");
|
||||
expect(dmKey).not.toBe(groupKey);
|
||||
});
|
||||
|
||||
it("handles empty/invalid IDs safely without collision", () => {
|
||||
// If ID is missing, does it collide?
|
||||
const missingIdKey = buildAgentSessionKey({
|
||||
agentId,
|
||||
channel,
|
||||
accountId,
|
||||
peer: { kind: "channel", id: "" }, // Empty string
|
||||
dmScope: "main",
|
||||
function expectUnknownChannelKeyCase(channelId: string) {
|
||||
const missingIdKey = buildDiscordSessionKey({
|
||||
peer: { kind: "channel", id: channelId },
|
||||
});
|
||||
|
||||
expect(missingIdKey).toContain("unknown");
|
||||
|
||||
// Should still be distinct from main
|
||||
expect(missingIdKey).not.toBe("agent:main:main");
|
||||
}
|
||||
|
||||
it.each([
|
||||
{
|
||||
name: "keeps main-scoped DMs distinct from channel sessions",
|
||||
dmScope: "main" as const,
|
||||
expectedDmKey: "agent:main:main",
|
||||
},
|
||||
{
|
||||
name: "keeps per-peer DMs distinct from channel sessions",
|
||||
dmScope: "per-peer" as const,
|
||||
expectedDmKey: "agent:main:direct:user123",
|
||||
},
|
||||
])("$name", ({ dmScope, expectedDmKey }) => {
|
||||
expectDistinctDmAndChannelKeys({ dmScope, expectedDmKey });
|
||||
});
|
||||
|
||||
it.each(["", " "] as const)("handles invalid channel id %j without collision", (channelId) => {
|
||||
expectUnknownChannelKeyCase(channelId);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -12,121 +12,116 @@ import {
|
||||
} from "./session-key.js";
|
||||
|
||||
describe("classifySessionKeyShape", () => {
|
||||
it("classifies empty keys as missing", () => {
|
||||
expect(classifySessionKeyShape(undefined)).toBe("missing");
|
||||
expect(classifySessionKeyShape(" ")).toBe("missing");
|
||||
});
|
||||
|
||||
it("classifies valid agent keys", () => {
|
||||
expect(classifySessionKeyShape("agent:main:main")).toBe("agent");
|
||||
expect(classifySessionKeyShape("agent:research:subagent:worker")).toBe("agent");
|
||||
});
|
||||
|
||||
it("classifies malformed agent keys", () => {
|
||||
expect(classifySessionKeyShape("agent::broken")).toBe("malformed_agent");
|
||||
expect(classifySessionKeyShape("agent:main")).toBe("malformed_agent");
|
||||
});
|
||||
|
||||
it("treats non-agent legacy or alias keys as non-malformed", () => {
|
||||
expect(classifySessionKeyShape("main")).toBe("legacy_or_alias");
|
||||
expect(classifySessionKeyShape("custom-main")).toBe("legacy_or_alias");
|
||||
expect(classifySessionKeyShape("subagent:worker")).toBe("legacy_or_alias");
|
||||
it.each([
|
||||
{ input: undefined, expected: "missing" },
|
||||
{ input: " ", expected: "missing" },
|
||||
{ input: "agent:main:main", expected: "agent" },
|
||||
{ input: "agent:research:subagent:worker", expected: "agent" },
|
||||
{ input: "agent::broken", expected: "malformed_agent" },
|
||||
{ input: "agent:main", expected: "malformed_agent" },
|
||||
{ input: "main", expected: "legacy_or_alias" },
|
||||
{ input: "custom-main", expected: "legacy_or_alias" },
|
||||
{ input: "subagent:worker", expected: "legacy_or_alias" },
|
||||
] as const)("classifies %j as $expected", ({ input, expected }) => {
|
||||
expect(classifySessionKeyShape(input)).toBe(expected);
|
||||
});
|
||||
});
|
||||
|
||||
describe("session key backward compatibility", () => {
|
||||
it("classifies legacy :dm: session keys as valid agent keys", () => {
|
||||
// Legacy session keys use :dm: instead of :direct:
|
||||
// Both should be recognized as valid agent keys
|
||||
expect(classifySessionKeyShape("agent:main:telegram:dm:123456")).toBe("agent");
|
||||
expect(classifySessionKeyShape("agent:main:whatsapp:dm:+15551234567")).toBe("agent");
|
||||
expect(classifySessionKeyShape("agent:main:discord:dm:user123")).toBe("agent");
|
||||
});
|
||||
function expectBackwardCompatibleDirectSessionKey(key: string) {
|
||||
expect(classifySessionKeyShape(key)).toBe("agent");
|
||||
}
|
||||
|
||||
it("classifies new :direct: session keys as valid agent keys", () => {
|
||||
expect(classifySessionKeyShape("agent:main:telegram:direct:123456")).toBe("agent");
|
||||
expect(classifySessionKeyShape("agent:main:whatsapp:direct:+15551234567")).toBe("agent");
|
||||
expect(classifySessionKeyShape("agent:main:discord:direct:user123")).toBe("agent");
|
||||
it.each([
|
||||
"agent:main:telegram:dm:123456",
|
||||
"agent:main:whatsapp:dm:+15551234567",
|
||||
"agent:main:discord:dm:user123",
|
||||
"agent:main:telegram:direct:123456",
|
||||
"agent:main:whatsapp:direct:+15551234567",
|
||||
"agent:main:discord:direct:user123",
|
||||
] as const)("classifies backward-compatible direct session key %s as valid", (key) => {
|
||||
expectBackwardCompatibleDirectSessionKey(key);
|
||||
});
|
||||
});
|
||||
|
||||
describe("getSubagentDepth", () => {
|
||||
it("returns 0 for non-subagent session keys", () => {
|
||||
expect(getSubagentDepth("agent:main:main")).toBe(0);
|
||||
expect(getSubagentDepth("main")).toBe(0);
|
||||
expect(getSubagentDepth(undefined)).toBe(0);
|
||||
});
|
||||
|
||||
it("returns 2 for nested subagent session keys", () => {
|
||||
expect(getSubagentDepth("agent:main:subagent:parent:subagent:child")).toBe(2);
|
||||
it.each([
|
||||
{ key: "agent:main:main", expected: 0 },
|
||||
{ key: "main", expected: 0 },
|
||||
{ key: undefined, expected: 0 },
|
||||
{ key: "agent:main:subagent:parent:subagent:child", expected: 2 },
|
||||
] as const)("returns $expected for session key %j", ({ key, expected }) => {
|
||||
expect(getSubagentDepth(key)).toBe(expected);
|
||||
});
|
||||
});
|
||||
|
||||
describe("isCronSessionKey", () => {
|
||||
it("matches base and run cron agent session keys", () => {
|
||||
expect(isCronSessionKey("agent:main:cron:job-1")).toBe(true);
|
||||
expect(isCronSessionKey("agent:main:cron:job-1:run:run-1")).toBe(true);
|
||||
});
|
||||
|
||||
it("does not match non-cron sessions", () => {
|
||||
expect(isCronSessionKey("agent:main:main")).toBe(false);
|
||||
expect(isCronSessionKey("agent:main:subagent:worker")).toBe(false);
|
||||
expect(isCronSessionKey("cron:job-1")).toBe(false);
|
||||
expect(isCronSessionKey(undefined)).toBe(false);
|
||||
it.each([
|
||||
{ key: "agent:main:cron:job-1", expected: true },
|
||||
{ key: "agent:main:cron:job-1:run:run-1", expected: true },
|
||||
{ key: "agent:main:main", expected: false },
|
||||
{ key: "agent:main:subagent:worker", expected: false },
|
||||
{ key: "cron:job-1", expected: false },
|
||||
{ key: undefined, expected: false },
|
||||
] as const)("matches cron key %j => $expected", ({ key, expected }) => {
|
||||
expect(isCronSessionKey(key)).toBe(expected);
|
||||
});
|
||||
});
|
||||
|
||||
describe("deriveSessionChatType", () => {
|
||||
it("detects canonical direct/group/channel session keys", () => {
|
||||
expect(deriveSessionChatType("agent:main:discord:direct:user1")).toBe("direct");
|
||||
expect(deriveSessionChatType("agent:main:telegram:group:g1")).toBe("group");
|
||||
expect(deriveSessionChatType("agent:main:discord:channel:c1")).toBe("channel");
|
||||
});
|
||||
|
||||
it("detects legacy direct markers", () => {
|
||||
expect(deriveSessionChatType("agent:main:telegram:dm:123456")).toBe("direct");
|
||||
expect(deriveSessionChatType("telegram:dm:123456")).toBe("direct");
|
||||
});
|
||||
|
||||
it("detects legacy discord guild channel keys", () => {
|
||||
expect(deriveSessionChatType("discord:acc-1:guild-123:channel-456")).toBe("channel");
|
||||
});
|
||||
|
||||
it("returns unknown for main or malformed session keys", () => {
|
||||
expect(deriveSessionChatType("agent:main:main")).toBe("unknown");
|
||||
expect(deriveSessionChatType("agent:main")).toBe("unknown");
|
||||
expect(deriveSessionChatType("")).toBe("unknown");
|
||||
it.each([
|
||||
{ key: "agent:main:discord:direct:user1", expected: "direct" },
|
||||
{ key: "agent:main:telegram:group:g1", expected: "group" },
|
||||
{ key: "agent:main:discord:channel:c1", expected: "channel" },
|
||||
{ key: "agent:main:telegram:dm:123456", expected: "direct" },
|
||||
{ key: "telegram:dm:123456", expected: "direct" },
|
||||
{ key: "discord:acc-1:guild-123:channel-456", expected: "channel" },
|
||||
{ key: "agent:main:main", expected: "unknown" },
|
||||
{ key: "agent:main", expected: "unknown" },
|
||||
{ key: "", expected: "unknown" },
|
||||
] as const)("derives chat type for %j => $expected", ({ key, expected }) => {
|
||||
expect(deriveSessionChatType(key)).toBe(expected);
|
||||
});
|
||||
});
|
||||
|
||||
describe("session key canonicalization", () => {
|
||||
it("parses agent keys case-insensitively and returns lowercase tokens", () => {
|
||||
expect(parseAgentSessionKey("AGENT:Main:Hook:Webhook:42")).toEqual({
|
||||
agentId: "main",
|
||||
rest: "hook:webhook:42",
|
||||
});
|
||||
});
|
||||
function expectSessionKeyCanonicalizationCase(params: { run: () => void }) {
|
||||
params.run();
|
||||
}
|
||||
|
||||
it("does not double-prefix already-qualified agent keys", () => {
|
||||
expect(
|
||||
toAgentStoreSessionKey({
|
||||
agentId: "main",
|
||||
requestKey: "agent:main:main",
|
||||
}),
|
||||
).toBe("agent:main:main");
|
||||
it.each([
|
||||
{
|
||||
name: "parses agent keys case-insensitively and returns lowercase tokens",
|
||||
run: () =>
|
||||
expect(parseAgentSessionKey("AGENT:Main:Hook:Webhook:42")).toEqual({
|
||||
agentId: "main",
|
||||
rest: "hook:webhook:42",
|
||||
}),
|
||||
},
|
||||
{
|
||||
name: "does not double-prefix already-qualified agent keys",
|
||||
run: () =>
|
||||
expect(
|
||||
toAgentStoreSessionKey({
|
||||
agentId: "main",
|
||||
requestKey: "agent:main:main",
|
||||
}),
|
||||
).toBe("agent:main:main"),
|
||||
},
|
||||
] as const)("$name", ({ run }) => {
|
||||
expectSessionKeyCanonicalizationCase({ run });
|
||||
});
|
||||
});
|
||||
|
||||
describe("isValidAgentId", () => {
|
||||
it("accepts valid agent ids", () => {
|
||||
expect(isValidAgentId("main")).toBe(true);
|
||||
expect(isValidAgentId("my-research_agent01")).toBe(true);
|
||||
});
|
||||
|
||||
it("rejects malformed agent ids", () => {
|
||||
expect(isValidAgentId("")).toBe(false);
|
||||
expect(isValidAgentId("Agent not found: xyz")).toBe(false);
|
||||
expect(isValidAgentId("../../../etc/passwd")).toBe(false);
|
||||
expect(isValidAgentId("a".repeat(65))).toBe(false);
|
||||
it.each([
|
||||
{ input: "main", expected: true },
|
||||
{ input: "my-research_agent01", expected: true },
|
||||
{ input: "", expected: false },
|
||||
{ input: "Agent not found: xyz", expected: false },
|
||||
{ input: "../../../etc/passwd", expected: false },
|
||||
{ input: "a".repeat(65), expected: false },
|
||||
] as const)("validates agent id %j => $expected", ({ input, expected }) => {
|
||||
expect(isValidAgentId(input)).toBe(expected);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -2,35 +2,12 @@ import { describe, expect, it } from "vitest";
|
||||
import { stripAssistantInternalScaffolding } from "./assistant-visible-text.js";
|
||||
|
||||
describe("stripAssistantInternalScaffolding", () => {
|
||||
it("strips reasoning tags", () => {
|
||||
const input = ["<thinking>", "secret", "</thinking>", "Visible"].join("\n");
|
||||
expect(stripAssistantInternalScaffolding(input)).toBe("Visible");
|
||||
});
|
||||
function expectVisibleText(input: string, expected: string) {
|
||||
expect(stripAssistantInternalScaffolding(input)).toBe(expected);
|
||||
}
|
||||
|
||||
it("strips relevant-memories scaffolding blocks", () => {
|
||||
const input = [
|
||||
"<relevant-memories>",
|
||||
"The following memories may be relevant to this conversation:",
|
||||
"- Internal memory note",
|
||||
"</relevant-memories>",
|
||||
"",
|
||||
"User-visible answer",
|
||||
].join("\n");
|
||||
expect(stripAssistantInternalScaffolding(input)).toBe("User-visible answer");
|
||||
});
|
||||
|
||||
it("supports relevant_memories tag variants", () => {
|
||||
const input = [
|
||||
"<relevant_memories>",
|
||||
"Internal memory note",
|
||||
"</relevant_memories>",
|
||||
"Visible",
|
||||
].join("\n");
|
||||
expect(stripAssistantInternalScaffolding(input)).toBe("Visible");
|
||||
});
|
||||
|
||||
it("keeps relevant-memories tags inside fenced code", () => {
|
||||
const input = [
|
||||
function createLiteralRelevantMemoriesCodeBlock() {
|
||||
return [
|
||||
"```xml",
|
||||
"<relevant-memories>",
|
||||
"sample",
|
||||
@@ -39,43 +16,87 @@ describe("stripAssistantInternalScaffolding", () => {
|
||||
"",
|
||||
"Visible text",
|
||||
].join("\n");
|
||||
expect(stripAssistantInternalScaffolding(input)).toBe(input);
|
||||
});
|
||||
}
|
||||
|
||||
it("keeps relevant-memories tags inside inline code", () => {
|
||||
const input = "Use `<relevant-memories>example</relevant-memories>` literally.";
|
||||
expect(stripAssistantInternalScaffolding(input)).toBe(input);
|
||||
});
|
||||
function expectLiteralVisibleText(input: string) {
|
||||
expectVisibleText(input, input);
|
||||
}
|
||||
|
||||
it("hides unfinished relevant-memories blocks", () => {
|
||||
const input = ["Hello", "<relevant-memories>", "internal-only"].join("\n");
|
||||
expect(stripAssistantInternalScaffolding(input)).toBe("Hello\n");
|
||||
});
|
||||
|
||||
it("trims leading whitespace after stripping scaffolding", () => {
|
||||
const input = [
|
||||
"<thinking>",
|
||||
"secret",
|
||||
"</thinking>",
|
||||
" ",
|
||||
"<relevant-memories>",
|
||||
"internal note",
|
||||
"</relevant-memories>",
|
||||
" Visible",
|
||||
].join("\n");
|
||||
expect(stripAssistantInternalScaffolding(input)).toBe("Visible");
|
||||
});
|
||||
|
||||
it("preserves unfinished reasoning text while still stripping memory blocks", () => {
|
||||
const input = [
|
||||
"Before",
|
||||
"<thinking>",
|
||||
"secret",
|
||||
"<relevant-memories>",
|
||||
"internal note",
|
||||
"</relevant-memories>",
|
||||
"After",
|
||||
].join("\n");
|
||||
expect(stripAssistantInternalScaffolding(input)).toBe("Before\n\nsecret\n\nAfter");
|
||||
it.each([
|
||||
{
|
||||
name: "strips reasoning tags",
|
||||
input: ["<thinking>", "secret", "</thinking>", "Visible"].join("\n"),
|
||||
expected: "Visible",
|
||||
},
|
||||
{
|
||||
name: "strips relevant-memories scaffolding blocks",
|
||||
input: [
|
||||
"<relevant-memories>",
|
||||
"The following memories may be relevant to this conversation:",
|
||||
"- Internal memory note",
|
||||
"</relevant-memories>",
|
||||
"",
|
||||
"User-visible answer",
|
||||
].join("\n"),
|
||||
expected: "User-visible answer",
|
||||
},
|
||||
{
|
||||
name: "supports relevant_memories tag variants",
|
||||
input: [
|
||||
"<relevant_memories>",
|
||||
"Internal memory note",
|
||||
"</relevant_memories>",
|
||||
"Visible",
|
||||
].join("\n"),
|
||||
expected: "Visible",
|
||||
},
|
||||
{
|
||||
name: "hides unfinished relevant-memories blocks",
|
||||
input: ["Hello", "<relevant-memories>", "internal-only"].join("\n"),
|
||||
expected: "Hello\n",
|
||||
},
|
||||
{
|
||||
name: "trims leading whitespace after stripping scaffolding",
|
||||
input: [
|
||||
"<thinking>",
|
||||
"secret",
|
||||
"</thinking>",
|
||||
" ",
|
||||
"<relevant-memories>",
|
||||
"internal note",
|
||||
"</relevant-memories>",
|
||||
" Visible",
|
||||
].join("\n"),
|
||||
expected: "Visible",
|
||||
},
|
||||
{
|
||||
name: "preserves unfinished reasoning text while still stripping memory blocks",
|
||||
input: [
|
||||
"Before",
|
||||
"<thinking>",
|
||||
"secret",
|
||||
"<relevant-memories>",
|
||||
"internal note",
|
||||
"</relevant-memories>",
|
||||
"After",
|
||||
].join("\n"),
|
||||
expected: "Before\n\nsecret\n\nAfter",
|
||||
},
|
||||
{
|
||||
name: "keeps relevant-memories tags inside fenced code",
|
||||
input: createLiteralRelevantMemoriesCodeBlock(),
|
||||
expected: undefined,
|
||||
},
|
||||
{
|
||||
name: "keeps literal relevant-memories prose",
|
||||
input: "Use `<relevant-memories>example</relevant-memories>` literally.",
|
||||
expected: undefined,
|
||||
},
|
||||
] as const)("$name", ({ input, expected }) => {
|
||||
if (expected === undefined) {
|
||||
expectLiteralVisibleText(input);
|
||||
return;
|
||||
}
|
||||
expectVisibleText(input, expected);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -2,50 +2,61 @@ import { describe, expect, it } from "vitest";
|
||||
import { findCodeRegions, isInsideCode } from "./code-regions.js";
|
||||
|
||||
describe("shared/text/code-regions", () => {
|
||||
it("finds fenced and inline code regions without double-counting inline code inside fences", () => {
|
||||
const text = [
|
||||
"before `inline` after",
|
||||
"```ts",
|
||||
"const a = `inside fence`;",
|
||||
"```",
|
||||
"tail",
|
||||
].join("\n");
|
||||
|
||||
function expectCodeRegionSlices(text: string, expectedSlices: readonly string[]) {
|
||||
const regions = findCodeRegions(text);
|
||||
expect(regions).toHaveLength(expectedSlices.length);
|
||||
expect(regions.map((region) => text.slice(region.start, region.end))).toEqual(expectedSlices);
|
||||
}
|
||||
|
||||
expect(regions).toHaveLength(2);
|
||||
expect(text.slice(regions[0].start, regions[0].end)).toBe("`inline`");
|
||||
expect(text.slice(regions[1].start, regions[1].end)).toContain("```ts");
|
||||
});
|
||||
|
||||
it("accepts alternate fence markers and unterminated trailing fences", () => {
|
||||
const text = "~~~js\nconsole.log(1)\n~~~\nplain\n```\nunterminated";
|
||||
const regions = findCodeRegions(text);
|
||||
|
||||
expect(regions).toHaveLength(2);
|
||||
expect(text.slice(regions[0].start, regions[0].end)).toContain("~~~js");
|
||||
expect(text.slice(regions[1].start, regions[1].end)).toBe("```\nunterminated");
|
||||
});
|
||||
|
||||
it("keeps adjacent inline code outside fenced regions", () => {
|
||||
const text = ["```ts", "const a = 1;", "```", "after `inline` tail"].join("\n");
|
||||
|
||||
const regions = findCodeRegions(text);
|
||||
|
||||
expect(regions).toHaveLength(2);
|
||||
expect(text.slice(regions[0].start, regions[0].end)).toContain("```ts");
|
||||
expect(text.slice(regions[1].start, regions[1].end)).toBe("`inline`");
|
||||
});
|
||||
|
||||
it("reports whether positions are inside discovered regions", () => {
|
||||
function expectInsideCodeCase(params: {
|
||||
positionSelector: (text: string, regionEnd: number) => number;
|
||||
expected: boolean;
|
||||
}) {
|
||||
const text = "plain `code` done";
|
||||
const regions = findCodeRegions(text);
|
||||
const codeStart = text.indexOf("code");
|
||||
const plainStart = text.indexOf("plain");
|
||||
const regionEnd = regions[0]?.end ?? -1;
|
||||
expect(isInsideCode(params.positionSelector(text, regionEnd), regions)).toBe(params.expected);
|
||||
}
|
||||
|
||||
expect(isInsideCode(codeStart, regions)).toBe(true);
|
||||
expect(isInsideCode(plainStart, regions)).toBe(false);
|
||||
expect(isInsideCode(regionEnd, regions)).toBe(false);
|
||||
it.each([
|
||||
{
|
||||
name: "finds fenced and inline code regions without double-counting inline code inside fences",
|
||||
text: ["before `inline` after", "```ts", "const a = `inside fence`;", "```", "tail"].join(
|
||||
"\n",
|
||||
),
|
||||
expectedSlices: ["`inline`", "```ts\nconst a = `inside fence`;\n```"],
|
||||
},
|
||||
{
|
||||
name: "accepts alternate fence markers and unterminated trailing fences",
|
||||
text: "~~~js\nconsole.log(1)\n~~~\nplain\n```\nunterminated",
|
||||
expectedSlices: ["~~~js\nconsole.log(1)\n~~~", "```\nunterminated"],
|
||||
},
|
||||
{
|
||||
name: "keeps adjacent inline code outside fenced regions",
|
||||
text: ["```ts", "const a = 1;", "```", "after `inline` tail"].join("\n"),
|
||||
expectedSlices: ["```ts\nconst a = 1;\n```", "`inline`"],
|
||||
},
|
||||
] as const)("$name", ({ text, expectedSlices }) => {
|
||||
expectCodeRegionSlices(text, expectedSlices);
|
||||
});
|
||||
|
||||
it.each([
|
||||
{
|
||||
name: "inside code",
|
||||
positionSelector: (text: string) => text.indexOf("code"),
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "outside code",
|
||||
positionSelector: (text: string) => text.indexOf("plain"),
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "at region end",
|
||||
positionSelector: (_text: string, regionEnd: number) => regionEnd,
|
||||
expected: false,
|
||||
},
|
||||
] as const)("reports whether positions are inside discovered regions: $name", (testCase) => {
|
||||
expectInsideCodeCase(testCase);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,45 +1,51 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { concatOptionalTextSegments, joinPresentTextSegments } from "./join-segments.js";
|
||||
|
||||
function expectTextSegmentsCase<T>(actual: T, expected: T) {
|
||||
expect(actual).toBe(expected);
|
||||
}
|
||||
|
||||
function expectJoinedTextSegmentsCase<T>(params: { run: () => T; expected: T }) {
|
||||
expectTextSegmentsCase(params.run(), params.expected);
|
||||
}
|
||||
|
||||
describe("concatOptionalTextSegments", () => {
|
||||
it("concatenates left and right with default separator", () => {
|
||||
expect(concatOptionalTextSegments({ left: "A", right: "B" })).toBe("A\n\nB");
|
||||
});
|
||||
|
||||
it("keeps explicit empty-string right value", () => {
|
||||
expect(concatOptionalTextSegments({ left: "A", right: "" })).toBe("");
|
||||
});
|
||||
|
||||
it("falls back to whichever side is present and honors custom separators", () => {
|
||||
expect(concatOptionalTextSegments({ left: "A" })).toBe("A");
|
||||
expect(concatOptionalTextSegments({ right: "B" })).toBe("B");
|
||||
expect(concatOptionalTextSegments({ left: "", right: "B" })).toBe("B");
|
||||
expect(concatOptionalTextSegments({ left: "" })).toBe("");
|
||||
expect(concatOptionalTextSegments({ left: "A", right: "B", separator: " | " })).toBe("A | B");
|
||||
it.each([
|
||||
{ params: { left: "A", right: "B" }, expected: "A\n\nB" },
|
||||
{ params: { left: "A", right: "" }, expected: "" },
|
||||
{ params: { left: "A" }, expected: "A" },
|
||||
{ params: { right: "B" }, expected: "B" },
|
||||
{ params: { left: "", right: "B" }, expected: "B" },
|
||||
{ params: { left: "" }, expected: "" },
|
||||
{ params: { left: "A", right: "B", separator: " | " }, expected: "A | B" },
|
||||
] as const)("concatenates optional segments %#", ({ params, expected }) => {
|
||||
expectJoinedTextSegmentsCase({
|
||||
run: () => concatOptionalTextSegments(params),
|
||||
expected,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("joinPresentTextSegments", () => {
|
||||
it("joins non-empty segments", () => {
|
||||
expect(joinPresentTextSegments(["A", undefined, "B"])).toBe("A\n\nB");
|
||||
});
|
||||
|
||||
it("returns undefined when all segments are empty", () => {
|
||||
expect(joinPresentTextSegments(["", undefined, null])).toBeUndefined();
|
||||
});
|
||||
|
||||
it("trims segments when requested", () => {
|
||||
expect(joinPresentTextSegments([" A ", " B "], { trim: true })).toBe("A\n\nB");
|
||||
});
|
||||
|
||||
it("keeps whitespace-only segments unless trim is enabled and supports custom separators", () => {
|
||||
expect(joinPresentTextSegments(["A", " ", "B"], { separator: " | " })).toBe("A | | B");
|
||||
expect(joinPresentTextSegments(["A", " ", "B"], { trim: true, separator: " | " })).toBe(
|
||||
"A | B",
|
||||
);
|
||||
});
|
||||
|
||||
it("preserves segment whitespace when trim is disabled", () => {
|
||||
expect(joinPresentTextSegments(["A", " B "], { separator: "|" })).toBe("A| B ");
|
||||
it.each([
|
||||
{ segments: ["A", undefined, "B"], options: undefined, expected: "A\n\nB" },
|
||||
{ segments: ["", undefined, null], options: undefined, expected: undefined },
|
||||
{ segments: [" A ", " B "], options: { trim: true }, expected: "A\n\nB" },
|
||||
{
|
||||
segments: ["A", " ", "B"],
|
||||
options: { separator: " | " },
|
||||
expected: "A | | B",
|
||||
},
|
||||
{
|
||||
segments: ["A", " ", "B"],
|
||||
options: { trim: true, separator: " | " },
|
||||
expected: "A | B",
|
||||
},
|
||||
{ segments: ["A", " B "], options: { separator: "|" }, expected: "A| B " },
|
||||
] as const)("joins present segments %#", ({ segments, options, expected }) => {
|
||||
expectJoinedTextSegmentsCase({
|
||||
run: () => joinPresentTextSegments(segments, options),
|
||||
expected,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -2,181 +2,200 @@ import { describe, expect, it } from "vitest";
|
||||
import { stripReasoningTagsFromText } from "./reasoning-tags.js";
|
||||
|
||||
describe("stripReasoningTagsFromText", () => {
|
||||
const expectStrippedCases = (
|
||||
cases: ReadonlyArray<{
|
||||
input: string;
|
||||
expected: string;
|
||||
opts?: Parameters<typeof stripReasoningTagsFromText>[1];
|
||||
name?: string;
|
||||
}>,
|
||||
) => {
|
||||
for (const { input, expected, opts, name } of cases) {
|
||||
expect(stripReasoningTagsFromText(input, opts), name).toBe(expected);
|
||||
function expectStrippedCase(params: {
|
||||
input: string | null;
|
||||
expected: string | null;
|
||||
opts?: Parameters<typeof stripReasoningTagsFromText>[1];
|
||||
}) {
|
||||
expect(stripReasoningTagsFromText(params.input as unknown as string, params.opts)).toBe(
|
||||
params.expected,
|
||||
);
|
||||
}
|
||||
|
||||
function expectPreservedReasoningTagCodeExample(input: string) {
|
||||
expect(stripReasoningTagsFromText(input)).toBe(input);
|
||||
}
|
||||
|
||||
function expectReasoningCodeCase(params: { input: string; expected?: string }) {
|
||||
if (params.expected === undefined) {
|
||||
expectPreservedReasoningTagCodeExample(params.input);
|
||||
return;
|
||||
}
|
||||
};
|
||||
expectStrippedCase({
|
||||
input: params.input,
|
||||
expected: params.expected,
|
||||
});
|
||||
}
|
||||
|
||||
describe("basic functionality", () => {
|
||||
it("returns text unchanged when no reasoning tags present", () => {
|
||||
const input = "Hello, this is a normal message.";
|
||||
expect(stripReasoningTagsFromText(input)).toBe(input);
|
||||
});
|
||||
|
||||
it("strips reasoning-tag variants", () => {
|
||||
const cases = [
|
||||
{
|
||||
name: "strips proper think tags",
|
||||
input: "Hello <think>internal reasoning</think> world!",
|
||||
expected: "Hello world!",
|
||||
},
|
||||
{
|
||||
name: "strips thinking tags",
|
||||
input: "Before <thinking>some thought</thinking> after",
|
||||
expected: "Before after",
|
||||
},
|
||||
{ name: "strips thought tags", input: "A <thought>hmm</thought> B", expected: "A B" },
|
||||
{
|
||||
name: "strips antthinking tags",
|
||||
input: "X <antthinking>internal</antthinking> Y",
|
||||
expected: "X Y",
|
||||
},
|
||||
] as const;
|
||||
expectStrippedCases(cases);
|
||||
});
|
||||
|
||||
it("strips multiple reasoning blocks", () => {
|
||||
const input = "<think>first</think>A<think>second</think>B";
|
||||
expect(stripReasoningTagsFromText(input)).toBe("AB");
|
||||
it.each([
|
||||
{
|
||||
name: "returns text unchanged when no reasoning tags present",
|
||||
input: "Hello, this is a normal message.",
|
||||
expected: "Hello, this is a normal message.",
|
||||
},
|
||||
{
|
||||
name: "strips proper think tags",
|
||||
input: "Hello <think>internal reasoning</think> world!",
|
||||
expected: "Hello world!",
|
||||
},
|
||||
{
|
||||
name: "strips thinking tags",
|
||||
input: "Before <thinking>some thought</thinking> after",
|
||||
expected: "Before after",
|
||||
},
|
||||
{ name: "strips thought tags", input: "A <thought>hmm</thought> B", expected: "A B" },
|
||||
{
|
||||
name: "strips antthinking tags",
|
||||
input: "X <antthinking>internal</antthinking> Y",
|
||||
expected: "X Y",
|
||||
},
|
||||
{
|
||||
name: "strips multiple reasoning blocks",
|
||||
input: "<think>first</think>A<think>second</think>B",
|
||||
expected: "AB",
|
||||
},
|
||||
] as const)("$name", (testCase) => {
|
||||
expectStrippedCase(testCase);
|
||||
});
|
||||
});
|
||||
|
||||
describe("code block preservation (issue #3952)", () => {
|
||||
it("preserves tags inside code examples", () => {
|
||||
const cases = [
|
||||
"Use the tag like this:\n```\n<think>reasoning</think>\n```\nThat's it!",
|
||||
"The `<think>` tag is used for reasoning. Don't forget the closing `</think>` tag.",
|
||||
"Example:\n```xml\n<think>\n <thought>nested</thought>\n</think>\n```\nDone!",
|
||||
"Use `<think>` to open and `</think>` to close.",
|
||||
"Example:\n```\n<think>reasoning</think>\n```",
|
||||
"Use `<final>` for final answers in code: ```\n<final>42</final>\n```",
|
||||
"First `<think>` then ```\n<thinking>block</thinking>\n``` then `<thought>`",
|
||||
] as const;
|
||||
for (const input of cases) {
|
||||
expect(stripReasoningTagsFromText(input)).toBe(input);
|
||||
}
|
||||
});
|
||||
|
||||
it("handles mixed code-tag and real-tag content", () => {
|
||||
const cases = [
|
||||
{
|
||||
input: "<think>hidden</think>Visible text with `<think>` example.",
|
||||
expected: "Visible text with `<think>` example.",
|
||||
},
|
||||
{
|
||||
input: "```\n<think>code</think>\n```\n<think>real hidden</think>visible",
|
||||
expected: "```\n<think>code</think>\n```\nvisible",
|
||||
},
|
||||
] as const;
|
||||
expectStrippedCases(cases);
|
||||
it.each([
|
||||
{
|
||||
name: "preserves plain code example",
|
||||
input: "Use the tag like this:\n```\n<think>reasoning</think>\n```\nThat's it!",
|
||||
},
|
||||
{
|
||||
name: "preserves inline literal think tag documentation",
|
||||
input: "The `<think>` tag is used for reasoning. Don't forget the closing `</think>` tag.",
|
||||
},
|
||||
{
|
||||
name: "preserves xml fenced examples",
|
||||
input: "Example:\n```xml\n<think>\n <thought>nested</thought>\n</think>\n```\nDone!",
|
||||
},
|
||||
{
|
||||
name: "preserves plain literal opening and closing tags",
|
||||
input: "Use `<think>` to open and `</think>` to close.",
|
||||
},
|
||||
{
|
||||
name: "preserves fenced think example",
|
||||
input: "Example:\n```\n<think>reasoning</think>\n```",
|
||||
},
|
||||
{
|
||||
name: "preserves final tags inside code examples",
|
||||
input: "Use `<final>` for final answers in code: ```\n<final>42</final>\n```",
|
||||
},
|
||||
{
|
||||
name: "preserves mixed literal think tags and code blocks",
|
||||
input: "First `<think>` then ```\n<thinking>block</thinking>\n``` then `<thought>`",
|
||||
},
|
||||
{
|
||||
name: "strips real tags while preserving literal think examples",
|
||||
input: "<think>hidden</think>Visible text with `<think>` example.",
|
||||
expected: "Visible text with `<think>` example.",
|
||||
},
|
||||
{
|
||||
name: "strips real tags after fenced code block",
|
||||
input: "```\n<think>code</think>\n```\n<think>real hidden</think>visible",
|
||||
expected: "```\n<think>code</think>\n```\nvisible",
|
||||
},
|
||||
] as const)("$name", ({ input, expected }) => {
|
||||
expectReasoningCodeCase({ input, expected });
|
||||
});
|
||||
});
|
||||
|
||||
describe("edge cases", () => {
|
||||
it("handles malformed tags and null-ish inputs", () => {
|
||||
const cases = [
|
||||
{
|
||||
input: "Here is how to use <think tags in your code",
|
||||
expected: "Here is how to use <think tags in your code",
|
||||
},
|
||||
{
|
||||
input: "You can start with <think and then close with </think>",
|
||||
expected: "You can start with <think and then close with",
|
||||
},
|
||||
{
|
||||
input: "A < think >content< /think > B",
|
||||
expected: "A B",
|
||||
},
|
||||
{
|
||||
input: "",
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
input: null as unknown as string,
|
||||
expected: null,
|
||||
},
|
||||
] as const;
|
||||
for (const { input, expected } of cases) {
|
||||
expect(stripReasoningTagsFromText(input)).toBe(expected);
|
||||
}
|
||||
it.each([
|
||||
{
|
||||
input: "Here is how to use <think tags in your code",
|
||||
expected: "Here is how to use <think tags in your code",
|
||||
},
|
||||
{
|
||||
input: "You can start with <think and then close with </think>",
|
||||
expected: "You can start with <think and then close with",
|
||||
},
|
||||
{
|
||||
input: "A < think >content< /think > B",
|
||||
expected: "A B",
|
||||
},
|
||||
{
|
||||
input: "",
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
input: null as unknown as string,
|
||||
expected: null,
|
||||
},
|
||||
] as const)("handles malformed/null-ish input %j", (testCase) => {
|
||||
expectStrippedCase(testCase);
|
||||
});
|
||||
|
||||
it("handles fenced and inline code edge behavior", () => {
|
||||
const cases = [
|
||||
{
|
||||
input: "Example:\n~~~\n<think>reasoning</think>\n~~~\nDone!",
|
||||
expected: "Example:\n~~~\n<think>reasoning</think>\n~~~\nDone!",
|
||||
},
|
||||
{
|
||||
input: "Example:\n~~~js\n<think>code</think>\n~~~",
|
||||
expected: "Example:\n~~~js\n<think>code</think>\n~~~",
|
||||
},
|
||||
{
|
||||
input: "Use ``code`` with <think>hidden</think> text",
|
||||
expected: "Use ``code`` with text",
|
||||
},
|
||||
{
|
||||
input: "Before\n```\ncode\n```\nAfter with <think>hidden</think>",
|
||||
expected: "Before\n```\ncode\n```\nAfter with",
|
||||
},
|
||||
{
|
||||
input: "```\n<think>not protected\n~~~\n</think>text",
|
||||
expected: "```\n<think>not protected\n~~~\n</think>text",
|
||||
},
|
||||
{
|
||||
input: "Start `unclosed <think>hidden</think> end",
|
||||
expected: "Start `unclosed end",
|
||||
},
|
||||
] as const;
|
||||
expectStrippedCases(cases);
|
||||
it.each([
|
||||
{
|
||||
input: "Example:\n~~~\n<think>reasoning</think>\n~~~\nDone!",
|
||||
expected: "Example:\n~~~\n<think>reasoning</think>\n~~~\nDone!",
|
||||
},
|
||||
{
|
||||
input: "Example:\n~~~js\n<think>code</think>\n~~~",
|
||||
expected: "Example:\n~~~js\n<think>code</think>\n~~~",
|
||||
},
|
||||
{
|
||||
input: "Use ``code`` with <think>hidden</think> text",
|
||||
expected: "Use ``code`` with text",
|
||||
},
|
||||
{
|
||||
input: "Before\n```\ncode\n```\nAfter with <think>hidden</think>",
|
||||
expected: "Before\n```\ncode\n```\nAfter with",
|
||||
},
|
||||
{
|
||||
input: "```\n<think>not protected\n~~~\n</think>text",
|
||||
expected: "```\n<think>not protected\n~~~\n</think>text",
|
||||
},
|
||||
{
|
||||
input: "Start `unclosed <think>hidden</think> end",
|
||||
expected: "Start `unclosed end",
|
||||
},
|
||||
] as const)("handles fenced/inline code edge behavior: %j", (testCase) => {
|
||||
expectStrippedCase(testCase);
|
||||
});
|
||||
|
||||
it("handles nested and final tag behavior", () => {
|
||||
const cases = [
|
||||
{
|
||||
input: "<think>outer <think>inner</think> still outer</think>visible",
|
||||
expected: "still outervisible",
|
||||
},
|
||||
{
|
||||
input: "A<final>1</final>B<final>2</final>C",
|
||||
expected: "A1B2C",
|
||||
},
|
||||
{
|
||||
input: "`<final>` in code, <final>visible</final> outside",
|
||||
expected: "`<final>` in code, visible outside",
|
||||
},
|
||||
{
|
||||
input: "A <FINAL data-x='1'>visible</Final> B",
|
||||
expected: "A visible B",
|
||||
},
|
||||
] as const;
|
||||
expectStrippedCases(cases);
|
||||
it.each([
|
||||
{
|
||||
input: "<think>outer <think>inner</think> still outer</think>visible",
|
||||
expected: "still outervisible",
|
||||
},
|
||||
{
|
||||
input: "A<final>1</final>B<final>2</final>C",
|
||||
expected: "A1B2C",
|
||||
},
|
||||
{
|
||||
input: "`<final>` in code, <final>visible</final> outside",
|
||||
expected: "`<final>` in code, visible outside",
|
||||
},
|
||||
{
|
||||
input: "A <FINAL data-x='1'>visible</Final> B",
|
||||
expected: "A visible B",
|
||||
},
|
||||
] as const)("handles nested/final tag behavior: %j", (testCase) => {
|
||||
expectStrippedCase(testCase);
|
||||
});
|
||||
|
||||
it("handles unicode, attributes, and case-insensitive tag names", () => {
|
||||
const cases = [
|
||||
{
|
||||
input: "你好 <think>思考 🤔</think> 世界",
|
||||
expected: "你好 世界",
|
||||
},
|
||||
{
|
||||
input: "A <think id='test' class=\"foo\">hidden</think> B",
|
||||
expected: "A B",
|
||||
},
|
||||
{
|
||||
input: "A <THINK>hidden</THINK> <Thinking>also hidden</Thinking> B",
|
||||
expected: "A B",
|
||||
},
|
||||
] as const;
|
||||
expectStrippedCases(cases);
|
||||
it.each([
|
||||
{
|
||||
input: "你好 <think>思考 🤔</think> 世界",
|
||||
expected: "你好 世界",
|
||||
},
|
||||
{
|
||||
input: "A <think id='test' class=\"foo\">hidden</think> B",
|
||||
expected: "A B",
|
||||
},
|
||||
{
|
||||
input: "A <THINK>hidden</THINK> <Thinking>also hidden</Thinking> B",
|
||||
expected: "A B",
|
||||
},
|
||||
] as const)("handles unicode/attributes/case-insensitive names: %j", (testCase) => {
|
||||
expectStrippedCase(testCase);
|
||||
});
|
||||
|
||||
it("handles long content and pathological backtick patterns efficiently", () => {
|
||||
@@ -192,50 +211,60 @@ describe("stripReasoningTagsFromText", () => {
|
||||
});
|
||||
|
||||
describe("strict vs preserve mode", () => {
|
||||
it("applies strict and preserve modes to unclosed tags", () => {
|
||||
const input = "Before <think>unclosed content after";
|
||||
const cases = [
|
||||
{ mode: "strict" as const, expected: "Before" },
|
||||
{ mode: "preserve" as const, expected: "Before unclosed content after" },
|
||||
] as const;
|
||||
for (const { mode, expected } of cases) {
|
||||
expect(stripReasoningTagsFromText(input, { mode })).toBe(expected);
|
||||
}
|
||||
});
|
||||
|
||||
it("still strips fully closed reasoning blocks in preserve mode", () => {
|
||||
expect(stripReasoningTagsFromText("A <think>hidden</think> B", { mode: "preserve" })).toBe(
|
||||
"A B",
|
||||
);
|
||||
it.each([
|
||||
{
|
||||
name: "applies strict mode to unclosed tags",
|
||||
input: "Before <think>unclosed content after",
|
||||
expected: "Before",
|
||||
opts: { mode: "strict" as const },
|
||||
},
|
||||
{
|
||||
name: "applies preserve mode to unclosed tags",
|
||||
input: "Before <think>unclosed content after",
|
||||
expected: "Before unclosed content after",
|
||||
opts: { mode: "preserve" as const },
|
||||
},
|
||||
{
|
||||
name: "still strips fully closed reasoning blocks in preserve mode",
|
||||
input: "A <think>hidden</think> B",
|
||||
expected: "A B",
|
||||
opts: { mode: "preserve" as const },
|
||||
},
|
||||
] as const)("$name", (testCase) => {
|
||||
expectStrippedCase(testCase);
|
||||
});
|
||||
});
|
||||
|
||||
describe("trim options", () => {
|
||||
it("applies configured trim strategies", () => {
|
||||
const cases = [
|
||||
{
|
||||
input: " <think>x</think> result <think>y</think> ",
|
||||
expected: "result",
|
||||
opts: undefined,
|
||||
},
|
||||
{
|
||||
input: " <think>x</think> result ",
|
||||
expected: " result ",
|
||||
opts: { trim: "none" as const },
|
||||
},
|
||||
{
|
||||
input: " <think>x</think> result ",
|
||||
expected: "result ",
|
||||
opts: { trim: "start" as const },
|
||||
},
|
||||
] as const;
|
||||
expectStrippedCases(cases);
|
||||
it.each([
|
||||
{
|
||||
name: "applies default trim strategy",
|
||||
input: " <think>x</think> result <think>y</think> ",
|
||||
expected: "result",
|
||||
opts: undefined,
|
||||
},
|
||||
{
|
||||
name: "supports trim=none",
|
||||
input: " <think>x</think> result ",
|
||||
expected: " result ",
|
||||
opts: { trim: "none" as const },
|
||||
},
|
||||
{
|
||||
name: "supports trim=start",
|
||||
input: " <think>x</think> result ",
|
||||
expected: "result ",
|
||||
opts: { trim: "start" as const },
|
||||
},
|
||||
] as const)("$name", (testCase) => {
|
||||
expectStrippedCase(testCase);
|
||||
});
|
||||
});
|
||||
|
||||
it("does not leak regex state across repeated calls", () => {
|
||||
expect(stripReasoningTagsFromText("A <final>1</final> B")).toBe("A 1 B");
|
||||
expect(stripReasoningTagsFromText("C <final>2</final> D")).toBe("C 2 D");
|
||||
expect(stripReasoningTagsFromText("E <think>x</think> F")).toBe("E F");
|
||||
it.each([
|
||||
{ input: "A <final>1</final> B", expected: "A 1 B" },
|
||||
{ input: "C <final>2</final> D", expected: "C 2 D" },
|
||||
{ input: "E <think>x</think> F", expected: "E F" },
|
||||
] as const)("does not leak regex state across repeated calls: %j", (testCase) => {
|
||||
expectStrippedCase(testCase);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user