test: dedupe routing and text suites

This commit is contained in:
Peter Steinberger
2026-03-28 07:31:24 +00:00
parent 30be04cd87
commit 155915e7dc
10 changed files with 1766 additions and 1351 deletions

View File

@@ -17,6 +17,30 @@ function expectFencesBalanced(chunks: string[]) {
}
}
function expectChunkLengths(chunks: string[], expectedLengths: number[]) {
expect(chunks).toHaveLength(expectedLengths.length);
expectedLengths.forEach((length, index) => {
expect(chunks[index]?.length).toBe(length);
});
}
function expectNormalizedChunkJoin(chunks: string[], text: string) {
expect(chunks.join(" ").replace(/\s+/g, " ").trim()).toBe(text.replace(/\s+/g, " ").trim());
}
function expectChunkTextCase(params: {
text: string;
limit: number;
assert: (chunks: string[], text: string) => void;
}) {
const chunks = chunkText(params.text, params.limit);
params.assert(chunks, params.text);
}
function expectChunkSpecialCase(run: () => void) {
run();
}
type ChunkCase = {
name: string;
text: string;
@@ -30,6 +54,19 @@ function runChunkCases(chunker: (text: string, limit: number) => string[], cases
});
}
function expectChunkModeCase(params: {
chunker: (text: string, limit: number, mode: "length" | "newline") => string[];
text: string;
limit: number;
mode: "length" | "newline";
expected: readonly string[];
name?: string;
}) {
expect(params.chunker(params.text, params.limit, params.mode), params.name).toEqual(
params.expected,
);
}
function expectMarkdownFenceSplitCases(
cases: ReadonlyArray<{
name: string;
@@ -39,7 +76,7 @@ function expectMarkdownFenceSplitCases(
expectedSuffix: string;
}>,
) {
for (const { name, text, limit, expectedPrefix, expectedSuffix } of cases) {
cases.forEach(({ name, text, limit, expectedPrefix, expectedSuffix }) => {
const chunks = chunkMarkdownText(text, limit);
expect(chunks.length, name).toBeGreaterThan(1);
for (const chunk of chunks) {
@@ -48,9 +85,28 @@ function expectMarkdownFenceSplitCases(
expect(chunk.trimEnd().endsWith(expectedSuffix), name).toBe(true);
}
expectFencesBalanced(chunks);
});
}
function expectNoEmptyFencedChunks(text: string, limit: number) {
const chunks = chunkMarkdownText(text, limit);
for (const chunk of chunks) {
const nonFenceLines = chunk
.split("\n")
.filter((line) => !/^( {0,3})(`{3,}|~{3,})(.*)$/.test(line));
expect(nonFenceLines.join("\n").trim()).not.toBe("");
}
}
function expectFenceParseOccursOnce(text: string, limit: number) {
const parseSpy = vi.spyOn(fences, "parseFenceSpans");
const chunks = chunkMarkdownText(text, limit);
expect(chunks.length).toBeGreaterThan(2);
expect(parseSpy).toHaveBeenCalledTimes(1);
parseSpy.mockRestore();
}
const parentheticalCases: ChunkCase[] = [
{
name: "keeps parenthetical phrases together",
@@ -72,232 +128,355 @@ const parentheticalCases: ChunkCase[] = [
},
];
const newlineModeFenceCases = (() => {
const fence = "```python\ndef my_function():\n x = 1\n\n y = 2\n return x + y\n```";
const longFence = `\`\`\`js\n${"const a = 1;\n".repeat(20)}\`\`\``;
return [
{
name: "keeps single-newline fence+paragraph together",
text: "```js\nconst a = 1;\nconst b = 2;\n```\nAfter",
limit: 1000,
expected: ["```js\nconst a = 1;\nconst b = 2;\n```\nAfter"],
},
{
name: "keeps blank lines inside fence together",
text: fence,
limit: 1000,
expected: [fence],
},
{
name: "splits between fence and following paragraph",
text: `${fence}\n\nAfter`,
limit: 1000,
expected: [fence, "After"],
},
{
name: "defers long markdown blocks to markdown chunker",
text: longFence,
limit: 40,
expected: chunkMarkdownText(longFence, 40),
},
] as const;
})();
describe("chunkText", () => {
it("keeps multi-line text in one chunk when under limit", () => {
const text = "Line one\n\nLine two\n\nLine three";
const chunks = chunkText(text, 1600);
expect(chunks).toEqual([text]);
});
it("splits only when text exceeds the limit", () => {
const part = "a".repeat(20);
const text = part.repeat(5); // 100 chars
const chunks = chunkText(text, 60);
expect(chunks.length).toBe(2);
expect(chunks[0].length).toBe(60);
expect(chunks[1].length).toBe(40);
expect(chunks.join("")).toBe(text);
});
it("prefers breaking at a newline before the limit", () => {
const text = `paragraph one line\n\nparagraph two starts here and continues`;
const chunks = chunkText(text, 40);
expect(chunks).toEqual(["paragraph one line", "paragraph two starts here and continues"]);
});
it("otherwise breaks at the last whitespace under the limit", () => {
const text = "This is a message that should break nicely near a word boundary.";
const chunks = chunkText(text, 30);
expect(chunks[0].length).toBeLessThanOrEqual(30);
expect(chunks[1].length).toBeLessThanOrEqual(30);
expect(chunks.join(" ").replace(/\s+/g, " ").trim()).toBe(text.replace(/\s+/g, " ").trim());
});
it("falls back to a hard break when no whitespace is present", () => {
const text = "Supercalifragilisticexpialidocious"; // 34 chars
const chunks = chunkText(text, 10);
expect(chunks).toEqual(["Supercalif", "ragilistic", "expialidoc", "ious"]);
it.each([
{
name: "keeps multi-line text in one chunk when under limit",
text: "Line one\n\nLine two\n\nLine three",
limit: 1600,
assert: (chunks: string[], text: string) => {
expect(chunks).toEqual([text]);
},
},
{
name: "splits only when text exceeds the limit",
text: "a".repeat(20).repeat(5),
limit: 60,
assert: (chunks: string[], text: string) => {
expectChunkLengths(chunks, [60, 40]);
expect(chunks.join("")).toBe(text);
},
},
{
name: "prefers breaking at a newline before the limit",
text: "paragraph one line\n\nparagraph two starts here and continues",
limit: 40,
assert: (chunks: string[]) => {
expect(chunks).toEqual(["paragraph one line", "paragraph two starts here and continues"]);
},
},
{
name: "otherwise breaks at the last whitespace under the limit",
text: "This is a message that should break nicely near a word boundary.",
limit: 30,
assert: (chunks: string[], text: string) => {
expect(chunks[0]?.length).toBeLessThanOrEqual(30);
expect(chunks[1]?.length).toBeLessThanOrEqual(30);
expectNormalizedChunkJoin(chunks, text);
},
},
{
name: "falls back to a hard break when no whitespace is present",
text: "Supercalifragilisticexpialidocious",
limit: 10,
assert: (chunks: string[]) => {
expect(chunks).toEqual(["Supercalif", "ragilistic", "expialidoc", "ious"]);
},
},
] as const)("$name", ({ text, limit, assert }) => {
expectChunkTextCase({ text, limit, assert });
});
runChunkCases(chunkText, [parentheticalCases[0]]);
});
describe("resolveTextChunkLimit", () => {
it("uses per-provider defaults", () => {
expect(resolveTextChunkLimit(undefined, "whatsapp")).toBe(4000);
expect(resolveTextChunkLimit(undefined, "telegram")).toBe(4000);
expect(resolveTextChunkLimit(undefined, "slack")).toBe(4000);
expect(resolveTextChunkLimit(undefined, "signal")).toBe(4000);
expect(resolveTextChunkLimit(undefined, "imessage")).toBe(4000);
expect(resolveTextChunkLimit(undefined, "discord")).toBe(4000);
expect(
resolveTextChunkLimit(undefined, "discord", undefined, {
fallbackLimit: 2000,
it.each([
...(["whatsapp", "telegram", "slack", "signal", "imessage", "discord"] as const).map(
(provider) => ({
name: `uses default limit for ${provider}`,
cfg: undefined,
provider,
accountId: undefined,
options: undefined,
expected: 4000,
}),
).toBe(2000);
});
it("supports provider overrides", () => {
const cfg = { channels: { telegram: { textChunkLimit: 1234 } } };
expect(resolveTextChunkLimit(cfg, "whatsapp")).toBe(4000);
expect(resolveTextChunkLimit(cfg, "telegram")).toBe(1234);
});
it("prefers account overrides when provided", () => {
const cfg = {
channels: {
telegram: {
textChunkLimit: 2000,
accounts: {
default: { textChunkLimit: 1234 },
primary: { textChunkLimit: 777 },
),
{
name: "uses fallback limit override when provided",
cfg: undefined,
provider: "discord" as const,
accountId: undefined,
options: { fallbackLimit: 2000 },
expected: 2000,
},
{
name: "supports provider overrides for telegram",
cfg: { channels: { telegram: { textChunkLimit: 1234 } } },
provider: "telegram" as const,
accountId: undefined,
options: undefined,
expected: 1234,
},
{
name: "falls back when provider override does not match",
cfg: { channels: { telegram: { textChunkLimit: 1234 } } },
provider: "whatsapp" as const,
accountId: undefined,
options: undefined,
expected: 4000,
},
{
name: "prefers account overrides when provided",
cfg: {
channels: {
telegram: {
textChunkLimit: 2000,
accounts: {
default: { textChunkLimit: 1234 },
primary: { textChunkLimit: 777 },
},
},
},
},
};
expect(resolveTextChunkLimit(cfg, "telegram", "primary")).toBe(777);
expect(resolveTextChunkLimit(cfg, "telegram", "default")).toBe(1234);
});
it("uses the matching provider override", () => {
const cfg = {
channels: {
discord: { textChunkLimit: 111 },
slack: { textChunkLimit: 222 },
provider: "telegram" as const,
accountId: "primary",
options: undefined,
expected: 777,
},
{
name: "uses default account override when requested",
cfg: {
channels: {
telegram: {
textChunkLimit: 2000,
accounts: {
default: { textChunkLimit: 1234 },
primary: { textChunkLimit: 777 },
},
},
},
},
};
expect(resolveTextChunkLimit(cfg, "discord")).toBe(111);
expect(resolveTextChunkLimit(cfg, "slack")).toBe(222);
expect(resolveTextChunkLimit(cfg, "telegram")).toBe(4000);
provider: "telegram" as const,
accountId: "default",
options: undefined,
expected: 1234,
},
{
name: "uses the matching provider override for discord",
cfg: {
channels: {
discord: { textChunkLimit: 111 },
slack: { textChunkLimit: 222 },
},
},
provider: "discord" as const,
accountId: undefined,
options: undefined,
expected: 111,
},
{
name: "uses the matching provider override for slack",
cfg: {
channels: {
discord: { textChunkLimit: 111 },
slack: { textChunkLimit: 222 },
},
},
provider: "slack" as const,
accountId: undefined,
options: undefined,
expected: 222,
},
{
name: "falls back when multi-provider override does not match",
cfg: {
channels: {
discord: { textChunkLimit: 111 },
slack: { textChunkLimit: 222 },
},
},
provider: "telegram" as const,
accountId: undefined,
options: undefined,
expected: 4000,
},
] as const)("$name", ({ cfg, provider, accountId, options, expected }) => {
expect(resolveTextChunkLimit(cfg as never, provider, accountId, options)).toBe(expected);
});
});
describe("chunkMarkdownText", () => {
it("keeps fenced blocks intact when a safe break exists", () => {
const prefix = "p".repeat(60);
const fence = "```bash\nline1\nline2\n```";
const suffix = "s".repeat(60);
const text = `${prefix}\n\n${fence}\n\n${suffix}`;
it.each([
{
name: "keeps fenced blocks intact when a safe break exists",
run: () => {
const prefix = "p".repeat(60);
const fence = "```bash\nline1\nline2\n```";
const suffix = "s".repeat(60);
const text = `${prefix}\n\n${fence}\n\n${suffix}`;
const chunks = chunkMarkdownText(text, 40);
expect(chunks.some((chunk) => chunk.trimEnd() === fence)).toBe(true);
expectFencesBalanced(chunks);
});
it("handles multiple fence marker styles when splitting inside fences", () => {
const cases = [
{
name: "backtick fence",
text: `\`\`\`txt\n${"a".repeat(500)}\n\`\`\``,
limit: 120,
expectedPrefix: "```txt\n",
expectedSuffix: "```",
const chunks = chunkMarkdownText(text, 40);
expect(chunks.some((chunk) => chunk.trimEnd() === fence)).toBe(true);
expectFencesBalanced(chunks);
},
{
name: "tilde fence",
text: `~~~sh\n${"x".repeat(600)}\n~~~`,
limit: 140,
expectedPrefix: "~~~sh\n",
expectedSuffix: "~~~",
},
{
name: "long backtick fence",
text: `\`\`\`\`md\n${"y".repeat(600)}\n\`\`\`\``,
limit: 140,
expectedPrefix: "````md\n",
expectedSuffix: "````",
},
{
name: "indented fence",
text: ` \`\`\`js\n ${"z".repeat(600)}\n \`\`\``,
limit: 160,
expectedPrefix: " ```js\n",
expectedSuffix: " ```",
},
] as const;
expectMarkdownFenceSplitCases(cases);
});
it("never produces an empty fenced chunk when splitting", () => {
const text = `\`\`\`txt\n${"a".repeat(300)}\n\`\`\``;
const chunks = chunkMarkdownText(text, 60);
for (const chunk of chunks) {
const nonFenceLines = chunk
.split("\n")
.filter((line) => !/^( {0,3})(`{3,}|~{3,})(.*)$/.test(line));
expect(nonFenceLines.join("\n").trim()).not.toBe("");
}
},
{
name: "handles multiple fence marker styles when splitting inside fences",
run: () =>
expectMarkdownFenceSplitCases([
{
name: "backtick fence",
text: `\`\`\`txt\n${"a".repeat(500)}\n\`\`\``,
limit: 120,
expectedPrefix: "```txt\n",
expectedSuffix: "```",
},
{
name: "tilde fence",
text: `~~~sh\n${"x".repeat(600)}\n~~~`,
limit: 140,
expectedPrefix: "~~~sh\n",
expectedSuffix: "~~~",
},
{
name: "long backtick fence",
text: `\`\`\`\`md\n${"y".repeat(600)}\n\`\`\`\``,
limit: 140,
expectedPrefix: "````md\n",
expectedSuffix: "````",
},
{
name: "indented fence",
text: ` \`\`\`js\n ${"z".repeat(600)}\n \`\`\``,
limit: 160,
expectedPrefix: " ```js\n",
expectedSuffix: " ```",
},
]),
},
] as const)("$name", ({ run }) => {
expectChunkSpecialCase(run);
});
runChunkCases(chunkMarkdownText, parentheticalCases);
it("hard-breaks when a parenthetical exceeds the limit", () => {
const text = `(${"a".repeat(80)})`;
const chunks = chunkMarkdownText(text, 20);
expect(chunks[0]?.length).toBe(20);
expect(chunks.join("")).toBe(text);
});
it("parses fence spans once for long fenced payloads", () => {
const parseSpy = vi.spyOn(fences, "parseFenceSpans");
const text = `\`\`\`txt\n${"line\n".repeat(600)}\`\`\``;
const chunks = chunkMarkdownText(text, 80);
expect(chunks.length).toBeGreaterThan(2);
expect(parseSpy).toHaveBeenCalledTimes(1);
parseSpy.mockRestore();
it.each([
{
name: "never produces an empty fenced chunk when splitting",
run: () => {
expectNoEmptyFencedChunks(`\`\`\`txt\n${"a".repeat(300)}\n\`\`\``, 60);
},
},
{
name: "hard-breaks when a parenthetical exceeds the limit",
run: () => {
const text = `(${"a".repeat(80)})`;
const chunks = chunkMarkdownText(text, 20);
expect(chunks[0]?.length).toBe(20);
expect(chunks.join("")).toBe(text);
},
},
{
name: "parses fence spans once for long fenced payloads",
run: () => {
expectFenceParseOccursOnce(`\`\`\`txt\n${"line\n".repeat(600)}\`\`\``, 80);
},
},
] as const)("$name", ({ run }) => {
expectChunkSpecialCase(run);
});
});
describe("chunkByNewline", () => {
it("splits text on newlines", () => {
const text = "Line one\nLine two\nLine three";
const chunks = chunkByNewline(text, 1000);
expect(chunks).toEqual(["Line one", "Line two", "Line three"]);
it.each([
{
name: "splits text on newlines",
text: "Line one\nLine two\nLine three",
limit: 1000,
expected: ["Line one", "Line two", "Line three"],
},
{
name: "preserves blank lines by folding into the next chunk",
text: "Line one\n\n\nLine two\n\nLine three",
limit: 1000,
expected: ["Line one", "\n\nLine two", "\nLine three"],
},
{
name: "trims whitespace from lines",
text: " Line one \n Line two ",
limit: 1000,
expected: ["Line one", "Line two"],
},
{
name: "preserves leading blank lines on the first chunk",
text: "\n\nLine one\nLine two",
limit: 1000,
expected: ["\n\nLine one", "Line two"],
},
{
name: "preserves trailing blank lines on the last chunk",
text: "Line one\n\n",
limit: 1000,
expected: ["Line one\n\n"],
},
{
name: "keeps whitespace when trimLines is false",
text: " indented line \nNext",
limit: 1000,
options: { trimLines: false },
expected: [" indented line ", "Next"],
},
] as const)("$name", ({ text, limit, options, expected }) => {
expect(chunkByNewline(text, limit, options)).toEqual(expected);
});
it("preserves blank lines by folding into the next chunk", () => {
const text = "Line one\n\n\nLine two\n\nLine three";
const chunks = chunkByNewline(text, 1000);
expect(chunks).toEqual(["Line one", "\n\nLine two", "\nLine three"]);
});
it("trims whitespace from lines", () => {
const text = " Line one \n Line two ";
const chunks = chunkByNewline(text, 1000);
expect(chunks).toEqual(["Line one", "Line two"]);
});
it("preserves leading blank lines on the first chunk", () => {
const text = "\n\nLine one\nLine two";
const chunks = chunkByNewline(text, 1000);
expect(chunks).toEqual(["\n\nLine one", "Line two"]);
});
it("falls back to length-based for long lines", () => {
const text = "Short line\n" + "a".repeat(50) + "\nAnother short";
const chunks = chunkByNewline(text, 20);
expect(chunks[0]).toBe("Short line");
// Long line gets split into multiple chunks
expect(chunks[1].length).toBe(20);
expect(chunks[2].length).toBe(20);
expect(chunks[3].length).toBe(10);
expect(chunks[4]).toBe("Another short");
});
it("does not split long lines when splitLongLines is false", () => {
const text = "a".repeat(50);
const chunks = chunkByNewline(text, 20, { splitLongLines: false });
expect(chunks).toEqual([text]);
it.each([
{
name: "falls back to length-based for long lines",
run: () => {
const text = "Short line\n" + "a".repeat(50) + "\nAnother short";
const chunks = chunkByNewline(text, 20);
expect(chunks[0]).toBe("Short line");
expectChunkLengths(chunks.slice(1, 4), [20, 20, 10]);
expect(chunks[4]).toBe("Another short");
},
},
{
name: "does not split long lines when splitLongLines is false",
run: () => {
const text = "a".repeat(50);
expect(chunkByNewline(text, 20, { splitLongLines: false })).toEqual([text]);
},
},
] as const)("$name", ({ run }) => {
expectChunkSpecialCase(run);
});
it.each(["", " \n\n "] as const)("returns empty array for input %j", (text) => {
expect(chunkByNewline(text, 100)).toEqual([]);
});
it("preserves trailing blank lines on the last chunk", () => {
const text = "Line one\n\n";
const chunks = chunkByNewline(text, 1000);
expect(chunks).toEqual(["Line one\n\n"]);
});
it("keeps whitespace when trimLines is false", () => {
const text = " indented line \nNext";
const chunks = chunkByNewline(text, 1000, { trimLines: false });
expect(chunks).toEqual([" indented line ", "Next"]);
});
});
describe("chunkTextWithMode", () => {
@@ -323,7 +502,14 @@ describe("chunkTextWithMode", () => {
] as const)(
"applies mode-specific chunking behavior: $name",
({ text, mode, expected, name }) => {
expect(chunkTextWithMode(text, 1000, mode), name).toEqual(expected);
expectChunkModeCase({
chunker: chunkTextWithMode,
text,
limit: 1000,
mode,
expected,
name,
});
},
);
});
@@ -349,71 +535,50 @@ describe("chunkMarkdownTextWithMode", () => {
expected: ["Para one", "Para two"],
},
] as const)("applies markdown/newline mode behavior: $name", ({ text, mode, expected, name }) => {
expect(chunkMarkdownTextWithMode(text, 1000, mode), name).toEqual(expected);
expectChunkModeCase({
chunker: chunkMarkdownTextWithMode,
text,
limit: 1000,
mode,
expected,
name,
});
});
it("handles newline mode fence splitting rules", () => {
const fence = "```python\ndef my_function():\n x = 1\n\n y = 2\n return x + y\n```";
const longFence = `\`\`\`js\n${"const a = 1;\n".repeat(20)}\`\`\``;
const cases = [
{
name: "keeps single-newline fence+paragraph together",
text: "```js\nconst a = 1;\nconst b = 2;\n```\nAfter",
limit: 1000,
expected: ["```js\nconst a = 1;\nconst b = 2;\n```\nAfter"],
},
{
name: "keeps blank lines inside fence together",
text: fence,
limit: 1000,
expected: [fence],
},
{
name: "splits between fence and following paragraph",
text: `${fence}\n\nAfter`,
limit: 1000,
expected: [fence, "After"],
},
{
name: "defers long markdown blocks to markdown chunker",
text: longFence,
limit: 40,
expected: chunkMarkdownText(longFence, 40),
},
] as const;
for (const { text, limit, expected, name } of cases) {
it.each(newlineModeFenceCases)(
"handles newline mode fence splitting rules: $name",
({ text, limit, expected, name }) => {
expect(chunkMarkdownTextWithMode(text, limit, "newline"), name).toEqual(expected);
}
});
},
);
});
describe("resolveChunkMode", () => {
it("resolves default, provider, account, and internal channel modes", () => {
const providerCfg = { channels: { slack: { chunkMode: "newline" as const } } };
const accountCfg = {
channels: {
slack: {
chunkMode: "length" as const,
accounts: {
primary: { chunkMode: "newline" as const },
},
const providerCfg = { channels: { slack: { chunkMode: "newline" as const } } };
const accountCfg = {
channels: {
slack: {
chunkMode: "length" as const,
accounts: {
primary: { chunkMode: "newline" as const },
},
},
};
const cases = [
{ cfg: undefined, provider: "telegram", accountId: undefined, expected: "length" },
{ cfg: {}, provider: "discord", accountId: undefined, expected: "length" },
{ cfg: undefined, provider: "bluebubbles", accountId: undefined, expected: "length" },
{ cfg: providerCfg, provider: "__internal__", accountId: undefined, expected: "length" },
{ cfg: providerCfg, provider: "slack", accountId: undefined, expected: "newline" },
{ cfg: providerCfg, provider: "discord", accountId: undefined, expected: "length" },
{ cfg: accountCfg, provider: "slack", accountId: "primary", expected: "newline" },
{ cfg: accountCfg, provider: "slack", accountId: "other", expected: "length" },
] as const;
},
};
for (const { cfg, provider, accountId, expected } of cases) {
it.each([
{ cfg: undefined, provider: "telegram", accountId: undefined, expected: "length" },
{ cfg: {}, provider: "discord", accountId: undefined, expected: "length" },
{ cfg: undefined, provider: "bluebubbles", accountId: undefined, expected: "length" },
{ cfg: providerCfg, provider: "__internal__", accountId: undefined, expected: "length" },
{ cfg: providerCfg, provider: "slack", accountId: undefined, expected: "newline" },
{ cfg: providerCfg, provider: "discord", accountId: undefined, expected: "length" },
{ cfg: accountCfg, provider: "slack", accountId: "primary", expected: "newline" },
{ cfg: accountCfg, provider: "slack", accountId: "other", expected: "length" },
] as const)(
"resolves default/provider/account/internal chunk mode for $provider $accountId",
({ cfg, provider, accountId, expected }) => {
expect(resolveChunkMode(cfg as never, provider, accountId)).toBe(expected);
}
});
},
);
});

View File

@@ -6,33 +6,59 @@ import {
} from "./account-id.js";
describe("account id normalization", () => {
it("defaults missing values to default account", () => {
expect(normalizeAccountId(undefined)).toBe(DEFAULT_ACCOUNT_ID);
expect(normalizeAccountId(null)).toBe(DEFAULT_ACCOUNT_ID);
expect(normalizeAccountId(" ")).toBe(DEFAULT_ACCOUNT_ID);
const reservedAccountIdCases = [
{ name: "rejects __proto__ pollution keys", input: "__proto__" },
{ name: "rejects constructor pollution keys", input: "constructor" },
{ name: "rejects prototype pollution keys", input: "prototype" },
] as const;
function expectNormalizedAccountIdCase(params: {
input: string | null | undefined;
expected: string | undefined;
optional?: boolean;
}) {
const normalize = params.optional ? normalizeOptionalAccountId : normalizeAccountId;
expect(normalize(params.input)).toBe(params.expected);
}
it.each([
{
name: "defaults undefined to default account",
input: undefined,
expected: DEFAULT_ACCOUNT_ID,
},
{ name: "defaults null to default account", input: null, expected: DEFAULT_ACCOUNT_ID },
{
name: "defaults blank strings to default account",
input: " ",
expected: DEFAULT_ACCOUNT_ID,
},
{ name: "normalizes valid ids to lowercase", input: " Business_1 ", expected: "business_1" },
{
name: "sanitizes invalid characters into canonical ids",
input: " Prod/US East ",
expected: "prod-us-east",
},
...reservedAccountIdCases.map(({ name, input }) => ({
name,
input,
expected: DEFAULT_ACCOUNT_ID,
})),
] as const)("$name", ({ input, expected }) => {
expectNormalizedAccountIdCase({ input, expected });
});
it("normalizes valid ids to lowercase", () => {
expect(normalizeAccountId(" Business_1 ")).toBe("business_1");
});
it("sanitizes invalid characters into canonical ids", () => {
expect(normalizeAccountId(" Prod/US East ")).toBe("prod-us-east");
});
it("rejects prototype-pollution key vectors", () => {
expect(normalizeAccountId("__proto__")).toBe(DEFAULT_ACCOUNT_ID);
expect(normalizeAccountId("constructor")).toBe(DEFAULT_ACCOUNT_ID);
expect(normalizeAccountId("prototype")).toBe(DEFAULT_ACCOUNT_ID);
expect(normalizeOptionalAccountId("__proto__")).toBeUndefined();
expect(normalizeOptionalAccountId("constructor")).toBeUndefined();
expect(normalizeOptionalAccountId("prototype")).toBeUndefined();
});
it("preserves optional semantics without forcing default", () => {
expect(normalizeOptionalAccountId(undefined)).toBeUndefined();
expect(normalizeOptionalAccountId(" ")).toBeUndefined();
expect(normalizeOptionalAccountId(" !!! ")).toBeUndefined();
expect(normalizeOptionalAccountId(" Business ")).toBe("business");
it.each([
{ name: "keeps undefined optional values unset", input: undefined, expected: undefined },
{ name: "keeps blank optional values unset", input: " ", expected: undefined },
{ name: "keeps invalid optional values unset", input: " !!! ", expected: undefined },
...reservedAccountIdCases.map(({ name, input }) => ({
name: name.replace(" pollution keys", " optional values"),
input,
expected: undefined,
})),
{ name: "normalizes valid optional values", input: " Business ", expected: "business" },
] as const)("$name", ({ input, expected }) => {
expectNormalizedAccountIdCase({ input, expected, optional: true });
});
});

View File

@@ -1,42 +1,98 @@
import { describe, expect, it } from "vitest";
import { resolveAccountEntry, resolveNormalizedAccountEntry } from "./account-lookup.js";
function createAccountsWithPrototypePollution() {
const inherited = { default: { id: "polluted" } };
return Object.create(inherited) as Record<string, { id: string }>;
}
function expectResolvedAccountLookupCase(
actual: { id: string } | undefined,
expected: { id: string } | undefined,
) {
expect(actual).toEqual(expected);
}
function expectPrototypePollutionIgnoredCase(
resolve: (accounts: Record<string, { id: string }>) => { id: string } | undefined,
) {
const pollutedAccounts = createAccountsWithPrototypePollution();
expect(resolve(pollutedAccounts)).toBeUndefined();
}
function expectAccountLookupCase(params: {
accounts?: Record<string, { id: string }>;
resolve: (accounts: Record<string, { id: string }>) => { id: string } | undefined;
expected: { id: string } | undefined;
}) {
expectResolvedAccountLookupCase(params.resolve(params.accounts ?? {}), params.expected);
}
describe("resolveAccountEntry", () => {
it("resolves direct and case-insensitive account keys", () => {
const accounts = {
default: { id: "default" },
Business: { id: "business" },
};
expect(resolveAccountEntry(accounts, "default")).toEqual({ id: "default" });
expect(resolveAccountEntry(accounts, "business")).toEqual({ id: "business" });
const accounts = {
default: { id: "default" },
Business: { id: "business" },
};
it.each([
{
name: "resolves the default account key",
resolve: (localAccounts: Record<string, { id: string }>) =>
resolveAccountEntry(localAccounts, "default"),
expected: { id: "default" },
},
{
name: "resolves a normalized business account key",
resolve: (localAccounts: Record<string, { id: string }>) =>
resolveAccountEntry(localAccounts, "business"),
expected: { id: "business" },
},
] as const)("$name", ({ resolve, expected }) => {
expectAccountLookupCase({ accounts, resolve, expected });
});
it("ignores prototype-chain values", () => {
const inherited = { default: { id: "polluted" } };
const accounts = Object.create(inherited) as Record<string, { id: string }>;
expect(resolveAccountEntry(accounts, "default")).toBeUndefined();
expectPrototypePollutionIgnoredCase((localAccounts) =>
resolveAccountEntry(localAccounts, "default"),
);
});
});
describe("resolveNormalizedAccountEntry", () => {
it("resolves normalized account keys with a custom normalizer", () => {
const accounts = {
"Ops Team": { id: "ops" },
};
const normalizeAccountId = (accountId: string) =>
accountId.trim().toLowerCase().replaceAll(" ", "-");
expect(
resolveNormalizedAccountEntry(accounts, "ops-team", (accountId) =>
accountId.trim().toLowerCase().replaceAll(" ", "-"),
),
).toEqual({ id: "ops" });
});
it.each([
{
name: "resolves normalized account keys with a custom normalizer",
accounts: {
"Ops Team": { id: "ops" },
},
resolve: (accounts: Record<string, { id: string }>) =>
resolveNormalizedAccountEntry(accounts, "ops-team", normalizeAccountId),
expected: {
id: "ops",
},
},
{
name: "ignores prototype-chain values",
resolve: () => undefined,
expected: undefined,
assert: () =>
expectPrototypePollutionIgnoredCase((accounts) =>
resolveNormalizedAccountEntry(accounts, "default", (accountId) => accountId),
),
},
] as const)("$name", ({ accounts, resolve, expected, assert }) => {
if (assert) {
assert();
return;
}
it("ignores prototype-chain values", () => {
const inherited = { default: { id: "polluted" } };
const accounts = Object.create(inherited) as Record<string, { id: string }>;
expect(
resolveNormalizedAccountEntry(accounts, "default", (accountId) => accountId),
).toBeUndefined();
expectAccountLookupCase({
accounts,
resolve,
expected,
});
});
});

File diff suppressed because it is too large Load Diff

View File

@@ -6,65 +6,62 @@ describe("Discord Session Key Continuity", () => {
const channel = "discord";
const accountId = "default";
it("generates distinct keys for DM vs Channel (dmScope=main)", () => {
// Scenario: Default config (dmScope=main)
const dmKey = buildAgentSessionKey({
function buildDiscordSessionKey(params: {
peer: { kind: "direct" | "channel"; id: string };
dmScope?: "main" | "per-peer";
}) {
return buildAgentSessionKey({
agentId,
channel,
accountId,
dmScope: params.dmScope ?? "main",
peer: params.peer,
});
}
function expectDistinctDmAndChannelKeys(params: {
dmScope: "main" | "per-peer";
expectedDmKey: string;
}) {
const dmKey = buildDiscordSessionKey({
peer: { kind: "direct", id: "user123" },
dmScope: "main",
dmScope: params.dmScope,
});
const groupKey = buildAgentSessionKey({
agentId,
channel,
accountId,
const groupKey = buildDiscordSessionKey({
peer: { kind: "channel", id: "channel456" },
dmScope: "main",
});
expect(dmKey).toBe("agent:main:main");
expect(dmKey).toBe(params.expectedDmKey);
expect(groupKey).toBe("agent:main:discord:channel:channel456");
expect(dmKey).not.toBe(groupKey);
});
}
it("generates distinct keys for DM vs Channel (dmScope=per-peer)", () => {
// Scenario: Multi-user bot config
const dmKey = buildAgentSessionKey({
agentId,
channel,
accountId,
peer: { kind: "direct", id: "user123" },
dmScope: "per-peer",
});
const groupKey = buildAgentSessionKey({
agentId,
channel,
accountId,
peer: { kind: "channel", id: "channel456" },
dmScope: "per-peer",
});
expect(dmKey).toBe("agent:main:direct:user123");
expect(groupKey).toBe("agent:main:discord:channel:channel456");
expect(dmKey).not.toBe(groupKey);
});
it("handles empty/invalid IDs safely without collision", () => {
// If ID is missing, does it collide?
const missingIdKey = buildAgentSessionKey({
agentId,
channel,
accountId,
peer: { kind: "channel", id: "" }, // Empty string
dmScope: "main",
function expectUnknownChannelKeyCase(channelId: string) {
const missingIdKey = buildDiscordSessionKey({
peer: { kind: "channel", id: channelId },
});
expect(missingIdKey).toContain("unknown");
// Should still be distinct from main
expect(missingIdKey).not.toBe("agent:main:main");
}
it.each([
{
name: "keeps main-scoped DMs distinct from channel sessions",
dmScope: "main" as const,
expectedDmKey: "agent:main:main",
},
{
name: "keeps per-peer DMs distinct from channel sessions",
dmScope: "per-peer" as const,
expectedDmKey: "agent:main:direct:user123",
},
])("$name", ({ dmScope, expectedDmKey }) => {
expectDistinctDmAndChannelKeys({ dmScope, expectedDmKey });
});
it.each(["", " "] as const)("handles invalid channel id %j without collision", (channelId) => {
expectUnknownChannelKeyCase(channelId);
});
});

View File

@@ -12,121 +12,116 @@ import {
} from "./session-key.js";
describe("classifySessionKeyShape", () => {
it("classifies empty keys as missing", () => {
expect(classifySessionKeyShape(undefined)).toBe("missing");
expect(classifySessionKeyShape(" ")).toBe("missing");
});
it("classifies valid agent keys", () => {
expect(classifySessionKeyShape("agent:main:main")).toBe("agent");
expect(classifySessionKeyShape("agent:research:subagent:worker")).toBe("agent");
});
it("classifies malformed agent keys", () => {
expect(classifySessionKeyShape("agent::broken")).toBe("malformed_agent");
expect(classifySessionKeyShape("agent:main")).toBe("malformed_agent");
});
it("treats non-agent legacy or alias keys as non-malformed", () => {
expect(classifySessionKeyShape("main")).toBe("legacy_or_alias");
expect(classifySessionKeyShape("custom-main")).toBe("legacy_or_alias");
expect(classifySessionKeyShape("subagent:worker")).toBe("legacy_or_alias");
it.each([
{ input: undefined, expected: "missing" },
{ input: " ", expected: "missing" },
{ input: "agent:main:main", expected: "agent" },
{ input: "agent:research:subagent:worker", expected: "agent" },
{ input: "agent::broken", expected: "malformed_agent" },
{ input: "agent:main", expected: "malformed_agent" },
{ input: "main", expected: "legacy_or_alias" },
{ input: "custom-main", expected: "legacy_or_alias" },
{ input: "subagent:worker", expected: "legacy_or_alias" },
] as const)("classifies %j as $expected", ({ input, expected }) => {
expect(classifySessionKeyShape(input)).toBe(expected);
});
});
describe("session key backward compatibility", () => {
it("classifies legacy :dm: session keys as valid agent keys", () => {
// Legacy session keys use :dm: instead of :direct:
// Both should be recognized as valid agent keys
expect(classifySessionKeyShape("agent:main:telegram:dm:123456")).toBe("agent");
expect(classifySessionKeyShape("agent:main:whatsapp:dm:+15551234567")).toBe("agent");
expect(classifySessionKeyShape("agent:main:discord:dm:user123")).toBe("agent");
});
function expectBackwardCompatibleDirectSessionKey(key: string) {
expect(classifySessionKeyShape(key)).toBe("agent");
}
it("classifies new :direct: session keys as valid agent keys", () => {
expect(classifySessionKeyShape("agent:main:telegram:direct:123456")).toBe("agent");
expect(classifySessionKeyShape("agent:main:whatsapp:direct:+15551234567")).toBe("agent");
expect(classifySessionKeyShape("agent:main:discord:direct:user123")).toBe("agent");
it.each([
"agent:main:telegram:dm:123456",
"agent:main:whatsapp:dm:+15551234567",
"agent:main:discord:dm:user123",
"agent:main:telegram:direct:123456",
"agent:main:whatsapp:direct:+15551234567",
"agent:main:discord:direct:user123",
] as const)("classifies backward-compatible direct session key %s as valid", (key) => {
expectBackwardCompatibleDirectSessionKey(key);
});
});
describe("getSubagentDepth", () => {
it("returns 0 for non-subagent session keys", () => {
expect(getSubagentDepth("agent:main:main")).toBe(0);
expect(getSubagentDepth("main")).toBe(0);
expect(getSubagentDepth(undefined)).toBe(0);
});
it("returns 2 for nested subagent session keys", () => {
expect(getSubagentDepth("agent:main:subagent:parent:subagent:child")).toBe(2);
it.each([
{ key: "agent:main:main", expected: 0 },
{ key: "main", expected: 0 },
{ key: undefined, expected: 0 },
{ key: "agent:main:subagent:parent:subagent:child", expected: 2 },
] as const)("returns $expected for session key %j", ({ key, expected }) => {
expect(getSubagentDepth(key)).toBe(expected);
});
});
describe("isCronSessionKey", () => {
it("matches base and run cron agent session keys", () => {
expect(isCronSessionKey("agent:main:cron:job-1")).toBe(true);
expect(isCronSessionKey("agent:main:cron:job-1:run:run-1")).toBe(true);
});
it("does not match non-cron sessions", () => {
expect(isCronSessionKey("agent:main:main")).toBe(false);
expect(isCronSessionKey("agent:main:subagent:worker")).toBe(false);
expect(isCronSessionKey("cron:job-1")).toBe(false);
expect(isCronSessionKey(undefined)).toBe(false);
it.each([
{ key: "agent:main:cron:job-1", expected: true },
{ key: "agent:main:cron:job-1:run:run-1", expected: true },
{ key: "agent:main:main", expected: false },
{ key: "agent:main:subagent:worker", expected: false },
{ key: "cron:job-1", expected: false },
{ key: undefined, expected: false },
] as const)("matches cron key %j => $expected", ({ key, expected }) => {
expect(isCronSessionKey(key)).toBe(expected);
});
});
describe("deriveSessionChatType", () => {
it("detects canonical direct/group/channel session keys", () => {
expect(deriveSessionChatType("agent:main:discord:direct:user1")).toBe("direct");
expect(deriveSessionChatType("agent:main:telegram:group:g1")).toBe("group");
expect(deriveSessionChatType("agent:main:discord:channel:c1")).toBe("channel");
});
it("detects legacy direct markers", () => {
expect(deriveSessionChatType("agent:main:telegram:dm:123456")).toBe("direct");
expect(deriveSessionChatType("telegram:dm:123456")).toBe("direct");
});
it("detects legacy discord guild channel keys", () => {
expect(deriveSessionChatType("discord:acc-1:guild-123:channel-456")).toBe("channel");
});
it("returns unknown for main or malformed session keys", () => {
expect(deriveSessionChatType("agent:main:main")).toBe("unknown");
expect(deriveSessionChatType("agent:main")).toBe("unknown");
expect(deriveSessionChatType("")).toBe("unknown");
it.each([
{ key: "agent:main:discord:direct:user1", expected: "direct" },
{ key: "agent:main:telegram:group:g1", expected: "group" },
{ key: "agent:main:discord:channel:c1", expected: "channel" },
{ key: "agent:main:telegram:dm:123456", expected: "direct" },
{ key: "telegram:dm:123456", expected: "direct" },
{ key: "discord:acc-1:guild-123:channel-456", expected: "channel" },
{ key: "agent:main:main", expected: "unknown" },
{ key: "agent:main", expected: "unknown" },
{ key: "", expected: "unknown" },
] as const)("derives chat type for %j => $expected", ({ key, expected }) => {
expect(deriveSessionChatType(key)).toBe(expected);
});
});
describe("session key canonicalization", () => {
it("parses agent keys case-insensitively and returns lowercase tokens", () => {
expect(parseAgentSessionKey("AGENT:Main:Hook:Webhook:42")).toEqual({
agentId: "main",
rest: "hook:webhook:42",
});
});
function expectSessionKeyCanonicalizationCase(params: { run: () => void }) {
params.run();
}
it("does not double-prefix already-qualified agent keys", () => {
expect(
toAgentStoreSessionKey({
agentId: "main",
requestKey: "agent:main:main",
}),
).toBe("agent:main:main");
it.each([
{
name: "parses agent keys case-insensitively and returns lowercase tokens",
run: () =>
expect(parseAgentSessionKey("AGENT:Main:Hook:Webhook:42")).toEqual({
agentId: "main",
rest: "hook:webhook:42",
}),
},
{
name: "does not double-prefix already-qualified agent keys",
run: () =>
expect(
toAgentStoreSessionKey({
agentId: "main",
requestKey: "agent:main:main",
}),
).toBe("agent:main:main"),
},
] as const)("$name", ({ run }) => {
expectSessionKeyCanonicalizationCase({ run });
});
});
describe("isValidAgentId", () => {
it("accepts valid agent ids", () => {
expect(isValidAgentId("main")).toBe(true);
expect(isValidAgentId("my-research_agent01")).toBe(true);
});
it("rejects malformed agent ids", () => {
expect(isValidAgentId("")).toBe(false);
expect(isValidAgentId("Agent not found: xyz")).toBe(false);
expect(isValidAgentId("../../../etc/passwd")).toBe(false);
expect(isValidAgentId("a".repeat(65))).toBe(false);
it.each([
{ input: "main", expected: true },
{ input: "my-research_agent01", expected: true },
{ input: "", expected: false },
{ input: "Agent not found: xyz", expected: false },
{ input: "../../../etc/passwd", expected: false },
{ input: "a".repeat(65), expected: false },
] as const)("validates agent id %j => $expected", ({ input, expected }) => {
expect(isValidAgentId(input)).toBe(expected);
});
});

View File

@@ -2,35 +2,12 @@ import { describe, expect, it } from "vitest";
import { stripAssistantInternalScaffolding } from "./assistant-visible-text.js";
describe("stripAssistantInternalScaffolding", () => {
it("strips reasoning tags", () => {
const input = ["<thinking>", "secret", "</thinking>", "Visible"].join("\n");
expect(stripAssistantInternalScaffolding(input)).toBe("Visible");
});
function expectVisibleText(input: string, expected: string) {
expect(stripAssistantInternalScaffolding(input)).toBe(expected);
}
it("strips relevant-memories scaffolding blocks", () => {
const input = [
"<relevant-memories>",
"The following memories may be relevant to this conversation:",
"- Internal memory note",
"</relevant-memories>",
"",
"User-visible answer",
].join("\n");
expect(stripAssistantInternalScaffolding(input)).toBe("User-visible answer");
});
it("supports relevant_memories tag variants", () => {
const input = [
"<relevant_memories>",
"Internal memory note",
"</relevant_memories>",
"Visible",
].join("\n");
expect(stripAssistantInternalScaffolding(input)).toBe("Visible");
});
it("keeps relevant-memories tags inside fenced code", () => {
const input = [
function createLiteralRelevantMemoriesCodeBlock() {
return [
"```xml",
"<relevant-memories>",
"sample",
@@ -39,43 +16,87 @@ describe("stripAssistantInternalScaffolding", () => {
"",
"Visible text",
].join("\n");
expect(stripAssistantInternalScaffolding(input)).toBe(input);
});
}
it("keeps relevant-memories tags inside inline code", () => {
const input = "Use `<relevant-memories>example</relevant-memories>` literally.";
expect(stripAssistantInternalScaffolding(input)).toBe(input);
});
function expectLiteralVisibleText(input: string) {
expectVisibleText(input, input);
}
it("hides unfinished relevant-memories blocks", () => {
const input = ["Hello", "<relevant-memories>", "internal-only"].join("\n");
expect(stripAssistantInternalScaffolding(input)).toBe("Hello\n");
});
it("trims leading whitespace after stripping scaffolding", () => {
const input = [
"<thinking>",
"secret",
"</thinking>",
" ",
"<relevant-memories>",
"internal note",
"</relevant-memories>",
" Visible",
].join("\n");
expect(stripAssistantInternalScaffolding(input)).toBe("Visible");
});
it("preserves unfinished reasoning text while still stripping memory blocks", () => {
const input = [
"Before",
"<thinking>",
"secret",
"<relevant-memories>",
"internal note",
"</relevant-memories>",
"After",
].join("\n");
expect(stripAssistantInternalScaffolding(input)).toBe("Before\n\nsecret\n\nAfter");
it.each([
{
name: "strips reasoning tags",
input: ["<thinking>", "secret", "</thinking>", "Visible"].join("\n"),
expected: "Visible",
},
{
name: "strips relevant-memories scaffolding blocks",
input: [
"<relevant-memories>",
"The following memories may be relevant to this conversation:",
"- Internal memory note",
"</relevant-memories>",
"",
"User-visible answer",
].join("\n"),
expected: "User-visible answer",
},
{
name: "supports relevant_memories tag variants",
input: [
"<relevant_memories>",
"Internal memory note",
"</relevant_memories>",
"Visible",
].join("\n"),
expected: "Visible",
},
{
name: "hides unfinished relevant-memories blocks",
input: ["Hello", "<relevant-memories>", "internal-only"].join("\n"),
expected: "Hello\n",
},
{
name: "trims leading whitespace after stripping scaffolding",
input: [
"<thinking>",
"secret",
"</thinking>",
" ",
"<relevant-memories>",
"internal note",
"</relevant-memories>",
" Visible",
].join("\n"),
expected: "Visible",
},
{
name: "preserves unfinished reasoning text while still stripping memory blocks",
input: [
"Before",
"<thinking>",
"secret",
"<relevant-memories>",
"internal note",
"</relevant-memories>",
"After",
].join("\n"),
expected: "Before\n\nsecret\n\nAfter",
},
{
name: "keeps relevant-memories tags inside fenced code",
input: createLiteralRelevantMemoriesCodeBlock(),
expected: undefined,
},
{
name: "keeps literal relevant-memories prose",
input: "Use `<relevant-memories>example</relevant-memories>` literally.",
expected: undefined,
},
] as const)("$name", ({ input, expected }) => {
if (expected === undefined) {
expectLiteralVisibleText(input);
return;
}
expectVisibleText(input, expected);
});
});

View File

@@ -2,50 +2,61 @@ import { describe, expect, it } from "vitest";
import { findCodeRegions, isInsideCode } from "./code-regions.js";
describe("shared/text/code-regions", () => {
it("finds fenced and inline code regions without double-counting inline code inside fences", () => {
const text = [
"before `inline` after",
"```ts",
"const a = `inside fence`;",
"```",
"tail",
].join("\n");
function expectCodeRegionSlices(text: string, expectedSlices: readonly string[]) {
const regions = findCodeRegions(text);
expect(regions).toHaveLength(expectedSlices.length);
expect(regions.map((region) => text.slice(region.start, region.end))).toEqual(expectedSlices);
}
expect(regions).toHaveLength(2);
expect(text.slice(regions[0].start, regions[0].end)).toBe("`inline`");
expect(text.slice(regions[1].start, regions[1].end)).toContain("```ts");
});
it("accepts alternate fence markers and unterminated trailing fences", () => {
const text = "~~~js\nconsole.log(1)\n~~~\nplain\n```\nunterminated";
const regions = findCodeRegions(text);
expect(regions).toHaveLength(2);
expect(text.slice(regions[0].start, regions[0].end)).toContain("~~~js");
expect(text.slice(regions[1].start, regions[1].end)).toBe("```\nunterminated");
});
it("keeps adjacent inline code outside fenced regions", () => {
const text = ["```ts", "const a = 1;", "```", "after `inline` tail"].join("\n");
const regions = findCodeRegions(text);
expect(regions).toHaveLength(2);
expect(text.slice(regions[0].start, regions[0].end)).toContain("```ts");
expect(text.slice(regions[1].start, regions[1].end)).toBe("`inline`");
});
it("reports whether positions are inside discovered regions", () => {
function expectInsideCodeCase(params: {
positionSelector: (text: string, regionEnd: number) => number;
expected: boolean;
}) {
const text = "plain `code` done";
const regions = findCodeRegions(text);
const codeStart = text.indexOf("code");
const plainStart = text.indexOf("plain");
const regionEnd = regions[0]?.end ?? -1;
expect(isInsideCode(params.positionSelector(text, regionEnd), regions)).toBe(params.expected);
}
expect(isInsideCode(codeStart, regions)).toBe(true);
expect(isInsideCode(plainStart, regions)).toBe(false);
expect(isInsideCode(regionEnd, regions)).toBe(false);
it.each([
{
name: "finds fenced and inline code regions without double-counting inline code inside fences",
text: ["before `inline` after", "```ts", "const a = `inside fence`;", "```", "tail"].join(
"\n",
),
expectedSlices: ["`inline`", "```ts\nconst a = `inside fence`;\n```"],
},
{
name: "accepts alternate fence markers and unterminated trailing fences",
text: "~~~js\nconsole.log(1)\n~~~\nplain\n```\nunterminated",
expectedSlices: ["~~~js\nconsole.log(1)\n~~~", "```\nunterminated"],
},
{
name: "keeps adjacent inline code outside fenced regions",
text: ["```ts", "const a = 1;", "```", "after `inline` tail"].join("\n"),
expectedSlices: ["```ts\nconst a = 1;\n```", "`inline`"],
},
] as const)("$name", ({ text, expectedSlices }) => {
expectCodeRegionSlices(text, expectedSlices);
});
it.each([
{
name: "inside code",
positionSelector: (text: string) => text.indexOf("code"),
expected: true,
},
{
name: "outside code",
positionSelector: (text: string) => text.indexOf("plain"),
expected: false,
},
{
name: "at region end",
positionSelector: (_text: string, regionEnd: number) => regionEnd,
expected: false,
},
] as const)("reports whether positions are inside discovered regions: $name", (testCase) => {
expectInsideCodeCase(testCase);
});
});

View File

@@ -1,45 +1,51 @@
import { describe, expect, it } from "vitest";
import { concatOptionalTextSegments, joinPresentTextSegments } from "./join-segments.js";
function expectTextSegmentsCase<T>(actual: T, expected: T) {
expect(actual).toBe(expected);
}
function expectJoinedTextSegmentsCase<T>(params: { run: () => T; expected: T }) {
expectTextSegmentsCase(params.run(), params.expected);
}
describe("concatOptionalTextSegments", () => {
it("concatenates left and right with default separator", () => {
expect(concatOptionalTextSegments({ left: "A", right: "B" })).toBe("A\n\nB");
});
it("keeps explicit empty-string right value", () => {
expect(concatOptionalTextSegments({ left: "A", right: "" })).toBe("");
});
it("falls back to whichever side is present and honors custom separators", () => {
expect(concatOptionalTextSegments({ left: "A" })).toBe("A");
expect(concatOptionalTextSegments({ right: "B" })).toBe("B");
expect(concatOptionalTextSegments({ left: "", right: "B" })).toBe("B");
expect(concatOptionalTextSegments({ left: "" })).toBe("");
expect(concatOptionalTextSegments({ left: "A", right: "B", separator: " | " })).toBe("A | B");
it.each([
{ params: { left: "A", right: "B" }, expected: "A\n\nB" },
{ params: { left: "A", right: "" }, expected: "" },
{ params: { left: "A" }, expected: "A" },
{ params: { right: "B" }, expected: "B" },
{ params: { left: "", right: "B" }, expected: "B" },
{ params: { left: "" }, expected: "" },
{ params: { left: "A", right: "B", separator: " | " }, expected: "A | B" },
] as const)("concatenates optional segments %#", ({ params, expected }) => {
expectJoinedTextSegmentsCase({
run: () => concatOptionalTextSegments(params),
expected,
});
});
});
describe("joinPresentTextSegments", () => {
it("joins non-empty segments", () => {
expect(joinPresentTextSegments(["A", undefined, "B"])).toBe("A\n\nB");
});
it("returns undefined when all segments are empty", () => {
expect(joinPresentTextSegments(["", undefined, null])).toBeUndefined();
});
it("trims segments when requested", () => {
expect(joinPresentTextSegments([" A ", " B "], { trim: true })).toBe("A\n\nB");
});
it("keeps whitespace-only segments unless trim is enabled and supports custom separators", () => {
expect(joinPresentTextSegments(["A", " ", "B"], { separator: " | " })).toBe("A | | B");
expect(joinPresentTextSegments(["A", " ", "B"], { trim: true, separator: " | " })).toBe(
"A | B",
);
});
it("preserves segment whitespace when trim is disabled", () => {
expect(joinPresentTextSegments(["A", " B "], { separator: "|" })).toBe("A| B ");
it.each([
{ segments: ["A", undefined, "B"], options: undefined, expected: "A\n\nB" },
{ segments: ["", undefined, null], options: undefined, expected: undefined },
{ segments: [" A ", " B "], options: { trim: true }, expected: "A\n\nB" },
{
segments: ["A", " ", "B"],
options: { separator: " | " },
expected: "A | | B",
},
{
segments: ["A", " ", "B"],
options: { trim: true, separator: " | " },
expected: "A | B",
},
{ segments: ["A", " B "], options: { separator: "|" }, expected: "A| B " },
] as const)("joins present segments %#", ({ segments, options, expected }) => {
expectJoinedTextSegmentsCase({
run: () => joinPresentTextSegments(segments, options),
expected,
});
});
});

View File

@@ -2,181 +2,200 @@ import { describe, expect, it } from "vitest";
import { stripReasoningTagsFromText } from "./reasoning-tags.js";
describe("stripReasoningTagsFromText", () => {
const expectStrippedCases = (
cases: ReadonlyArray<{
input: string;
expected: string;
opts?: Parameters<typeof stripReasoningTagsFromText>[1];
name?: string;
}>,
) => {
for (const { input, expected, opts, name } of cases) {
expect(stripReasoningTagsFromText(input, opts), name).toBe(expected);
function expectStrippedCase(params: {
input: string | null;
expected: string | null;
opts?: Parameters<typeof stripReasoningTagsFromText>[1];
}) {
expect(stripReasoningTagsFromText(params.input as unknown as string, params.opts)).toBe(
params.expected,
);
}
function expectPreservedReasoningTagCodeExample(input: string) {
expect(stripReasoningTagsFromText(input)).toBe(input);
}
function expectReasoningCodeCase(params: { input: string; expected?: string }) {
if (params.expected === undefined) {
expectPreservedReasoningTagCodeExample(params.input);
return;
}
};
expectStrippedCase({
input: params.input,
expected: params.expected,
});
}
describe("basic functionality", () => {
it("returns text unchanged when no reasoning tags present", () => {
const input = "Hello, this is a normal message.";
expect(stripReasoningTagsFromText(input)).toBe(input);
});
it("strips reasoning-tag variants", () => {
const cases = [
{
name: "strips proper think tags",
input: "Hello <think>internal reasoning</think> world!",
expected: "Hello world!",
},
{
name: "strips thinking tags",
input: "Before <thinking>some thought</thinking> after",
expected: "Before after",
},
{ name: "strips thought tags", input: "A <thought>hmm</thought> B", expected: "A B" },
{
name: "strips antthinking tags",
input: "X <antthinking>internal</antthinking> Y",
expected: "X Y",
},
] as const;
expectStrippedCases(cases);
});
it("strips multiple reasoning blocks", () => {
const input = "<think>first</think>A<think>second</think>B";
expect(stripReasoningTagsFromText(input)).toBe("AB");
it.each([
{
name: "returns text unchanged when no reasoning tags present",
input: "Hello, this is a normal message.",
expected: "Hello, this is a normal message.",
},
{
name: "strips proper think tags",
input: "Hello <think>internal reasoning</think> world!",
expected: "Hello world!",
},
{
name: "strips thinking tags",
input: "Before <thinking>some thought</thinking> after",
expected: "Before after",
},
{ name: "strips thought tags", input: "A <thought>hmm</thought> B", expected: "A B" },
{
name: "strips antthinking tags",
input: "X <antthinking>internal</antthinking> Y",
expected: "X Y",
},
{
name: "strips multiple reasoning blocks",
input: "<think>first</think>A<think>second</think>B",
expected: "AB",
},
] as const)("$name", (testCase) => {
expectStrippedCase(testCase);
});
});
describe("code block preservation (issue #3952)", () => {
it("preserves tags inside code examples", () => {
const cases = [
"Use the tag like this:\n```\n<think>reasoning</think>\n```\nThat's it!",
"The `<think>` tag is used for reasoning. Don't forget the closing `</think>` tag.",
"Example:\n```xml\n<think>\n <thought>nested</thought>\n</think>\n```\nDone!",
"Use `<think>` to open and `</think>` to close.",
"Example:\n```\n<think>reasoning</think>\n```",
"Use `<final>` for final answers in code: ```\n<final>42</final>\n```",
"First `<think>` then ```\n<thinking>block</thinking>\n``` then `<thought>`",
] as const;
for (const input of cases) {
expect(stripReasoningTagsFromText(input)).toBe(input);
}
});
it("handles mixed code-tag and real-tag content", () => {
const cases = [
{
input: "<think>hidden</think>Visible text with `<think>` example.",
expected: "Visible text with `<think>` example.",
},
{
input: "```\n<think>code</think>\n```\n<think>real hidden</think>visible",
expected: "```\n<think>code</think>\n```\nvisible",
},
] as const;
expectStrippedCases(cases);
it.each([
{
name: "preserves plain code example",
input: "Use the tag like this:\n```\n<think>reasoning</think>\n```\nThat's it!",
},
{
name: "preserves inline literal think tag documentation",
input: "The `<think>` tag is used for reasoning. Don't forget the closing `</think>` tag.",
},
{
name: "preserves xml fenced examples",
input: "Example:\n```xml\n<think>\n <thought>nested</thought>\n</think>\n```\nDone!",
},
{
name: "preserves plain literal opening and closing tags",
input: "Use `<think>` to open and `</think>` to close.",
},
{
name: "preserves fenced think example",
input: "Example:\n```\n<think>reasoning</think>\n```",
},
{
name: "preserves final tags inside code examples",
input: "Use `<final>` for final answers in code: ```\n<final>42</final>\n```",
},
{
name: "preserves mixed literal think tags and code blocks",
input: "First `<think>` then ```\n<thinking>block</thinking>\n``` then `<thought>`",
},
{
name: "strips real tags while preserving literal think examples",
input: "<think>hidden</think>Visible text with `<think>` example.",
expected: "Visible text with `<think>` example.",
},
{
name: "strips real tags after fenced code block",
input: "```\n<think>code</think>\n```\n<think>real hidden</think>visible",
expected: "```\n<think>code</think>\n```\nvisible",
},
] as const)("$name", ({ input, expected }) => {
expectReasoningCodeCase({ input, expected });
});
});
describe("edge cases", () => {
it("handles malformed tags and null-ish inputs", () => {
const cases = [
{
input: "Here is how to use <think tags in your code",
expected: "Here is how to use <think tags in your code",
},
{
input: "You can start with <think and then close with </think>",
expected: "You can start with <think and then close with",
},
{
input: "A < think >content< /think > B",
expected: "A B",
},
{
input: "",
expected: "",
},
{
input: null as unknown as string,
expected: null,
},
] as const;
for (const { input, expected } of cases) {
expect(stripReasoningTagsFromText(input)).toBe(expected);
}
it.each([
{
input: "Here is how to use <think tags in your code",
expected: "Here is how to use <think tags in your code",
},
{
input: "You can start with <think and then close with </think>",
expected: "You can start with <think and then close with",
},
{
input: "A < think >content< /think > B",
expected: "A B",
},
{
input: "",
expected: "",
},
{
input: null as unknown as string,
expected: null,
},
] as const)("handles malformed/null-ish input %j", (testCase) => {
expectStrippedCase(testCase);
});
it("handles fenced and inline code edge behavior", () => {
const cases = [
{
input: "Example:\n~~~\n<think>reasoning</think>\n~~~\nDone!",
expected: "Example:\n~~~\n<think>reasoning</think>\n~~~\nDone!",
},
{
input: "Example:\n~~~js\n<think>code</think>\n~~~",
expected: "Example:\n~~~js\n<think>code</think>\n~~~",
},
{
input: "Use ``code`` with <think>hidden</think> text",
expected: "Use ``code`` with text",
},
{
input: "Before\n```\ncode\n```\nAfter with <think>hidden</think>",
expected: "Before\n```\ncode\n```\nAfter with",
},
{
input: "```\n<think>not protected\n~~~\n</think>text",
expected: "```\n<think>not protected\n~~~\n</think>text",
},
{
input: "Start `unclosed <think>hidden</think> end",
expected: "Start `unclosed end",
},
] as const;
expectStrippedCases(cases);
it.each([
{
input: "Example:\n~~~\n<think>reasoning</think>\n~~~\nDone!",
expected: "Example:\n~~~\n<think>reasoning</think>\n~~~\nDone!",
},
{
input: "Example:\n~~~js\n<think>code</think>\n~~~",
expected: "Example:\n~~~js\n<think>code</think>\n~~~",
},
{
input: "Use ``code`` with <think>hidden</think> text",
expected: "Use ``code`` with text",
},
{
input: "Before\n```\ncode\n```\nAfter with <think>hidden</think>",
expected: "Before\n```\ncode\n```\nAfter with",
},
{
input: "```\n<think>not protected\n~~~\n</think>text",
expected: "```\n<think>not protected\n~~~\n</think>text",
},
{
input: "Start `unclosed <think>hidden</think> end",
expected: "Start `unclosed end",
},
] as const)("handles fenced/inline code edge behavior: %j", (testCase) => {
expectStrippedCase(testCase);
});
it("handles nested and final tag behavior", () => {
const cases = [
{
input: "<think>outer <think>inner</think> still outer</think>visible",
expected: "still outervisible",
},
{
input: "A<final>1</final>B<final>2</final>C",
expected: "A1B2C",
},
{
input: "`<final>` in code, <final>visible</final> outside",
expected: "`<final>` in code, visible outside",
},
{
input: "A <FINAL data-x='1'>visible</Final> B",
expected: "A visible B",
},
] as const;
expectStrippedCases(cases);
it.each([
{
input: "<think>outer <think>inner</think> still outer</think>visible",
expected: "still outervisible",
},
{
input: "A<final>1</final>B<final>2</final>C",
expected: "A1B2C",
},
{
input: "`<final>` in code, <final>visible</final> outside",
expected: "`<final>` in code, visible outside",
},
{
input: "A <FINAL data-x='1'>visible</Final> B",
expected: "A visible B",
},
] as const)("handles nested/final tag behavior: %j", (testCase) => {
expectStrippedCase(testCase);
});
it("handles unicode, attributes, and case-insensitive tag names", () => {
const cases = [
{
input: "你好 <think>思考 🤔</think> 世界",
expected: "你好 世界",
},
{
input: "A <think id='test' class=\"foo\">hidden</think> B",
expected: "A B",
},
{
input: "A <THINK>hidden</THINK> <Thinking>also hidden</Thinking> B",
expected: "A B",
},
] as const;
expectStrippedCases(cases);
it.each([
{
input: "你好 <think>思考 🤔</think> 世界",
expected: "你好 世界",
},
{
input: "A <think id='test' class=\"foo\">hidden</think> B",
expected: "A B",
},
{
input: "A <THINK>hidden</THINK> <Thinking>also hidden</Thinking> B",
expected: "A B",
},
] as const)("handles unicode/attributes/case-insensitive names: %j", (testCase) => {
expectStrippedCase(testCase);
});
it("handles long content and pathological backtick patterns efficiently", () => {
@@ -192,50 +211,60 @@ describe("stripReasoningTagsFromText", () => {
});
describe("strict vs preserve mode", () => {
it("applies strict and preserve modes to unclosed tags", () => {
const input = "Before <think>unclosed content after";
const cases = [
{ mode: "strict" as const, expected: "Before" },
{ mode: "preserve" as const, expected: "Before unclosed content after" },
] as const;
for (const { mode, expected } of cases) {
expect(stripReasoningTagsFromText(input, { mode })).toBe(expected);
}
});
it("still strips fully closed reasoning blocks in preserve mode", () => {
expect(stripReasoningTagsFromText("A <think>hidden</think> B", { mode: "preserve" })).toBe(
"A B",
);
it.each([
{
name: "applies strict mode to unclosed tags",
input: "Before <think>unclosed content after",
expected: "Before",
opts: { mode: "strict" as const },
},
{
name: "applies preserve mode to unclosed tags",
input: "Before <think>unclosed content after",
expected: "Before unclosed content after",
opts: { mode: "preserve" as const },
},
{
name: "still strips fully closed reasoning blocks in preserve mode",
input: "A <think>hidden</think> B",
expected: "A B",
opts: { mode: "preserve" as const },
},
] as const)("$name", (testCase) => {
expectStrippedCase(testCase);
});
});
describe("trim options", () => {
it("applies configured trim strategies", () => {
const cases = [
{
input: " <think>x</think> result <think>y</think> ",
expected: "result",
opts: undefined,
},
{
input: " <think>x</think> result ",
expected: " result ",
opts: { trim: "none" as const },
},
{
input: " <think>x</think> result ",
expected: "result ",
opts: { trim: "start" as const },
},
] as const;
expectStrippedCases(cases);
it.each([
{
name: "applies default trim strategy",
input: " <think>x</think> result <think>y</think> ",
expected: "result",
opts: undefined,
},
{
name: "supports trim=none",
input: " <think>x</think> result ",
expected: " result ",
opts: { trim: "none" as const },
},
{
name: "supports trim=start",
input: " <think>x</think> result ",
expected: "result ",
opts: { trim: "start" as const },
},
] as const)("$name", (testCase) => {
expectStrippedCase(testCase);
});
});
it("does not leak regex state across repeated calls", () => {
expect(stripReasoningTagsFromText("A <final>1</final> B")).toBe("A 1 B");
expect(stripReasoningTagsFromText("C <final>2</final> D")).toBe("C 2 D");
expect(stripReasoningTagsFromText("E <think>x</think> F")).toBe("E F");
it.each([
{ input: "A <final>1</final> B", expected: "A 1 B" },
{ input: "C <final>2</final> D", expected: "C 2 D" },
{ input: "E <think>x</think> F", expected: "E F" },
] as const)("does not leak regex state across repeated calls: %j", (testCase) => {
expectStrippedCase(testCase);
});
});