diff --git a/src/auto-reply/chunk.test.ts b/src/auto-reply/chunk.test.ts index ec29928fc71..5778d7f681c 100644 --- a/src/auto-reply/chunk.test.ts +++ b/src/auto-reply/chunk.test.ts @@ -153,9 +153,15 @@ const newlineModeFenceCases = (() => { expected: [fence], }, { - name: "splits between fence and following paragraph", + name: "keeps short fenced block and following paragraph together", text: `${fence}\n\nAfter`, limit: 1000, + expected: [`${fence}\n\nAfter`], + }, + { + name: "splits oversized fenced block away from following paragraph", + text: `${fence}\n\nAfter`, + limit: fence.length + 1, expected: [fence, "After"], }, { @@ -523,10 +529,10 @@ describe("chunkTextWithMode", () => { expected: ["Line one\nLine two"], }, { - name: "newline mode (blank-line split)", + name: "newline mode packs short blank-line-separated paragraphs", text: "Para one\n\nPara two", mode: "newline" as const, - expected: ["Para one", "Para two"], + expected: ["Para one\n\nPara two"], }, ] as const)( "applies mode-specific chunking behavior: $name", @@ -558,10 +564,10 @@ describe("chunkMarkdownTextWithMode", () => { expected: ["Line one\nLine two"], }, { - name: "newline mode splits by blank line", + name: "newline mode packs short blank-line-separated paragraphs", text: "Para one\n\nPara two", mode: "newline" as const, - expected: ["Para one", "Para two"], + expected: ["Para one\n\nPara two"], }, ] as const)("applies markdown/newline mode behavior: $name", ({ text, mode, expected, name }) => { expectChunkModeCase({ @@ -580,6 +586,13 @@ describe("chunkMarkdownTextWithMode", () => { expect(chunkMarkdownTextWithMode(text, limit, "newline"), name).toEqual(expected); }, ); + + it("packs multiple paragraphs up to the limit in newline mode", () => { + expect(chunkMarkdownTextWithMode("Alpha\n\nBeta\n\nGamma", 14, "newline")).toEqual([ + "Alpha\n\nBeta", + "Gamma", + ]); + }); }); describe("resolveChunkMode", () => { diff --git a/src/auto-reply/chunk.ts b/src/auto-reply/chunk.ts index 99709f021db..ac894e2c533 100644 --- a/src/auto-reply/chunk.ts +++ b/src/auto-reply/chunk.ts @@ -217,6 +217,7 @@ export function chunkByParagraph( const spans = parseFenceSpans(normalized); const parts: string[] = []; + const separators: string[] = []; const re = /\n[\t ]*\n+/g; // paragraph break: blank line(s), allowing whitespace let lastIndex = 0; for (const match of normalized.matchAll(re)) { @@ -228,23 +229,49 @@ export function chunkByParagraph( } parts.push(normalized.slice(lastIndex, idx)); + separators.push(match[0]); lastIndex = idx + match[0].length; } parts.push(normalized.slice(lastIndex)); const chunks: string[] = []; - for (const part of parts) { + let currentChunk = ""; + + const pushParagraph = (paragraph: string, separatorBefore?: string) => { + if (!currentChunk) { + if (paragraph.length <= limit) { + currentChunk = paragraph; + return; + } + if (!splitLongParagraphs) { + chunks.push(paragraph); + return; + } + chunks.push(...chunkText(paragraph, limit)); + return; + } + + const candidate = `${currentChunk}${separatorBefore ?? "\n\n"}${paragraph}`; + if (candidate.length <= limit) { + currentChunk = candidate; + return; + } + + chunks.push(currentChunk); + currentChunk = ""; + pushParagraph(paragraph); + }; + + for (const [index, part] of parts.entries()) { const paragraph = part.replace(/\s+$/g, ""); if (!paragraph.trim()) { continue; } - if (paragraph.length <= limit) { - chunks.push(paragraph); - } else if (!splitLongParagraphs) { - chunks.push(paragraph); - } else { - chunks.push(...chunkText(paragraph, limit)); - } + pushParagraph(paragraph, separators[index - 1]); + } + + if (currentChunk) { + chunks.push(currentChunk); } return chunks; @@ -266,13 +293,12 @@ export function chunkMarkdownTextWithMode(text: string, limit: number, mode: Chu // If a paragraph must be split by length, defer to the markdown-aware chunker. const paragraphChunks = chunkByParagraph(text, limit, { splitLongParagraphs: false }); const out: string[] = []; - for (const chunk of paragraphChunks) { - const nested = chunkMarkdownText(chunk, limit); - if (!nested.length && chunk) { - out.push(chunk); - } else { - out.push(...nested); - } + for (const chunk of paragraphChunks.flatMap((paragraphChunk) => + paragraphChunk.length > limit + ? splitPackedFenceParagraphChunk(paragraphChunk) + : paragraphChunk, + )) { + out.push(...chunkMarkdownText(chunk, limit)); } return out; } @@ -295,6 +321,34 @@ function splitByNewline( return lines; } +function splitPackedFenceParagraphChunk(chunk: string): string[] { + const chunks: string[] = []; + let start = 0; + for (const span of parseFenceSpans(chunk)) { + if (span.end <= start) { + continue; + } + const separator = chunk.slice(span.end).match(/^\n[\t ]*\n+/)?.[0]; + if (!separator) { + continue; + } + const tail = chunk.slice(span.end + separator.length); + if (!tail.trim()) { + continue; + } + chunks.push(chunk.slice(start, span.end)); + start = span.end + separator.length; + } + if (chunks.length === 0) { + return [chunk]; + } + const tail = chunk.slice(start); + if (tail) { + chunks.push(tail); + } + return chunks; +} + function resolveChunkEarlyReturn(text: string, limit: number): string[] | undefined { if (!text) { return []; diff --git a/src/infra/outbound/deliver.test.ts b/src/infra/outbound/deliver.test.ts index b500424a396..b74b468b2f0 100644 --- a/src/infra/outbound/deliver.test.ts +++ b/src/infra/outbound/deliver.test.ts @@ -2233,7 +2233,7 @@ describe("deliverOutboundPayloads", () => { expect(results.map((r) => r.messageId)).toEqual(["m1", "m2"]); }); - it("respects newline chunk mode for plugin text", async () => { + it("respects newline chunk mode for plugin text without splitting short messages", async () => { const sendMatrix = vi.fn().mockResolvedValue({ messageId: "m1", roomId: "!room:example" }); const cfg: OpenClawConfig = { channels: { @@ -2249,14 +2249,40 @@ describe("deliverOutboundPayloads", () => { deps: { matrix: sendMatrix }, }); + expect(sendMatrix).toHaveBeenCalledTimes(1); + const firstChunkCall = requireMatrixSendCall(sendMatrix); + expect(firstChunkCall?.[0]).toBe("!room:example"); + expect(firstChunkCall?.[1]).toBe("Line one\n\nLine two"); + expect((firstChunkCall?.[2] as { cfg?: unknown } | undefined)?.cfg).toBe(cfg); + }); + + it("splits long plugin text on packed paragraph boundaries in newline mode", async () => { + const sendMatrix = vi + .fn() + .mockResolvedValueOnce({ messageId: "m1", roomId: "!room:example" }) + .mockResolvedValueOnce({ messageId: "m2", roomId: "!room:example" }); + const cfg: OpenClawConfig = { + channels: { + matrix: { textChunkLimit: 14, chunkMode: "newline" }, + } as OpenClawConfig["channels"], + }; + + await deliverOutboundPayloads({ + cfg, + channel: "matrix", + to: "!room:example", + payloads: [{ text: "Alpha\n\nBeta\n\nGamma" }], + deps: { matrix: sendMatrix }, + }); + expect(sendMatrix).toHaveBeenCalledTimes(2); const firstChunkCall = requireMatrixSendCall(sendMatrix); expect(firstChunkCall?.[0]).toBe("!room:example"); - expect(firstChunkCall?.[1]).toBe("Line one"); + expect(firstChunkCall?.[1]).toBe("Alpha\n\nBeta"); expect((firstChunkCall?.[2] as { cfg?: unknown } | undefined)?.cfg).toBe(cfg); const secondChunkCall = sendMatrix.mock.calls[1]; expect(secondChunkCall?.[0]).toBe("!room:example"); - expect(secondChunkCall?.[1]).toBe("Line two"); + expect(secondChunkCall?.[1]).toBe("Gamma"); expect((secondChunkCall?.[2] as { cfg?: unknown } | undefined)?.cfg).toBe(cfg); });