fix(outbound): pack newline-mode paragraphs up to limit

Pack newline-mode outbound paragraphs up to the configured text limit instead of sending one message per blank-line-separated paragraph. Preserves markdown fence guardrails and adds focused chunking plus outbound delivery regressions.\n\nVerified: autoreview clean; node scripts/run-vitest.mjs src/auto-reply/chunk.test.ts src/infra/outbound/deliver.test.ts; git diff --check origin/main...HEAD.\n\nThanks @kesslerio.
This commit is contained in:
Martin Kessler
2026-05-30 09:24:57 -07:00
committed by GitHub
parent b1911a7cd3
commit 73a69d9e64
3 changed files with 116 additions and 23 deletions

View File

@@ -153,9 +153,15 @@ const newlineModeFenceCases = (() => {
expected: [fence],
},
{
name: "splits between fence and following paragraph",
name: "keeps short fenced block and following paragraph together",
text: `${fence}\n\nAfter`,
limit: 1000,
expected: [`${fence}\n\nAfter`],
},
{
name: "splits oversized fenced block away from following paragraph",
text: `${fence}\n\nAfter`,
limit: fence.length + 1,
expected: [fence, "After"],
},
{
@@ -523,10 +529,10 @@ describe("chunkTextWithMode", () => {
expected: ["Line one\nLine two"],
},
{
name: "newline mode (blank-line split)",
name: "newline mode packs short blank-line-separated paragraphs",
text: "Para one\n\nPara two",
mode: "newline" as const,
expected: ["Para one", "Para two"],
expected: ["Para one\n\nPara two"],
},
] as const)(
"applies mode-specific chunking behavior: $name",
@@ -558,10 +564,10 @@ describe("chunkMarkdownTextWithMode", () => {
expected: ["Line one\nLine two"],
},
{
name: "newline mode splits by blank line",
name: "newline mode packs short blank-line-separated paragraphs",
text: "Para one\n\nPara two",
mode: "newline" as const,
expected: ["Para one", "Para two"],
expected: ["Para one\n\nPara two"],
},
] as const)("applies markdown/newline mode behavior: $name", ({ text, mode, expected, name }) => {
expectChunkModeCase({
@@ -580,6 +586,13 @@ describe("chunkMarkdownTextWithMode", () => {
expect(chunkMarkdownTextWithMode(text, limit, "newline"), name).toEqual(expected);
},
);
it("packs multiple paragraphs up to the limit in newline mode", () => {
expect(chunkMarkdownTextWithMode("Alpha\n\nBeta\n\nGamma", 14, "newline")).toEqual([
"Alpha\n\nBeta",
"Gamma",
]);
});
});
describe("resolveChunkMode", () => {

View File

@@ -217,6 +217,7 @@ export function chunkByParagraph(
const spans = parseFenceSpans(normalized);
const parts: string[] = [];
const separators: string[] = [];
const re = /\n[\t ]*\n+/g; // paragraph break: blank line(s), allowing whitespace
let lastIndex = 0;
for (const match of normalized.matchAll(re)) {
@@ -228,23 +229,49 @@ export function chunkByParagraph(
}
parts.push(normalized.slice(lastIndex, idx));
separators.push(match[0]);
lastIndex = idx + match[0].length;
}
parts.push(normalized.slice(lastIndex));
const chunks: string[] = [];
for (const part of parts) {
let currentChunk = "";
const pushParagraph = (paragraph: string, separatorBefore?: string) => {
if (!currentChunk) {
if (paragraph.length <= limit) {
currentChunk = paragraph;
return;
}
if (!splitLongParagraphs) {
chunks.push(paragraph);
return;
}
chunks.push(...chunkText(paragraph, limit));
return;
}
const candidate = `${currentChunk}${separatorBefore ?? "\n\n"}${paragraph}`;
if (candidate.length <= limit) {
currentChunk = candidate;
return;
}
chunks.push(currentChunk);
currentChunk = "";
pushParagraph(paragraph);
};
for (const [index, part] of parts.entries()) {
const paragraph = part.replace(/\s+$/g, "");
if (!paragraph.trim()) {
continue;
}
if (paragraph.length <= limit) {
chunks.push(paragraph);
} else if (!splitLongParagraphs) {
chunks.push(paragraph);
} else {
chunks.push(...chunkText(paragraph, limit));
}
pushParagraph(paragraph, separators[index - 1]);
}
if (currentChunk) {
chunks.push(currentChunk);
}
return chunks;
@@ -266,13 +293,12 @@ export function chunkMarkdownTextWithMode(text: string, limit: number, mode: Chu
// If a paragraph must be split by length, defer to the markdown-aware chunker.
const paragraphChunks = chunkByParagraph(text, limit, { splitLongParagraphs: false });
const out: string[] = [];
for (const chunk of paragraphChunks) {
const nested = chunkMarkdownText(chunk, limit);
if (!nested.length && chunk) {
out.push(chunk);
} else {
out.push(...nested);
}
for (const chunk of paragraphChunks.flatMap((paragraphChunk) =>
paragraphChunk.length > limit
? splitPackedFenceParagraphChunk(paragraphChunk)
: paragraphChunk,
)) {
out.push(...chunkMarkdownText(chunk, limit));
}
return out;
}
@@ -295,6 +321,34 @@ function splitByNewline(
return lines;
}
function splitPackedFenceParagraphChunk(chunk: string): string[] {
const chunks: string[] = [];
let start = 0;
for (const span of parseFenceSpans(chunk)) {
if (span.end <= start) {
continue;
}
const separator = chunk.slice(span.end).match(/^\n[\t ]*\n+/)?.[0];
if (!separator) {
continue;
}
const tail = chunk.slice(span.end + separator.length);
if (!tail.trim()) {
continue;
}
chunks.push(chunk.slice(start, span.end));
start = span.end + separator.length;
}
if (chunks.length === 0) {
return [chunk];
}
const tail = chunk.slice(start);
if (tail) {
chunks.push(tail);
}
return chunks;
}
function resolveChunkEarlyReturn(text: string, limit: number): string[] | undefined {
if (!text) {
return [];

View File

@@ -2233,7 +2233,7 @@ describe("deliverOutboundPayloads", () => {
expect(results.map((r) => r.messageId)).toEqual(["m1", "m2"]);
});
it("respects newline chunk mode for plugin text", async () => {
it("respects newline chunk mode for plugin text without splitting short messages", async () => {
const sendMatrix = vi.fn().mockResolvedValue({ messageId: "m1", roomId: "!room:example" });
const cfg: OpenClawConfig = {
channels: {
@@ -2249,14 +2249,40 @@ describe("deliverOutboundPayloads", () => {
deps: { matrix: sendMatrix },
});
expect(sendMatrix).toHaveBeenCalledTimes(1);
const firstChunkCall = requireMatrixSendCall(sendMatrix);
expect(firstChunkCall?.[0]).toBe("!room:example");
expect(firstChunkCall?.[1]).toBe("Line one\n\nLine two");
expect((firstChunkCall?.[2] as { cfg?: unknown } | undefined)?.cfg).toBe(cfg);
});
it("splits long plugin text on packed paragraph boundaries in newline mode", async () => {
const sendMatrix = vi
.fn()
.mockResolvedValueOnce({ messageId: "m1", roomId: "!room:example" })
.mockResolvedValueOnce({ messageId: "m2", roomId: "!room:example" });
const cfg: OpenClawConfig = {
channels: {
matrix: { textChunkLimit: 14, chunkMode: "newline" },
} as OpenClawConfig["channels"],
};
await deliverOutboundPayloads({
cfg,
channel: "matrix",
to: "!room:example",
payloads: [{ text: "Alpha\n\nBeta\n\nGamma" }],
deps: { matrix: sendMatrix },
});
expect(sendMatrix).toHaveBeenCalledTimes(2);
const firstChunkCall = requireMatrixSendCall(sendMatrix);
expect(firstChunkCall?.[0]).toBe("!room:example");
expect(firstChunkCall?.[1]).toBe("Line one");
expect(firstChunkCall?.[1]).toBe("Alpha\n\nBeta");
expect((firstChunkCall?.[2] as { cfg?: unknown } | undefined)?.cfg).toBe(cfg);
const secondChunkCall = sendMatrix.mock.calls[1];
expect(secondChunkCall?.[0]).toBe("!room:example");
expect(secondChunkCall?.[1]).toBe("Line two");
expect(secondChunkCall?.[1]).toBe("Gamma");
expect((secondChunkCall?.[2] as { cfg?: unknown } | undefined)?.cfg).toBe(cfg);
});