mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-06 07:02:52 +00:00
fix(outbound): pack newline-mode paragraphs up to limit
Pack newline-mode outbound paragraphs up to the configured text limit instead of sending one message per blank-line-separated paragraph. Preserves markdown fence guardrails and adds focused chunking plus outbound delivery regressions.\n\nVerified: autoreview clean; node scripts/run-vitest.mjs src/auto-reply/chunk.test.ts src/infra/outbound/deliver.test.ts; git diff --check origin/main...HEAD.\n\nThanks @kesslerio.
This commit is contained in:
@@ -153,9 +153,15 @@ const newlineModeFenceCases = (() => {
|
||||
expected: [fence],
|
||||
},
|
||||
{
|
||||
name: "splits between fence and following paragraph",
|
||||
name: "keeps short fenced block and following paragraph together",
|
||||
text: `${fence}\n\nAfter`,
|
||||
limit: 1000,
|
||||
expected: [`${fence}\n\nAfter`],
|
||||
},
|
||||
{
|
||||
name: "splits oversized fenced block away from following paragraph",
|
||||
text: `${fence}\n\nAfter`,
|
||||
limit: fence.length + 1,
|
||||
expected: [fence, "After"],
|
||||
},
|
||||
{
|
||||
@@ -523,10 +529,10 @@ describe("chunkTextWithMode", () => {
|
||||
expected: ["Line one\nLine two"],
|
||||
},
|
||||
{
|
||||
name: "newline mode (blank-line split)",
|
||||
name: "newline mode packs short blank-line-separated paragraphs",
|
||||
text: "Para one\n\nPara two",
|
||||
mode: "newline" as const,
|
||||
expected: ["Para one", "Para two"],
|
||||
expected: ["Para one\n\nPara two"],
|
||||
},
|
||||
] as const)(
|
||||
"applies mode-specific chunking behavior: $name",
|
||||
@@ -558,10 +564,10 @@ describe("chunkMarkdownTextWithMode", () => {
|
||||
expected: ["Line one\nLine two"],
|
||||
},
|
||||
{
|
||||
name: "newline mode splits by blank line",
|
||||
name: "newline mode packs short blank-line-separated paragraphs",
|
||||
text: "Para one\n\nPara two",
|
||||
mode: "newline" as const,
|
||||
expected: ["Para one", "Para two"],
|
||||
expected: ["Para one\n\nPara two"],
|
||||
},
|
||||
] as const)("applies markdown/newline mode behavior: $name", ({ text, mode, expected, name }) => {
|
||||
expectChunkModeCase({
|
||||
@@ -580,6 +586,13 @@ describe("chunkMarkdownTextWithMode", () => {
|
||||
expect(chunkMarkdownTextWithMode(text, limit, "newline"), name).toEqual(expected);
|
||||
},
|
||||
);
|
||||
|
||||
it("packs multiple paragraphs up to the limit in newline mode", () => {
|
||||
expect(chunkMarkdownTextWithMode("Alpha\n\nBeta\n\nGamma", 14, "newline")).toEqual([
|
||||
"Alpha\n\nBeta",
|
||||
"Gamma",
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolveChunkMode", () => {
|
||||
|
||||
@@ -217,6 +217,7 @@ export function chunkByParagraph(
|
||||
const spans = parseFenceSpans(normalized);
|
||||
|
||||
const parts: string[] = [];
|
||||
const separators: string[] = [];
|
||||
const re = /\n[\t ]*\n+/g; // paragraph break: blank line(s), allowing whitespace
|
||||
let lastIndex = 0;
|
||||
for (const match of normalized.matchAll(re)) {
|
||||
@@ -228,23 +229,49 @@ export function chunkByParagraph(
|
||||
}
|
||||
|
||||
parts.push(normalized.slice(lastIndex, idx));
|
||||
separators.push(match[0]);
|
||||
lastIndex = idx + match[0].length;
|
||||
}
|
||||
parts.push(normalized.slice(lastIndex));
|
||||
|
||||
const chunks: string[] = [];
|
||||
for (const part of parts) {
|
||||
let currentChunk = "";
|
||||
|
||||
const pushParagraph = (paragraph: string, separatorBefore?: string) => {
|
||||
if (!currentChunk) {
|
||||
if (paragraph.length <= limit) {
|
||||
currentChunk = paragraph;
|
||||
return;
|
||||
}
|
||||
if (!splitLongParagraphs) {
|
||||
chunks.push(paragraph);
|
||||
return;
|
||||
}
|
||||
chunks.push(...chunkText(paragraph, limit));
|
||||
return;
|
||||
}
|
||||
|
||||
const candidate = `${currentChunk}${separatorBefore ?? "\n\n"}${paragraph}`;
|
||||
if (candidate.length <= limit) {
|
||||
currentChunk = candidate;
|
||||
return;
|
||||
}
|
||||
|
||||
chunks.push(currentChunk);
|
||||
currentChunk = "";
|
||||
pushParagraph(paragraph);
|
||||
};
|
||||
|
||||
for (const [index, part] of parts.entries()) {
|
||||
const paragraph = part.replace(/\s+$/g, "");
|
||||
if (!paragraph.trim()) {
|
||||
continue;
|
||||
}
|
||||
if (paragraph.length <= limit) {
|
||||
chunks.push(paragraph);
|
||||
} else if (!splitLongParagraphs) {
|
||||
chunks.push(paragraph);
|
||||
} else {
|
||||
chunks.push(...chunkText(paragraph, limit));
|
||||
}
|
||||
pushParagraph(paragraph, separators[index - 1]);
|
||||
}
|
||||
|
||||
if (currentChunk) {
|
||||
chunks.push(currentChunk);
|
||||
}
|
||||
|
||||
return chunks;
|
||||
@@ -266,13 +293,12 @@ export function chunkMarkdownTextWithMode(text: string, limit: number, mode: Chu
|
||||
// If a paragraph must be split by length, defer to the markdown-aware chunker.
|
||||
const paragraphChunks = chunkByParagraph(text, limit, { splitLongParagraphs: false });
|
||||
const out: string[] = [];
|
||||
for (const chunk of paragraphChunks) {
|
||||
const nested = chunkMarkdownText(chunk, limit);
|
||||
if (!nested.length && chunk) {
|
||||
out.push(chunk);
|
||||
} else {
|
||||
out.push(...nested);
|
||||
}
|
||||
for (const chunk of paragraphChunks.flatMap((paragraphChunk) =>
|
||||
paragraphChunk.length > limit
|
||||
? splitPackedFenceParagraphChunk(paragraphChunk)
|
||||
: paragraphChunk,
|
||||
)) {
|
||||
out.push(...chunkMarkdownText(chunk, limit));
|
||||
}
|
||||
return out;
|
||||
}
|
||||
@@ -295,6 +321,34 @@ function splitByNewline(
|
||||
return lines;
|
||||
}
|
||||
|
||||
function splitPackedFenceParagraphChunk(chunk: string): string[] {
|
||||
const chunks: string[] = [];
|
||||
let start = 0;
|
||||
for (const span of parseFenceSpans(chunk)) {
|
||||
if (span.end <= start) {
|
||||
continue;
|
||||
}
|
||||
const separator = chunk.slice(span.end).match(/^\n[\t ]*\n+/)?.[0];
|
||||
if (!separator) {
|
||||
continue;
|
||||
}
|
||||
const tail = chunk.slice(span.end + separator.length);
|
||||
if (!tail.trim()) {
|
||||
continue;
|
||||
}
|
||||
chunks.push(chunk.slice(start, span.end));
|
||||
start = span.end + separator.length;
|
||||
}
|
||||
if (chunks.length === 0) {
|
||||
return [chunk];
|
||||
}
|
||||
const tail = chunk.slice(start);
|
||||
if (tail) {
|
||||
chunks.push(tail);
|
||||
}
|
||||
return chunks;
|
||||
}
|
||||
|
||||
function resolveChunkEarlyReturn(text: string, limit: number): string[] | undefined {
|
||||
if (!text) {
|
||||
return [];
|
||||
|
||||
@@ -2233,7 +2233,7 @@ describe("deliverOutboundPayloads", () => {
|
||||
expect(results.map((r) => r.messageId)).toEqual(["m1", "m2"]);
|
||||
});
|
||||
|
||||
it("respects newline chunk mode for plugin text", async () => {
|
||||
it("respects newline chunk mode for plugin text without splitting short messages", async () => {
|
||||
const sendMatrix = vi.fn().mockResolvedValue({ messageId: "m1", roomId: "!room:example" });
|
||||
const cfg: OpenClawConfig = {
|
||||
channels: {
|
||||
@@ -2249,14 +2249,40 @@ describe("deliverOutboundPayloads", () => {
|
||||
deps: { matrix: sendMatrix },
|
||||
});
|
||||
|
||||
expect(sendMatrix).toHaveBeenCalledTimes(1);
|
||||
const firstChunkCall = requireMatrixSendCall(sendMatrix);
|
||||
expect(firstChunkCall?.[0]).toBe("!room:example");
|
||||
expect(firstChunkCall?.[1]).toBe("Line one\n\nLine two");
|
||||
expect((firstChunkCall?.[2] as { cfg?: unknown } | undefined)?.cfg).toBe(cfg);
|
||||
});
|
||||
|
||||
it("splits long plugin text on packed paragraph boundaries in newline mode", async () => {
|
||||
const sendMatrix = vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce({ messageId: "m1", roomId: "!room:example" })
|
||||
.mockResolvedValueOnce({ messageId: "m2", roomId: "!room:example" });
|
||||
const cfg: OpenClawConfig = {
|
||||
channels: {
|
||||
matrix: { textChunkLimit: 14, chunkMode: "newline" },
|
||||
} as OpenClawConfig["channels"],
|
||||
};
|
||||
|
||||
await deliverOutboundPayloads({
|
||||
cfg,
|
||||
channel: "matrix",
|
||||
to: "!room:example",
|
||||
payloads: [{ text: "Alpha\n\nBeta\n\nGamma" }],
|
||||
deps: { matrix: sendMatrix },
|
||||
});
|
||||
|
||||
expect(sendMatrix).toHaveBeenCalledTimes(2);
|
||||
const firstChunkCall = requireMatrixSendCall(sendMatrix);
|
||||
expect(firstChunkCall?.[0]).toBe("!room:example");
|
||||
expect(firstChunkCall?.[1]).toBe("Line one");
|
||||
expect(firstChunkCall?.[1]).toBe("Alpha\n\nBeta");
|
||||
expect((firstChunkCall?.[2] as { cfg?: unknown } | undefined)?.cfg).toBe(cfg);
|
||||
const secondChunkCall = sendMatrix.mock.calls[1];
|
||||
expect(secondChunkCall?.[0]).toBe("!room:example");
|
||||
expect(secondChunkCall?.[1]).toBe("Line two");
|
||||
expect(secondChunkCall?.[1]).toBe("Gamma");
|
||||
expect((secondChunkCall?.[2] as { cfg?: unknown } | undefined)?.cfg).toBe(cfg);
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user