diff --git a/extensions/feishu/src/docx-batch-insert.test.ts b/extensions/feishu/src/docx-batch-insert.test.ts new file mode 100644 index 00000000000..239e46738b4 --- /dev/null +++ b/extensions/feishu/src/docx-batch-insert.test.ts @@ -0,0 +1,90 @@ +import { describe, expect, it, vi } from "vitest"; +import { BATCH_SIZE, insertBlocksInBatches } from "./docx-batch-insert.js"; + +function createCountingIterable(values: T[]) { + let iterations = 0; + return { + values: { + [Symbol.iterator]: function* () { + iterations += 1; + yield* values; + }, + }, + getIterations: () => iterations, + }; +} + +describe("insertBlocksInBatches", () => { + it("builds the source block map once for large flat trees", async () => { + const blockCount = BATCH_SIZE + 200; + const blocks = Array.from({ length: blockCount }, (_, index) => ({ + block_id: `block_${index}`, + block_type: 2, + })); + const counting = createCountingIterable(blocks); + const createMock = vi.fn(async ({ data }: { data: { children_id: string[] } }) => ({ + code: 0, + data: { + children: data.children_id.map((id) => ({ block_id: id })), + }, + })); + const client = { + docx: { + documentBlockDescendant: { + create: createMock, + }, + }, + } as any; + + const result = await insertBlocksInBatches( + client, + "doc_1", + counting.values as any[], + blocks.map((block) => block.block_id), + ); + + expect(counting.getIterations()).toBe(1); + expect(createMock).toHaveBeenCalledTimes(2); + expect(createMock.mock.calls[0]?.[0]?.data.children_id).toHaveLength(BATCH_SIZE); + expect(createMock.mock.calls[1]?.[0]?.data.children_id).toHaveLength(200); + expect(result.children).toHaveLength(blockCount); + }); + + it("keeps nested descendants grouped with their root blocks", async () => { + const createMock = vi.fn( + async ({ + data, + }: { + data: { children_id: string[]; descendants: Array<{ block_id: string }> }; + }) => ({ + code: 0, + data: { + children: data.children_id.map((id) => ({ block_id: id })), + }, + }), + ); + const client = { + docx: { + documentBlockDescendant: { + create: createMock, + }, + }, + } as any; + const blocks = [ + { block_id: "root_a", block_type: 1, children: ["child_a"] }, + { block_id: "child_a", block_type: 2 }, + { block_id: "root_b", block_type: 1, children: ["child_b"] }, + { block_id: "child_b", block_type: 2 }, + ]; + + await insertBlocksInBatches(client, "doc_1", blocks as any[], ["root_a", "root_b"]); + + expect(createMock).toHaveBeenCalledTimes(1); + expect(createMock.mock.calls[0]?.[0]?.data.children_id).toEqual(["root_a", "root_b"]); + expect( + createMock.mock.calls[0]?.[0]?.data.descendants.map( + (block: { block_id: string }) => block.block_id, + ), + ).toEqual(["root_a", "child_a", "root_b", "child_b"]); + }); +}); diff --git a/extensions/feishu/src/docx-batch-insert.ts b/extensions/feishu/src/docx-batch-insert.ts index e38552a4857..b855e53a4a9 100644 --- a/extensions/feishu/src/docx-batch-insert.ts +++ b/extensions/feishu/src/docx-batch-insert.ts @@ -14,16 +14,11 @@ export const BATCH_SIZE = 1000; // Feishu API limit per request type Logger = { info?: (msg: string) => void }; /** - * Collect all descendant blocks for a given set of first-level block IDs. + * Collect all descendant blocks for a given first-level block ID. * Recursively traverses the block tree to gather all children. */ // eslint-disable-next-line @typescript-eslint/no-explicit-any -- SDK block types -function collectDescendants(blocks: any[], firstLevelIds: string[]): any[] { - const blockMap = new Map(); - for (const block of blocks) { - blockMap.set(block.block_id, block); - } - +function collectDescendants(blockMap: Map, rootId: string): any[] { const result: any[] = []; const visited = new Set(); @@ -47,9 +42,7 @@ function collectDescendants(blocks: any[], firstLevelIds: string[]): any[] { } } - for (const id of firstLevelIds) { - collect(id); - } + collect(rootId); return result; } @@ -123,9 +116,13 @@ export async function insertBlocksInBatches( const batches: { firstLevelIds: string[]; blocks: any[] }[] = []; let currentBatch: { firstLevelIds: string[]; blocks: any[] } = { firstLevelIds: [], blocks: [] }; const usedBlockIds = new Set(); + const blockMap = new Map(); + for (const block of blocks) { + blockMap.set(block.block_id, block); + } for (const firstLevelId of firstLevelBlockIds) { - const descendants = collectDescendants(blocks, [firstLevelId]); + const descendants = collectDescendants(blockMap, firstLevelId); const newBlocks = descendants.filter((b) => !usedBlockIds.has(b.block_id)); // A single block whose subtree exceeds the API limit cannot be split diff --git a/src/agents/pi-embedded-block-chunker.test.ts b/src/agents/pi-embedded-block-chunker.test.ts index 0b6c858ef95..c8b1f5dda55 100644 --- a/src/agents/pi-embedded-block-chunker.test.ts +++ b/src/agents/pi-embedded-block-chunker.test.ts @@ -1,4 +1,5 @@ -import { describe, expect, it } from "vitest"; +import { describe, expect, it, vi } from "vitest"; +import * as fences from "../markdown/fences.js"; import { EmbeddedBlockChunker } from "./pi-embedded-block-chunker.js"; function createFlushOnParagraphChunker(params: { minChars: number; maxChars: number }) { @@ -120,4 +121,20 @@ describe("EmbeddedBlockChunker", () => { expect(chunks).toEqual(["Intro\n```js\nconst a = 1;\n\nconst b = 2;\n```"]); expect(chunker.bufferedText).toBe("After fence"); }); + + it("parses fence spans once per drain call for long fenced buffers", () => { + const parseSpy = vi.spyOn(fences, "parseFenceSpans"); + const chunker = new EmbeddedBlockChunker({ + minChars: 20, + maxChars: 80, + breakPreference: "paragraph", + }); + + chunker.append(`\`\`\`txt\n${"line\n".repeat(600)}\`\`\``); + const chunks = drainChunks(chunker); + + expect(chunks.length).toBeGreaterThan(2); + expect(parseSpy).toHaveBeenCalledTimes(1); + parseSpy.mockRestore(); + }); }); diff --git a/src/agents/pi-embedded-block-chunker.ts b/src/agents/pi-embedded-block-chunker.ts index b1266a1557a..11eddc2d190 100644 --- a/src/agents/pi-embedded-block-chunker.ts +++ b/src/agents/pi-embedded-block-chunker.ts @@ -12,6 +12,7 @@ export type BlockReplyChunking = { type FenceSplit = { closeFenceLine: string; reopenFenceLine: string; + fence: FenceSpan; }; type BreakResult = { @@ -28,6 +29,7 @@ function findSafeSentenceBreakIndex( text: string, fenceSpans: FenceSpan[], minChars: number, + offset = 0, ): number { const matches = text.matchAll(/[.!?](?=\s|$)/g); let sentenceIdx = -1; @@ -37,7 +39,7 @@ function findSafeSentenceBreakIndex( continue; } const candidate = at + 1; - if (isSafeFenceBreak(fenceSpans, candidate)) { + if (isSafeFenceBreak(fenceSpans, offset + candidate)) { sentenceIdx = candidate; } } @@ -49,8 +51,9 @@ function findSafeParagraphBreakIndex(params: { fenceSpans: FenceSpan[]; minChars: number; reverse: boolean; + offset?: number; }): number { - const { text, fenceSpans, minChars, reverse } = params; + const { text, fenceSpans, minChars, reverse, offset = 0 } = params; let paragraphIdx = reverse ? text.lastIndexOf("\n\n") : text.indexOf("\n\n"); while (reverse ? paragraphIdx >= minChars : paragraphIdx !== -1) { const candidates = [paragraphIdx, paragraphIdx + 1]; @@ -61,7 +64,7 @@ function findSafeParagraphBreakIndex(params: { if (candidate < 0 || candidate >= text.length) { continue; } - if (isSafeFenceBreak(fenceSpans, candidate)) { + if (isSafeFenceBreak(fenceSpans, offset + candidate)) { return candidate; } } @@ -77,11 +80,12 @@ function findSafeNewlineBreakIndex(params: { fenceSpans: FenceSpan[]; minChars: number; reverse: boolean; + offset?: number; }): number { - const { text, fenceSpans, minChars, reverse } = params; + const { text, fenceSpans, minChars, reverse, offset = 0 } = params; let newlineIdx = reverse ? text.lastIndexOf("\n") : text.indexOf("\n"); while (reverse ? newlineIdx >= minChars : newlineIdx !== -1) { - if (newlineIdx >= minChars && isSafeFenceBreak(fenceSpans, newlineIdx)) { + if (newlineIdx >= minChars && isSafeFenceBreak(fenceSpans, offset + newlineIdx)) { return newlineIdx; } newlineIdx = reverse @@ -125,14 +129,7 @@ export class EmbeddedBlockChunker { const minChars = Math.max(1, Math.floor(this.#chunking.minChars)); const maxChars = Math.max(minChars, Math.floor(this.#chunking.maxChars)); - // When flushOnParagraph is set (chunkMode="newline"), eagerly split on \n\n - // boundaries regardless of minChars so each paragraph is sent immediately. - if (this.#chunking.flushOnParagraph && !force) { - this.#drainParagraphs(emit, maxChars); - return; - } - - if (this.#buffer.length < minChars && !force) { + if (this.#buffer.length < minChars && !force && !this.#chunking.flushOnParagraph) { return; } @@ -144,108 +141,132 @@ export class EmbeddedBlockChunker { return; } - while (this.#buffer.length >= minChars || (force && this.#buffer.length > 0)) { + const source = this.#buffer; + const fenceSpans = parseFenceSpans(source); + let start = 0; + let reopenFence: FenceSpan | undefined; + + while (start < source.length) { + const reopenPrefix = reopenFence ? `${reopenFence.openLine}\n` : ""; + const remainingLength = reopenPrefix.length + (source.length - start); + + if (!force && !this.#chunking.flushOnParagraph && remainingLength < minChars) { + break; + } + + if (this.#chunking.flushOnParagraph && !force) { + const paragraphBreak = findNextParagraphBreak(source, fenceSpans, start); + const paragraphLimit = Math.max(1, maxChars - reopenPrefix.length); + if (paragraphBreak && paragraphBreak.index - start <= paragraphLimit) { + const chunk = `${reopenPrefix}${source.slice(start, paragraphBreak.index)}`; + if (chunk.trim().length > 0) { + emit(chunk); + } + start = skipLeadingNewlines(source, paragraphBreak.index + paragraphBreak.length); + reopenFence = undefined; + continue; + } + if (remainingLength < maxChars) { + break; + } + } + + const view = source.slice(start); const breakResult = - force && this.#buffer.length <= maxChars - ? this.#pickSoftBreakIndex(this.#buffer, 1) - : this.#pickBreakIndex(this.#buffer, force ? 1 : undefined); + force && remainingLength <= maxChars + ? this.#pickSoftBreakIndex(view, fenceSpans, 1, start) + : this.#pickBreakIndex( + view, + fenceSpans, + force || this.#chunking.flushOnParagraph ? 1 : undefined, + start, + ); if (breakResult.index <= 0) { if (force) { - emit(this.#buffer); - this.#buffer = ""; + emit(`${reopenPrefix}${source.slice(start)}`); + start = source.length; + reopenFence = undefined; } - return; + break; } - if (!this.#emitBreakResult(breakResult, emit)) { + const consumed = this.#emitBreakResult({ + breakResult, + emit, + reopenPrefix, + source, + start, + }); + if (consumed === null) { continue; } + start = consumed.start; + reopenFence = consumed.reopenFence; - if (this.#buffer.length < minChars && !force) { - return; + const nextLength = + (reopenFence ? `${reopenFence.openLine}\n`.length : 0) + (source.length - start); + if (nextLength < minChars && !force && !this.#chunking.flushOnParagraph) { + break; } - if (this.#buffer.length < maxChars && !force) { - return; + if (nextLength < maxChars && !force && !this.#chunking.flushOnParagraph) { + break; } } + this.#buffer = reopenFence + ? `${reopenFence.openLine}\n${source.slice(start)}` + : stripLeadingNewlines(source.slice(start)); } - /** Eagerly emit complete paragraphs (text before \n\n) regardless of minChars. */ - #drainParagraphs(emit: (chunk: string) => void, maxChars: number) { - while (this.#buffer.length > 0) { - const fenceSpans = parseFenceSpans(this.#buffer); - const paragraphBreak = findNextParagraphBreak(this.#buffer, fenceSpans); - if (!paragraphBreak || paragraphBreak.index > maxChars) { - // No paragraph boundary yet (or the next boundary is too far). If the - // buffer exceeds maxChars, fall back to normal break logic to avoid - // oversized chunks or unbounded accumulation. - if (this.#buffer.length >= maxChars) { - const breakResult = this.#pickBreakIndex(this.#buffer, 1); - if (breakResult.index > 0) { - this.#emitBreakResult(breakResult, emit); - continue; - } - } - return; - } - - const chunk = this.#buffer.slice(0, paragraphBreak.index); - if (chunk.trim().length > 0) { - emit(chunk); - } - this.#buffer = stripLeadingNewlines( - this.#buffer.slice(paragraphBreak.index + paragraphBreak.length), - ); - } - } - - #emitBreakResult(breakResult: BreakResult, emit: (chunk: string) => void): boolean { + #emitBreakResult(params: { + breakResult: BreakResult; + emit: (chunk: string) => void; + reopenPrefix: string; + source: string; + start: number; + }): { start: number; reopenFence?: FenceSpan } | null { + const { breakResult, emit, reopenPrefix, source, start } = params; const breakIdx = breakResult.index; if (breakIdx <= 0) { - return false; + return null; } - let rawChunk = this.#buffer.slice(0, breakIdx); + const absoluteBreakIdx = start + breakIdx; + let rawChunk = `${reopenPrefix}${source.slice(start, absoluteBreakIdx)}`; if (rawChunk.trim().length === 0) { - this.#buffer = stripLeadingNewlines(this.#buffer.slice(breakIdx)).trimStart(); - return false; + return { start: skipLeadingNewlines(source, absoluteBreakIdx), reopenFence: undefined }; } - let nextBuffer = this.#buffer.slice(breakIdx); const fenceSplit = breakResult.fenceSplit; if (fenceSplit) { const closeFence = rawChunk.endsWith("\n") ? `${fenceSplit.closeFenceLine}\n` : `\n${fenceSplit.closeFenceLine}\n`; rawChunk = `${rawChunk}${closeFence}`; - - const reopenFence = fenceSplit.reopenFenceLine.endsWith("\n") - ? fenceSplit.reopenFenceLine - : `${fenceSplit.reopenFenceLine}\n`; - nextBuffer = `${reopenFence}${nextBuffer}`; } emit(rawChunk); if (fenceSplit) { - this.#buffer = nextBuffer; - } else { - const nextStart = - breakIdx < this.#buffer.length && /\s/.test(this.#buffer[breakIdx]) - ? breakIdx + 1 - : breakIdx; - this.#buffer = stripLeadingNewlines(this.#buffer.slice(nextStart)); + return { start: absoluteBreakIdx, reopenFence: fenceSplit.fence }; } - return true; + const nextStart = + absoluteBreakIdx < source.length && /\s/.test(source[absoluteBreakIdx]) + ? absoluteBreakIdx + 1 + : absoluteBreakIdx; + return { start: skipLeadingNewlines(source, nextStart), reopenFence: undefined }; } - #pickSoftBreakIndex(buffer: string, minCharsOverride?: number): BreakResult { + #pickSoftBreakIndex( + buffer: string, + fenceSpans: FenceSpan[], + minCharsOverride?: number, + offset = 0, + ): BreakResult { const minChars = Math.max(1, Math.floor(minCharsOverride ?? this.#chunking.minChars)); if (buffer.length < minChars) { return { index: -1 }; } - const fenceSpans = parseFenceSpans(buffer); const preference = this.#chunking.breakPreference ?? "paragraph"; if (preference === "paragraph") { @@ -254,6 +275,7 @@ export class EmbeddedBlockChunker { fenceSpans, minChars, reverse: false, + offset, }); if (paragraphIdx !== -1) { return { index: paragraphIdx }; @@ -266,6 +288,7 @@ export class EmbeddedBlockChunker { fenceSpans, minChars, reverse: false, + offset, }); if (newlineIdx !== -1) { return { index: newlineIdx }; @@ -273,7 +296,7 @@ export class EmbeddedBlockChunker { } if (preference !== "newline") { - const sentenceIdx = findSafeSentenceBreakIndex(buffer, fenceSpans, minChars); + const sentenceIdx = findSafeSentenceBreakIndex(buffer, fenceSpans, minChars, offset); if (sentenceIdx !== -1) { return { index: sentenceIdx }; } @@ -282,14 +305,18 @@ export class EmbeddedBlockChunker { return { index: -1 }; } - #pickBreakIndex(buffer: string, minCharsOverride?: number): BreakResult { + #pickBreakIndex( + buffer: string, + fenceSpans: FenceSpan[], + minCharsOverride?: number, + offset = 0, + ): BreakResult { const minChars = Math.max(1, Math.floor(minCharsOverride ?? this.#chunking.minChars)); const maxChars = Math.max(minChars, Math.floor(this.#chunking.maxChars)); if (buffer.length < minChars) { return { index: -1 }; } const window = buffer.slice(0, Math.min(maxChars, buffer.length)); - const fenceSpans = parseFenceSpans(buffer); const preference = this.#chunking.breakPreference ?? "paragraph"; if (preference === "paragraph") { @@ -298,6 +325,7 @@ export class EmbeddedBlockChunker { fenceSpans, minChars, reverse: true, + offset, }); if (paragraphIdx !== -1) { return { index: paragraphIdx }; @@ -310,6 +338,7 @@ export class EmbeddedBlockChunker { fenceSpans, minChars, reverse: true, + offset, }); if (newlineIdx !== -1) { return { index: newlineIdx }; @@ -317,7 +346,7 @@ export class EmbeddedBlockChunker { } if (preference !== "newline") { - const sentenceIdx = findSafeSentenceBreakIndex(window, fenceSpans, minChars); + const sentenceIdx = findSafeSentenceBreakIndex(window, fenceSpans, minChars, offset); if (sentenceIdx !== -1) { return { index: sentenceIdx }; } @@ -328,22 +357,23 @@ export class EmbeddedBlockChunker { } for (let i = window.length - 1; i >= minChars; i--) { - if (/\s/.test(window[i]) && isSafeFenceBreak(fenceSpans, i)) { + if (/\s/.test(window[i]) && isSafeFenceBreak(fenceSpans, offset + i)) { return { index: i }; } } if (buffer.length >= maxChars) { - if (isSafeFenceBreak(fenceSpans, maxChars)) { + if (isSafeFenceBreak(fenceSpans, offset + maxChars)) { return { index: maxChars }; } - const fence = findFenceSpanAt(fenceSpans, maxChars); + const fence = findFenceSpanAt(fenceSpans, offset + maxChars); if (fence) { return { index: maxChars, fenceSplit: { closeFenceLine: `${fence.indent}${fence.marker}`, reopenFenceLine: fence.openLine, + fence, }, }; } @@ -354,12 +384,17 @@ export class EmbeddedBlockChunker { } } -function stripLeadingNewlines(value: string): string { - let i = 0; +function skipLeadingNewlines(value: string, start = 0): number { + let i = start; while (i < value.length && value[i] === "\n") { i++; } - return i > 0 ? value.slice(i) : value; + return i; +} + +function stripLeadingNewlines(value: string): string { + const start = skipLeadingNewlines(value); + return start > 0 ? value.slice(start) : value; } function findNextParagraphBreak( diff --git a/src/auto-reply/chunk.test.ts b/src/auto-reply/chunk.test.ts index f6ae74d909d..07b40069d57 100644 --- a/src/auto-reply/chunk.test.ts +++ b/src/auto-reply/chunk.test.ts @@ -1,4 +1,5 @@ -import { describe, expect, it } from "vitest"; +import { describe, expect, it, vi } from "vitest"; +import * as fences from "../markdown/fences.js"; import { hasBalancedFences } from "../test-utils/chunk-test-helpers.js"; import { chunkByNewline, @@ -217,6 +218,17 @@ describe("chunkMarkdownText", () => { expect(chunks[0]?.length).toBe(20); expect(chunks.join("")).toBe(text); }); + + it("parses fence spans once for long fenced payloads", () => { + const parseSpy = vi.spyOn(fences, "parseFenceSpans"); + const text = `\`\`\`txt\n${"line\n".repeat(600)}\`\`\``; + + const chunks = chunkMarkdownText(text, 80); + + expect(chunks.length).toBeGreaterThan(2); + expect(parseSpy).toHaveBeenCalledTimes(1); + parseSpy.mockRestore(); + }); }); describe("chunkByNewline", () => { diff --git a/src/auto-reply/chunk.ts b/src/auto-reply/chunk.ts index 780d57a1f5b..9d16f36d532 100644 --- a/src/auto-reply/chunk.ts +++ b/src/auto-reply/chunk.ts @@ -306,7 +306,7 @@ export function chunkText(text: string, limit: number): string[] { } return chunkTextByBreakResolver(text, limit, (window) => { // 1) Prefer a newline break inside the window (outside parentheses). - const { lastNewline, lastWhitespace } = scanParenAwareBreakpoints(window); + const { lastNewline, lastWhitespace } = scanParenAwareBreakpoints(window, 0, window.length); // 2) Otherwise prefer the last whitespace (word boundary) inside the window. return lastNewline > 0 ? lastNewline : lastWhitespace; }); @@ -319,14 +319,24 @@ export function chunkMarkdownText(text: string, limit: number): string[] { } const chunks: string[] = []; - let remaining = text; + const spans = parseFenceSpans(text); + let start = 0; + let reopenFence: ReturnType | undefined; - while (remaining.length > limit) { - const spans = parseFenceSpans(remaining); - const window = remaining.slice(0, limit); + while (start < text.length) { + const reopenPrefix = reopenFence ? `${reopenFence.openLine}\n` : ""; + const contentLimit = Math.max(1, limit - reopenPrefix.length); + if (text.length - start <= contentLimit) { + const finalChunk = `${reopenPrefix}${text.slice(start)}`; + if (finalChunk.length > 0) { + chunks.push(finalChunk); + } + break; + } - const softBreak = pickSafeBreakIndex(window, spans); - let breakIdx = softBreak > 0 ? softBreak : limit; + const windowEnd = Math.min(text.length, start + contentLimit); + const softBreak = pickSafeBreakIndex(text, start, windowEnd, spans); + let breakIdx = softBreak > start ? softBreak : windowEnd; const initialFence = isSafeFenceBreak(spans, breakIdx) ? undefined @@ -335,38 +345,38 @@ export function chunkMarkdownText(text: string, limit: number): string[] { let fenceToSplit = initialFence; if (initialFence) { const closeLine = `${initialFence.indent}${initialFence.marker}`; - const maxIdxIfNeedNewline = limit - (closeLine.length + 1); + const maxIdxIfNeedNewline = start + (contentLimit - (closeLine.length + 1)); - if (maxIdxIfNeedNewline <= 0) { + if (maxIdxIfNeedNewline <= start) { fenceToSplit = undefined; - breakIdx = limit; + breakIdx = windowEnd; } else { const minProgressIdx = Math.min( - remaining.length, - initialFence.start + initialFence.openLine.length + 2, + text.length, + Math.max(start + 1, initialFence.start + initialFence.openLine.length + 2), ); - const maxIdxIfAlreadyNewline = limit - closeLine.length; + const maxIdxIfAlreadyNewline = start + (contentLimit - closeLine.length); let pickedNewline = false; - let lastNewline = remaining.lastIndexOf("\n", Math.max(0, maxIdxIfAlreadyNewline - 1)); - while (lastNewline !== -1) { + let lastNewline = text.lastIndexOf("\n", Math.max(start, maxIdxIfAlreadyNewline - 1)); + while (lastNewline >= start) { const candidateBreak = lastNewline + 1; if (candidateBreak < minProgressIdx) { break; } const candidateFence = findFenceSpanAt(spans, candidateBreak); if (candidateFence && candidateFence.start === initialFence.start) { - breakIdx = Math.max(1, candidateBreak); + breakIdx = candidateBreak; pickedNewline = true; break; } - lastNewline = remaining.lastIndexOf("\n", lastNewline - 1); + lastNewline = text.lastIndexOf("\n", lastNewline - 1); } if (!pickedNewline) { if (minProgressIdx > maxIdxIfAlreadyNewline) { fenceToSplit = undefined; - breakIdx = limit; + breakIdx = windowEnd; } else { breakIdx = Math.max(minProgressIdx, maxIdxIfNeedNewline); } @@ -378,68 +388,72 @@ export function chunkMarkdownText(text: string, limit: number): string[] { fenceAtBreak && fenceAtBreak.start === initialFence.start ? fenceAtBreak : undefined; } - let rawChunk = remaining.slice(0, breakIdx); - if (!rawChunk) { + const rawContent = text.slice(start, breakIdx); + if (!rawContent) { break; } - const brokeOnSeparator = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]); - const nextStart = Math.min(remaining.length, breakIdx + (brokeOnSeparator ? 1 : 0)); - let next = remaining.slice(nextStart); + let rawChunk = `${reopenPrefix}${rawContent}`; + const brokeOnSeparator = breakIdx < text.length && /\s/.test(text[breakIdx]); + let nextStart = Math.min(text.length, breakIdx + (brokeOnSeparator ? 1 : 0)); if (fenceToSplit) { const closeLine = `${fenceToSplit.indent}${fenceToSplit.marker}`; rawChunk = rawChunk.endsWith("\n") ? `${rawChunk}${closeLine}` : `${rawChunk}\n${closeLine}`; - next = `${fenceToSplit.openLine}\n${next}`; + reopenFence = fenceToSplit; } else { - next = stripLeadingNewlines(next); + nextStart = skipLeadingNewlines(text, nextStart); + reopenFence = undefined; } chunks.push(rawChunk); - remaining = next; - } - - if (remaining.length) { - chunks.push(remaining); + start = nextStart; } return chunks; } -function stripLeadingNewlines(value: string): string { - let i = 0; +function skipLeadingNewlines(value: string, start = 0): number { + let i = start; while (i < value.length && value[i] === "\n") { i++; } - return i > 0 ? value.slice(i) : value; + return i; } -function pickSafeBreakIndex(window: string, spans: ReturnType): number { - const { lastNewline, lastWhitespace } = scanParenAwareBreakpoints(window, (index) => +function pickSafeBreakIndex( + text: string, + start: number, + end: number, + spans: ReturnType, +): number { + const { lastNewline, lastWhitespace } = scanParenAwareBreakpoints(text, start, end, (index) => isSafeFenceBreak(spans, index), ); - if (lastNewline > 0) { + if (lastNewline > start) { return lastNewline; } - if (lastWhitespace > 0) { + if (lastWhitespace > start) { return lastWhitespace; } return -1; } function scanParenAwareBreakpoints( - window: string, + text: string, + start: number, + end: number, isAllowed: (index: number) => boolean = () => true, ): { lastNewline: number; lastWhitespace: number } { let lastNewline = -1; let lastWhitespace = -1; let depth = 0; - for (let i = 0; i < window.length; i++) { + for (let i = start; i < end; i++) { if (!isAllowed(i)) { continue; } - const char = window[i]; + const char = text[i]; if (char === "(") { depth += 1; continue; diff --git a/src/markdown/fences.ts b/src/markdown/fences.ts index d3cbbced1c6..282b6ecc296 100644 --- a/src/markdown/fences.ts +++ b/src/markdown/fences.ts @@ -73,7 +73,27 @@ export function parseFenceSpans(buffer: string): FenceSpan[] { } export function findFenceSpanAt(spans: FenceSpan[], index: number): FenceSpan | undefined { - return spans.find((span) => index > span.start && index < span.end); + let low = 0; + let high = spans.length - 1; + + while (low <= high) { + const mid = Math.floor((low + high) / 2); + const span = spans[mid]; + if (!span) { + break; + } + if (index <= span.start) { + high = mid - 1; + continue; + } + if (index >= span.end) { + low = mid + 1; + continue; + } + return span; + } + + return undefined; } export function isSafeFenceBreak(spans: FenceSpan[], index: number): boolean {