diff --git a/CHANGELOG.md b/CHANGELOG.md index fa651fe784d..6eb2028903d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,7 @@ Docs: https://docs.openclaw.ai - Feishu/Local media sends: propagate `mediaLocalRoots` through Feishu outbound media sending into `loadWebMedia` so local path attachments work with post-CVE local-root enforcement. (#27884) Thanks @joelnishanth. - Feishu/Group sender allowlist fallback: add global `channels.feishu.groupSenderAllowFrom` sender authorization for group chats, with per-group `groups..allowFrom` precedence and regression coverage for allow/block/precedence behavior. (#29174) Thanks @1MoreBuild. - Feishu/Docx append/write ordering: insert converted Docx blocks sequentially (single-block creates) so Feishu append/write preserves markdown block order instead of returning shuffled sections in asynchronous batch inserts. (#26172, #26022) Thanks @echoVic. +- Feishu/Docx convert fallback chunking: recursively split oversized markdown chunks (including long no-heading sections) when `document.convert` hits content limits, while keeping fenced-code-aware split boundaries whenever possible. (#14402) Thanks @lml2468. - Feishu/Inbound media regression coverage: add explicit tests for message resource type mapping (`image` stays `image`, non-image maps to `file`) to prevent reintroducing unsupported Feishu `type=audio` fetches. (#16311, #8746) Thanks @Yaxuan42. - Feishu/API quota controls: add `typingIndicator` and `resolveSenderNames` config flags (top-level and per-account) so operators can disable typing reactions and sender-name lookup requests while keeping default behavior unchanged. (#10513) Thanks @BigUncle. - Security/Feishu webhook ingress: bound unauthenticated webhook rate-limit state with stale-window pruning and a hard key cap to prevent unbounded pre-auth memory growth from rotating source keys. (#26050) Thanks @bmendonca3. diff --git a/extensions/feishu/src/docx.test.ts b/extensions/feishu/src/docx.test.ts index 740872a61b8..532a6728984 100644 --- a/extensions/feishu/src/docx.test.ts +++ b/extensions/feishu/src/docx.test.ts @@ -147,7 +147,7 @@ describe("feishu_doc image fetch hardening", () => { const result = await feishuDocTool.execute("tool-call", { action: "append", doc_token: "doc_1", - content: "## H1\ntext\n## H2", + content: "plain text body", }); // Verify sequential insertion: one call per block @@ -163,6 +163,135 @@ describe("feishu_doc image fetch hardening", () => { expect(result.details.blocks_added).toBe(3); }); + it("falls back to size-based convert chunking for long no-heading markdown", async () => { + let successChunkCount = 0; + convertMock.mockImplementation(async ({ data }) => { + const content = data.content as string; + if (content.length > 280) { + return { code: 999, msg: "content too large" }; + } + successChunkCount++; + const blockId = `b_${successChunkCount}`; + return { + code: 0, + data: { + blocks: [{ block_type: 2, block_id: blockId }], + first_level_block_ids: [blockId], + }, + }; + }); + + blockChildrenCreateMock.mockImplementation(async ({ data }) => ({ + code: 0, + data: { children: data.children }, + })); + + const registerTool = vi.fn(); + registerFeishuDocTools({ + config: { + channels: { + feishu: { appId: "app_id", appSecret: "app_secret" }, + }, + } as any, + logger: { debug: vi.fn(), info: vi.fn() } as any, + registerTool, + } as any); + + const feishuDocTool = registerTool.mock.calls + .map((call) => call[0]) + .map((tool) => (typeof tool === "function" ? tool({}) : tool)) + .find((tool) => tool.name === "feishu_doc"); + expect(feishuDocTool).toBeDefined(); + + const longMarkdown = Array.from( + { length: 120 }, + (_, i) => `line ${i} with enough content to trigger fallback chunking`, + ).join("\n"); + + const result = await feishuDocTool.execute("tool-call", { + action: "append", + doc_token: "doc_1", + content: longMarkdown, + }); + + expect(convertMock.mock.calls.length).toBeGreaterThan(1); + expect(successChunkCount).toBeGreaterThan(1); + expect(result.details.blocks_added).toBe(successChunkCount); + }); + + it("keeps fenced code blocks balanced when size fallback split is needed", async () => { + const convertedChunks: string[] = []; + let successChunkCount = 0; + let failFirstConvert = true; + convertMock.mockImplementation(async ({ data }) => { + const content = data.content as string; + convertedChunks.push(content); + if (failFirstConvert) { + failFirstConvert = false; + return { code: 999, msg: "content too large" }; + } + successChunkCount++; + const blockId = `c_${successChunkCount}`; + return { + code: 0, + data: { + blocks: [{ block_type: 2, block_id: blockId }], + first_level_block_ids: [blockId], + }, + }; + }); + + blockChildrenCreateMock.mockImplementation(async ({ data }) => ({ + code: 0, + data: { children: data.children }, + })); + + const registerTool = vi.fn(); + registerFeishuDocTools({ + config: { + channels: { + feishu: { appId: "app_id", appSecret: "app_secret" }, + }, + } as any, + logger: { debug: vi.fn(), info: vi.fn() } as any, + registerTool, + } as any); + + const feishuDocTool = registerTool.mock.calls + .map((call) => call[0]) + .map((tool) => (typeof tool === "function" ? tool({}) : tool)) + .find((tool) => tool.name === "feishu_doc"); + expect(feishuDocTool).toBeDefined(); + + const fencedMarkdown = [ + "## Section", + "```ts", + "const alpha = 1;", + "const beta = 2;", + "const gamma = alpha + beta;", + "console.log(gamma);", + "```", + "", + "Tail paragraph one with enough text to exceed API limits when combined. ".repeat(8), + "Tail paragraph two with enough text to exceed API limits when combined. ".repeat(8), + "Tail paragraph three with enough text to exceed API limits when combined. ".repeat(8), + ].join("\n"); + + const result = await feishuDocTool.execute("tool-call", { + action: "append", + doc_token: "doc_1", + content: fencedMarkdown, + }); + + expect(convertMock.mock.calls.length).toBeGreaterThan(1); + expect(successChunkCount).toBeGreaterThan(1); + for (const chunk of convertedChunks) { + const fenceCount = chunk.match(/```/g)?.length ?? 0; + expect(fenceCount % 2).toBe(0); + } + expect(result.details.blocks_added).toBe(successChunkCount); + }); + it("skips image upload when markdown image URL is blocked", async () => { const consoleErrorSpy = vi.spyOn(console, "error").mockImplementation(() => {}); fetchRemoteMediaMock.mockRejectedValueOnce( diff --git a/extensions/feishu/src/docx.ts b/extensions/feishu/src/docx.ts index 8d3385aa4c1..7a126717d01 100644 --- a/extensions/feishu/src/docx.ts +++ b/extensions/feishu/src/docx.ts @@ -85,6 +85,10 @@ function cleanBlocksForInsert(blocks: any[]): { cleaned: any[]; skipped: string[ // ============ Core Functions ============ +/** Max blocks per documentBlockChildren.create request */ +const MAX_BLOCKS_PER_INSERT = 50; +const MAX_CONVERT_RETRY_DEPTH = 8; + async function convertMarkdown(client: Lark.Client, markdown: string) { const res = await client.docx.document.convert({ data: { content_type: "markdown", content: markdown }, @@ -143,6 +147,138 @@ async function insertBlocks( return { children: allInserted, skipped }; } +/** Split markdown into chunks at top-level headings (# or ##) to stay within API content limits */ +function splitMarkdownByHeadings(markdown: string): string[] { + const lines = markdown.split("\n"); + const chunks: string[] = []; + let current: string[] = []; + let inFencedBlock = false; + + for (const line of lines) { + if (/^(`{3,}|~{3,})/.test(line)) { + inFencedBlock = !inFencedBlock; + } + if (!inFencedBlock && /^#{1,2}\s/.test(line) && current.length > 0) { + chunks.push(current.join("\n")); + current = []; + } + current.push(line); + } + if (current.length > 0) { + chunks.push(current.join("\n")); + } + return chunks; +} + +/** Split markdown by size, preferring to break outside fenced code blocks when possible */ +function splitMarkdownBySize(markdown: string, maxChars: number): string[] { + if (markdown.length <= maxChars) { + return [markdown]; + } + + const lines = markdown.split("\n"); + const chunks: string[] = []; + let current: string[] = []; + let currentLength = 0; + let inFencedBlock = false; + + for (const line of lines) { + if (/^(`{3,}|~{3,})/.test(line)) { + inFencedBlock = !inFencedBlock; + } + + const lineLength = line.length + 1; + const wouldExceed = currentLength + lineLength > maxChars; + if (current.length > 0 && wouldExceed && !inFencedBlock) { + chunks.push(current.join("\n")); + current = []; + currentLength = 0; + } + + current.push(line); + currentLength += lineLength; + } + + if (current.length > 0) { + chunks.push(current.join("\n")); + } + + if (chunks.length > 1) { + return chunks; + } + + // Degenerate case: no safe boundary outside fenced content. + const midpoint = Math.floor(lines.length / 2); + if (midpoint <= 0 || midpoint >= lines.length) { + return [markdown]; + } + return [lines.slice(0, midpoint).join("\n"), lines.slice(midpoint).join("\n")]; +} + +async function convertMarkdownWithFallback(client: Lark.Client, markdown: string, depth = 0) { + try { + return await convertMarkdown(client, markdown); + } catch (error) { + if (depth >= MAX_CONVERT_RETRY_DEPTH || markdown.length < 2) { + throw error; + } + + const splitTarget = Math.max(256, Math.floor(markdown.length / 2)); + const chunks = splitMarkdownBySize(markdown, splitTarget); + if (chunks.length <= 1) { + throw error; + } + + // eslint-disable-next-line @typescript-eslint/no-explicit-any -- SDK block types + const blocks: any[] = []; + const firstLevelBlockIds: string[] = []; + + for (const chunk of chunks) { + const converted = await convertMarkdownWithFallback(client, chunk, depth + 1); + blocks.push(...converted.blocks); + firstLevelBlockIds.push(...converted.firstLevelBlockIds); + } + + return { blocks, firstLevelBlockIds }; + } +} + +/** Convert markdown in chunks to avoid document.convert content size limits */ +async function chunkedConvertMarkdown(client: Lark.Client, markdown: string) { + const chunks = splitMarkdownByHeadings(markdown); + // eslint-disable-next-line @typescript-eslint/no-explicit-any -- SDK block types + const allBlocks: any[] = []; + for (const chunk of chunks) { + const { blocks, firstLevelBlockIds } = await convertMarkdownWithFallback(client, chunk); + const sorted = sortBlocksByFirstLevel(blocks, firstLevelBlockIds); + allBlocks.push(...sorted); + } + return allBlocks; +} + +/** Insert blocks in batches of MAX_BLOCKS_PER_INSERT to avoid API 400 errors */ +/* eslint-disable @typescript-eslint/no-explicit-any -- SDK block types */ +async function chunkedInsertBlocks( + client: Lark.Client, + docToken: string, + blocks: any[], + parentBlockId?: string, +): Promise<{ children: any[]; skipped: string[] }> { + /* eslint-enable @typescript-eslint/no-explicit-any */ + // eslint-disable-next-line @typescript-eslint/no-explicit-any -- SDK block types + const allChildren: any[] = []; + const allSkipped: string[] = []; + + for (let i = 0; i < blocks.length; i += MAX_BLOCKS_PER_INSERT) { + const batch = blocks.slice(i, i + MAX_BLOCKS_PER_INSERT); + const { children, skipped } = await insertBlocks(client, docToken, batch, parentBlockId); + allChildren.push(...children); + allSkipped.push(...skipped); + } + + return { children: allChildren, skipped: allSkipped }; +} + async function clearDocumentContent(client: Lark.Client, docToken: string) { const existing = await client.docx.documentBlock.list({ path: { document_id: docToken }, @@ -499,13 +635,12 @@ async function createDoc( async function writeDoc(client: Lark.Client, docToken: string, markdown: string, maxBytes: number) { const deleted = await clearDocumentContent(client, docToken); - const { blocks, firstLevelBlockIds } = await convertMarkdown(client, markdown); + const blocks = await chunkedConvertMarkdown(client, markdown); if (blocks.length === 0) { return { success: true, blocks_deleted: deleted, blocks_added: 0, images_processed: 0 }; } - const sortedBlocks = sortBlocksByFirstLevel(blocks, firstLevelBlockIds); - const { children: inserted, skipped } = await insertBlocks(client, docToken, sortedBlocks); + const { children: inserted, skipped } = await chunkedInsertBlocks(client, docToken, blocks); const imagesProcessed = await processImages(client, docToken, markdown, inserted, maxBytes); return { @@ -525,13 +660,12 @@ async function appendDoc( markdown: string, maxBytes: number, ) { - const { blocks, firstLevelBlockIds } = await convertMarkdown(client, markdown); + const blocks = await chunkedConvertMarkdown(client, markdown); if (blocks.length === 0) { throw new Error("Content is empty"); } - const sortedBlocks = sortBlocksByFirstLevel(blocks, firstLevelBlockIds); - const { children: inserted, skipped } = await insertBlocks(client, docToken, sortedBlocks); + const { children: inserted, skipped } = await chunkedInsertBlocks(client, docToken, blocks); const imagesProcessed = await processImages(client, docToken, markdown, inserted, maxBytes); return {