fix(feishu): chunk large documents for write/append to avoid API 400 errors (#14402)

* fix(feishu): chunk large documents for write/append to avoid API 400 errors

The Feishu API limits documentBlockChildren.create to 50 blocks per
request and document.convert has content size limits for large markdown.

Previously, writeDoc and appendDoc would send the entire content in a
single API call, causing HTTP 400 errors for long documents.

This commit adds:
- splitMarkdownByHeadings(): splits markdown at # or ## headings
- chunkedConvertMarkdown(): converts each chunk independently
- chunkedInsertBlocks(): batches blocks into groups of ≤50

Both writeDoc and appendDoc now use the chunked helpers while
preserving backward compatibility for small documents. Image
processing correctly receives all inserted blocks across batches.

* fix(feishu): skip heading detection inside fenced code blocks

Addresses review feedback: splitMarkdownByHeadings() now tracks
fenced code blocks (``` or ~~~) and skips heading-based splitting
when inside one, preventing corruption of code block content.

* Feishu/Docx: add convert fallback chunking + tests

---------

Co-authored-by: lml2468 <lml2468@users.noreply.github.com>
Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
This commit is contained in:
Menglin Li
2026-02-28 13:11:12 +08:00
committed by GitHub
parent 27882dc73e
commit 4dc55ea88d
3 changed files with 271 additions and 7 deletions

View File

@@ -33,6 +33,7 @@ Docs: https://docs.openclaw.ai
- Feishu/Local media sends: propagate `mediaLocalRoots` through Feishu outbound media sending into `loadWebMedia` so local path attachments work with post-CVE local-root enforcement. (#27884) Thanks @joelnishanth.
- Feishu/Group sender allowlist fallback: add global `channels.feishu.groupSenderAllowFrom` sender authorization for group chats, with per-group `groups.<id>.allowFrom` precedence and regression coverage for allow/block/precedence behavior. (#29174) Thanks @1MoreBuild.
- Feishu/Docx append/write ordering: insert converted Docx blocks sequentially (single-block creates) so Feishu append/write preserves markdown block order instead of returning shuffled sections in asynchronous batch inserts. (#26172, #26022) Thanks @echoVic.
- Feishu/Docx convert fallback chunking: recursively split oversized markdown chunks (including long no-heading sections) when `document.convert` hits content limits, while keeping fenced-code-aware split boundaries whenever possible. (#14402) Thanks @lml2468.
- Feishu/Inbound media regression coverage: add explicit tests for message resource type mapping (`image` stays `image`, non-image maps to `file`) to prevent reintroducing unsupported Feishu `type=audio` fetches. (#16311, #8746) Thanks @Yaxuan42.
- Feishu/API quota controls: add `typingIndicator` and `resolveSenderNames` config flags (top-level and per-account) so operators can disable typing reactions and sender-name lookup requests while keeping default behavior unchanged. (#10513) Thanks @BigUncle.
- Security/Feishu webhook ingress: bound unauthenticated webhook rate-limit state with stale-window pruning and a hard key cap to prevent unbounded pre-auth memory growth from rotating source keys. (#26050) Thanks @bmendonca3.

View File

@@ -147,7 +147,7 @@ describe("feishu_doc image fetch hardening", () => {
const result = await feishuDocTool.execute("tool-call", {
action: "append",
doc_token: "doc_1",
content: "## H1\ntext\n## H2",
content: "plain text body",
});
// Verify sequential insertion: one call per block
@@ -163,6 +163,135 @@ describe("feishu_doc image fetch hardening", () => {
expect(result.details.blocks_added).toBe(3);
});
it("falls back to size-based convert chunking for long no-heading markdown", async () => {
let successChunkCount = 0;
convertMock.mockImplementation(async ({ data }) => {
const content = data.content as string;
if (content.length > 280) {
return { code: 999, msg: "content too large" };
}
successChunkCount++;
const blockId = `b_${successChunkCount}`;
return {
code: 0,
data: {
blocks: [{ block_type: 2, block_id: blockId }],
first_level_block_ids: [blockId],
},
};
});
blockChildrenCreateMock.mockImplementation(async ({ data }) => ({
code: 0,
data: { children: data.children },
}));
const registerTool = vi.fn();
registerFeishuDocTools({
config: {
channels: {
feishu: { appId: "app_id", appSecret: "app_secret" },
},
} as any,
logger: { debug: vi.fn(), info: vi.fn() } as any,
registerTool,
} as any);
const feishuDocTool = registerTool.mock.calls
.map((call) => call[0])
.map((tool) => (typeof tool === "function" ? tool({}) : tool))
.find((tool) => tool.name === "feishu_doc");
expect(feishuDocTool).toBeDefined();
const longMarkdown = Array.from(
{ length: 120 },
(_, i) => `line ${i} with enough content to trigger fallback chunking`,
).join("\n");
const result = await feishuDocTool.execute("tool-call", {
action: "append",
doc_token: "doc_1",
content: longMarkdown,
});
expect(convertMock.mock.calls.length).toBeGreaterThan(1);
expect(successChunkCount).toBeGreaterThan(1);
expect(result.details.blocks_added).toBe(successChunkCount);
});
it("keeps fenced code blocks balanced when size fallback split is needed", async () => {
const convertedChunks: string[] = [];
let successChunkCount = 0;
let failFirstConvert = true;
convertMock.mockImplementation(async ({ data }) => {
const content = data.content as string;
convertedChunks.push(content);
if (failFirstConvert) {
failFirstConvert = false;
return { code: 999, msg: "content too large" };
}
successChunkCount++;
const blockId = `c_${successChunkCount}`;
return {
code: 0,
data: {
blocks: [{ block_type: 2, block_id: blockId }],
first_level_block_ids: [blockId],
},
};
});
blockChildrenCreateMock.mockImplementation(async ({ data }) => ({
code: 0,
data: { children: data.children },
}));
const registerTool = vi.fn();
registerFeishuDocTools({
config: {
channels: {
feishu: { appId: "app_id", appSecret: "app_secret" },
},
} as any,
logger: { debug: vi.fn(), info: vi.fn() } as any,
registerTool,
} as any);
const feishuDocTool = registerTool.mock.calls
.map((call) => call[0])
.map((tool) => (typeof tool === "function" ? tool({}) : tool))
.find((tool) => tool.name === "feishu_doc");
expect(feishuDocTool).toBeDefined();
const fencedMarkdown = [
"## Section",
"```ts",
"const alpha = 1;",
"const beta = 2;",
"const gamma = alpha + beta;",
"console.log(gamma);",
"```",
"",
"Tail paragraph one with enough text to exceed API limits when combined. ".repeat(8),
"Tail paragraph two with enough text to exceed API limits when combined. ".repeat(8),
"Tail paragraph three with enough text to exceed API limits when combined. ".repeat(8),
].join("\n");
const result = await feishuDocTool.execute("tool-call", {
action: "append",
doc_token: "doc_1",
content: fencedMarkdown,
});
expect(convertMock.mock.calls.length).toBeGreaterThan(1);
expect(successChunkCount).toBeGreaterThan(1);
for (const chunk of convertedChunks) {
const fenceCount = chunk.match(/```/g)?.length ?? 0;
expect(fenceCount % 2).toBe(0);
}
expect(result.details.blocks_added).toBe(successChunkCount);
});
it("skips image upload when markdown image URL is blocked", async () => {
const consoleErrorSpy = vi.spyOn(console, "error").mockImplementation(() => {});
fetchRemoteMediaMock.mockRejectedValueOnce(

View File

@@ -85,6 +85,10 @@ function cleanBlocksForInsert(blocks: any[]): { cleaned: any[]; skipped: string[
// ============ Core Functions ============
/** Max blocks per documentBlockChildren.create request */
const MAX_BLOCKS_PER_INSERT = 50;
const MAX_CONVERT_RETRY_DEPTH = 8;
async function convertMarkdown(client: Lark.Client, markdown: string) {
const res = await client.docx.document.convert({
data: { content_type: "markdown", content: markdown },
@@ -143,6 +147,138 @@ async function insertBlocks(
return { children: allInserted, skipped };
}
/** Split markdown into chunks at top-level headings (# or ##) to stay within API content limits */
function splitMarkdownByHeadings(markdown: string): string[] {
const lines = markdown.split("\n");
const chunks: string[] = [];
let current: string[] = [];
let inFencedBlock = false;
for (const line of lines) {
if (/^(`{3,}|~{3,})/.test(line)) {
inFencedBlock = !inFencedBlock;
}
if (!inFencedBlock && /^#{1,2}\s/.test(line) && current.length > 0) {
chunks.push(current.join("\n"));
current = [];
}
current.push(line);
}
if (current.length > 0) {
chunks.push(current.join("\n"));
}
return chunks;
}
/** Split markdown by size, preferring to break outside fenced code blocks when possible */
function splitMarkdownBySize(markdown: string, maxChars: number): string[] {
if (markdown.length <= maxChars) {
return [markdown];
}
const lines = markdown.split("\n");
const chunks: string[] = [];
let current: string[] = [];
let currentLength = 0;
let inFencedBlock = false;
for (const line of lines) {
if (/^(`{3,}|~{3,})/.test(line)) {
inFencedBlock = !inFencedBlock;
}
const lineLength = line.length + 1;
const wouldExceed = currentLength + lineLength > maxChars;
if (current.length > 0 && wouldExceed && !inFencedBlock) {
chunks.push(current.join("\n"));
current = [];
currentLength = 0;
}
current.push(line);
currentLength += lineLength;
}
if (current.length > 0) {
chunks.push(current.join("\n"));
}
if (chunks.length > 1) {
return chunks;
}
// Degenerate case: no safe boundary outside fenced content.
const midpoint = Math.floor(lines.length / 2);
if (midpoint <= 0 || midpoint >= lines.length) {
return [markdown];
}
return [lines.slice(0, midpoint).join("\n"), lines.slice(midpoint).join("\n")];
}
async function convertMarkdownWithFallback(client: Lark.Client, markdown: string, depth = 0) {
try {
return await convertMarkdown(client, markdown);
} catch (error) {
if (depth >= MAX_CONVERT_RETRY_DEPTH || markdown.length < 2) {
throw error;
}
const splitTarget = Math.max(256, Math.floor(markdown.length / 2));
const chunks = splitMarkdownBySize(markdown, splitTarget);
if (chunks.length <= 1) {
throw error;
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- SDK block types
const blocks: any[] = [];
const firstLevelBlockIds: string[] = [];
for (const chunk of chunks) {
const converted = await convertMarkdownWithFallback(client, chunk, depth + 1);
blocks.push(...converted.blocks);
firstLevelBlockIds.push(...converted.firstLevelBlockIds);
}
return { blocks, firstLevelBlockIds };
}
}
/** Convert markdown in chunks to avoid document.convert content size limits */
async function chunkedConvertMarkdown(client: Lark.Client, markdown: string) {
const chunks = splitMarkdownByHeadings(markdown);
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- SDK block types
const allBlocks: any[] = [];
for (const chunk of chunks) {
const { blocks, firstLevelBlockIds } = await convertMarkdownWithFallback(client, chunk);
const sorted = sortBlocksByFirstLevel(blocks, firstLevelBlockIds);
allBlocks.push(...sorted);
}
return allBlocks;
}
/** Insert blocks in batches of MAX_BLOCKS_PER_INSERT to avoid API 400 errors */
/* eslint-disable @typescript-eslint/no-explicit-any -- SDK block types */
async function chunkedInsertBlocks(
client: Lark.Client,
docToken: string,
blocks: any[],
parentBlockId?: string,
): Promise<{ children: any[]; skipped: string[] }> {
/* eslint-enable @typescript-eslint/no-explicit-any */
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- SDK block types
const allChildren: any[] = [];
const allSkipped: string[] = [];
for (let i = 0; i < blocks.length; i += MAX_BLOCKS_PER_INSERT) {
const batch = blocks.slice(i, i + MAX_BLOCKS_PER_INSERT);
const { children, skipped } = await insertBlocks(client, docToken, batch, parentBlockId);
allChildren.push(...children);
allSkipped.push(...skipped);
}
return { children: allChildren, skipped: allSkipped };
}
async function clearDocumentContent(client: Lark.Client, docToken: string) {
const existing = await client.docx.documentBlock.list({
path: { document_id: docToken },
@@ -499,13 +635,12 @@ async function createDoc(
async function writeDoc(client: Lark.Client, docToken: string, markdown: string, maxBytes: number) {
const deleted = await clearDocumentContent(client, docToken);
const { blocks, firstLevelBlockIds } = await convertMarkdown(client, markdown);
const blocks = await chunkedConvertMarkdown(client, markdown);
if (blocks.length === 0) {
return { success: true, blocks_deleted: deleted, blocks_added: 0, images_processed: 0 };
}
const sortedBlocks = sortBlocksByFirstLevel(blocks, firstLevelBlockIds);
const { children: inserted, skipped } = await insertBlocks(client, docToken, sortedBlocks);
const { children: inserted, skipped } = await chunkedInsertBlocks(client, docToken, blocks);
const imagesProcessed = await processImages(client, docToken, markdown, inserted, maxBytes);
return {
@@ -525,13 +660,12 @@ async function appendDoc(
markdown: string,
maxBytes: number,
) {
const { blocks, firstLevelBlockIds } = await convertMarkdown(client, markdown);
const blocks = await chunkedConvertMarkdown(client, markdown);
if (blocks.length === 0) {
throw new Error("Content is empty");
}
const sortedBlocks = sortBlocksByFirstLevel(blocks, firstLevelBlockIds);
const { children: inserted, skipped } = await insertBlocks(client, docToken, sortedBlocks);
const { children: inserted, skipped } = await chunkedInsertBlocks(client, docToken, blocks);
const imagesProcessed = await processImages(client, docToken, markdown, inserted, maxBytes);
return {