Files
openclaw/extensions/feishu/src/docx-batch-insert.ts
2026-03-07 16:50:35 +00:00

188 lines
5.9 KiB
TypeScript

/**
* Batch insertion for large Feishu documents (>1000 blocks).
*
* The Feishu Descendant API has a limit of 1000 blocks per request.
* This module handles splitting large documents into batches while
* preserving parent-child relationships between blocks.
*/
import type * as Lark from "@larksuiteoapi/node-sdk";
import { cleanBlocksForDescendant } from "./docx-table-ops.js";
export const BATCH_SIZE = 1000; // Feishu API limit per request
type Logger = { info?: (msg: string) => void };
/**
* Collect all descendant blocks for a given first-level block ID.
* Recursively traverses the block tree to gather all children.
*/
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- SDK block types
function collectDescendants(blockMap: Map<string, any>, rootId: string): any[] {
const result: any[] = [];
const visited = new Set<string>();
function collect(blockId: string) {
if (visited.has(blockId)) return;
visited.add(blockId);
const block = blockMap.get(blockId);
if (!block) return;
result.push(block);
// Recursively collect children
const children = block.children;
if (Array.isArray(children)) {
for (const childId of children) {
collect(childId);
}
} else if (typeof children === "string") {
collect(children);
}
}
collect(rootId);
return result;
}
/**
* Insert a single batch of blocks using Descendant API.
*
* @param parentBlockId - Parent block to insert into (defaults to docToken)
* @param index - Position within parent's children (-1 = end)
*/
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- SDK block types
async function insertBatch(
client: Lark.Client,
docToken: string,
blocks: any[],
firstLevelBlockIds: string[],
parentBlockId: string = docToken,
index: number = -1,
): Promise<any[]> {
const descendants = cleanBlocksForDescendant(blocks);
if (descendants.length === 0) {
return [];
}
const res = await client.docx.documentBlockDescendant.create({
path: { document_id: docToken, block_id: parentBlockId },
data: {
children_id: firstLevelBlockIds,
descendants,
index,
},
});
if (res.code !== 0) {
throw new Error(`${res.msg} (code: ${res.code})`);
}
return res.data?.children ?? [];
}
/**
* Insert blocks in batches for large documents (>1000 blocks).
*
* Batches are split to ensure BOTH children_id AND descendants
* arrays stay under the 1000 block API limit.
*
* @param client - Feishu API client
* @param docToken - Document ID
* @param blocks - All blocks from Convert API
* @param firstLevelBlockIds - IDs of top-level blocks to insert
* @param logger - Optional logger for progress updates
* @param parentBlockId - Parent block to insert into (defaults to docToken = document root)
* @param startIndex - Starting position within parent (-1 = end). For multi-batch inserts,
* each batch advances this by the number of first-level IDs inserted so far.
* @returns Inserted children blocks and any skipped block IDs
*/
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- SDK block types
export async function insertBlocksInBatches(
client: Lark.Client,
docToken: string,
blocks: any[],
firstLevelBlockIds: string[],
logger?: Logger,
parentBlockId: string = docToken,
startIndex: number = -1,
): Promise<{ children: any[]; skipped: string[] }> {
const allChildren: any[] = [];
// Build batches ensuring each batch has ≤1000 total descendants
const batches: { firstLevelIds: string[]; blocks: any[] }[] = [];
let currentBatch: { firstLevelIds: string[]; blocks: any[] } = { firstLevelIds: [], blocks: [] };
const usedBlockIds = new Set<string>();
const blockMap = new Map<string, any>();
for (const block of blocks) {
blockMap.set(block.block_id, block);
}
for (const firstLevelId of firstLevelBlockIds) {
const descendants = collectDescendants(blockMap, firstLevelId);
const newBlocks = descendants.filter((b) => !usedBlockIds.has(b.block_id));
// A single block whose subtree exceeds the API limit cannot be split
// (a table or other compound block must be inserted atomically).
if (newBlocks.length > BATCH_SIZE) {
throw new Error(
`Block "${firstLevelId}" has ${newBlocks.length} descendants, which exceeds the ` +
`Feishu API limit of ${BATCH_SIZE} blocks per request. ` +
`Please split the content into smaller sections.`,
);
}
// If adding this first-level block would exceed limit, start new batch
if (
currentBatch.blocks.length + newBlocks.length > BATCH_SIZE &&
currentBatch.blocks.length > 0
) {
batches.push(currentBatch);
currentBatch = { firstLevelIds: [], blocks: [] };
}
// Add to current batch
currentBatch.firstLevelIds.push(firstLevelId);
for (const block of newBlocks) {
currentBatch.blocks.push(block);
usedBlockIds.add(block.block_id);
}
}
// Don't forget the last batch
if (currentBatch.blocks.length > 0) {
batches.push(currentBatch);
}
// Insert each batch, advancing index for position-aware inserts.
// When startIndex == -1 (append to end), each batch appends after the previous.
// When startIndex >= 0, each batch starts at startIndex + count of first-level IDs already inserted.
let currentIndex = startIndex;
for (let i = 0; i < batches.length; i++) {
const batch = batches[i];
logger?.info?.(
`feishu_doc: Inserting batch ${i + 1}/${batches.length} (${batch.blocks.length} blocks)...`,
);
const children = await insertBatch(
client,
docToken,
batch.blocks,
batch.firstLevelIds,
parentBlockId,
currentIndex,
);
allChildren.push(...children);
// Advance index only for explicit positions; -1 always means "after last inserted"
if (currentIndex !== -1) {
currentIndex += batch.firstLevelIds.length;
}
}
return { children: allChildren, skipped: [] };
}