mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-14 19:40:40 +00:00
188 lines
5.9 KiB
TypeScript
188 lines
5.9 KiB
TypeScript
/**
|
|
* Batch insertion for large Feishu documents (>1000 blocks).
|
|
*
|
|
* The Feishu Descendant API has a limit of 1000 blocks per request.
|
|
* This module handles splitting large documents into batches while
|
|
* preserving parent-child relationships between blocks.
|
|
*/
|
|
|
|
import type * as Lark from "@larksuiteoapi/node-sdk";
|
|
import { cleanBlocksForDescendant } from "./docx-table-ops.js";
|
|
|
|
export const BATCH_SIZE = 1000; // Feishu API limit per request
|
|
|
|
type Logger = { info?: (msg: string) => void };
|
|
|
|
/**
|
|
* Collect all descendant blocks for a given first-level block ID.
|
|
* Recursively traverses the block tree to gather all children.
|
|
*/
|
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- SDK block types
|
|
function collectDescendants(blockMap: Map<string, any>, rootId: string): any[] {
|
|
const result: any[] = [];
|
|
const visited = new Set<string>();
|
|
|
|
function collect(blockId: string) {
|
|
if (visited.has(blockId)) return;
|
|
visited.add(blockId);
|
|
|
|
const block = blockMap.get(blockId);
|
|
if (!block) return;
|
|
|
|
result.push(block);
|
|
|
|
// Recursively collect children
|
|
const children = block.children;
|
|
if (Array.isArray(children)) {
|
|
for (const childId of children) {
|
|
collect(childId);
|
|
}
|
|
} else if (typeof children === "string") {
|
|
collect(children);
|
|
}
|
|
}
|
|
|
|
collect(rootId);
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Insert a single batch of blocks using Descendant API.
|
|
*
|
|
* @param parentBlockId - Parent block to insert into (defaults to docToken)
|
|
* @param index - Position within parent's children (-1 = end)
|
|
*/
|
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- SDK block types
|
|
async function insertBatch(
|
|
client: Lark.Client,
|
|
docToken: string,
|
|
blocks: any[],
|
|
firstLevelBlockIds: string[],
|
|
parentBlockId: string = docToken,
|
|
index: number = -1,
|
|
): Promise<any[]> {
|
|
const descendants = cleanBlocksForDescendant(blocks);
|
|
|
|
if (descendants.length === 0) {
|
|
return [];
|
|
}
|
|
|
|
const res = await client.docx.documentBlockDescendant.create({
|
|
path: { document_id: docToken, block_id: parentBlockId },
|
|
data: {
|
|
children_id: firstLevelBlockIds,
|
|
descendants,
|
|
index,
|
|
},
|
|
});
|
|
|
|
if (res.code !== 0) {
|
|
throw new Error(`${res.msg} (code: ${res.code})`);
|
|
}
|
|
|
|
return res.data?.children ?? [];
|
|
}
|
|
|
|
/**
|
|
* Insert blocks in batches for large documents (>1000 blocks).
|
|
*
|
|
* Batches are split to ensure BOTH children_id AND descendants
|
|
* arrays stay under the 1000 block API limit.
|
|
*
|
|
* @param client - Feishu API client
|
|
* @param docToken - Document ID
|
|
* @param blocks - All blocks from Convert API
|
|
* @param firstLevelBlockIds - IDs of top-level blocks to insert
|
|
* @param logger - Optional logger for progress updates
|
|
* @param parentBlockId - Parent block to insert into (defaults to docToken = document root)
|
|
* @param startIndex - Starting position within parent (-1 = end). For multi-batch inserts,
|
|
* each batch advances this by the number of first-level IDs inserted so far.
|
|
* @returns Inserted children blocks and any skipped block IDs
|
|
*/
|
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- SDK block types
|
|
export async function insertBlocksInBatches(
|
|
client: Lark.Client,
|
|
docToken: string,
|
|
blocks: any[],
|
|
firstLevelBlockIds: string[],
|
|
logger?: Logger,
|
|
parentBlockId: string = docToken,
|
|
startIndex: number = -1,
|
|
): Promise<{ children: any[]; skipped: string[] }> {
|
|
const allChildren: any[] = [];
|
|
|
|
// Build batches ensuring each batch has ≤1000 total descendants
|
|
const batches: { firstLevelIds: string[]; blocks: any[] }[] = [];
|
|
let currentBatch: { firstLevelIds: string[]; blocks: any[] } = { firstLevelIds: [], blocks: [] };
|
|
const usedBlockIds = new Set<string>();
|
|
const blockMap = new Map<string, any>();
|
|
for (const block of blocks) {
|
|
blockMap.set(block.block_id, block);
|
|
}
|
|
|
|
for (const firstLevelId of firstLevelBlockIds) {
|
|
const descendants = collectDescendants(blockMap, firstLevelId);
|
|
const newBlocks = descendants.filter((b) => !usedBlockIds.has(b.block_id));
|
|
|
|
// A single block whose subtree exceeds the API limit cannot be split
|
|
// (a table or other compound block must be inserted atomically).
|
|
if (newBlocks.length > BATCH_SIZE) {
|
|
throw new Error(
|
|
`Block "${firstLevelId}" has ${newBlocks.length} descendants, which exceeds the ` +
|
|
`Feishu API limit of ${BATCH_SIZE} blocks per request. ` +
|
|
`Please split the content into smaller sections.`,
|
|
);
|
|
}
|
|
|
|
// If adding this first-level block would exceed limit, start new batch
|
|
if (
|
|
currentBatch.blocks.length + newBlocks.length > BATCH_SIZE &&
|
|
currentBatch.blocks.length > 0
|
|
) {
|
|
batches.push(currentBatch);
|
|
currentBatch = { firstLevelIds: [], blocks: [] };
|
|
}
|
|
|
|
// Add to current batch
|
|
currentBatch.firstLevelIds.push(firstLevelId);
|
|
for (const block of newBlocks) {
|
|
currentBatch.blocks.push(block);
|
|
usedBlockIds.add(block.block_id);
|
|
}
|
|
}
|
|
|
|
// Don't forget the last batch
|
|
if (currentBatch.blocks.length > 0) {
|
|
batches.push(currentBatch);
|
|
}
|
|
|
|
// Insert each batch, advancing index for position-aware inserts.
|
|
// When startIndex == -1 (append to end), each batch appends after the previous.
|
|
// When startIndex >= 0, each batch starts at startIndex + count of first-level IDs already inserted.
|
|
let currentIndex = startIndex;
|
|
for (let i = 0; i < batches.length; i++) {
|
|
const batch = batches[i];
|
|
logger?.info?.(
|
|
`feishu_doc: Inserting batch ${i + 1}/${batches.length} (${batch.blocks.length} blocks)...`,
|
|
);
|
|
|
|
const children = await insertBatch(
|
|
client,
|
|
docToken,
|
|
batch.blocks,
|
|
batch.firstLevelIds,
|
|
parentBlockId,
|
|
currentIndex,
|
|
);
|
|
allChildren.push(...children);
|
|
|
|
// Advance index only for explicit positions; -1 always means "after last inserted"
|
|
if (currentIndex !== -1) {
|
|
currentIndex += batch.firstLevelIds.length;
|
|
}
|
|
}
|
|
|
|
return { children: allChildren, skipped: [] };
|
|
}
|