mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-28 09:33:06 +00:00
fix: remap session JSONL chunk line numbers to original source positions (#12102)
* fix: remap session JSONL chunk line numbers to original source positions buildSessionEntry() flattens JSONL messages into plain text before chunkMarkdown() assigns line numbers. The stored startLine/endLine values therefore reference positions in the flattened text, not the original JSONL file. - Add lineMap to SessionFileEntry tracking which JSONL line each extracted message came from - Add remapChunkLines() to translate chunk positions back to original JSONL lines after chunking - Guard remap with source === "sessions" to prevent misapplication - Include lineMap in content hash so existing sessions get re-indexed Fixes #12044 * memory: dedupe session JSONL parsing --------- Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
This commit is contained in:
@@ -2,7 +2,12 @@ import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
||||
import { chunkMarkdown, listMemoryFiles, normalizeExtraMemoryPaths } from "./internal.js";
|
||||
import {
|
||||
chunkMarkdown,
|
||||
listMemoryFiles,
|
||||
normalizeExtraMemoryPaths,
|
||||
remapChunkLines,
|
||||
} from "./internal.js";
|
||||
|
||||
describe("normalizeExtraMemoryPaths", () => {
|
||||
it("trims, resolves, and dedupes paths", () => {
|
||||
@@ -123,3 +128,65 @@ describe("chunkMarkdown", () => {
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("remapChunkLines", () => {
|
||||
it("remaps chunk line numbers using a lineMap", () => {
|
||||
// Simulate 5 content lines that came from JSONL lines [4, 6, 7, 10, 13] (1-indexed)
|
||||
const lineMap = [4, 6, 7, 10, 13];
|
||||
|
||||
// Create chunks from content that has 5 lines
|
||||
const content = "User: Hello\nAssistant: Hi\nUser: Question\nAssistant: Answer\nUser: Thanks";
|
||||
const chunks = chunkMarkdown(content, { tokens: 400, overlap: 0 });
|
||||
expect(chunks.length).toBeGreaterThan(0);
|
||||
|
||||
// Before remapping, startLine/endLine reference content line numbers (1-indexed)
|
||||
expect(chunks[0].startLine).toBe(1);
|
||||
|
||||
// Remap
|
||||
remapChunkLines(chunks, lineMap);
|
||||
|
||||
// After remapping, line numbers should reference original JSONL lines
|
||||
// Content line 1 → JSONL line 4, content line 5 → JSONL line 13
|
||||
expect(chunks[0].startLine).toBe(4);
|
||||
const lastChunk = chunks[chunks.length - 1];
|
||||
expect(lastChunk.endLine).toBe(13);
|
||||
});
|
||||
|
||||
it("preserves original line numbers when lineMap is undefined", () => {
|
||||
const content = "Line one\nLine two\nLine three";
|
||||
const chunks = chunkMarkdown(content, { tokens: 400, overlap: 0 });
|
||||
const originalStart = chunks[0].startLine;
|
||||
const originalEnd = chunks[chunks.length - 1].endLine;
|
||||
|
||||
remapChunkLines(chunks, undefined);
|
||||
|
||||
expect(chunks[0].startLine).toBe(originalStart);
|
||||
expect(chunks[chunks.length - 1].endLine).toBe(originalEnd);
|
||||
});
|
||||
|
||||
it("handles multi-chunk content with correct remapping", () => {
|
||||
// Use small chunk size to force multiple chunks
|
||||
// lineMap: 10 content lines from JSONL lines [2, 5, 8, 11, 14, 17, 20, 23, 26, 29]
|
||||
const lineMap = [2, 5, 8, 11, 14, 17, 20, 23, 26, 29];
|
||||
const contentLines = lineMap.map((_, i) =>
|
||||
i % 2 === 0 ? `User: Message ${i}` : `Assistant: Reply ${i}`,
|
||||
);
|
||||
const content = contentLines.join("\n");
|
||||
|
||||
// Use very small chunk size to force splitting
|
||||
const chunks = chunkMarkdown(content, { tokens: 10, overlap: 0 });
|
||||
expect(chunks.length).toBeGreaterThan(1);
|
||||
|
||||
remapChunkLines(chunks, lineMap);
|
||||
|
||||
// First chunk should start at JSONL line 2
|
||||
expect(chunks[0].startLine).toBe(2);
|
||||
// Last chunk should end at JSONL line 29
|
||||
expect(chunks[chunks.length - 1].endLine).toBe(29);
|
||||
|
||||
// Each chunk's startLine should be ≤ its endLine
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.startLine).toBeLessThanOrEqual(chunk.endLine);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user