mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 06:20:43 +00:00
test: streamline memory and tts suites
This commit is contained in:
@@ -1,10 +1,23 @@
|
||||
import fsSync from "node:fs";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterAll, beforeAll, beforeEach, describe, expect, it } from "vitest";
|
||||
import { afterAll, beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
vi.mock("../../../../src/media/mime.js", () => ({
|
||||
detectMime: async (opts: { filePath?: string }) => {
|
||||
if (opts.filePath?.endsWith(".png")) {
|
||||
return "image/png";
|
||||
}
|
||||
if (opts.filePath?.endsWith(".wav")) {
|
||||
return "audio/wav";
|
||||
}
|
||||
return undefined;
|
||||
},
|
||||
}));
|
||||
|
||||
import {
|
||||
buildMultimodalChunkForIndexing,
|
||||
buildFileEntry,
|
||||
buildMultimodalChunkForIndexing,
|
||||
chunkMarkdown,
|
||||
isMemoryPath,
|
||||
listMemoryFiles,
|
||||
@@ -20,7 +33,7 @@ let sharedTempRoot = "";
|
||||
let sharedTempId = 0;
|
||||
|
||||
beforeAll(() => {
|
||||
sharedTempRoot = fsSync.mkdtempSync(path.join(os.tmpdir(), "memory-host-sdk-tests-"));
|
||||
sharedTempRoot = fsSync.mkdtempSync(path.join(os.tmpdir(), "memory-host-sdk-package-tests-"));
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
@@ -38,210 +51,63 @@ function setupTempDirLifecycle(prefix: string): () => string {
|
||||
return () => tmpDir;
|
||||
}
|
||||
|
||||
describe("normalizeExtraMemoryPaths", () => {
|
||||
it("trims, resolves, and dedupes paths", () => {
|
||||
const multimodal: MemoryMultimodalSettings = {
|
||||
enabled: true,
|
||||
modalities: ["image", "audio"],
|
||||
maxFileBytes: DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES,
|
||||
};
|
||||
|
||||
describe("memory host SDK package internals", () => {
|
||||
const getTmpDir = setupTempDirLifecycle("memory-package-");
|
||||
|
||||
it("normalizes additional memory paths", () => {
|
||||
const workspaceDir = path.join(os.tmpdir(), "memory-test-workspace");
|
||||
const absPath = path.resolve(path.sep, "shared-notes");
|
||||
const result = normalizeExtraMemoryPaths(workspaceDir, [
|
||||
" notes ",
|
||||
"./notes",
|
||||
absPath,
|
||||
absPath,
|
||||
"",
|
||||
]);
|
||||
expect(result).toEqual([path.resolve(workspaceDir, "notes"), absPath]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("listMemoryFiles", () => {
|
||||
const getTmpDir = setupTempDirLifecycle("memory-test-");
|
||||
const multimodal: MemoryMultimodalSettings = {
|
||||
enabled: true,
|
||||
modalities: ["image", "audio"],
|
||||
maxFileBytes: DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES,
|
||||
};
|
||||
|
||||
it("includes files from additional paths (directory)", async () => {
|
||||
const tmpDir = getTmpDir();
|
||||
fsSync.writeFileSync(path.join(tmpDir, "MEMORY.md"), "# Default memory");
|
||||
const extraDir = path.join(tmpDir, "extra-notes");
|
||||
fsSync.mkdirSync(extraDir, { recursive: true });
|
||||
fsSync.writeFileSync(path.join(extraDir, "note1.md"), "# Note 1");
|
||||
fsSync.writeFileSync(path.join(extraDir, "note2.md"), "# Note 2");
|
||||
fsSync.writeFileSync(path.join(extraDir, "ignore.txt"), "Not a markdown file");
|
||||
|
||||
const files = await listMemoryFiles(tmpDir, [extraDir]);
|
||||
expect(files).toHaveLength(3);
|
||||
expect(files.some((file) => file.endsWith("MEMORY.md"))).toBe(true);
|
||||
expect(files.some((file) => file.endsWith("note1.md"))).toBe(true);
|
||||
expect(files.some((file) => file.endsWith("note2.md"))).toBe(true);
|
||||
expect(files.some((file) => file.endsWith("ignore.txt"))).toBe(false);
|
||||
expect(
|
||||
normalizeExtraMemoryPaths(workspaceDir, [" notes ", "./notes", absPath, absPath, ""]),
|
||||
).toEqual([path.resolve(workspaceDir, "notes"), absPath]);
|
||||
});
|
||||
|
||||
it("includes files from additional paths (single file)", async () => {
|
||||
const tmpDir = getTmpDir();
|
||||
fsSync.writeFileSync(path.join(tmpDir, "MEMORY.md"), "# Default memory");
|
||||
const singleFile = path.join(tmpDir, "standalone.md");
|
||||
fsSync.writeFileSync(singleFile, "# Standalone");
|
||||
|
||||
const files = await listMemoryFiles(tmpDir, [singleFile]);
|
||||
expect(files).toHaveLength(2);
|
||||
expect(files.some((file) => file.endsWith("standalone.md"))).toBe(true);
|
||||
});
|
||||
|
||||
it("ignores lowercase root memory.md when canonical MEMORY.md is absent", async () => {
|
||||
const tmpDir = getTmpDir();
|
||||
fsSync.writeFileSync(path.join(tmpDir, "memory.md"), "# Legacy memory");
|
||||
|
||||
const files = await listMemoryFiles(tmpDir, [path.join(tmpDir, "memory.md")]);
|
||||
|
||||
expect(files).toEqual([]);
|
||||
});
|
||||
|
||||
it("prefers MEMORY.md when both root files exist", async () => {
|
||||
it("lists canonical markdown and enabled multimodal files", async () => {
|
||||
const tmpDir = getTmpDir();
|
||||
fsSync.writeFileSync(path.join(tmpDir, "MEMORY.md"), "# Default memory");
|
||||
fsSync.writeFileSync(path.join(tmpDir, "memory.md"), "# Legacy memory");
|
||||
|
||||
const files = await listMemoryFiles(tmpDir, [path.join(tmpDir, "memory.md"), tmpDir]);
|
||||
|
||||
expect(files).toEqual([path.join(tmpDir, "MEMORY.md")]);
|
||||
});
|
||||
|
||||
it("skips root-memory repair backups from extra workspace paths", async () => {
|
||||
const tmpDir = getTmpDir();
|
||||
fsSync.writeFileSync(path.join(tmpDir, "MEMORY.md"), "# Default memory");
|
||||
const repairDir = path.join(tmpDir, ".openclaw-repair", "root-memory", "2026-04-23");
|
||||
fsSync.mkdirSync(repairDir, { recursive: true });
|
||||
fsSync.writeFileSync(path.join(repairDir, "memory.md"), "# Archived legacy memory");
|
||||
|
||||
const files = await listMemoryFiles(tmpDir, [tmpDir]);
|
||||
|
||||
expect(files).toHaveLength(1);
|
||||
expect(files[0]).toBe(path.join(tmpDir, "MEMORY.md"));
|
||||
});
|
||||
|
||||
it("handles relative paths in additional paths", async () => {
|
||||
const tmpDir = getTmpDir();
|
||||
fsSync.writeFileSync(path.join(tmpDir, "MEMORY.md"), "# Default memory");
|
||||
const extraDir = path.join(tmpDir, "subdir");
|
||||
fsSync.mkdirSync(extraDir, { recursive: true });
|
||||
fsSync.writeFileSync(path.join(extraDir, "nested.md"), "# Nested");
|
||||
|
||||
const files = await listMemoryFiles(tmpDir, ["subdir"]);
|
||||
expect(files).toHaveLength(2);
|
||||
expect(files.some((file) => file.endsWith("nested.md"))).toBe(true);
|
||||
});
|
||||
|
||||
it("ignores non-existent additional paths", async () => {
|
||||
const tmpDir = getTmpDir();
|
||||
fsSync.writeFileSync(path.join(tmpDir, "MEMORY.md"), "# Default memory");
|
||||
|
||||
const files = await listMemoryFiles(tmpDir, ["/does/not/exist"]);
|
||||
expect(files).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("ignores symlinked files and directories", async () => {
|
||||
const tmpDir = getTmpDir();
|
||||
fsSync.writeFileSync(path.join(tmpDir, "MEMORY.md"), "# Default memory");
|
||||
const extraDir = path.join(tmpDir, "extra");
|
||||
fsSync.mkdirSync(extraDir, { recursive: true });
|
||||
fsSync.writeFileSync(path.join(extraDir, "note.md"), "# Note");
|
||||
|
||||
const targetFile = path.join(tmpDir, "target.md");
|
||||
fsSync.writeFileSync(targetFile, "# Target");
|
||||
const linkFile = path.join(extraDir, "linked.md");
|
||||
|
||||
const targetDir = path.join(tmpDir, "target-dir");
|
||||
fsSync.mkdirSync(targetDir, { recursive: true });
|
||||
fsSync.writeFileSync(path.join(targetDir, "nested.md"), "# Nested");
|
||||
const linkDir = path.join(tmpDir, "linked-dir");
|
||||
|
||||
let symlinksOk = true;
|
||||
try {
|
||||
fsSync.symlinkSync(targetFile, linkFile, "file");
|
||||
fsSync.symlinkSync(targetDir, linkDir, "dir");
|
||||
} catch (err) {
|
||||
const code = (err as NodeJS.ErrnoException).code;
|
||||
if (code === "EPERM" || code === "EACCES") {
|
||||
symlinksOk = false;
|
||||
} else {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
const files = await listMemoryFiles(tmpDir, [extraDir, linkDir]);
|
||||
expect(files.some((file) => file.endsWith("note.md"))).toBe(true);
|
||||
if (symlinksOk) {
|
||||
expect(files.some((file) => file.endsWith("linked.md"))).toBe(false);
|
||||
expect(files.some((file) => file.endsWith("nested.md"))).toBe(false);
|
||||
}
|
||||
});
|
||||
|
||||
it("dedupes overlapping extra paths that resolve to the same file", async () => {
|
||||
const tmpDir = getTmpDir();
|
||||
fsSync.writeFileSync(path.join(tmpDir, "MEMORY.md"), "# Default memory");
|
||||
const files = await listMemoryFiles(tmpDir, [tmpDir, ".", path.join(tmpDir, "MEMORY.md")]);
|
||||
const memoryMatches = files.filter((file) => file.endsWith("MEMORY.md"));
|
||||
expect(memoryMatches).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("includes image and audio files from extra paths when multimodal is enabled", async () => {
|
||||
const tmpDir = getTmpDir();
|
||||
const extraDir = path.join(tmpDir, "media");
|
||||
fsSync.mkdirSync(extraDir, { recursive: true });
|
||||
fsSync.writeFileSync(path.join(extraDir, "diagram.png"), Buffer.from("png"));
|
||||
fsSync.writeFileSync(path.join(extraDir, "note.wav"), Buffer.from("wav"));
|
||||
fsSync.writeFileSync(path.join(extraDir, "ignore.bin"), Buffer.from("bin"));
|
||||
fsSync.writeFileSync(path.join(extraDir, "ignore.txt"), "ignored");
|
||||
|
||||
const files = await listMemoryFiles(tmpDir, [extraDir], multimodal);
|
||||
expect(files.some((file) => file.endsWith("diagram.png"))).toBe(true);
|
||||
expect(files.some((file) => file.endsWith("note.wav"))).toBe(true);
|
||||
expect(files.some((file) => file.endsWith("ignore.bin"))).toBe(false);
|
||||
const files = await listMemoryFiles(
|
||||
tmpDir,
|
||||
[path.join(tmpDir, "memory.md"), extraDir],
|
||||
multimodal,
|
||||
);
|
||||
|
||||
expect(files.map((file) => path.relative(tmpDir, file)).toSorted()).toEqual([
|
||||
"MEMORY.md",
|
||||
path.join("extra", "diagram.png"),
|
||||
path.join("extra", "note.md"),
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("isMemoryPath", () => {
|
||||
it("allows explicit access to top-level dreams.md", () => {
|
||||
it("keeps package-specific dreams path casing", () => {
|
||||
expect(isMemoryPath("dreams.md")).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildFileEntry", () => {
|
||||
const getTmpDir = setupTempDirLifecycle("memory-build-entry-");
|
||||
const multimodal: MemoryMultimodalSettings = {
|
||||
enabled: true,
|
||||
modalities: ["image", "audio"],
|
||||
maxFileBytes: DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES,
|
||||
};
|
||||
|
||||
it("returns null when the file disappears before reading", async () => {
|
||||
const tmpDir = getTmpDir();
|
||||
const target = path.join(tmpDir, "ghost.md");
|
||||
fsSync.writeFileSync(target, "ghost", "utf-8");
|
||||
fsSync.rmSync(target);
|
||||
const entry = await buildFileEntry(target, tmpDir);
|
||||
expect(entry).toBeNull();
|
||||
expect(isMemoryPath("DREAMS.md")).toBe(false);
|
||||
});
|
||||
|
||||
it("returns metadata when the file exists", async () => {
|
||||
it("builds markdown and multimodal file entries", async () => {
|
||||
const tmpDir = getTmpDir();
|
||||
const target = path.join(tmpDir, "note.md");
|
||||
fsSync.writeFileSync(target, "hello", "utf-8");
|
||||
const entry = await buildFileEntry(target, tmpDir);
|
||||
expect(entry).not.toBeNull();
|
||||
expect(entry?.path).toBe("note.md");
|
||||
expect(entry?.size).toBeGreaterThan(0);
|
||||
});
|
||||
const notePath = path.join(tmpDir, "note.md");
|
||||
const imagePath = path.join(tmpDir, "diagram.png");
|
||||
fsSync.writeFileSync(notePath, "hello", "utf-8");
|
||||
fsSync.writeFileSync(imagePath, Buffer.from("png"));
|
||||
|
||||
it("returns multimodal metadata for eligible image files", async () => {
|
||||
const tmpDir = getTmpDir();
|
||||
const target = path.join(tmpDir, "diagram.png");
|
||||
fsSync.writeFileSync(target, Buffer.from("png"));
|
||||
const note = await buildFileEntry(notePath, tmpDir);
|
||||
const image = await buildFileEntry(imagePath, tmpDir, multimodal);
|
||||
|
||||
const entry = await buildFileEntry(target, tmpDir, multimodal);
|
||||
|
||||
expect(entry).toMatchObject({
|
||||
expect(note).toMatchObject({ path: "note.md", kind: "markdown" });
|
||||
expect(image).toMatchObject({
|
||||
path: "diagram.png",
|
||||
kind: "multimodal",
|
||||
modality: "image",
|
||||
@@ -250,228 +116,51 @@ describe("buildFileEntry", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("builds a multimodal chunk lazily for indexing", async () => {
|
||||
it("builds multimodal chunks lazily and rejects changed files", async () => {
|
||||
const tmpDir = getTmpDir();
|
||||
const target = path.join(tmpDir, "diagram.png");
|
||||
fsSync.writeFileSync(target, Buffer.from("png"));
|
||||
const imagePath = path.join(tmpDir, "diagram.png");
|
||||
fsSync.writeFileSync(imagePath, Buffer.from("png"));
|
||||
|
||||
const entry = await buildFileEntry(target, tmpDir, multimodal);
|
||||
const entry = await buildFileEntry(imagePath, tmpDir, multimodal);
|
||||
const built = await buildMultimodalChunkForIndexing(entry!);
|
||||
|
||||
expect(built?.chunk.embeddingInput?.parts).toEqual([
|
||||
{ type: "text", text: "Image file: diagram.png" },
|
||||
expect.objectContaining({ type: "inline-data", mimeType: "image/png" }),
|
||||
]);
|
||||
expect(built?.structuredInputBytes).toBeGreaterThan(0);
|
||||
|
||||
fsSync.writeFileSync(imagePath, Buffer.alloc(entry!.size + 32, 1));
|
||||
await expect(buildMultimodalChunkForIndexing(entry!)).resolves.toBeNull();
|
||||
});
|
||||
|
||||
it("skips lazy multimodal indexing when file state changes after discovery", async () => {
|
||||
for (const testCase of [
|
||||
{
|
||||
name: "grows",
|
||||
mutate: (target: string, entrySize: number) => {
|
||||
fsSync.writeFileSync(target, Buffer.alloc(entrySize + 32, 1));
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "bytes change",
|
||||
mutate: (target: string) => {
|
||||
fsSync.writeFileSync(target, Buffer.from("gif"));
|
||||
},
|
||||
},
|
||||
] as const) {
|
||||
const tmpDir = getTmpDir();
|
||||
const target = path.join(tmpDir, `${testCase.name}.png`);
|
||||
fsSync.writeFileSync(target, Buffer.from("png"));
|
||||
it("chunks mixed text and preserves surrogate pairs", () => {
|
||||
const mixed = Array.from(
|
||||
{ length: 30 },
|
||||
(_, index) => `Line ${index}: 这是中英文混合的测试内容 with English`,
|
||||
).join("\n");
|
||||
const mixedChunks = chunkMarkdown(mixed, { tokens: 50, overlap: 0 });
|
||||
expect(mixedChunks.length).toBeGreaterThan(1);
|
||||
expect(mixedChunks.map((chunk) => chunk.text).join("\n")).toContain("Line 29");
|
||||
|
||||
const entry = await buildFileEntry(target, tmpDir, multimodal);
|
||||
expect(entry, testCase.name).not.toBeNull();
|
||||
testCase.mutate(target, entry!.size);
|
||||
|
||||
await expect(buildMultimodalChunkForIndexing(entry!), testCase.name).resolves.toBeNull();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("chunkMarkdown", () => {
|
||||
it("splits overly long lines into max-sized chunks", () => {
|
||||
const chunkTokens = 400;
|
||||
const maxChars = chunkTokens * 4;
|
||||
const content = "a".repeat(maxChars * 3 + 25);
|
||||
const chunks = chunkMarkdown(content, { tokens: chunkTokens, overlap: 0 });
|
||||
expect(chunks.length).toBeGreaterThan(1);
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.text.length).toBeLessThanOrEqual(maxChars);
|
||||
}
|
||||
});
|
||||
|
||||
it("produces more chunks for CJK text than for equal-length ASCII text", () => {
|
||||
// CJK chars ≈ 1 token each; ASCII chars ≈ 0.25 tokens each.
|
||||
// For the same raw character count, CJK content should produce more chunks
|
||||
// because each character "weighs" ~4× more in token estimation.
|
||||
const chunkTokens = 50;
|
||||
|
||||
// 400 ASCII chars → ~100 tokens → fits in ~2 chunks
|
||||
const asciiLines = Array.from({ length: 20 }, () => "a".repeat(20)).join("\n");
|
||||
const asciiChunks = chunkMarkdown(asciiLines, { tokens: chunkTokens, overlap: 0 });
|
||||
|
||||
// 400 CJK chars → ~400 tokens → needs ~8 chunks
|
||||
const cjkLines = Array.from({ length: 20 }, () => "你".repeat(20)).join("\n");
|
||||
const cjkChunks = chunkMarkdown(cjkLines, { tokens: chunkTokens, overlap: 0 });
|
||||
|
||||
expect(cjkChunks.length).toBeGreaterThan(asciiChunks.length);
|
||||
});
|
||||
|
||||
it("respects token budget for Chinese text", () => {
|
||||
// With tokens=100, each CJK char ≈ 1 token, so chunks should hold ~100 CJK chars.
|
||||
const chunkTokens = 100;
|
||||
const lines: string[] = [];
|
||||
for (let i = 0; i < 50; i++) {
|
||||
lines.push("这是一个测试句子用来验证分块逻辑是否正确处理中文文本内容");
|
||||
}
|
||||
const content = lines.join("\n");
|
||||
const chunks = chunkMarkdown(content, { tokens: chunkTokens, overlap: 0 });
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1);
|
||||
// Each chunk's CJK content should not vastly exceed the token budget.
|
||||
// With CJK-aware estimation, each char ≈ 1 token, so chunk text length
|
||||
// (in CJK chars) should be roughly <= tokens budget (with some tolerance
|
||||
// for line boundaries).
|
||||
for (const chunk of chunks) {
|
||||
// Count actual CJK characters in the chunk
|
||||
const cjkCount = (chunk.text.match(/[\u4e00-\u9fff]/g) ?? []).length;
|
||||
// Allow 2× tolerance for line-boundary rounding
|
||||
expect(cjkCount).toBeLessThanOrEqual(chunkTokens * 2);
|
||||
}
|
||||
});
|
||||
|
||||
it("keeps English chunking behavior unchanged", () => {
|
||||
const chunkTokens = 100;
|
||||
const maxChars = chunkTokens * 4; // 400 chars
|
||||
const content = "hello world this is a test. ".repeat(50);
|
||||
const chunks = chunkMarkdown(content, { tokens: chunkTokens, overlap: 0 });
|
||||
expect(chunks.length).toBeGreaterThan(1);
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.text.length).toBeLessThanOrEqual(maxChars);
|
||||
}
|
||||
});
|
||||
|
||||
it("handles mixed CJK and ASCII content correctly", () => {
|
||||
const chunkTokens = 50;
|
||||
const lines: string[] = [];
|
||||
for (let i = 0; i < 30; i++) {
|
||||
lines.push(`Line ${i}: 这是中英文混合的测试内容 with some English text`);
|
||||
}
|
||||
const content = lines.join("\n");
|
||||
const chunks = chunkMarkdown(content, { tokens: chunkTokens, overlap: 0 });
|
||||
// Should produce multiple chunks and not crash
|
||||
expect(chunks.length).toBeGreaterThan(1);
|
||||
// Verify all content is preserved
|
||||
const reconstructed = chunks.map((c) => c.text).join("\n");
|
||||
// Due to overlap=0, the concatenated chunks should cover all lines
|
||||
expect(reconstructed).toContain("Line 0");
|
||||
expect(reconstructed).toContain("Line 29");
|
||||
});
|
||||
|
||||
it("splits very long CJK lines into budget-sized segments", () => {
|
||||
// A single line of 2000 CJK characters (no newlines).
|
||||
// With tokens=200, each CJK char ≈ 1 token.
|
||||
const longCjkLine = "中".repeat(2000);
|
||||
const chunks = chunkMarkdown(longCjkLine, { tokens: 200, overlap: 0 });
|
||||
expect(chunks.length).toBeGreaterThanOrEqual(8);
|
||||
for (const chunk of chunks) {
|
||||
const cjkCount = (chunk.text.match(/[\u4E00-\u9FFF]/g) ?? []).length;
|
||||
expect(cjkCount).toBeLessThanOrEqual(200 * 2);
|
||||
}
|
||||
});
|
||||
it("does not break surrogate pairs when splitting long CJK lines", () => {
|
||||
// "𠀀" (U+20000) is a surrogate pair: 2 UTF-16 code units per character.
|
||||
// With an odd token budget, the fine-split must not cut inside a pair.
|
||||
const surrogateChar = "\u{20000}"; // 𠀀
|
||||
const longLine = surrogateChar.repeat(120);
|
||||
const chunks = chunkMarkdown(longLine, { tokens: 31, overlap: 0 });
|
||||
for (const chunk of chunks) {
|
||||
// No chunk should contain the Unicode replacement character U+FFFD,
|
||||
// which would indicate a broken surrogate pair.
|
||||
const surrogateChar = "\u{20000}";
|
||||
const surrogateChunks = chunkMarkdown(surrogateChar.repeat(120), {
|
||||
tokens: 31,
|
||||
overlap: 0,
|
||||
});
|
||||
for (const chunk of surrogateChunks) {
|
||||
expect(chunk.text).not.toContain("\uFFFD");
|
||||
// Every character in the chunk should be a valid string (no lone surrogates).
|
||||
for (let i = 0; i < chunk.text.length; i += 1) {
|
||||
const code = chunk.text.charCodeAt(i);
|
||||
if (code >= 0xd800 && code <= 0xdbff) {
|
||||
// High surrogate must be followed by a low surrogate
|
||||
const next = chunk.text.charCodeAt(i + 1);
|
||||
expect(next).toBeGreaterThanOrEqual(0xdc00);
|
||||
expect(next).toBeLessThanOrEqual(0xdfff);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
it("does not over-split long Latin lines (backward compat)", () => {
|
||||
// 2000 ASCII chars / 800 maxChars -> about 3 segments, not 10 tiny ones.
|
||||
const longLatinLine = "a".repeat(2000);
|
||||
const chunks = chunkMarkdown(longLatinLine, { tokens: 200, overlap: 0 });
|
||||
expect(chunks.length).toBeLessThanOrEqual(5);
|
||||
});
|
||||
});
|
||||
|
||||
describe("remapChunkLines", () => {
|
||||
it("remaps chunk line numbers using a lineMap", () => {
|
||||
// Simulate 5 content lines that came from JSONL lines [4, 6, 7, 10, 13] (1-indexed)
|
||||
it("remaps chunk lines using JSONL source line maps", () => {
|
||||
const lineMap = [4, 6, 7, 10, 13];
|
||||
|
||||
// Create chunks from content that has 5 lines
|
||||
const content = "User: Hello\nAssistant: Hi\nUser: Question\nAssistant: Answer\nUser: Thanks";
|
||||
const chunks = chunkMarkdown(content, { tokens: 400, overlap: 0 });
|
||||
expect(chunks.length).toBeGreaterThan(0);
|
||||
|
||||
// Before remapping, startLine/endLine reference content line numbers (1-indexed)
|
||||
expect(chunks[0].startLine).toBe(1);
|
||||
|
||||
// Remap
|
||||
remapChunkLines(chunks, lineMap);
|
||||
|
||||
// After remapping, line numbers should reference original JSONL lines
|
||||
// Content line 1 → JSONL line 4, content line 5 → JSONL line 13
|
||||
expect(chunks[0].startLine).toBe(4);
|
||||
const lastChunk = chunks[chunks.length - 1];
|
||||
expect(lastChunk.endLine).toBe(13);
|
||||
});
|
||||
|
||||
it("preserves original line numbers when lineMap is undefined", () => {
|
||||
const content = "Line one\nLine two\nLine three";
|
||||
const chunks = chunkMarkdown(content, { tokens: 400, overlap: 0 });
|
||||
const originalStart = chunks[0].startLine;
|
||||
const originalEnd = chunks[chunks.length - 1].endLine;
|
||||
|
||||
remapChunkLines(chunks, undefined);
|
||||
|
||||
expect(chunks[0].startLine).toBe(originalStart);
|
||||
expect(chunks[chunks.length - 1].endLine).toBe(originalEnd);
|
||||
});
|
||||
|
||||
it("handles multi-chunk content with correct remapping", () => {
|
||||
// Use small chunk size to force multiple chunks
|
||||
// lineMap: 10 content lines from JSONL lines [2, 5, 8, 11, 14, 17, 20, 23, 26, 29]
|
||||
const lineMap = [2, 5, 8, 11, 14, 17, 20, 23, 26, 29];
|
||||
const contentLines = lineMap.map((_, i) =>
|
||||
i % 2 === 0 ? `User: Message ${i}` : `Assistant: Reply ${i}`,
|
||||
const chunks = chunkMarkdown(
|
||||
"User: Hello\nAssistant: Hi\nUser: Question\nAssistant: Answer\nUser: Thanks",
|
||||
{ tokens: 400, overlap: 0 },
|
||||
);
|
||||
const content = contentLines.join("\n");
|
||||
|
||||
// Use very small chunk size to force splitting
|
||||
const chunks = chunkMarkdown(content, { tokens: 10, overlap: 0 });
|
||||
expect(chunks.length).toBeGreaterThan(1);
|
||||
|
||||
remapChunkLines(chunks, lineMap);
|
||||
|
||||
// First chunk should start at JSONL line 2
|
||||
expect(chunks[0].startLine).toBe(2);
|
||||
// Last chunk should end at JSONL line 29
|
||||
expect(chunks[chunks.length - 1].endLine).toBe(29);
|
||||
|
||||
// Each chunk's startLine should be ≤ its endLine
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.startLine).toBeLessThanOrEqual(chunk.endLine);
|
||||
}
|
||||
expect(chunks[0].startLine).toBe(4);
|
||||
expect(chunks[chunks.length - 1].endLine).toBe(13);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -3,6 +3,19 @@ import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterAll, beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
vi.mock("../../media/mime.js", () => ({
|
||||
detectMime: async (opts: { filePath?: string }) => {
|
||||
if (opts.filePath?.endsWith(".png")) {
|
||||
return "image/png";
|
||||
}
|
||||
if (opts.filePath?.endsWith(".wav")) {
|
||||
return "audio/wav";
|
||||
}
|
||||
return undefined;
|
||||
},
|
||||
}));
|
||||
|
||||
import {
|
||||
buildMultimodalChunkForIndexing,
|
||||
buildFileEntry,
|
||||
|
||||
@@ -1,13 +1,55 @@
|
||||
import fs from "node:fs";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import { withTempHome } from "../../test/helpers/temp-home.js";
|
||||
import { afterAll, beforeAll, describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../config/types.js";
|
||||
import { resolveStatusTtsSnapshot } from "./status-config.js";
|
||||
|
||||
let fixtureRoot = "";
|
||||
let fixtureId = 0;
|
||||
|
||||
beforeAll(() => {
|
||||
fixtureRoot = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-tts-status-"));
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
if (fixtureRoot) {
|
||||
fs.rmSync(fixtureRoot, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
async function withStatusTempHome(run: (home: string) => Promise<void>): Promise<void> {
|
||||
const home = path.join(fixtureRoot, `case-${fixtureId++}`);
|
||||
const previousHome = process.env.HOME;
|
||||
const previousUserProfile = process.env.USERPROFILE;
|
||||
const previousOpenClawHome = process.env.OPENCLAW_HOME;
|
||||
const previousStateDir = process.env.OPENCLAW_STATE_DIR;
|
||||
fs.mkdirSync(home, { recursive: true });
|
||||
process.env.HOME = home;
|
||||
process.env.USERPROFILE = home;
|
||||
delete process.env.OPENCLAW_HOME;
|
||||
process.env.OPENCLAW_STATE_DIR = path.join(home, ".openclaw");
|
||||
try {
|
||||
await run(home);
|
||||
} finally {
|
||||
restoreEnv("HOME", previousHome);
|
||||
restoreEnv("USERPROFILE", previousUserProfile);
|
||||
restoreEnv("OPENCLAW_HOME", previousOpenClawHome);
|
||||
restoreEnv("OPENCLAW_STATE_DIR", previousStateDir);
|
||||
}
|
||||
}
|
||||
|
||||
function restoreEnv(key: string, value: string | undefined): void {
|
||||
if (value === undefined) {
|
||||
delete process.env[key];
|
||||
} else {
|
||||
process.env[key] = value;
|
||||
}
|
||||
}
|
||||
|
||||
describe("resolveStatusTtsSnapshot", () => {
|
||||
it("uses prefs overrides without loading speech providers", async () => {
|
||||
await withTempHome(async (home) => {
|
||||
await withStatusTempHome(async (home) => {
|
||||
const prefsPath = path.join(home, ".openclaw", "settings", "tts.json");
|
||||
fs.mkdirSync(path.dirname(prefsPath), { recursive: true });
|
||||
fs.writeFileSync(
|
||||
@@ -42,7 +84,7 @@ describe("resolveStatusTtsSnapshot", () => {
|
||||
});
|
||||
|
||||
it("reports auto provider when tts is on without an explicit provider", async () => {
|
||||
await withTempHome(async () => {
|
||||
await withStatusTempHome(async () => {
|
||||
expect(
|
||||
resolveStatusTtsSnapshot({
|
||||
cfg: {
|
||||
@@ -63,42 +105,40 @@ describe("resolveStatusTtsSnapshot", () => {
|
||||
});
|
||||
|
||||
it("derives the default prefs path from OPENCLAW_CONFIG_PATH when set", async () => {
|
||||
await withTempHome(
|
||||
async (home) => {
|
||||
const stateDir = path.join(home, ".openclaw-dev");
|
||||
const prefsPath = path.join(stateDir, "settings", "tts.json");
|
||||
fs.mkdirSync(path.dirname(prefsPath), { recursive: true });
|
||||
fs.writeFileSync(
|
||||
prefsPath,
|
||||
JSON.stringify({
|
||||
tts: {
|
||||
auto: "always",
|
||||
provider: "openai",
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
vi.stubEnv("OPENCLAW_CONFIG_PATH", path.join(stateDir, "openclaw.json"));
|
||||
try {
|
||||
expect(
|
||||
resolveStatusTtsSnapshot({
|
||||
cfg: {
|
||||
messages: {
|
||||
tts: {},
|
||||
},
|
||||
} as OpenClawConfig,
|
||||
}),
|
||||
).toEqual({
|
||||
autoMode: "always",
|
||||
await withStatusTempHome(async (home) => {
|
||||
const stateDir = path.join(home, ".openclaw-dev");
|
||||
const prefsPath = path.join(stateDir, "settings", "tts.json");
|
||||
fs.mkdirSync(path.dirname(prefsPath), { recursive: true });
|
||||
fs.writeFileSync(
|
||||
prefsPath,
|
||||
JSON.stringify({
|
||||
tts: {
|
||||
auto: "always",
|
||||
provider: "openai",
|
||||
maxLength: 1500,
|
||||
summarize: true,
|
||||
});
|
||||
} finally {
|
||||
vi.unstubAllEnvs();
|
||||
}
|
||||
},
|
||||
{ env: { OPENCLAW_STATE_DIR: undefined } },
|
||||
);
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
delete process.env.OPENCLAW_STATE_DIR;
|
||||
vi.stubEnv("OPENCLAW_CONFIG_PATH", path.join(stateDir, "openclaw.json"));
|
||||
try {
|
||||
expect(
|
||||
resolveStatusTtsSnapshot({
|
||||
cfg: {
|
||||
messages: {
|
||||
tts: {},
|
||||
},
|
||||
} as OpenClawConfig,
|
||||
}),
|
||||
).toEqual({
|
||||
autoMode: "always",
|
||||
provider: "openai",
|
||||
maxLength: 1500,
|
||||
summarize: true,
|
||||
});
|
||||
} finally {
|
||||
vi.unstubAllEnvs();
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,18 +1,31 @@
|
||||
import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
||||
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
||||
import { afterAll, beforeAll, afterEach, beforeEach, describe, expect, it } from "vitest";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import { shouldAttemptTtsPayload } from "./tts-config.js";
|
||||
|
||||
describe("shouldAttemptTtsPayload", () => {
|
||||
let originalPrefsPath: string | undefined;
|
||||
let root = "";
|
||||
let dir: string;
|
||||
let prefsPath: string;
|
||||
let caseId = 0;
|
||||
|
||||
beforeAll(() => {
|
||||
root = mkdtempSync(path.join(tmpdir(), "openclaw-tts-config-"));
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
if (root) {
|
||||
rmSync(root, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
originalPrefsPath = process.env.OPENCLAW_TTS_PREFS;
|
||||
dir = mkdtempSync(path.join(tmpdir(), "openclaw-tts-config-"));
|
||||
dir = path.join(root, `case-${caseId++}`);
|
||||
mkdirSync(dir, { recursive: true });
|
||||
prefsPath = path.join(dir, "tts.json");
|
||||
process.env.OPENCLAW_TTS_PREFS = prefsPath;
|
||||
});
|
||||
@@ -23,7 +36,6 @@ describe("shouldAttemptTtsPayload", () => {
|
||||
} else {
|
||||
process.env.OPENCLAW_TTS_PREFS = originalPrefsPath;
|
||||
}
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it("skips TTS when config, prefs, and session state leave auto mode off", () => {
|
||||
|
||||
Reference in New Issue
Block a user