diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a4fab28a7c..02c0ca00893 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ Docs: https://docs.openclaw.ai - Dreaming/cron: wake managed dreaming jobs immediately instead of waiting for the next heartbeat, so scheduled dreaming runs start when the cron fires. (#65053) Thanks @l0cka and @vincentkoc. - QA/packaging: stop packaged QA helpers from crashing when optional scenario execution config is unavailable, so npm distributions can skip the repo-only scenario pack without breaking completion-cache and startup paths. (#65118) Thanks @EdderTalmor and @vincentkoc. - Media/audio transcription: surface the real provider failure when every audio transcription attempt fails, so status output and the CLI stop collapsing those errors into generic skips. (#65096) Thanks @l0cka and @vincentkoc. +- Memory/wiki: preserve Unicode letters, digits, and combining marks in wiki slugs and contradiction clustering, and cap Unicode filename segments to safe byte lengths so non-ASCII titles stop collapsing or overflowing path limits. (#64742) Thanks @zhouhe-xydt and @vincentkoc. ## 2026.4.11 diff --git a/extensions/memory-wiki/src/bridge.test.ts b/extensions/memory-wiki/src/bridge.test.ts index f6e3b888c86..d56b5240f60 100644 --- a/extensions/memory-wiki/src/bridge.test.ts +++ b/extensions/memory-wiki/src/bridge.test.ts @@ -299,4 +299,47 @@ describe("syncMemoryWikiBridgeSources", () => { code: "ENOENT", }); }); + + it("caps composed bridge source filenames to the filesystem component limit", async () => { + const workspaceDir = await createBridgeWorkspace(`${"漢".repeat(50)}-workspace`); + const { rootDir: vaultDir, config } = await createVault({ + rootDir: nextCaseRoot("long-bridge-vault"), + config: { + vaultMode: "bridge", + bridge: { + enabled: true, + readMemoryArtifacts: true, + indexDailyNotes: true, + }, + }, + }); + + const relativePath = `${"語".repeat(50)}/${"録".repeat(50)}.md`; + const absolutePath = path.join(workspaceDir, relativePath); + await fs.mkdir(path.dirname(absolutePath), { recursive: true }); + await fs.writeFile(absolutePath, "# Deep Unicode Note\n", "utf8"); + registerBridgeArtifacts([ + { + kind: "daily-note", + workspaceDir, + relativePath, + absolutePath, + agentIds: ["main"], + contentType: "markdown", + }, + ]); + + const appConfig: OpenClawConfig = { + agents: { + list: [{ id: "main", default: true, workspace: workspaceDir }], + }, + }; + + const result = await syncMemoryWikiBridgeSources({ config, appConfig }); + const pagePath = result.pagePaths[0] ?? ""; + + expect(result.importedCount).toBe(1); + expect(Buffer.byteLength(path.basename(pagePath))).toBeLessThanOrEqual(255); + await expect(fs.stat(path.join(vaultDir, pagePath))).resolves.toBeTruthy(); + }); }); diff --git a/extensions/memory-wiki/src/bridge.ts b/extensions/memory-wiki/src/bridge.ts index e1c40e8e964..b81731ddcc9 100644 --- a/extensions/memory-wiki/src/bridge.ts +++ b/extensions/memory-wiki/src/bridge.ts @@ -8,7 +8,12 @@ import { import type { OpenClawConfig } from "../api.js"; import type { ResolvedMemoryWikiConfig } from "./config.js"; import { appendMemoryWikiLog } from "./log.js"; -import { renderMarkdownFence, renderWikiMarkdown, slugifyWikiSegment } from "./markdown.js"; +import { + createWikiPageFilename, + renderMarkdownFence, + renderWikiMarkdown, + slugifyWikiSegment, +} from "./markdown.js"; import { writeImportedSourcePage } from "./source-page-shared.js"; import { resolveArtifactKey } from "./source-path-shared.js"; import { @@ -110,11 +115,10 @@ function resolveBridgePagePath(params: { workspaceDir: string; relativePath: str const artifactHash = createHash("sha1").update(params.relativePath).digest("hex"); const workspaceSlug = `${workspaceBaseSlug}-${workspaceHash.slice(0, 8)}`; const artifactSlug = `${artifactBaseSlug}-${artifactHash.slice(0, 8)}`; + const fileName = createWikiPageFilename(`bridge-${workspaceSlug}-${artifactSlug}`); return { pageId: `source.bridge.${workspaceSlug}.${artifactSlug}`, - pagePath: path - .join("sources", `bridge-${workspaceSlug}-${artifactSlug}.md`) - .replace(/\\/g, "/"), + pagePath: path.join("sources", fileName).replace(/\\/g, "/"), workspaceSlug, artifactSlug, }; diff --git a/extensions/memory-wiki/src/claim-health.test.ts b/extensions/memory-wiki/src/claim-health.test.ts new file mode 100644 index 00000000000..521b6f9985e --- /dev/null +++ b/extensions/memory-wiki/src/claim-health.test.ts @@ -0,0 +1,60 @@ +import { describe, expect, it } from "vitest"; +import { buildPageContradictionClusters } from "./claim-health.js"; +import type { WikiPageSummary } from "./markdown.js"; + +function createPage(params: { + relativePath: string; + title: string; + contradictions: string[]; +}): WikiPageSummary { + return { + absolutePath: `/tmp/${params.relativePath}`, + relativePath: params.relativePath, + kind: "entity", + title: params.title, + sourceIds: [], + linkTargets: [], + claims: [], + contradictions: params.contradictions, + questions: [], + }; +} + +describe("buildPageContradictionClusters", () => { + it("clusters Unicode contradiction notes that differ only by punctuation", () => { + const clusters = buildPageContradictionClusters([ + createPage({ + relativePath: "entities/alpha.md", + title: "Alpha", + contradictions: ["模型冲突:版本 A"], + }), + createPage({ + relativePath: "entities/beta.md", + title: "Beta", + contradictions: ["模型冲突 版本 A"], + }), + ]); + + expect(clusters).toHaveLength(1); + expect(clusters[0]?.entries).toHaveLength(2); + }); + + it("keeps combining-mark contradiction notes in separate clusters", () => { + const clusters = buildPageContradictionClusters([ + createPage({ + relativePath: "entities/alpha.md", + title: "Alpha", + contradictions: ["किताब"], + }), + createPage({ + relativePath: "entities/beta.md", + title: "Beta", + contradictions: ["कीताब"], + }), + ]); + + expect(clusters).toHaveLength(2); + expect(clusters.map((cluster) => cluster.key).toSorted()).toEqual(["किताब", "कीताब"]); + expect(clusters.every((cluster) => cluster.entries)).toBe(true); + }); +}); diff --git a/extensions/memory-wiki/src/claim-health.ts b/extensions/memory-wiki/src/claim-health.ts index 223c2dc4acd..bf016268381 100644 --- a/extensions/memory-wiki/src/claim-health.ts +++ b/extensions/memory-wiki/src/claim-health.ts @@ -66,7 +66,7 @@ function normalizeClaimTextKey(text: string): string { function normalizeTextKey(text: string): string { return normalizeLowercaseStringOrEmpty(text) - .replace(/[^a-z0-9]+/g, " ") + .replace(/[^\p{L}\p{N}\p{M}]+/gu, " ") .replace(/\s+/g, " "); } diff --git a/extensions/memory-wiki/src/markdown.test.ts b/extensions/memory-wiki/src/markdown.test.ts new file mode 100644 index 00000000000..39d5f95f838 --- /dev/null +++ b/extensions/memory-wiki/src/markdown.test.ts @@ -0,0 +1,42 @@ +import { createHash } from "node:crypto"; +import { describe, expect, it } from "vitest"; +import { createWikiPageFilename, slugifyWikiSegment } from "./markdown.js"; + +describe("slugifyWikiSegment", () => { + it("preserves Unicode letters and numbers in wiki slugs", () => { + expect(slugifyWikiSegment("大语言模型概述")).toBe("大语言模型概述"); + expect(slugifyWikiSegment("LLM 架构分析")).toBe("llm-架构分析"); + expect(slugifyWikiSegment("Circuit Breaker 自動恢復")).toBe("circuit-breaker-自動恢復"); + }); + + it("keeps ASCII behavior unchanged", () => { + expect(slugifyWikiSegment("hello world")).toBe("hello-world"); + expect(slugifyWikiSegment("")).toBe("page"); + }); + + it("retains combining marks so distinct titles do not collapse", () => { + expect(slugifyWikiSegment("किताब")).toBe("किताब"); + expect(slugifyWikiSegment("कुतुब")).toBe("कुतुब"); + expect(slugifyWikiSegment("कीताब")).toBe("कीताब"); + }); + + it("caps long Unicode slugs to a safe filename byte length", () => { + const title = "漢".repeat(90); + const slug = slugifyWikiSegment(title); + + expect(slug.endsWith(`-${createHash("sha1").update(title).digest("hex").slice(0, 12)}`)).toBe( + true, + ); + expect(Buffer.byteLength(slug)).toBeLessThanOrEqual(240); + expect(slugifyWikiSegment(title)).toBe(slug); + }); + + it("caps composed wiki page filenames to a safe path-component length", () => { + const stem = `bridge-${"漢".repeat(45)}-${"語".repeat(45)}`; + const fileName = createWikiPageFilename(stem); + + expect(fileName.endsWith(".md")).toBe(true); + expect(Buffer.byteLength(fileName)).toBeLessThanOrEqual(255); + expect(createWikiPageFilename(stem)).toBe(fileName); + }); +}); diff --git a/extensions/memory-wiki/src/markdown.ts b/extensions/memory-wiki/src/markdown.ts index e152a244b99..73755711fab 100644 --- a/extensions/memory-wiki/src/markdown.ts +++ b/extensions/memory-wiki/src/markdown.ts @@ -1,3 +1,4 @@ +import { createHash } from "node:crypto"; import path from "node:path"; import { normalizeLowercaseStringOrEmpty, @@ -65,13 +66,54 @@ const RELATED_BLOCK_PATTERN = new RegExp( `${WIKI_RELATED_START_MARKER}[\\s\\S]*?${WIKI_RELATED_END_MARKER}`, "g", ); +const MAX_WIKI_SEGMENT_BYTES = 240; +const MAX_WIKI_FILENAME_COMPONENT_BYTES = 255; +const WIKI_SEGMENT_HASH_BYTES = 12; + +function truncateUtf8CodePointSafe(value: string, maxBytes: number): string { + let result = ""; + let bytes = 0; + for (const char of value) { + const nextBytes = Buffer.byteLength(char); + if (bytes + nextBytes > maxBytes) { + break; + } + result += char; + bytes += nextBytes; + } + return result; +} + +function capWikiValueWithHash(raw: string, maxBytes: number, fallback: string): string { + if (Buffer.byteLength(raw) <= maxBytes) { + return raw; + } + const suffix = createHash("sha1").update(raw).digest("hex").slice(0, WIKI_SEGMENT_HASH_BYTES); + const truncated = truncateUtf8CodePointSafe( + raw, + maxBytes - Buffer.byteLength(`-${suffix}`), + ).replace(/-+$/g, ""); + return `${truncated || fallback}-${suffix}`; +} export function slugifyWikiSegment(raw: string): string { const slug = normalizeLowercaseStringOrEmpty(raw) - .replace(/[^a-z0-9]+/g, "-") + .replace(/[^\p{L}\p{N}\p{M}]+/gu, "-") .replace(/-+/g, "-") .replace(/^-+|-+$/g, ""); - return slug || "page"; + if (!slug) { + return "page"; + } + return capWikiValueWithHash(slug, MAX_WIKI_SEGMENT_BYTES, "page"); +} + +export function createWikiPageFilename(stem: string, extension = ".md"): string { + const normalizedExtension = extension.startsWith(".") ? extension : `.${extension}`; + const maxStemBytes = Math.max( + 1, + MAX_WIKI_FILENAME_COMPONENT_BYTES - Buffer.byteLength(normalizedExtension), + ); + return `${capWikiValueWithHash(stem, maxStemBytes, "page")}${normalizedExtension}`; } export function parseWikiMarkdown(content: string): ParsedWikiMarkdown { diff --git a/extensions/memory-wiki/src/unsafe-local.test.ts b/extensions/memory-wiki/src/unsafe-local.test.ts index d62089de0c4..133bced73f6 100644 --- a/extensions/memory-wiki/src/unsafe-local.test.ts +++ b/extensions/memory-wiki/src/unsafe-local.test.ts @@ -103,4 +103,30 @@ describe("syncMemoryWikiUnsafeLocalSources", () => { code: "ENOENT", }); }); + + it("caps composed unsafe-local filenames to the filesystem component limit", async () => { + const privateDir = await createPrivateDir(`${"漢".repeat(50)}-private`); + const nestedDir = path.join(privateDir, `${"語".repeat(50)}-nested`); + const secretPath = path.join(nestedDir, `${"録".repeat(50)}.md`); + await fs.mkdir(nestedDir, { recursive: true }); + await fs.writeFile(secretPath, "# very private\n", "utf8"); + + const { rootDir: vaultDir, config } = await createVault({ + rootDir: nextCaseRoot("long-unsafe-vault"), + config: { + vaultMode: "unsafe-local", + unsafeLocal: { + allowPrivateMemoryCoreAccess: true, + paths: [privateDir], + }, + }, + }); + + const result = await syncMemoryWikiUnsafeLocalSources(config); + const pagePath = result.pagePaths[0] ?? ""; + + expect(result.importedCount).toBe(1); + expect(Buffer.byteLength(path.basename(pagePath))).toBeLessThanOrEqual(255); + await expect(fs.stat(path.join(vaultDir, pagePath))).resolves.toBeTruthy(); + }); }); diff --git a/extensions/memory-wiki/src/unsafe-local.ts b/extensions/memory-wiki/src/unsafe-local.ts index fd98f0af740..dc1e363b3c2 100644 --- a/extensions/memory-wiki/src/unsafe-local.ts +++ b/extensions/memory-wiki/src/unsafe-local.ts @@ -5,7 +5,12 @@ import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtim import type { BridgeMemoryWikiResult } from "./bridge.js"; import type { ResolvedMemoryWikiConfig } from "./config.js"; import { appendMemoryWikiLog } from "./log.js"; -import { renderMarkdownFence, renderWikiMarkdown, slugifyWikiSegment } from "./markdown.js"; +import { + createWikiPageFilename, + renderMarkdownFence, + renderWikiMarkdown, + slugifyWikiSegment, +} from "./markdown.js"; import { writeImportedSourcePage } from "./source-page-shared.js"; import { resolveArtifactKey } from "./source-path-shared.js"; import { @@ -113,7 +118,9 @@ function resolveUnsafeLocalPagePath(params: { configuredPath: string; absolutePa const pageSlug = `${configuredBaseSlug}-${configuredHash}-${artifactBaseSlug}-${artifactHash}`; return { pageId: `source.unsafe-local.${pageSlug}`, - pagePath: path.join("sources", `unsafe-local-${pageSlug}.md`).replace(/\\/g, "/"), + pagePath: path + .join("sources", createWikiPageFilename(`unsafe-local-${pageSlug}`)) + .replace(/\\/g, "/"), }; }