mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 06:20:43 +00:00
fix(memory-wiki): support Unicode characters in slugifyWikiSegment (#64742)
* fix(memory-wiki): support Unicode characters in slugifyWikiSegment Replace ASCII-only regex with Unicode-aware regex to preserve CJK, Cyrillic, Arabic, and other non-ASCII characters in wiki slugs. Fixes #64620 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * test(memory-wiki): cover Unicode slug regressions * fix(memory-wiki): preserve combining marks in slugs * fix(memory-wiki): cap composed source filenames --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
This commit is contained in:
@@ -36,6 +36,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Dreaming/cron: wake managed dreaming jobs immediately instead of waiting for the next heartbeat, so scheduled dreaming runs start when the cron fires. (#65053) Thanks @l0cka and @vincentkoc.
|
||||
- QA/packaging: stop packaged QA helpers from crashing when optional scenario execution config is unavailable, so npm distributions can skip the repo-only scenario pack without breaking completion-cache and startup paths. (#65118) Thanks @EdderTalmor and @vincentkoc.
|
||||
- Media/audio transcription: surface the real provider failure when every audio transcription attempt fails, so status output and the CLI stop collapsing those errors into generic skips. (#65096) Thanks @l0cka and @vincentkoc.
|
||||
- Memory/wiki: preserve Unicode letters, digits, and combining marks in wiki slugs and contradiction clustering, and cap Unicode filename segments to safe byte lengths so non-ASCII titles stop collapsing or overflowing path limits. (#64742) Thanks @zhouhe-xydt and @vincentkoc.
|
||||
|
||||
## 2026.4.11
|
||||
|
||||
|
||||
@@ -299,4 +299,47 @@ describe("syncMemoryWikiBridgeSources", () => {
|
||||
code: "ENOENT",
|
||||
});
|
||||
});
|
||||
|
||||
it("caps composed bridge source filenames to the filesystem component limit", async () => {
|
||||
const workspaceDir = await createBridgeWorkspace(`${"漢".repeat(50)}-workspace`);
|
||||
const { rootDir: vaultDir, config } = await createVault({
|
||||
rootDir: nextCaseRoot("long-bridge-vault"),
|
||||
config: {
|
||||
vaultMode: "bridge",
|
||||
bridge: {
|
||||
enabled: true,
|
||||
readMemoryArtifacts: true,
|
||||
indexDailyNotes: true,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const relativePath = `${"語".repeat(50)}/${"録".repeat(50)}.md`;
|
||||
const absolutePath = path.join(workspaceDir, relativePath);
|
||||
await fs.mkdir(path.dirname(absolutePath), { recursive: true });
|
||||
await fs.writeFile(absolutePath, "# Deep Unicode Note\n", "utf8");
|
||||
registerBridgeArtifacts([
|
||||
{
|
||||
kind: "daily-note",
|
||||
workspaceDir,
|
||||
relativePath,
|
||||
absolutePath,
|
||||
agentIds: ["main"],
|
||||
contentType: "markdown",
|
||||
},
|
||||
]);
|
||||
|
||||
const appConfig: OpenClawConfig = {
|
||||
agents: {
|
||||
list: [{ id: "main", default: true, workspace: workspaceDir }],
|
||||
},
|
||||
};
|
||||
|
||||
const result = await syncMemoryWikiBridgeSources({ config, appConfig });
|
||||
const pagePath = result.pagePaths[0] ?? "";
|
||||
|
||||
expect(result.importedCount).toBe(1);
|
||||
expect(Buffer.byteLength(path.basename(pagePath))).toBeLessThanOrEqual(255);
|
||||
await expect(fs.stat(path.join(vaultDir, pagePath))).resolves.toBeTruthy();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -8,7 +8,12 @@ import {
|
||||
import type { OpenClawConfig } from "../api.js";
|
||||
import type { ResolvedMemoryWikiConfig } from "./config.js";
|
||||
import { appendMemoryWikiLog } from "./log.js";
|
||||
import { renderMarkdownFence, renderWikiMarkdown, slugifyWikiSegment } from "./markdown.js";
|
||||
import {
|
||||
createWikiPageFilename,
|
||||
renderMarkdownFence,
|
||||
renderWikiMarkdown,
|
||||
slugifyWikiSegment,
|
||||
} from "./markdown.js";
|
||||
import { writeImportedSourcePage } from "./source-page-shared.js";
|
||||
import { resolveArtifactKey } from "./source-path-shared.js";
|
||||
import {
|
||||
@@ -110,11 +115,10 @@ function resolveBridgePagePath(params: { workspaceDir: string; relativePath: str
|
||||
const artifactHash = createHash("sha1").update(params.relativePath).digest("hex");
|
||||
const workspaceSlug = `${workspaceBaseSlug}-${workspaceHash.slice(0, 8)}`;
|
||||
const artifactSlug = `${artifactBaseSlug}-${artifactHash.slice(0, 8)}`;
|
||||
const fileName = createWikiPageFilename(`bridge-${workspaceSlug}-${artifactSlug}`);
|
||||
return {
|
||||
pageId: `source.bridge.${workspaceSlug}.${artifactSlug}`,
|
||||
pagePath: path
|
||||
.join("sources", `bridge-${workspaceSlug}-${artifactSlug}.md`)
|
||||
.replace(/\\/g, "/"),
|
||||
pagePath: path.join("sources", fileName).replace(/\\/g, "/"),
|
||||
workspaceSlug,
|
||||
artifactSlug,
|
||||
};
|
||||
|
||||
60
extensions/memory-wiki/src/claim-health.test.ts
Normal file
60
extensions/memory-wiki/src/claim-health.test.ts
Normal file
@@ -0,0 +1,60 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { buildPageContradictionClusters } from "./claim-health.js";
|
||||
import type { WikiPageSummary } from "./markdown.js";
|
||||
|
||||
function createPage(params: {
|
||||
relativePath: string;
|
||||
title: string;
|
||||
contradictions: string[];
|
||||
}): WikiPageSummary {
|
||||
return {
|
||||
absolutePath: `/tmp/${params.relativePath}`,
|
||||
relativePath: params.relativePath,
|
||||
kind: "entity",
|
||||
title: params.title,
|
||||
sourceIds: [],
|
||||
linkTargets: [],
|
||||
claims: [],
|
||||
contradictions: params.contradictions,
|
||||
questions: [],
|
||||
};
|
||||
}
|
||||
|
||||
describe("buildPageContradictionClusters", () => {
|
||||
it("clusters Unicode contradiction notes that differ only by punctuation", () => {
|
||||
const clusters = buildPageContradictionClusters([
|
||||
createPage({
|
||||
relativePath: "entities/alpha.md",
|
||||
title: "Alpha",
|
||||
contradictions: ["模型冲突:版本 A"],
|
||||
}),
|
||||
createPage({
|
||||
relativePath: "entities/beta.md",
|
||||
title: "Beta",
|
||||
contradictions: ["模型冲突 版本 A"],
|
||||
}),
|
||||
]);
|
||||
|
||||
expect(clusters).toHaveLength(1);
|
||||
expect(clusters[0]?.entries).toHaveLength(2);
|
||||
});
|
||||
|
||||
it("keeps combining-mark contradiction notes in separate clusters", () => {
|
||||
const clusters = buildPageContradictionClusters([
|
||||
createPage({
|
||||
relativePath: "entities/alpha.md",
|
||||
title: "Alpha",
|
||||
contradictions: ["किताब"],
|
||||
}),
|
||||
createPage({
|
||||
relativePath: "entities/beta.md",
|
||||
title: "Beta",
|
||||
contradictions: ["कीताब"],
|
||||
}),
|
||||
]);
|
||||
|
||||
expect(clusters).toHaveLength(2);
|
||||
expect(clusters.map((cluster) => cluster.key).toSorted()).toEqual(["किताब", "कीताब"]);
|
||||
expect(clusters.every((cluster) => cluster.entries)).toBe(true);
|
||||
});
|
||||
});
|
||||
@@ -66,7 +66,7 @@ function normalizeClaimTextKey(text: string): string {
|
||||
|
||||
function normalizeTextKey(text: string): string {
|
||||
return normalizeLowercaseStringOrEmpty(text)
|
||||
.replace(/[^a-z0-9]+/g, " ")
|
||||
.replace(/[^\p{L}\p{N}\p{M}]+/gu, " ")
|
||||
.replace(/\s+/g, " ");
|
||||
}
|
||||
|
||||
|
||||
42
extensions/memory-wiki/src/markdown.test.ts
Normal file
42
extensions/memory-wiki/src/markdown.test.ts
Normal file
@@ -0,0 +1,42 @@
|
||||
import { createHash } from "node:crypto";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { createWikiPageFilename, slugifyWikiSegment } from "./markdown.js";
|
||||
|
||||
describe("slugifyWikiSegment", () => {
|
||||
it("preserves Unicode letters and numbers in wiki slugs", () => {
|
||||
expect(slugifyWikiSegment("大语言模型概述")).toBe("大语言模型概述");
|
||||
expect(slugifyWikiSegment("LLM 架构分析")).toBe("llm-架构分析");
|
||||
expect(slugifyWikiSegment("Circuit Breaker 自動恢復")).toBe("circuit-breaker-自動恢復");
|
||||
});
|
||||
|
||||
it("keeps ASCII behavior unchanged", () => {
|
||||
expect(slugifyWikiSegment("hello world")).toBe("hello-world");
|
||||
expect(slugifyWikiSegment("")).toBe("page");
|
||||
});
|
||||
|
||||
it("retains combining marks so distinct titles do not collapse", () => {
|
||||
expect(slugifyWikiSegment("किताब")).toBe("किताब");
|
||||
expect(slugifyWikiSegment("कुतुब")).toBe("कुतुब");
|
||||
expect(slugifyWikiSegment("कीताब")).toBe("कीताब");
|
||||
});
|
||||
|
||||
it("caps long Unicode slugs to a safe filename byte length", () => {
|
||||
const title = "漢".repeat(90);
|
||||
const slug = slugifyWikiSegment(title);
|
||||
|
||||
expect(slug.endsWith(`-${createHash("sha1").update(title).digest("hex").slice(0, 12)}`)).toBe(
|
||||
true,
|
||||
);
|
||||
expect(Buffer.byteLength(slug)).toBeLessThanOrEqual(240);
|
||||
expect(slugifyWikiSegment(title)).toBe(slug);
|
||||
});
|
||||
|
||||
it("caps composed wiki page filenames to a safe path-component length", () => {
|
||||
const stem = `bridge-${"漢".repeat(45)}-${"語".repeat(45)}`;
|
||||
const fileName = createWikiPageFilename(stem);
|
||||
|
||||
expect(fileName.endsWith(".md")).toBe(true);
|
||||
expect(Buffer.byteLength(fileName)).toBeLessThanOrEqual(255);
|
||||
expect(createWikiPageFilename(stem)).toBe(fileName);
|
||||
});
|
||||
});
|
||||
@@ -1,3 +1,4 @@
|
||||
import { createHash } from "node:crypto";
|
||||
import path from "node:path";
|
||||
import {
|
||||
normalizeLowercaseStringOrEmpty,
|
||||
@@ -65,13 +66,54 @@ const RELATED_BLOCK_PATTERN = new RegExp(
|
||||
`${WIKI_RELATED_START_MARKER}[\\s\\S]*?${WIKI_RELATED_END_MARKER}`,
|
||||
"g",
|
||||
);
|
||||
const MAX_WIKI_SEGMENT_BYTES = 240;
|
||||
const MAX_WIKI_FILENAME_COMPONENT_BYTES = 255;
|
||||
const WIKI_SEGMENT_HASH_BYTES = 12;
|
||||
|
||||
function truncateUtf8CodePointSafe(value: string, maxBytes: number): string {
|
||||
let result = "";
|
||||
let bytes = 0;
|
||||
for (const char of value) {
|
||||
const nextBytes = Buffer.byteLength(char);
|
||||
if (bytes + nextBytes > maxBytes) {
|
||||
break;
|
||||
}
|
||||
result += char;
|
||||
bytes += nextBytes;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function capWikiValueWithHash(raw: string, maxBytes: number, fallback: string): string {
|
||||
if (Buffer.byteLength(raw) <= maxBytes) {
|
||||
return raw;
|
||||
}
|
||||
const suffix = createHash("sha1").update(raw).digest("hex").slice(0, WIKI_SEGMENT_HASH_BYTES);
|
||||
const truncated = truncateUtf8CodePointSafe(
|
||||
raw,
|
||||
maxBytes - Buffer.byteLength(`-${suffix}`),
|
||||
).replace(/-+$/g, "");
|
||||
return `${truncated || fallback}-${suffix}`;
|
||||
}
|
||||
|
||||
export function slugifyWikiSegment(raw: string): string {
|
||||
const slug = normalizeLowercaseStringOrEmpty(raw)
|
||||
.replace(/[^a-z0-9]+/g, "-")
|
||||
.replace(/[^\p{L}\p{N}\p{M}]+/gu, "-")
|
||||
.replace(/-+/g, "-")
|
||||
.replace(/^-+|-+$/g, "");
|
||||
return slug || "page";
|
||||
if (!slug) {
|
||||
return "page";
|
||||
}
|
||||
return capWikiValueWithHash(slug, MAX_WIKI_SEGMENT_BYTES, "page");
|
||||
}
|
||||
|
||||
export function createWikiPageFilename(stem: string, extension = ".md"): string {
|
||||
const normalizedExtension = extension.startsWith(".") ? extension : `.${extension}`;
|
||||
const maxStemBytes = Math.max(
|
||||
1,
|
||||
MAX_WIKI_FILENAME_COMPONENT_BYTES - Buffer.byteLength(normalizedExtension),
|
||||
);
|
||||
return `${capWikiValueWithHash(stem, maxStemBytes, "page")}${normalizedExtension}`;
|
||||
}
|
||||
|
||||
export function parseWikiMarkdown(content: string): ParsedWikiMarkdown {
|
||||
|
||||
@@ -103,4 +103,30 @@ describe("syncMemoryWikiUnsafeLocalSources", () => {
|
||||
code: "ENOENT",
|
||||
});
|
||||
});
|
||||
|
||||
it("caps composed unsafe-local filenames to the filesystem component limit", async () => {
|
||||
const privateDir = await createPrivateDir(`${"漢".repeat(50)}-private`);
|
||||
const nestedDir = path.join(privateDir, `${"語".repeat(50)}-nested`);
|
||||
const secretPath = path.join(nestedDir, `${"録".repeat(50)}.md`);
|
||||
await fs.mkdir(nestedDir, { recursive: true });
|
||||
await fs.writeFile(secretPath, "# very private\n", "utf8");
|
||||
|
||||
const { rootDir: vaultDir, config } = await createVault({
|
||||
rootDir: nextCaseRoot("long-unsafe-vault"),
|
||||
config: {
|
||||
vaultMode: "unsafe-local",
|
||||
unsafeLocal: {
|
||||
allowPrivateMemoryCoreAccess: true,
|
||||
paths: [privateDir],
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const result = await syncMemoryWikiUnsafeLocalSources(config);
|
||||
const pagePath = result.pagePaths[0] ?? "";
|
||||
|
||||
expect(result.importedCount).toBe(1);
|
||||
expect(Buffer.byteLength(path.basename(pagePath))).toBeLessThanOrEqual(255);
|
||||
await expect(fs.stat(path.join(vaultDir, pagePath))).resolves.toBeTruthy();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -5,7 +5,12 @@ import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtim
|
||||
import type { BridgeMemoryWikiResult } from "./bridge.js";
|
||||
import type { ResolvedMemoryWikiConfig } from "./config.js";
|
||||
import { appendMemoryWikiLog } from "./log.js";
|
||||
import { renderMarkdownFence, renderWikiMarkdown, slugifyWikiSegment } from "./markdown.js";
|
||||
import {
|
||||
createWikiPageFilename,
|
||||
renderMarkdownFence,
|
||||
renderWikiMarkdown,
|
||||
slugifyWikiSegment,
|
||||
} from "./markdown.js";
|
||||
import { writeImportedSourcePage } from "./source-page-shared.js";
|
||||
import { resolveArtifactKey } from "./source-path-shared.js";
|
||||
import {
|
||||
@@ -113,7 +118,9 @@ function resolveUnsafeLocalPagePath(params: { configuredPath: string; absolutePa
|
||||
const pageSlug = `${configuredBaseSlug}-${configuredHash}-${artifactBaseSlug}-${artifactHash}`;
|
||||
return {
|
||||
pageId: `source.unsafe-local.${pageSlug}`,
|
||||
pagePath: path.join("sources", `unsafe-local-${pageSlug}.md`).replace(/\\/g, "/"),
|
||||
pagePath: path
|
||||
.join("sources", createWikiPageFilename(`unsafe-local-${pageSlug}`))
|
||||
.replace(/\\/g, "/"),
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user