From e3bc985a6ea2d8ab89c08ce9f64ae25fdef876f2 Mon Sep 17 00:00:00 2001 From: Ke He Date: Wed, 29 Apr 2026 12:56:56 +0800 Subject: [PATCH] fix(shared): preserve unicode slug labels Fixes #58932. Carries forward #58942 and #58995. Thanks @fengqing-git, @Starhappysh, and @koen666. --- CHANGELOG.md | 1 + src/shared/string-normalization.test.ts | 41 +++++++++++++++++++++++++ src/shared/string-normalization.ts | 12 +++++--- 3 files changed, 50 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 000aa66e78c..d58e0a29a44 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai ### Fixes - Plugin SDK: add tracked Discord component-message helpers and a Telegram account-resolution compatibility facade, so existing plugins using those subpaths resolve while new plugins stay on generic channel SDK contracts. Thanks @vincentkoc. +- Shared labels: preserve Unicode combining marks and NFC-equivalent accented text in group/channel slug normalization so non-Latin labels no longer lose meaningful characters. Fixes #58932; carries forward #58942 and #58995. Thanks @fengqing-git, @Starhappysh, and @koen666. - Docs/Hetzner: clarify that SSH tunnel access requires `AllowTcpForwarding local` before running `ssh -L`, so hardened VPS sshd configs do not block loopback Gateway access. Fixes #54557; carries forward #54564; refs #54954. Thanks @satishkc7, @blackstrype, and @Aftabbs. - Gateway/shutdown: report structured shutdown warnings and HTTP close timeout warnings through `ShutdownResult` while preserving lifecycle hook hardening. Carries forward #41296. Thanks @edenfunf. - Plugins/QA: prebuild the private QA channel runtime before plugin gauntlet source runs so wrapper CPU/RSS measurements are not polluted by private QA dist rebuild work. Thanks @vincentkoc. diff --git a/src/shared/string-normalization.test.ts b/src/shared/string-normalization.test.ts index e0f8c8ae900..5e49c36ed14 100644 --- a/src/shared/string-normalization.test.ts +++ b/src/shared/string-normalization.test.ts @@ -45,4 +45,45 @@ describe("shared/string-normalization", () => { expect(normalizeAtHashSlug("###__Room Name__")).toBe("room-name"); expect(normalizeAtHashSlug("@@@___")).toBe(""); }); + + it.each([ + ["技术讨论组", "技术讨论组"], + [" AI 助手群 ", "ai-助手群"], + ["友達グループ", "友達グループ"], + ["개발자 모임", "개발자-모임"], + ["Team 技术讨论", "team-技术讨论"], + ["#OpenClaw中文群", "#openclaw中文群"], + ["Команда разработки", "команда-разработки"], + ["فريق التطوير", "فريق-التطوير"], + ])("preserves Unicode letters in normalizeHyphenSlug: %s", (input, expected) => { + expect(normalizeHyphenSlug(input)).toBe(expected); + }); + + it.each([ + ["Cafe\u0301 Team", "café-team"], + ["हिन्दी चर्चा", "हिन्दी-चर्चा"], + ["ห้อง แช็ต", "ห้อง-แช็ต"], + ])("preserves combining marks in normalizeHyphenSlug: %s", (input, expected) => { + expect(normalizeHyphenSlug(input)).toBe(expected); + }); + + it.each([ + ["#技术频道", "技术频道"], + ["@中文群组", "中文群组"], + ["#日本語チャンネル", "日本語チャンネル"], + ["#한국어채널", "한국어채널"], + ["#Команда разработки", "команда-разработки"], + ["@فريق التطوير", "فريق-التطوير"], + ["#OpenClaw中文群", "openclaw中文群"], + ])("preserves Unicode letters in normalizeAtHashSlug: %s", (input, expected) => { + expect(normalizeAtHashSlug(input)).toBe(expected); + }); + + it.each([ + ["#Cafe\u0301_Team", "café-team"], + ["@हिन्दी चर्चा", "हिन्दी-चर्चा"], + ["#ห้อง แช็ต", "ห้อง-แช็ต"], + ])("preserves combining marks in normalizeAtHashSlug: %s", (input, expected) => { + expect(normalizeAtHashSlug(input)).toBe(expected); + }); }); diff --git a/src/shared/string-normalization.ts b/src/shared/string-normalization.ts index f9e8eec03ec..ea09ebdf0a5 100644 --- a/src/shared/string-normalization.ts +++ b/src/shared/string-normalization.ts @@ -51,23 +51,27 @@ export function normalizeCsvOrLooseStringList(value: unknown): string[] { return []; } +function normalizeSlugInput(raw?: string | null) { + return (normalizeOptionalLowercaseString(raw) ?? "").normalize("NFC"); +} + export function normalizeHyphenSlug(raw?: string | null) { - const trimmed = normalizeOptionalLowercaseString(raw) ?? ""; + const trimmed = normalizeSlugInput(raw); if (!trimmed) { return ""; } const dashed = trimmed.replace(/\s+/g, "-"); - const cleaned = dashed.replace(/[^a-z0-9#@._+-]+/g, "-"); + const cleaned = dashed.replace(/[^\p{L}\p{M}\p{N}#@._+-]+/gu, "-"); return cleaned.replace(/-{2,}/g, "-").replace(/^[-.]+|[-.]+$/g, ""); } export function normalizeAtHashSlug(raw?: string | null) { - const trimmed = normalizeOptionalLowercaseString(raw) ?? ""; + const trimmed = normalizeSlugInput(raw); if (!trimmed) { return ""; } const withoutPrefix = trimmed.replace(/^[@#]+/, ""); const dashed = withoutPrefix.replace(/[\s_]+/g, "-"); - const cleaned = dashed.replace(/[^a-z0-9-]+/g, "-"); + const cleaned = dashed.replace(/[^\p{L}\p{M}\p{N}-]+/gu, "-"); return cleaned.replace(/-{2,}/g, "-").replace(/^-+|-+$/g, ""); }