Memory: add Arabic query expansion stop words (#23717)

2026-05-06 20:10:42 +00:00 · 2026-02-22 12:17:47 -05:00
parent 8c71bbe1e1
commit 9ae08ce205
3 changed files with 80 additions and 0 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,7 @@ Docs: https://docs.openclaw.ai
 - Memory/FTS: add Korean stop-word filtering and particle-aware keyword extraction (including mixed Korean/English stems) for query expansion in FTS-only search mode. (#18899) Thanks @ruypang.
 - Memory/FTS: add Japanese-aware query expansion tokenization and stop-word filtering (including mixed-script terms like ASCII + katakana) for FTS-only search mode. Thanks @vincentkoc.
 - Memory/FTS: add Spanish and Portuguese stop-word filtering for query expansion in FTS-only search mode, improving conversational recall for both languages. Thanks @vincentkoc.
+- Memory/FTS: add Arabic stop-word filtering for query expansion in FTS-only search mode to reduce conversational filler in Arabic memory searches. Thanks @vincentkoc.
 - iOS/Talk: prefetch TTS segments and suppress expected speech-cancellation errors for smoother talk playback. (#22833) Thanks @ngutman.

 ### Breaking
--- a/src/memory/query-expansion.test.ts
+++ b/src/memory/query-expansion.test.ts
@@ -143,6 +143,22 @@ describe("extractKeywords", () => {
    expect(keywords).not.toContain("onde");
  });

+  it("extracts keywords from Arabic conversational query", () => {
+    const keywords = extractKeywords("بالأمس ناقشنا استراتيجية النشر");
+    expect(keywords).toContain("ناقشنا");
+    expect(keywords).toContain("استراتيجية");
+    expect(keywords).toContain("النشر");
+    expect(keywords).not.toContain("بالأمس");
+  });
+
+  it("filters Arabic question stop words", () => {
+    const keywords = extractKeywords("كيف متى أين ماذا");
+    expect(keywords).not.toContain("كيف");
+    expect(keywords).not.toContain("متى");
+    expect(keywords).not.toContain("أين");
+    expect(keywords).not.toContain("ماذا");
+  });
+
  it("handles empty query", () => {
    expect(extractKeywords("")).toEqual([]);
    expect(extractKeywords("   ")).toEqual([]);
--- a/src/memory/query-expansion.ts
+++ b/src/memory/query-expansion.ts
@@ -262,6 +262,68 @@ const STOP_WORDS_PT = new Set([
  "ajuda",
 ]);

+const STOP_WORDS_AR = new Set([
+  // Articles and connectors
+  "ال",
+  "و",
+  "أو",
+  "لكن",
+  "ثم",
+  "بل",
+  // Pronouns / references
+  "أنا",
+  "نحن",
+  "هو",
+  "هي",
+  "هم",
+  "هذا",
+  "هذه",
+  "ذلك",
+  "تلك",
+  "هنا",
+  "هناك",
+  // Common prepositions
+  "من",
+  "إلى",
+  "الى",
+  "في",
+  "على",
+  "عن",
+  "مع",
+  "بين",
+  "ل",
+  "ب",
+  "ك",
+  // Common auxiliaries / vague verbs
+  "كان",
+  "كانت",
+  "يكون",
+  "تكون",
+  "صار",
+  "أصبح",
+  "يمكن",
+  "ممكن",
+  // Time references (vague)
+  "بالأمس",
+  "امس",
+  "اليوم",
+  "غدا",
+  "الآن",
+  "قبل",
+  "بعد",
+  "مؤخرا",
+  // Question/request words
+  "لماذا",
+  "كيف",
+  "ماذا",
+  "متى",
+  "أين",
+  "هل",
+  "من فضلك",
+  "فضلا",
+  "ساعد",
+]);
+
 const STOP_WORDS_KO = new Set([
  // Particles (조사)
  "은",
@@ -669,6 +731,7 @@ export function extractKeywords(query: string): string[] {
      STOP_WORDS_EN.has(token) ||
      STOP_WORDS_ES.has(token) ||
      STOP_WORDS_PT.has(token) ||
+      STOP_WORDS_AR.has(token) ||
      STOP_WORDS_ZH.has(token) ||
      STOP_WORDS_KO.has(token) ||
      STOP_WORDS_JA.has(token)