Memory: add Spanish and Portuguese query expansion stop words (#23710)

2026-05-06 20:10:42 +00:00 · 2026-02-22 11:26:12 -05:00
parent f14ebd743c
commit 35b162af76
3 changed files with 173 additions and 0 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai
 - Discord/Allowlist: canonicalize resolved Discord allowlist names to IDs and split resolution flow for clearer fail-closed behavior.
 - Memory/FTS: add Korean stop-word filtering and particle-aware keyword extraction (including mixed Korean/English stems) for query expansion in FTS-only search mode. (#18899) Thanks @ruypang.
 - Memory/FTS: add Japanese-aware query expansion tokenization and stop-word filtering (including mixed-script terms like ASCII + katakana) for FTS-only search mode. Thanks @vincentkoc.
+- Memory/FTS: add Spanish and Portuguese stop-word filtering for query expansion in FTS-only search mode, improving conversational recall for both languages. Thanks @vincentkoc.
 - iOS/Talk: prefetch TTS segments and suppress expected speech-cancellation errors for smoother talk playback. (#22833) Thanks @ngutman.

 ### Breaking
--- a/src/memory/query-expansion.test.ts
+++ b/src/memory/query-expansion.test.ts
@@ -117,6 +117,32 @@ describe("extractKeywords", () => {
    expect(keywords).not.toContain("どう");
  });

+  it("extracts keywords from Spanish conversational query", () => {
+    const keywords = extractKeywords("ayer hablamos sobre la estrategia de despliegue");
+    expect(keywords).toContain("estrategia");
+    expect(keywords).toContain("despliegue");
+    expect(keywords).not.toContain("ayer");
+    expect(keywords).not.toContain("sobre");
+  });
+
+  it("extracts keywords from Portuguese conversational query", () => {
+    const keywords = extractKeywords("ontem falamos sobre a estratégia de implantação");
+    expect(keywords).toContain("estratégia");
+    expect(keywords).toContain("implantação");
+    expect(keywords).not.toContain("ontem");
+    expect(keywords).not.toContain("sobre");
+  });
+
+  it("filters Spanish and Portuguese question stop words", () => {
+    const keywords = extractKeywords("cómo cuando donde porquê quando onde");
+    expect(keywords).not.toContain("cómo");
+    expect(keywords).not.toContain("cuando");
+    expect(keywords).not.toContain("donde");
+    expect(keywords).not.toContain("porquê");
+    expect(keywords).not.toContain("quando");
+    expect(keywords).not.toContain("onde");
+  });
+
  it("handles empty query", () => {
    expect(extractKeywords("")).toEqual([]);
    expect(extractKeywords("   ")).toEqual([]);
--- a/src/memory/query-expansion.ts
+++ b/src/memory/query-expansion.ts
@@ -118,6 +118,150 @@ const STOP_WORDS_EN = new Set([
  "give",
 ]);

+const STOP_WORDS_ES = new Set([
+  // Articles and determiners
+  "el",
+  "la",
+  "los",
+  "las",
+  "un",
+  "una",
+  "unos",
+  "unas",
+  "este",
+  "esta",
+  "ese",
+  "esa",
+  // Pronouns
+  "yo",
+  "me",
+  "mi",
+  "nosotros",
+  "nosotras",
+  "tu",
+  "tus",
+  "usted",
+  "ustedes",
+  "ellos",
+  "ellas",
+  // Prepositions and conjunctions
+  "de",
+  "del",
+  "a",
+  "en",
+  "con",
+  "por",
+  "para",
+  "sobre",
+  "entre",
+  "y",
+  "o",
+  "pero",
+  "si",
+  "porque",
+  "como",
+  // Common verbs / auxiliaries
+  "es",
+  "son",
+  "fue",
+  "fueron",
+  "ser",
+  "estar",
+  "haber",
+  "tener",
+  "hacer",
+  // Time references (vague)
+  "ayer",
+  "hoy",
+  "mañana",
+  "antes",
+  "despues",
+  "después",
+  "ahora",
+  "recientemente",
+  // Question/request words
+  "que",
+  "qué",
+  "cómo",
+  "cuando",
+  "cuándo",
+  "donde",
+  "dónde",
+  "porqué",
+  "favor",
+  "ayuda",
+]);
+
+const STOP_WORDS_PT = new Set([
+  // Articles and determiners
+  "o",
+  "a",
+  "os",
+  "as",
+  "um",
+  "uma",
+  "uns",
+  "umas",
+  "este",
+  "esta",
+  "esse",
+  "essa",
+  // Pronouns
+  "eu",
+  "me",
+  "meu",
+  "minha",
+  "nos",
+  "nós",
+  "você",
+  "vocês",
+  "ele",
+  "ela",
+  "eles",
+  "elas",
+  // Prepositions and conjunctions
+  "de",
+  "do",
+  "da",
+  "em",
+  "com",
+  "por",
+  "para",
+  "sobre",
+  "entre",
+  "e",
+  "ou",
+  "mas",
+  "se",
+  "porque",
+  "como",
+  // Common verbs / auxiliaries
+  "é",
+  "são",
+  "foi",
+  "foram",
+  "ser",
+  "estar",
+  "ter",
+  "fazer",
+  // Time references (vague)
+  "ontem",
+  "hoje",
+  "amanhã",
+  "antes",
+  "depois",
+  "agora",
+  "recentemente",
+  // Question/request words
+  "que",
+  "quê",
+  "quando",
+  "onde",
+  "porquê",
+  "favor",
+  "ajuda",
+]);
+
 const STOP_WORDS_KO = new Set([
  // Particles (조사)
  "은",
@@ -523,6 +667,8 @@ export function extractKeywords(query: string): string[] {
    // Skip stop words
    if (
      STOP_WORDS_EN.has(token) ||
+      STOP_WORDS_ES.has(token) ||
+      STOP_WORDS_PT.has(token) ||
      STOP_WORDS_ZH.has(token) ||
      STOP_WORDS_KO.has(token) ||
      STOP_WORDS_JA.has(token)