From ec7ad3b4ac71fa944d2db22776940ba3565c5d97 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 25 May 2026 23:35:52 +0100 Subject: [PATCH] perf: reduce fuzzy matching allocations --- .../reply/model-selection-directive.ts | 16 ++-- src/talk/activation-name.ts | 73 +++++++++++++------ 2 files changed, 59 insertions(+), 30 deletions(-) diff --git a/src/auto-reply/reply/model-selection-directive.ts b/src/auto-reply/reply/model-selection-directive.ts index 5f2a94fb1eb..ca409c9c1be 100644 --- a/src/auto-reply/reply/model-selection-directive.ts +++ b/src/auto-reply/reply/model-selection-directive.ts @@ -116,9 +116,13 @@ function boundedLevenshteinDistance(a: string, b: string, maxDistance: number): return null; } - // Standard DP with early exit. O(maxDistance * minLen) in common cases. - const prev = Array.from({ length: bLen + 1 }, (_, idx) => idx); - const curr = Array.from({ length: bLen + 1 }, () => 0); + // Standard DP with early exit. Reuse fixed-size numeric buffers so fuzzy + // matching large model catalogs does not allocate a row per candidate. + const prev = new Uint32Array(bLen + 1); + const curr = new Uint32Array(bLen + 1); + for (let index = 0; index <= bLen; index += 1) { + prev[index] = index; + } for (let i = 1; i <= aLen; i++) { curr[0] = i; @@ -138,12 +142,12 @@ function boundedLevenshteinDistance(a: string, b: string, maxDistance: number): } for (let j = 0; j <= bLen; j++) { - prev[j] = curr[j] ?? 0; + prev[j] = curr[j]; } } - const dist = prev[bLen] ?? null; - if (dist == null || dist > maxDistance) { + const dist = prev[bLen]; + if (dist > maxDistance) { return null; } return dist; diff --git a/src/talk/activation-name.ts b/src/talk/activation-name.ts index aa15cae09fc..444e2815972 100644 --- a/src/talk/activation-name.ts +++ b/src/talk/activation-name.ts @@ -22,6 +22,16 @@ type EdgeActivationNameCandidate = { strongBoundary: boolean; }; +type PreparedActivationName = { + activationName: string; + compact: string; +}; + +type PreparedEdgeActivationNameCandidate = { + candidate: EdgeActivationNameCandidate; + compact: string; +}; + export function realtimeVoiceActivationNameWordCount(value: string): number { return Array.from(value.matchAll(/[a-z0-9]+/gi)).length; } @@ -72,25 +82,39 @@ export function matchRealtimeVoiceActivationName( activationNames: string[], maxWords = REALTIME_VOICE_ACTIVATION_NAME_MAX_WORDS, ): Extract | undefined { + const preparedActivationNames: PreparedActivationName[] = []; + for (const activationName of activationNames) { + const normalizedActivationName = normalizeActivationNameCandidate(activationName); + if (!normalizedActivationName) { + continue; + } + preparedActivationNames.push({ + activationName, + compact: compactActivationName(normalizedActivationName), + }); + } + if (preparedActivationNames.length === 0) { + return undefined; + } + const candidates = [ ...leadingActivationNameCandidates(text, maxWords), ...trailingActivationNameCandidates(text, maxWords), - ].toSorted( - (left, right) => - compactActivationName(right.heardName).length - compactActivationName(left.heardName).length, - ); + ] + .map( + (candidate): PreparedEdgeActivationNameCandidate => ({ + candidate, + compact: compactActivationName(candidate.heardName), + }), + ) + .toSorted((left, right) => right.compact.length - left.compact.length); - for (const candidate of candidates) { - for (const activationName of activationNames) { - const normalizedActivationName = normalizeActivationNameCandidate(activationName); - if (!normalizedActivationName) { - continue; - } - const heardCompact = compactActivationName(candidate.heardName); - const activationCompact = compactActivationName(normalizedActivationName); + for (const { candidate, compact: heardCompact } of candidates) { + for (const { activationName, compact: activationCompact } of preparedActivationNames) { const exactMatch = heardCompact === activationCompact; const fuzzyMatch = - candidate.edge === "leading" && isFuzzyActivationNameMatch(candidate, activationName); + candidate.edge === "leading" && + isFuzzyActivationNameMatch(candidate, heardCompact, activationCompact); if (exactMatch || fuzzyMatch) { return { allowed: true, @@ -223,9 +247,13 @@ function levenshteinDistance(left: string, right: string): number { return left.length; } - let previous = Array.from({ length: right.length + 1 }, (_, index) => index); + let previous = new Uint32Array(right.length + 1); + let current = new Uint32Array(right.length + 1); + for (let index = 0; index <= right.length; index += 1) { + previous[index] = index; + } for (let leftIndex = 0; leftIndex < left.length; leftIndex += 1) { - const current = [leftIndex + 1]; + current[0] = leftIndex + 1; for (let rightIndex = 0; rightIndex < right.length; rightIndex += 1) { const cost = left[leftIndex] === right[rightIndex] ? 0 : 1; current[rightIndex + 1] = Math.min( @@ -234,9 +262,11 @@ function levenshteinDistance(left: string, right: string): number { previous[rightIndex] + cost, ); } - previous = current; + const nextPrevious = current; + current = previous; + previous = nextPrevious; } - return previous[right.length] ?? Math.max(left.length, right.length); + return previous[right.length]; } function hasOnlyPhoneticSubstitutions(left: string, right: string): boolean { @@ -274,14 +304,9 @@ function commonPrefixLength(left: string, right: string): number { function isFuzzyActivationNameMatch( candidate: EdgeActivationNameCandidate, - activationName: string, + heardCompact: string, + activationCompact: string, ): boolean { - const normalizedActivationName = normalizeActivationNameCandidate(activationName); - if (!normalizedActivationName) { - return false; - } - const heardCompact = compactActivationName(candidate.heardName); - const activationCompact = compactActivationName(normalizedActivationName); if (!heardCompact || !activationCompact || activationCompact.length < 5) { return false; }