perf: reduce fuzzy matching allocations

This commit is contained in:
Peter Steinberger
2026-05-25 23:35:52 +01:00
parent 1531fe2525
commit ec7ad3b4ac
2 changed files with 59 additions and 30 deletions

View File

@@ -116,9 +116,13 @@ function boundedLevenshteinDistance(a: string, b: string, maxDistance: number):
return null;
}
// Standard DP with early exit. O(maxDistance * minLen) in common cases.
const prev = Array.from({ length: bLen + 1 }, (_, idx) => idx);
const curr = Array.from({ length: bLen + 1 }, () => 0);
// Standard DP with early exit. Reuse fixed-size numeric buffers so fuzzy
// matching large model catalogs does not allocate a row per candidate.
const prev = new Uint32Array(bLen + 1);
const curr = new Uint32Array(bLen + 1);
for (let index = 0; index <= bLen; index += 1) {
prev[index] = index;
}
for (let i = 1; i <= aLen; i++) {
curr[0] = i;
@@ -138,12 +142,12 @@ function boundedLevenshteinDistance(a: string, b: string, maxDistance: number):
}
for (let j = 0; j <= bLen; j++) {
prev[j] = curr[j] ?? 0;
prev[j] = curr[j];
}
}
const dist = prev[bLen] ?? null;
if (dist == null || dist > maxDistance) {
const dist = prev[bLen];
if (dist > maxDistance) {
return null;
}
return dist;

View File

@@ -22,6 +22,16 @@ type EdgeActivationNameCandidate = {
strongBoundary: boolean;
};
type PreparedActivationName = {
activationName: string;
compact: string;
};
type PreparedEdgeActivationNameCandidate = {
candidate: EdgeActivationNameCandidate;
compact: string;
};
export function realtimeVoiceActivationNameWordCount(value: string): number {
return Array.from(value.matchAll(/[a-z0-9]+/gi)).length;
}
@@ -72,25 +82,39 @@ export function matchRealtimeVoiceActivationName(
activationNames: string[],
maxWords = REALTIME_VOICE_ACTIVATION_NAME_MAX_WORDS,
): Extract<RealtimeVoiceActivationNameTranscriptResult, { allowed: true }> | undefined {
const preparedActivationNames: PreparedActivationName[] = [];
for (const activationName of activationNames) {
const normalizedActivationName = normalizeActivationNameCandidate(activationName);
if (!normalizedActivationName) {
continue;
}
preparedActivationNames.push({
activationName,
compact: compactActivationName(normalizedActivationName),
});
}
if (preparedActivationNames.length === 0) {
return undefined;
}
const candidates = [
...leadingActivationNameCandidates(text, maxWords),
...trailingActivationNameCandidates(text, maxWords),
].toSorted(
(left, right) =>
compactActivationName(right.heardName).length - compactActivationName(left.heardName).length,
);
]
.map(
(candidate): PreparedEdgeActivationNameCandidate => ({
candidate,
compact: compactActivationName(candidate.heardName),
}),
)
.toSorted((left, right) => right.compact.length - left.compact.length);
for (const candidate of candidates) {
for (const activationName of activationNames) {
const normalizedActivationName = normalizeActivationNameCandidate(activationName);
if (!normalizedActivationName) {
continue;
}
const heardCompact = compactActivationName(candidate.heardName);
const activationCompact = compactActivationName(normalizedActivationName);
for (const { candidate, compact: heardCompact } of candidates) {
for (const { activationName, compact: activationCompact } of preparedActivationNames) {
const exactMatch = heardCompact === activationCompact;
const fuzzyMatch =
candidate.edge === "leading" && isFuzzyActivationNameMatch(candidate, activationName);
candidate.edge === "leading" &&
isFuzzyActivationNameMatch(candidate, heardCompact, activationCompact);
if (exactMatch || fuzzyMatch) {
return {
allowed: true,
@@ -223,9 +247,13 @@ function levenshteinDistance(left: string, right: string): number {
return left.length;
}
let previous = Array.from({ length: right.length + 1 }, (_, index) => index);
let previous = new Uint32Array(right.length + 1);
let current = new Uint32Array(right.length + 1);
for (let index = 0; index <= right.length; index += 1) {
previous[index] = index;
}
for (let leftIndex = 0; leftIndex < left.length; leftIndex += 1) {
const current = [leftIndex + 1];
current[0] = leftIndex + 1;
for (let rightIndex = 0; rightIndex < right.length; rightIndex += 1) {
const cost = left[leftIndex] === right[rightIndex] ? 0 : 1;
current[rightIndex + 1] = Math.min(
@@ -234,9 +262,11 @@ function levenshteinDistance(left: string, right: string): number {
previous[rightIndex] + cost,
);
}
previous = current;
const nextPrevious = current;
current = previous;
previous = nextPrevious;
}
return previous[right.length] ?? Math.max(left.length, right.length);
return previous[right.length];
}
function hasOnlyPhoneticSubstitutions(left: string, right: string): boolean {
@@ -274,14 +304,9 @@ function commonPrefixLength(left: string, right: string): number {
function isFuzzyActivationNameMatch(
candidate: EdgeActivationNameCandidate,
activationName: string,
heardCompact: string,
activationCompact: string,
): boolean {
const normalizedActivationName = normalizeActivationNameCandidate(activationName);
if (!normalizedActivationName) {
return false;
}
const heardCompact = compactActivationName(candidate.heardName);
const activationCompact = compactActivationName(normalizedActivationName);
if (!heardCompact || !activationCompact || activationCompact.length < 5) {
return false;
}