mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-28 09:22:14 +00:00
refactor: share realtime voice activation helpers (#86615)
This commit is contained in:
committed by
GitHub
parent
170e0aac2a
commit
d0ab0d9922
@@ -6,6 +6,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Changes
|
||||
|
||||
- Voice: share activation-name matching and consult-transcript screening through the realtime voice SDK so Discord, browser voice, and meeting surfaces can reuse one implementation.
|
||||
- Cron: default `cron.maxConcurrentRuns` to 8 so scheduled automations and their isolated agent turns can make progress in parallel without explicit configuration.
|
||||
- QA-Lab: add `qa coverage --match <query>` so focused proof selection can discover matching scenarios from existing metadata before running live or remote lanes.
|
||||
- Control UI: add an ephemeral Activity tab for sanitized live tool activity summaries without persisting raw telemetry. Fixes #12831. Thanks @BunsDev.
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
390681a3d97af8c004db89ead136bd6cff693af5a0ddfe86a8e3c55a29a077eb plugin-sdk-api-baseline.json
|
||||
8dfaf69ee3d0a946bfdd1d8d97ef85262824d52c20854249f900db61f2a7f7b4 plugin-sdk-api-baseline.jsonl
|
||||
1d3e6177eeac57fc43736f7d5f76d8f825e1859ca625d268e97dc30b5567ea34 plugin-sdk-api-baseline.json
|
||||
6c093ff7c10bd81ee9d2c4fc5d07b206bc3a1f5acd0bad491cfc9e0df6689f6b plugin-sdk-api-baseline.jsonl
|
||||
|
||||
@@ -3,11 +3,14 @@ import type {
|
||||
ChannelDoctorLegacyConfigRule,
|
||||
} from "openclaw/plugin-sdk/channel-contract";
|
||||
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-contracts";
|
||||
import {
|
||||
isSupportedRealtimeVoiceActivationName,
|
||||
normalizeRealtimeVoiceActivationNamePrefix,
|
||||
} from "openclaw/plugin-sdk/realtime-voice";
|
||||
import { asObjectRecord, normalizeLegacyChannelAliases } from "openclaw/plugin-sdk/runtime-doctor";
|
||||
import { resolveDiscordPreviewStreamMode } from "./preview-streaming.js";
|
||||
|
||||
const LEGACY_TTS_PROVIDER_KEYS = ["openai", "elevenlabs", "microsoft", "edge"] as const;
|
||||
const DISCORD_REALTIME_WAKE_NAME_MAX_WORDS = 2;
|
||||
type AgentBindingConfig = NonNullable<OpenClawConfig["bindings"]>[number];
|
||||
|
||||
function hasLegacyTtsProviderKeys(value: unknown): boolean {
|
||||
@@ -78,23 +81,6 @@ function hasLegacyDiscordAccountGuildChannelAgentId(value: unknown): boolean {
|
||||
return Object.values(accounts).some((account) => hasLegacyDiscordGuildChannelAgentId(account));
|
||||
}
|
||||
|
||||
function realtimeWakeNameWordCount(value: string): number {
|
||||
return Array.from(value.matchAll(/[a-z0-9]+/gi)).length;
|
||||
}
|
||||
|
||||
function normalizeRealtimeWakeName(value: string): string | undefined {
|
||||
const words = Array.from(value.matchAll(/[a-z0-9]+/gi), (match) => match[0]);
|
||||
if (words.length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
return words.slice(0, DISCORD_REALTIME_WAKE_NAME_MAX_WORDS).join(" ");
|
||||
}
|
||||
|
||||
function isSupportedRealtimeWakeName(value: string): boolean {
|
||||
const wordCount = realtimeWakeNameWordCount(value);
|
||||
return wordCount >= 1 && wordCount <= DISCORD_REALTIME_WAKE_NAME_MAX_WORDS;
|
||||
}
|
||||
|
||||
function hasUnsupportedRealtimeWakeNamesInVoice(value: unknown): boolean {
|
||||
const voice = asObjectRecord(value);
|
||||
const realtime = asObjectRecord(voice?.realtime);
|
||||
@@ -102,7 +88,8 @@ function hasUnsupportedRealtimeWakeNamesInVoice(value: unknown): boolean {
|
||||
return Array.isArray(wakeNames)
|
||||
? wakeNames.length === 0 ||
|
||||
wakeNames.some(
|
||||
(wakeName) => typeof wakeName === "string" && !isSupportedRealtimeWakeName(wakeName),
|
||||
(wakeName) =>
|
||||
typeof wakeName === "string" && !isSupportedRealtimeVoiceActivationName(wakeName),
|
||||
)
|
||||
: false;
|
||||
}
|
||||
@@ -231,10 +218,10 @@ function normalizeUnsupportedRealtimeWakeNames(
|
||||
let normalized = 0;
|
||||
let removed = 0;
|
||||
const nextWakeNames = wakeNames.flatMap((wakeName) => {
|
||||
if (typeof wakeName !== "string" || isSupportedRealtimeWakeName(wakeName)) {
|
||||
if (typeof wakeName !== "string" || isSupportedRealtimeVoiceActivationName(wakeName)) {
|
||||
return [wakeName];
|
||||
}
|
||||
const nextWakeName = normalizeRealtimeWakeName(wakeName);
|
||||
const nextWakeName = normalizeRealtimeVoiceActivationNamePrefix(wakeName);
|
||||
if (!nextWakeName) {
|
||||
removed += 1;
|
||||
return [];
|
||||
|
||||
@@ -3,9 +3,12 @@ import type { DiscordAccountConfig, OpenClawConfig } from "openclaw/plugin-sdk/c
|
||||
import {
|
||||
buildRealtimeVoiceAgentConsultChatMessage,
|
||||
buildRealtimeVoiceAgentConsultPolicyInstructions,
|
||||
classifySkippableRealtimeVoiceConsultTranscript,
|
||||
controlRealtimeVoiceAgentRun,
|
||||
createRealtimeVoiceAgentTalkbackQueue,
|
||||
createRealtimeVoiceBridgeSession,
|
||||
matchRealtimeVoiceActivationName,
|
||||
normalizeSupportedRealtimeVoiceActivationName,
|
||||
REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME,
|
||||
REALTIME_VOICE_AGENT_CONTROL_TOOL,
|
||||
REALTIME_VOICE_AGENT_CONTROL_TOOL_NAME,
|
||||
@@ -22,6 +25,8 @@ import {
|
||||
type RealtimeVoiceBridgeSession,
|
||||
type RealtimeVoiceProviderConfig,
|
||||
type RealtimeVoiceToolCallEvent,
|
||||
sortRealtimeVoiceActivationNames,
|
||||
type RealtimeVoiceActivationNameTranscriptResult,
|
||||
} from "openclaw/plugin-sdk/realtime-voice";
|
||||
import { createSubsystemLogger } from "openclaw/plugin-sdk/runtime-env";
|
||||
import { formatErrorMessage } from "openclaw/plugin-sdk/ssrf-runtime";
|
||||
@@ -65,35 +70,11 @@ const DISCORD_REALTIME_FORCED_CONSULT_FALLBACK_DELAY_MS = 200;
|
||||
const DISCORD_REALTIME_DUPLICATE_ERROR_SUPPRESS_MS = 60_000;
|
||||
const DISCORD_REALTIME_CONTROL_SPEECH_DEDUPE_MS = 5_000;
|
||||
const DISCORD_REALTIME_OUTPUT_PLAYBACK_WATCHDOG_MARGIN_MS = 1_500;
|
||||
const DISCORD_REALTIME_WAKE_NAME_EDGE_WORDS = 2;
|
||||
const REALTIME_PCM16_BYTES_PER_SAMPLE = 2;
|
||||
const DISCORD_RAW_PCM_FRAME_BYTES = 3_840;
|
||||
const DISCORD_REALTIME_OUTPUT_PREROLL_FRAMES = 25;
|
||||
const DISCORD_REALTIME_TRAILING_SILENCE_MIN_MS = 700;
|
||||
const DISCORD_REALTIME_TRAILING_SILENCE_MAX_MS = 3_000;
|
||||
const DISCORD_REALTIME_FORCED_CONSULT_TRAILING_FRAGMENT_WORDS = new Set([
|
||||
"a",
|
||||
"about",
|
||||
"an",
|
||||
"and",
|
||||
"as",
|
||||
"at",
|
||||
"because",
|
||||
"but",
|
||||
"by",
|
||||
"for",
|
||||
"from",
|
||||
"in",
|
||||
"of",
|
||||
"on",
|
||||
"or",
|
||||
"so",
|
||||
"that",
|
||||
"the",
|
||||
"then",
|
||||
"to",
|
||||
"with",
|
||||
]);
|
||||
const DISCORD_REALTIME_FORCED_CONSULT_REASON =
|
||||
"provider_final_transcript_without_openclaw_agent_consult";
|
||||
const DISCORD_REALTIME_VERBOSE_OMITTED_EVENTS = new Set([
|
||||
@@ -204,28 +185,6 @@ function shouldLogRealtimeVerboseEvent(event: RealtimeVoiceBridgeEvent): boolean
|
||||
return !DISCORD_REALTIME_VERBOSE_OMITTED_EVENTS.has(event.type);
|
||||
}
|
||||
|
||||
function classifySkippableForcedAgentProxyTranscript(text: string): string | undefined {
|
||||
const normalized = text.replace(/\s+/g, " ").trim().toLowerCase();
|
||||
if (!normalized) {
|
||||
return "empty";
|
||||
}
|
||||
if (/(\.\.\.|…)\s*$/.test(normalized)) {
|
||||
return "incomplete-transcript";
|
||||
}
|
||||
const lastWord = normalized.match(/[a-z']+$/)?.[0]?.replace(/^'+|'+$/g, "");
|
||||
if (lastWord && DISCORD_REALTIME_FORCED_CONSULT_TRAILING_FRAGMENT_WORDS.has(lastWord)) {
|
||||
return "trailing-fragment";
|
||||
}
|
||||
if (
|
||||
!normalized.includes("?") &&
|
||||
(/^(i'?ll|i will) be (right )?back\b/.test(normalized) ||
|
||||
/\b(see you|bye(?:-bye)?|goodbye)\b/.test(normalized))
|
||||
) {
|
||||
return "non-actionable-closing";
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function readProviderConfigString(
|
||||
config: RealtimeVoiceProviderConfig,
|
||||
key: string,
|
||||
@@ -355,283 +314,6 @@ function normalizeControlSpeechText(text: string): string {
|
||||
return text.toLowerCase().replace(/\s+/g, " ").trim();
|
||||
}
|
||||
|
||||
function normalizeWakeName(value: string): string | undefined {
|
||||
const normalized = value.toLowerCase().replace(/\s+/g, " ").trim();
|
||||
return normalized || undefined;
|
||||
}
|
||||
|
||||
function normalizeSupportedWakeName(value: string | undefined): string | undefined {
|
||||
if (typeof value !== "string") {
|
||||
return undefined;
|
||||
}
|
||||
const normalized = normalizeWakeName(value);
|
||||
const wordCount = normalized ? Array.from(normalized.matchAll(/[a-z0-9]+/gi)).length : 0;
|
||||
return wordCount >= 1 && wordCount <= DISCORD_REALTIME_WAKE_NAME_EDGE_WORDS
|
||||
? normalized
|
||||
: undefined;
|
||||
}
|
||||
|
||||
function normalizeWakeNameCandidate(value: string): string | undefined {
|
||||
const normalized = value
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, " ")
|
||||
.replace(/\s+/g, " ")
|
||||
.trim();
|
||||
return normalized || undefined;
|
||||
}
|
||||
|
||||
function compactWakeName(value: string): string {
|
||||
return value.replace(/[^a-z0-9]+/g, "");
|
||||
}
|
||||
|
||||
type EdgeWakeNameCandidate = {
|
||||
edge: "leading" | "trailing";
|
||||
heardName: string;
|
||||
startIndex: number;
|
||||
endIndex: number;
|
||||
strongBoundary: boolean;
|
||||
};
|
||||
|
||||
type WakeNameTranscriptResult =
|
||||
| { allowed: true; text: string; wakeName: string; heardName: string; match: "exact" | "fuzzy" }
|
||||
| { allowed: false; text: string };
|
||||
type AllowedWakeNameTranscriptResult = Extract<WakeNameTranscriptResult, { allowed: true }>;
|
||||
|
||||
function leadingWakeNameCandidates(text: string, maxWords: number): EdgeWakeNameCandidate[] {
|
||||
const opener = /^\s*(?:(?:hey|ok|okay)(?:\s*[-,:;]+\s*|\s+))?/i.exec(text);
|
||||
const nameStart = opener?.[0].length ?? 0;
|
||||
const candidates: EdgeWakeNameCandidate[] = [];
|
||||
const candidateStarts = nameStart > 0 ? [0, nameStart] : [0];
|
||||
|
||||
for (const startIndex of candidateStarts) {
|
||||
const tokenPattern = /[a-z0-9]+/gi;
|
||||
tokenPattern.lastIndex = startIndex;
|
||||
const startCandidates: EdgeWakeNameCandidate[] = [];
|
||||
|
||||
for (let wordCount = 0; wordCount < maxWords; wordCount += 1) {
|
||||
const token = tokenPattern.exec(text);
|
||||
if (!token) {
|
||||
break;
|
||||
}
|
||||
const previousEndIndex =
|
||||
wordCount === 0 ? startIndex : startCandidates[wordCount - 1]?.endIndex;
|
||||
const between = text.slice(previousEndIndex, token.index);
|
||||
if (wordCount > 0 && !/^[\s'-]+$/.test(between)) {
|
||||
break;
|
||||
}
|
||||
const endIndex = token.index + token[0].length;
|
||||
const heardName = normalizeWakeNameCandidate(text.slice(startIndex, endIndex));
|
||||
if (!heardName) {
|
||||
break;
|
||||
}
|
||||
const boundary = text.slice(endIndex).match(/^\s*([,.:;!?-]|$)/);
|
||||
startCandidates.push({
|
||||
edge: "leading",
|
||||
heardName,
|
||||
startIndex,
|
||||
endIndex,
|
||||
strongBoundary: Boolean(boundary),
|
||||
});
|
||||
}
|
||||
|
||||
candidates.push(...startCandidates);
|
||||
}
|
||||
|
||||
return candidates;
|
||||
}
|
||||
|
||||
function trailingWakeNameCandidates(text: string, maxWords: number): EdgeWakeNameCandidate[] {
|
||||
const tokens = Array.from(text.matchAll(/[a-z0-9]+/gi));
|
||||
const candidates: EdgeWakeNameCandidate[] = [];
|
||||
const tokenCount = Math.min(tokens.length, maxWords);
|
||||
|
||||
for (let wordCount = 1; wordCount <= tokenCount; wordCount += 1) {
|
||||
const startToken = tokens[tokens.length - wordCount];
|
||||
const endToken = tokens[tokens.length - 1];
|
||||
if (!startToken || !endToken?.[0]) {
|
||||
break;
|
||||
}
|
||||
const startIndex = startToken.index ?? 0;
|
||||
const endIndex = (endToken.index ?? 0) + endToken[0].length;
|
||||
if (!/^\s*(?:[,.:;!?-]+\s*)?$/.test(text.slice(endIndex))) {
|
||||
break;
|
||||
}
|
||||
if (!/(^|[\s,.:;!?-])$/.test(text.slice(0, startIndex))) {
|
||||
break;
|
||||
}
|
||||
if (wordCount > 1) {
|
||||
const previousToken = tokens[tokens.length - wordCount + 1];
|
||||
const between = previousToken
|
||||
? text.slice(startIndex + startToken[0].length, previousToken.index)
|
||||
: "";
|
||||
if (!/^[\s'-]+$/.test(between)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
const heardName = normalizeWakeNameCandidate(text.slice(startIndex, endIndex));
|
||||
if (!heardName) {
|
||||
break;
|
||||
}
|
||||
candidates.push({
|
||||
edge: "trailing",
|
||||
heardName,
|
||||
startIndex,
|
||||
endIndex,
|
||||
strongBoundary: true,
|
||||
});
|
||||
}
|
||||
|
||||
return candidates;
|
||||
}
|
||||
|
||||
function levenshteinDistance(left: string, right: string): number {
|
||||
if (left === right) {
|
||||
return 0;
|
||||
}
|
||||
if (!left) {
|
||||
return right.length;
|
||||
}
|
||||
if (!right) {
|
||||
return left.length;
|
||||
}
|
||||
|
||||
let previous = Array.from({ length: right.length + 1 }, (_, index) => index);
|
||||
for (let leftIndex = 0; leftIndex < left.length; leftIndex += 1) {
|
||||
const current = [leftIndex + 1];
|
||||
for (let rightIndex = 0; rightIndex < right.length; rightIndex += 1) {
|
||||
const cost = left[leftIndex] === right[rightIndex] ? 0 : 1;
|
||||
current[rightIndex + 1] = Math.min(
|
||||
current[rightIndex] + 1,
|
||||
previous[rightIndex + 1] + 1,
|
||||
previous[rightIndex] + cost,
|
||||
);
|
||||
}
|
||||
previous = current;
|
||||
}
|
||||
return previous[right.length] ?? Math.max(left.length, right.length);
|
||||
}
|
||||
|
||||
function hasOnlyPhoneticSubstitutions(left: string, right: string): boolean {
|
||||
if (left.length !== right.length) {
|
||||
return false;
|
||||
}
|
||||
const vowels = new Set(["a", "e", "i", "o", "u", "y"]);
|
||||
const liquids = new Set(["l", "r"]);
|
||||
let substitutions = 0;
|
||||
for (let index = 0; index < left.length; index += 1) {
|
||||
const leftChar = left[index];
|
||||
const rightChar = right[index];
|
||||
if (leftChar === rightChar) {
|
||||
continue;
|
||||
}
|
||||
const vowelLike = vowels.has(leftChar ?? "") && vowels.has(rightChar ?? "");
|
||||
const liquidLike = liquids.has(leftChar ?? "") && liquids.has(rightChar ?? "");
|
||||
if (!vowelLike && !liquidLike) {
|
||||
return false;
|
||||
}
|
||||
substitutions += 1;
|
||||
}
|
||||
return substitutions > 0;
|
||||
}
|
||||
|
||||
function commonPrefixLength(left: string, right: string): number {
|
||||
const limit = Math.min(left.length, right.length);
|
||||
for (let index = 0; index < limit; index += 1) {
|
||||
if (left[index] !== right[index]) {
|
||||
return index;
|
||||
}
|
||||
}
|
||||
return limit;
|
||||
}
|
||||
|
||||
function isFuzzyWakeNameMatch(candidate: EdgeWakeNameCandidate, wakeName: string): boolean {
|
||||
const normalizedWakeName = normalizeWakeNameCandidate(wakeName);
|
||||
if (!normalizedWakeName) {
|
||||
return false;
|
||||
}
|
||||
const heardCompact = compactWakeName(candidate.heardName);
|
||||
const wakeCompact = compactWakeName(normalizedWakeName);
|
||||
if (!heardCompact || !wakeCompact || wakeCompact.length < 5) {
|
||||
return false;
|
||||
}
|
||||
if (!candidate.strongBoundary) {
|
||||
return false;
|
||||
}
|
||||
if (heardCompact[0] !== wakeCompact[0]) {
|
||||
return false;
|
||||
}
|
||||
const distance = levenshteinDistance(heardCompact, wakeCompact);
|
||||
if (distance <= 1) {
|
||||
return true;
|
||||
}
|
||||
if (
|
||||
distance === 2 &&
|
||||
heardCompact.length >= 4 &&
|
||||
wakeCompact.length >= 5 &&
|
||||
(heardCompact.length !== wakeCompact.length ||
|
||||
hasOnlyPhoneticSubstitutions(heardCompact, wakeCompact) ||
|
||||
commonPrefixLength(heardCompact, wakeCompact) >= 6)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
if (
|
||||
distance === 3 &&
|
||||
heardCompact.length >= 7 &&
|
||||
wakeCompact.length >= 7 &&
|
||||
heardCompact.length !== wakeCompact.length &&
|
||||
commonPrefixLength(heardCompact, wakeCompact) >= 5
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function stripEdgeWakeNameCandidate(text: string, candidate: EdgeWakeNameCandidate): string {
|
||||
if (candidate.edge === "leading") {
|
||||
return text
|
||||
.slice(candidate.endIndex)
|
||||
.replace(/^\s*(?:[-,:;.!?]+\s*)?/, "")
|
||||
.trim();
|
||||
}
|
||||
return text
|
||||
.slice(0, candidate.startIndex)
|
||||
.replace(/\s*(?:[-,:;.!?]+\s*)?$/, "")
|
||||
.trim();
|
||||
}
|
||||
|
||||
function matchEdgeWakeName(
|
||||
text: string,
|
||||
wakeNames: string[],
|
||||
): AllowedWakeNameTranscriptResult | undefined {
|
||||
const candidates = [
|
||||
...leadingWakeNameCandidates(text, DISCORD_REALTIME_WAKE_NAME_EDGE_WORDS),
|
||||
...trailingWakeNameCandidates(text, DISCORD_REALTIME_WAKE_NAME_EDGE_WORDS),
|
||||
].toSorted(
|
||||
(left, right) =>
|
||||
compactWakeName(right.heardName).length - compactWakeName(left.heardName).length,
|
||||
);
|
||||
for (const candidate of candidates) {
|
||||
for (const wakeName of wakeNames) {
|
||||
const normalizedWakeName = normalizeWakeNameCandidate(wakeName);
|
||||
if (!normalizedWakeName) {
|
||||
continue;
|
||||
}
|
||||
const heardCompact = compactWakeName(candidate.heardName);
|
||||
const wakeCompact = compactWakeName(normalizedWakeName);
|
||||
if (heardCompact === wakeCompact || isFuzzyWakeNameMatch(candidate, wakeName)) {
|
||||
return {
|
||||
allowed: true,
|
||||
text: stripEdgeWakeNameCandidate(text, candidate),
|
||||
wakeName,
|
||||
heardName: candidate.heardName,
|
||||
match: heardCompact === wakeCompact ? "exact" : "fuzzy",
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function resolveDiscordRealtimeWakeNames(params: {
|
||||
config: DiscordRealtimeVoiceConfig;
|
||||
cfg: OpenClawConfig;
|
||||
@@ -640,30 +322,24 @@ function resolveDiscordRealtimeWakeNames(params: {
|
||||
const rawConfigured = params.config?.wakeNames;
|
||||
if (rawConfigured) {
|
||||
const configured = rawConfigured
|
||||
.map((name) => normalizeSupportedWakeName(name))
|
||||
.map((name) => normalizeSupportedRealtimeVoiceActivationName(name))
|
||||
.filter((name): name is string => Boolean(name));
|
||||
return sortWakeNames(Array.from(new Set(configured)));
|
||||
return sortRealtimeVoiceActivationNames(Array.from(new Set(configured)));
|
||||
}
|
||||
const agent = params.cfg.agents?.list?.find((candidate) => candidate.id === params.agentId);
|
||||
const configuredAgentNames = [agent?.name, agent?.identity?.name]
|
||||
.map((name) => normalizeSupportedWakeName(name))
|
||||
.map((name) => normalizeSupportedRealtimeVoiceActivationName(name))
|
||||
.filter((name): name is string => Boolean(name));
|
||||
const productWakeNames = [normalizeSupportedWakeName("OpenClaw")].filter((name): name is string =>
|
||||
Boolean(name),
|
||||
const productWakeNames = [normalizeSupportedRealtimeVoiceActivationName("OpenClaw")].filter(
|
||||
(name): name is string => Boolean(name),
|
||||
);
|
||||
const defaults =
|
||||
configuredAgentNames.length > 0
|
||||
? [...configuredAgentNames, ...productWakeNames]
|
||||
: [normalizeSupportedWakeName(params.agentId), ...productWakeNames].filter(
|
||||
: [normalizeSupportedRealtimeVoiceActivationName(params.agentId), ...productWakeNames].filter(
|
||||
(name): name is string => Boolean(name),
|
||||
);
|
||||
return sortWakeNames(Array.from(new Set(defaults)));
|
||||
}
|
||||
|
||||
function sortWakeNames(wakeNames: string[]): string[] {
|
||||
return wakeNames.toSorted(
|
||||
(left, right) => right.length - left.length || left.localeCompare(right),
|
||||
);
|
||||
return sortRealtimeVoiceActivationNames(Array.from(new Set(defaults)));
|
||||
}
|
||||
|
||||
function matchesPendingAgentProxyQuestion(consultMessage: string, question: string): boolean {
|
||||
@@ -1524,14 +1200,21 @@ export class DiscordRealtimeVoiceSession implements VoiceRealtimeSession {
|
||||
this.talkback.enqueue(acceptedText, this.consumePendingSpeakerContext());
|
||||
}
|
||||
|
||||
private resolveWakeNameTranscript(text: string): WakeNameTranscriptResult {
|
||||
private resolveWakeNameTranscript(text: string): RealtimeVoiceActivationNameTranscriptResult {
|
||||
if (!this.requireWakeName) {
|
||||
return { allowed: true, text, wakeName: "", heardName: "", match: "exact" };
|
||||
return {
|
||||
allowed: true,
|
||||
text,
|
||||
activationName: "",
|
||||
heardName: "",
|
||||
match: "exact",
|
||||
edge: "leading",
|
||||
};
|
||||
}
|
||||
const wakeNameResult = matchEdgeWakeName(text, this.wakeNames);
|
||||
const wakeNameResult = matchRealtimeVoiceActivationName(text, this.wakeNames);
|
||||
if (wakeNameResult) {
|
||||
logger.info(
|
||||
`discord voice: realtime wake-name gate matched canonical=${wakeNameResult.wakeName} heard=${wakeNameResult.heardName} match=${wakeNameResult.match} voiceSession=${this.params.entry.voiceSessionKey} agent=${this.params.entry.route.agentId}`,
|
||||
`discord voice: realtime wake-name gate matched canonical=${wakeNameResult.activationName} heard=${wakeNameResult.heardName} match=${wakeNameResult.match} voiceSession=${this.params.entry.voiceSessionKey} agent=${this.params.entry.route.agentId}`,
|
||||
);
|
||||
return wakeNameResult;
|
||||
}
|
||||
@@ -1585,7 +1268,7 @@ export class DiscordRealtimeVoiceSession implements VoiceRealtimeSession {
|
||||
if (!question) {
|
||||
return undefined;
|
||||
}
|
||||
const skipReason = classifySkippableForcedAgentProxyTranscript(question);
|
||||
const skipReason = classifySkippableRealtimeVoiceConsultTranscript(question);
|
||||
if (skipReason) {
|
||||
const context = this.consumePendingSpeakerContext();
|
||||
logger.info(
|
||||
|
||||
@@ -50,6 +50,23 @@ export {
|
||||
type TalkTurnResult,
|
||||
type TalkTurnSuccess,
|
||||
} from "../talk/talk-session-controller.js";
|
||||
export {
|
||||
REALTIME_VOICE_ACTIVATION_NAME_MAX_WORDS,
|
||||
isSupportedRealtimeVoiceActivationName,
|
||||
matchRealtimeVoiceActivationName,
|
||||
normalizeRealtimeVoiceActivationName,
|
||||
normalizeRealtimeVoiceActivationNamePrefix,
|
||||
normalizeSupportedRealtimeVoiceActivationName,
|
||||
realtimeVoiceActivationNameWordCount,
|
||||
sortRealtimeVoiceActivationNames,
|
||||
type RealtimeVoiceActivationNameEdge,
|
||||
type RealtimeVoiceActivationNameMatchKind,
|
||||
type RealtimeVoiceActivationNameTranscriptResult,
|
||||
} from "../talk/activation-name.js";
|
||||
export {
|
||||
classifySkippableRealtimeVoiceConsultTranscript,
|
||||
type SkippableRealtimeVoiceConsultTranscriptReason,
|
||||
} from "../talk/consult-transcript.js";
|
||||
export {
|
||||
buildRealtimeVoiceAgentConsultChatMessage,
|
||||
buildRealtimeVoiceAgentConsultPolicyInstructions,
|
||||
|
||||
74
src/talk/activation-name.test.ts
Normal file
74
src/talk/activation-name.test.ts
Normal file
@@ -0,0 +1,74 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
isSupportedRealtimeVoiceActivationName,
|
||||
matchRealtimeVoiceActivationName,
|
||||
normalizeRealtimeVoiceActivationNamePrefix,
|
||||
normalizeSupportedRealtimeVoiceActivationName,
|
||||
sortRealtimeVoiceActivationNames,
|
||||
} from "./activation-name.js";
|
||||
|
||||
describe("realtime voice activation names", () => {
|
||||
it("normalizes and validates one- or two-word activation names", () => {
|
||||
expect(normalizeSupportedRealtimeVoiceActivationName(" OpenClaw ")).toBe("openclaw");
|
||||
expect(normalizeSupportedRealtimeVoiceActivationName("Open Claw")).toBe("open claw");
|
||||
expect(normalizeSupportedRealtimeVoiceActivationName("Claw Bot Helper")).toBeUndefined();
|
||||
expect(isSupportedRealtimeVoiceActivationName("Claw Bot")).toBe(true);
|
||||
expect(isSupportedRealtimeVoiceActivationName("Claw Bot Helper")).toBe(false);
|
||||
expect(normalizeRealtimeVoiceActivationNamePrefix("Claw Bot Helper")).toBe("Claw Bot");
|
||||
});
|
||||
|
||||
it("matches and strips leading exact activation names", () => {
|
||||
expect(matchRealtimeVoiceActivationName("Hey, Molty, ship it", ["molty"])).toEqual({
|
||||
allowed: true,
|
||||
activationName: "molty",
|
||||
edge: "leading",
|
||||
heardName: "molty",
|
||||
match: "exact",
|
||||
text: "ship it",
|
||||
});
|
||||
});
|
||||
|
||||
it("matches and strips trailing exact activation names", () => {
|
||||
expect(matchRealtimeVoiceActivationName("ship it, Claw Bot", ["claw bot"])).toEqual({
|
||||
allowed: true,
|
||||
activationName: "claw bot",
|
||||
edge: "trailing",
|
||||
heardName: "claw bot",
|
||||
match: "exact",
|
||||
text: "ship it",
|
||||
});
|
||||
});
|
||||
|
||||
it("accepts bounded fuzzy matches at the transcript edge", () => {
|
||||
expect(matchRealtimeVoiceActivationName("Malty, what changed?", ["molty"])).toMatchObject({
|
||||
allowed: true,
|
||||
activationName: "molty",
|
||||
edge: "leading",
|
||||
heardName: "malty",
|
||||
match: "fuzzy",
|
||||
text: "what changed?",
|
||||
});
|
||||
});
|
||||
|
||||
it("does not fuzzy match inside a larger phrase without an edge boundary", () => {
|
||||
expect(matchRealtimeVoiceActivationName("maltiness is not a wake name", ["molty"])).toBe(
|
||||
undefined,
|
||||
);
|
||||
});
|
||||
|
||||
it("prefers longer activation names first", () => {
|
||||
expect(sortRealtimeVoiceActivationNames(["claw", "claw bot", "openclaw"])).toEqual([
|
||||
"claw bot",
|
||||
"openclaw",
|
||||
"claw",
|
||||
]);
|
||||
expect(matchRealtimeVoiceActivationName("Claw Bot, status", ["claw", "claw bot"])).toEqual({
|
||||
allowed: true,
|
||||
activationName: "claw bot",
|
||||
edge: "leading",
|
||||
heardName: "claw bot",
|
||||
match: "exact",
|
||||
text: "status",
|
||||
});
|
||||
});
|
||||
});
|
||||
334
src/talk/activation-name.ts
Normal file
334
src/talk/activation-name.ts
Normal file
@@ -0,0 +1,334 @@
|
||||
export const REALTIME_VOICE_ACTIVATION_NAME_MAX_WORDS = 2;
|
||||
|
||||
export type RealtimeVoiceActivationNameEdge = "leading" | "trailing";
|
||||
export type RealtimeVoiceActivationNameMatchKind = "exact" | "fuzzy";
|
||||
|
||||
export type RealtimeVoiceActivationNameTranscriptResult =
|
||||
| {
|
||||
allowed: true;
|
||||
text: string;
|
||||
activationName: string;
|
||||
heardName: string;
|
||||
match: RealtimeVoiceActivationNameMatchKind;
|
||||
edge: RealtimeVoiceActivationNameEdge;
|
||||
}
|
||||
| { allowed: false; text: string };
|
||||
|
||||
type EdgeActivationNameCandidate = {
|
||||
edge: RealtimeVoiceActivationNameEdge;
|
||||
heardName: string;
|
||||
startIndex: number;
|
||||
endIndex: number;
|
||||
strongBoundary: boolean;
|
||||
};
|
||||
|
||||
export function realtimeVoiceActivationNameWordCount(value: string): number {
|
||||
return Array.from(value.matchAll(/[a-z0-9]+/gi)).length;
|
||||
}
|
||||
|
||||
export function normalizeRealtimeVoiceActivationName(value: string): string | undefined {
|
||||
const normalized = value.toLowerCase().replace(/\s+/g, " ").trim();
|
||||
return normalized || undefined;
|
||||
}
|
||||
|
||||
export function normalizeRealtimeVoiceActivationNamePrefix(
|
||||
value: string,
|
||||
maxWords = REALTIME_VOICE_ACTIVATION_NAME_MAX_WORDS,
|
||||
): string | undefined {
|
||||
const words = Array.from(value.matchAll(/[a-z0-9]+/gi), (match) => match[0]);
|
||||
if (words.length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
return words.slice(0, maxWords).join(" ");
|
||||
}
|
||||
|
||||
export function isSupportedRealtimeVoiceActivationName(
|
||||
value: string,
|
||||
maxWords = REALTIME_VOICE_ACTIVATION_NAME_MAX_WORDS,
|
||||
): boolean {
|
||||
const wordCount = realtimeVoiceActivationNameWordCount(value);
|
||||
return wordCount >= 1 && wordCount <= maxWords;
|
||||
}
|
||||
|
||||
export function normalizeSupportedRealtimeVoiceActivationName(
|
||||
value: string | undefined,
|
||||
maxWords = REALTIME_VOICE_ACTIVATION_NAME_MAX_WORDS,
|
||||
): string | undefined {
|
||||
if (typeof value !== "string") {
|
||||
return undefined;
|
||||
}
|
||||
const normalized = normalizeRealtimeVoiceActivationName(value);
|
||||
return normalized && isSupportedRealtimeVoiceActivationName(normalized, maxWords)
|
||||
? normalized
|
||||
: undefined;
|
||||
}
|
||||
|
||||
export function sortRealtimeVoiceActivationNames(names: string[]): string[] {
|
||||
return names.toSorted((left, right) => right.length - left.length || left.localeCompare(right));
|
||||
}
|
||||
|
||||
export function matchRealtimeVoiceActivationName(
|
||||
text: string,
|
||||
activationNames: string[],
|
||||
maxWords = REALTIME_VOICE_ACTIVATION_NAME_MAX_WORDS,
|
||||
): Extract<RealtimeVoiceActivationNameTranscriptResult, { allowed: true }> | undefined {
|
||||
const candidates = [
|
||||
...leadingActivationNameCandidates(text, maxWords),
|
||||
...trailingActivationNameCandidates(text, maxWords),
|
||||
].toSorted(
|
||||
(left, right) =>
|
||||
compactActivationName(right.heardName).length - compactActivationName(left.heardName).length,
|
||||
);
|
||||
|
||||
for (const candidate of candidates) {
|
||||
for (const activationName of activationNames) {
|
||||
const normalizedActivationName = normalizeActivationNameCandidate(activationName);
|
||||
if (!normalizedActivationName) {
|
||||
continue;
|
||||
}
|
||||
const heardCompact = compactActivationName(candidate.heardName);
|
||||
const activationCompact = compactActivationName(normalizedActivationName);
|
||||
if (
|
||||
heardCompact === activationCompact ||
|
||||
isFuzzyActivationNameMatch(candidate, activationName)
|
||||
) {
|
||||
return {
|
||||
allowed: true,
|
||||
text: stripEdgeActivationNameCandidate(text, candidate),
|
||||
activationName,
|
||||
heardName: candidate.heardName,
|
||||
match: heardCompact === activationCompact ? "exact" : "fuzzy",
|
||||
edge: candidate.edge,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function normalizeActivationNameCandidate(value: string): string | undefined {
|
||||
const normalized = value
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, " ")
|
||||
.replace(/\s+/g, " ")
|
||||
.trim();
|
||||
return normalized || undefined;
|
||||
}
|
||||
|
||||
function compactActivationName(value: string): string {
|
||||
return value.replace(/[^a-z0-9]+/g, "");
|
||||
}
|
||||
|
||||
function leadingActivationNameCandidates(
|
||||
text: string,
|
||||
maxWords: number,
|
||||
): EdgeActivationNameCandidate[] {
|
||||
const opener = /^\s*(?:(?:hey|ok|okay)(?:\s*[-,:;]+\s*|\s+))?/i.exec(text);
|
||||
const nameStart = opener?.[0].length ?? 0;
|
||||
const candidates: EdgeActivationNameCandidate[] = [];
|
||||
const candidateStarts = nameStart > 0 ? [0, nameStart] : [0];
|
||||
|
||||
for (const startIndex of candidateStarts) {
|
||||
const tokenPattern = /[a-z0-9]+/gi;
|
||||
tokenPattern.lastIndex = startIndex;
|
||||
const startCandidates: EdgeActivationNameCandidate[] = [];
|
||||
|
||||
for (let wordCount = 0; wordCount < maxWords; wordCount += 1) {
|
||||
const token = tokenPattern.exec(text);
|
||||
if (!token) {
|
||||
break;
|
||||
}
|
||||
const previousEndIndex =
|
||||
wordCount === 0 ? startIndex : startCandidates[wordCount - 1]?.endIndex;
|
||||
const between = text.slice(previousEndIndex, token.index);
|
||||
if (wordCount > 0 && !/^[\s'-]+$/.test(between)) {
|
||||
break;
|
||||
}
|
||||
const endIndex = token.index + token[0].length;
|
||||
const heardName = normalizeActivationNameCandidate(text.slice(startIndex, endIndex));
|
||||
if (!heardName) {
|
||||
break;
|
||||
}
|
||||
const boundary = text.slice(endIndex).match(/^\s*([,.:;!?-]|$)/);
|
||||
startCandidates.push({
|
||||
edge: "leading",
|
||||
heardName,
|
||||
startIndex,
|
||||
endIndex,
|
||||
strongBoundary: Boolean(boundary),
|
||||
});
|
||||
}
|
||||
|
||||
candidates.push(...startCandidates);
|
||||
}
|
||||
|
||||
return candidates;
|
||||
}
|
||||
|
||||
function trailingActivationNameCandidates(
|
||||
text: string,
|
||||
maxWords: number,
|
||||
): EdgeActivationNameCandidate[] {
|
||||
const tokens = Array.from(text.matchAll(/[a-z0-9]+/gi));
|
||||
const candidates: EdgeActivationNameCandidate[] = [];
|
||||
const tokenCount = Math.min(tokens.length, maxWords);
|
||||
|
||||
for (let wordCount = 1; wordCount <= tokenCount; wordCount += 1) {
|
||||
const startToken = tokens[tokens.length - wordCount];
|
||||
const endToken = tokens[tokens.length - 1];
|
||||
if (!startToken || !endToken?.[0]) {
|
||||
break;
|
||||
}
|
||||
const startIndex = startToken.index ?? 0;
|
||||
const endIndex = (endToken.index ?? 0) + endToken[0].length;
|
||||
if (!/^\s*(?:[,.:;!?-]+\s*)?$/.test(text.slice(endIndex))) {
|
||||
break;
|
||||
}
|
||||
if (!/(^|[\s,.:;!?-])$/.test(text.slice(0, startIndex))) {
|
||||
break;
|
||||
}
|
||||
if (wordCount > 1) {
|
||||
const previousToken = tokens[tokens.length - wordCount + 1];
|
||||
const between = previousToken
|
||||
? text.slice(startIndex + startToken[0].length, previousToken.index)
|
||||
: "";
|
||||
if (!/^[\s'-]+$/.test(between)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
const heardName = normalizeActivationNameCandidate(text.slice(startIndex, endIndex));
|
||||
if (!heardName) {
|
||||
break;
|
||||
}
|
||||
candidates.push({
|
||||
edge: "trailing",
|
||||
heardName,
|
||||
startIndex,
|
||||
endIndex,
|
||||
strongBoundary: true,
|
||||
});
|
||||
}
|
||||
|
||||
return candidates;
|
||||
}
|
||||
|
||||
function levenshteinDistance(left: string, right: string): number {
|
||||
if (left === right) {
|
||||
return 0;
|
||||
}
|
||||
if (!left) {
|
||||
return right.length;
|
||||
}
|
||||
if (!right) {
|
||||
return left.length;
|
||||
}
|
||||
|
||||
let previous = Array.from({ length: right.length + 1 }, (_, index) => index);
|
||||
for (let leftIndex = 0; leftIndex < left.length; leftIndex += 1) {
|
||||
const current = [leftIndex + 1];
|
||||
for (let rightIndex = 0; rightIndex < right.length; rightIndex += 1) {
|
||||
const cost = left[leftIndex] === right[rightIndex] ? 0 : 1;
|
||||
current[rightIndex + 1] = Math.min(
|
||||
current[rightIndex] + 1,
|
||||
previous[rightIndex + 1] + 1,
|
||||
previous[rightIndex] + cost,
|
||||
);
|
||||
}
|
||||
previous = current;
|
||||
}
|
||||
return previous[right.length] ?? Math.max(left.length, right.length);
|
||||
}
|
||||
|
||||
function hasOnlyPhoneticSubstitutions(left: string, right: string): boolean {
|
||||
if (left.length !== right.length) {
|
||||
return false;
|
||||
}
|
||||
const vowels = new Set(["a", "e", "i", "o", "u", "y"]);
|
||||
const liquids = new Set(["l", "r"]);
|
||||
let substitutions = 0;
|
||||
for (let index = 0; index < left.length; index += 1) {
|
||||
const leftChar = left[index];
|
||||
const rightChar = right[index];
|
||||
if (leftChar === rightChar) {
|
||||
continue;
|
||||
}
|
||||
const vowelLike = vowels.has(leftChar ?? "") && vowels.has(rightChar ?? "");
|
||||
const liquidLike = liquids.has(leftChar ?? "") && liquids.has(rightChar ?? "");
|
||||
if (!vowelLike && !liquidLike) {
|
||||
return false;
|
||||
}
|
||||
substitutions += 1;
|
||||
}
|
||||
return substitutions > 0;
|
||||
}
|
||||
|
||||
function commonPrefixLength(left: string, right: string): number {
|
||||
const limit = Math.min(left.length, right.length);
|
||||
for (let index = 0; index < limit; index += 1) {
|
||||
if (left[index] !== right[index]) {
|
||||
return index;
|
||||
}
|
||||
}
|
||||
return limit;
|
||||
}
|
||||
|
||||
function isFuzzyActivationNameMatch(
|
||||
candidate: EdgeActivationNameCandidate,
|
||||
activationName: string,
|
||||
): boolean {
|
||||
const normalizedActivationName = normalizeActivationNameCandidate(activationName);
|
||||
if (!normalizedActivationName) {
|
||||
return false;
|
||||
}
|
||||
const heardCompact = compactActivationName(candidate.heardName);
|
||||
const activationCompact = compactActivationName(normalizedActivationName);
|
||||
if (!heardCompact || !activationCompact || activationCompact.length < 5) {
|
||||
return false;
|
||||
}
|
||||
if (!candidate.strongBoundary) {
|
||||
return false;
|
||||
}
|
||||
if (heardCompact[0] !== activationCompact[0]) {
|
||||
return false;
|
||||
}
|
||||
const distance = levenshteinDistance(heardCompact, activationCompact);
|
||||
if (distance <= 1) {
|
||||
return true;
|
||||
}
|
||||
if (
|
||||
distance === 2 &&
|
||||
heardCompact.length >= 4 &&
|
||||
activationCompact.length >= 5 &&
|
||||
(heardCompact.length !== activationCompact.length ||
|
||||
hasOnlyPhoneticSubstitutions(heardCompact, activationCompact) ||
|
||||
commonPrefixLength(heardCompact, activationCompact) >= 6)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
if (
|
||||
distance === 3 &&
|
||||
heardCompact.length >= 7 &&
|
||||
activationCompact.length >= 7 &&
|
||||
heardCompact.length !== activationCompact.length &&
|
||||
commonPrefixLength(heardCompact, activationCompact) >= 5
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function stripEdgeActivationNameCandidate(
|
||||
text: string,
|
||||
candidate: EdgeActivationNameCandidate,
|
||||
): string {
|
||||
if (candidate.edge === "leading") {
|
||||
return text
|
||||
.slice(candidate.endIndex)
|
||||
.replace(/^\s*(?:[-,:;.!?]+\s*)?/, "")
|
||||
.trim();
|
||||
}
|
||||
return text
|
||||
.slice(0, candidate.startIndex)
|
||||
.replace(/\s*(?:[-,:;.!?]+\s*)?$/, "")
|
||||
.trim();
|
||||
}
|
||||
35
src/talk/consult-transcript.test.ts
Normal file
35
src/talk/consult-transcript.test.ts
Normal file
@@ -0,0 +1,35 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { classifySkippableRealtimeVoiceConsultTranscript } from "./consult-transcript.js";
|
||||
|
||||
describe("realtime voice consult transcript classification", () => {
|
||||
it("skips empty and incomplete transcripts", () => {
|
||||
expect(classifySkippableRealtimeVoiceConsultTranscript(" ")).toBe("empty");
|
||||
expect(classifySkippableRealtimeVoiceConsultTranscript("can you check...")).toBe(
|
||||
"incomplete-transcript",
|
||||
);
|
||||
expect(classifySkippableRealtimeVoiceConsultTranscript("can you check…")).toBe(
|
||||
"incomplete-transcript",
|
||||
);
|
||||
});
|
||||
|
||||
it("skips likely trailing fragments", () => {
|
||||
expect(classifySkippableRealtimeVoiceConsultTranscript("tell me about")).toBe(
|
||||
"trailing-fragment",
|
||||
);
|
||||
expect(classifySkippableRealtimeVoiceConsultTranscript("ship it so")).toBe("trailing-fragment");
|
||||
});
|
||||
|
||||
it("skips non-actionable closings unless phrased as a question", () => {
|
||||
expect(classifySkippableRealtimeVoiceConsultTranscript("I'll be right back")).toBe(
|
||||
"non-actionable-closing",
|
||||
);
|
||||
expect(classifySkippableRealtimeVoiceConsultTranscript("goodbye for now")).toBe(
|
||||
"non-actionable-closing",
|
||||
);
|
||||
expect(classifySkippableRealtimeVoiceConsultTranscript("can you say goodbye?")).toBeUndefined();
|
||||
});
|
||||
|
||||
it("keeps actionable transcripts", () => {
|
||||
expect(classifySkippableRealtimeVoiceConsultTranscript("what changed in CI?")).toBeUndefined();
|
||||
});
|
||||
});
|
||||
53
src/talk/consult-transcript.ts
Normal file
53
src/talk/consult-transcript.ts
Normal file
@@ -0,0 +1,53 @@
|
||||
const REALTIME_VOICE_CONSULT_TRAILING_FRAGMENT_WORDS = new Set([
|
||||
"a",
|
||||
"about",
|
||||
"an",
|
||||
"and",
|
||||
"as",
|
||||
"at",
|
||||
"because",
|
||||
"but",
|
||||
"by",
|
||||
"for",
|
||||
"from",
|
||||
"in",
|
||||
"of",
|
||||
"on",
|
||||
"or",
|
||||
"so",
|
||||
"that",
|
||||
"the",
|
||||
"then",
|
||||
"to",
|
||||
"with",
|
||||
]);
|
||||
|
||||
export type SkippableRealtimeVoiceConsultTranscriptReason =
|
||||
| "empty"
|
||||
| "incomplete-transcript"
|
||||
| "trailing-fragment"
|
||||
| "non-actionable-closing";
|
||||
|
||||
export function classifySkippableRealtimeVoiceConsultTranscript(
|
||||
text: string,
|
||||
): SkippableRealtimeVoiceConsultTranscriptReason | undefined {
|
||||
const normalized = text.replace(/\s+/g, " ").trim().toLowerCase();
|
||||
if (!normalized) {
|
||||
return "empty";
|
||||
}
|
||||
if (/(\.\.\.|…)\s*$/.test(normalized)) {
|
||||
return "incomplete-transcript";
|
||||
}
|
||||
const lastWord = normalized.match(/[a-z']+$/)?.[0]?.replace(/^'+|'+$/g, "");
|
||||
if (lastWord && REALTIME_VOICE_CONSULT_TRAILING_FRAGMENT_WORDS.has(lastWord)) {
|
||||
return "trailing-fragment";
|
||||
}
|
||||
if (
|
||||
!normalized.includes("?") &&
|
||||
(/^(i'?ll|i will) be (right )?back\b/.test(normalized) ||
|
||||
/\b(see you|bye(?:-bye)?|goodbye)\b/.test(normalized))
|
||||
) {
|
||||
return "non-actionable-closing";
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
Reference in New Issue
Block a user