From e7056272bc53c6897e83936dfe5817e47e0ff2f6 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 8 Mar 2026 02:03:37 +0000 Subject: [PATCH] refactor(telegram): centralize text parsing helpers --- src/telegram/bot-handlers.ts | 3 +- src/telegram/bot-message-context.ts | 19 +++++---- src/telegram/bot/helpers.test.ts | 60 +++++++++++++++++++++++++++++ src/telegram/bot/helpers.ts | 44 ++++++++++++++++++--- 4 files changed, 110 insertions(+), 16 deletions(-) diff --git a/src/telegram/bot-handlers.ts b/src/telegram/bot-handlers.ts index a83e99d41c5..e46e0c43fb8 100644 --- a/src/telegram/bot-handlers.ts +++ b/src/telegram/bot-handlers.ts @@ -48,6 +48,7 @@ import { } from "./bot-updates.js"; import { resolveMedia } from "./bot/delivery.js"; import { + getTelegramTextParts, buildTelegramGroupPeerId, buildTelegramParentPeer, resolveTelegramForumThreadId, @@ -1008,7 +1009,7 @@ export const registerTelegramHandlers = ({ // Skip sticker-only messages where the sticker was skipped (animated/video) // These have no media and no text content to process. - const hasText = Boolean((msg.text ?? msg.caption ?? "").trim()); + const hasText = Boolean(getTelegramTextParts(msg).text.trim()); if (msg.sticker && !media && !hasText) { logVerbose("telegram: skipping sticker-only message (unsupported sticker type)"); return; diff --git a/src/telegram/bot-message-context.ts b/src/telegram/bot-message-context.ts index 674c01b89f3..4b5dae6789c 100644 --- a/src/telegram/bot-message-context.ts +++ b/src/telegram/bot-message-context.ts @@ -60,13 +60,14 @@ import { buildTelegramGroupFrom, buildTelegramGroupPeerId, buildTypingThreadParams, - resolveTelegramDirectPeerId, - resolveTelegramMediaPlaceholder, expandTextLinks, - normalizeForwardedContext, describeReplyTarget, extractTelegramLocation, + getTelegramTextParts, hasBotMention, + normalizeForwardedContext, + resolveTelegramDirectPeerId, + resolveTelegramMediaPlaceholder, resolveTelegramThreadSpec, } from "./bot/helpers.js"; import type { StickerMetadata, TelegramContext } from "./bot/types.js"; @@ -397,6 +398,7 @@ export const buildTelegramMessageContext = async ({ }); const botUsername = primaryCtx.me?.username?.toLowerCase(); + const messageTextParts = getTelegramTextParts(msg); const allowForCommands = isGroup ? effectiveGroupAllow : effectiveDmAllow; const senderAllowedForCommands = isSenderAllowed({ allow: allowForCommands, @@ -404,7 +406,7 @@ export const buildTelegramMessageContext = async ({ senderUsername, }); const useAccessGroups = cfg.commands?.useAccessGroups !== false; - const hasControlCommandInMessage = hasControlCommand(msg.text ?? msg.caption ?? "", cfg, { + const hasControlCommandInMessage = hasControlCommand(messageTextParts.text, cfg, { botUsername, }); const commandGate = resolveControlCommandGate({ @@ -434,8 +436,7 @@ export const buildTelegramMessageContext = async ({ const locationData = extractTelegramLocation(msg); const locationText = locationData ? formatLocationText(locationData) : undefined; - const rawTextSource = msg.text ?? msg.caption ?? ""; - const rawText = expandTextLinks(rawTextSource, msg.entities ?? msg.caption_entities).trim(); + const rawText = expandTextLinks(messageTextParts.text, messageTextParts.entities).trim(); const hasUserText = Boolean(rawText || locationText); let rawBody = [rawText, locationText].filter(Boolean).join("\n").trim(); if (!rawBody) { @@ -500,13 +501,11 @@ export const buildTelegramMessageContext = async ({ } } - const hasAnyMention = (msg.entities ?? msg.caption_entities ?? []).some( - (ent) => ent.type === "mention", - ); + const hasAnyMention = messageTextParts.entities.some((ent) => ent.type === "mention"); const explicitlyMentioned = botUsername ? hasBotMention(msg, botUsername) : false; const computedWasMentioned = matchesMentionWithExplicit({ - text: msg.text ?? msg.caption ?? "", + text: messageTextParts.text, mentionRegexes, explicit: { hasAnyMention, diff --git a/src/telegram/bot/helpers.test.ts b/src/telegram/bot/helpers.test.ts index c83311980b2..fe30465b40c 100644 --- a/src/telegram/bot/helpers.test.ts +++ b/src/telegram/bot/helpers.test.ts @@ -4,6 +4,8 @@ import { buildTypingThreadParams, describeReplyTarget, expandTextLinks, + getTelegramTextParts, + hasBotMention, normalizeForwardedContext, resolveTelegramDirectPeerId, resolveTelegramForumThreadId, @@ -346,6 +348,64 @@ describe("describeReplyTarget", () => { }); }); +describe("hasBotMention", () => { + it("prefers caption text and caption entities when message text is absent", () => { + expect( + getTelegramTextParts({ + caption: "@gaian hello", + caption_entities: [{ type: "mention", offset: 0, length: 6 }], + chat: { id: 1, type: "private" }, + date: 1, + message_id: 1, + // oxlint-disable-next-line typescript/no-explicit-any + } as any), + ).toEqual({ + text: "@gaian hello", + entities: [{ type: "mention", offset: 0, length: 6 }], + }); + }); + + it("matches exact username mentions from plain text", () => { + expect( + hasBotMention( + { + text: "@gaian what is the group id?", + chat: { id: 1, type: "supergroup" }, + // oxlint-disable-next-line typescript/no-explicit-any + } as any, + "gaian", + ), + ).toBe(true); + }); + + it("does not match mention prefixes from longer bot usernames", () => { + expect( + hasBotMention( + { + text: "@GaianChat_Bot what is the group id?", + chat: { id: 1, type: "supergroup" }, + // oxlint-disable-next-line typescript/no-explicit-any + } as any, + "gaian", + ), + ).toBe(false); + }); + + it("still matches exact mention entities", () => { + expect( + hasBotMention( + { + text: "@GaianChat_Bot hi @gaian", + entities: [{ type: "mention", offset: 18, length: 6 }], + chat: { id: 1, type: "supergroup" }, + // oxlint-disable-next-line typescript/no-explicit-any + } as any, + "gaian", + ), + ).toBe(true); + }); +}); + describe("expandTextLinks", () => { it("returns text unchanged when no entities are provided", () => { expect(expandTextLinks("Hello world")).toBe("Hello world"); diff --git a/src/telegram/bot/helpers.ts b/src/telegram/bot/helpers.ts index 1f078c94c35..2d1cd9ef7a1 100644 --- a/src/telegram/bot/helpers.ts +++ b/src/telegram/bot/helpers.ts @@ -280,18 +280,52 @@ export function buildGroupLabel(msg: Message, chatId: number | string, messageTh return `group:${chatId}${topicSuffix}`; } +export type TelegramTextEntity = NonNullable[number]; + +export function getTelegramTextParts( + msg: Pick, +): { + text: string; + entities: TelegramTextEntity[]; +} { + const text = msg.text ?? msg.caption ?? ""; + const entities = msg.entities ?? msg.caption_entities ?? []; + return { text, entities }; +} + +function isTelegramMentionWordChar(char: string | undefined): boolean { + return char != null && /[a-z0-9_]/i.test(char); +} + +function hasStandaloneTelegramMention(text: string, mention: string): boolean { + let startIndex = 0; + while (startIndex < text.length) { + const idx = text.indexOf(mention, startIndex); + if (idx === -1) { + return false; + } + const prev = idx > 0 ? text[idx - 1] : undefined; + const next = text[idx + mention.length]; + if (!isTelegramMentionWordChar(prev) && !isTelegramMentionWordChar(next)) { + return true; + } + startIndex = idx + 1; + } + return false; +} + export function hasBotMention(msg: Message, botUsername: string) { - const text = (msg.text ?? msg.caption ?? "").toLowerCase(); - if (text.includes(`@${botUsername}`)) { + const { text, entities } = getTelegramTextParts(msg); + const mention = `@${botUsername}`.toLowerCase(); + if (hasStandaloneTelegramMention(text.toLowerCase(), mention)) { return true; } - const entities = msg.entities ?? msg.caption_entities ?? []; for (const ent of entities) { if (ent.type !== "mention") { continue; } - const slice = (msg.text ?? msg.caption ?? "").slice(ent.offset, ent.offset + ent.length); - if (slice.toLowerCase() === `@${botUsername}`) { + const slice = text.slice(ent.offset, ent.offset + ent.length); + if (slice.toLowerCase() === mention) { return true; } }