refactor(telegram): centralize text parsing helpers

This commit is contained in:
Peter Steinberger
2026-03-08 02:03:37 +00:00
parent 6a8081a7f3
commit e7056272bc
4 changed files with 110 additions and 16 deletions

View File

@@ -48,6 +48,7 @@ import {
} from "./bot-updates.js";
import { resolveMedia } from "./bot/delivery.js";
import {
getTelegramTextParts,
buildTelegramGroupPeerId,
buildTelegramParentPeer,
resolveTelegramForumThreadId,
@@ -1008,7 +1009,7 @@ export const registerTelegramHandlers = ({
// Skip sticker-only messages where the sticker was skipped (animated/video)
// These have no media and no text content to process.
const hasText = Boolean((msg.text ?? msg.caption ?? "").trim());
const hasText = Boolean(getTelegramTextParts(msg).text.trim());
if (msg.sticker && !media && !hasText) {
logVerbose("telegram: skipping sticker-only message (unsupported sticker type)");
return;

View File

@@ -60,13 +60,14 @@ import {
buildTelegramGroupFrom,
buildTelegramGroupPeerId,
buildTypingThreadParams,
resolveTelegramDirectPeerId,
resolveTelegramMediaPlaceholder,
expandTextLinks,
normalizeForwardedContext,
describeReplyTarget,
extractTelegramLocation,
getTelegramTextParts,
hasBotMention,
normalizeForwardedContext,
resolveTelegramDirectPeerId,
resolveTelegramMediaPlaceholder,
resolveTelegramThreadSpec,
} from "./bot/helpers.js";
import type { StickerMetadata, TelegramContext } from "./bot/types.js";
@@ -397,6 +398,7 @@ export const buildTelegramMessageContext = async ({
});
const botUsername = primaryCtx.me?.username?.toLowerCase();
const messageTextParts = getTelegramTextParts(msg);
const allowForCommands = isGroup ? effectiveGroupAllow : effectiveDmAllow;
const senderAllowedForCommands = isSenderAllowed({
allow: allowForCommands,
@@ -404,7 +406,7 @@ export const buildTelegramMessageContext = async ({
senderUsername,
});
const useAccessGroups = cfg.commands?.useAccessGroups !== false;
const hasControlCommandInMessage = hasControlCommand(msg.text ?? msg.caption ?? "", cfg, {
const hasControlCommandInMessage = hasControlCommand(messageTextParts.text, cfg, {
botUsername,
});
const commandGate = resolveControlCommandGate({
@@ -434,8 +436,7 @@ export const buildTelegramMessageContext = async ({
const locationData = extractTelegramLocation(msg);
const locationText = locationData ? formatLocationText(locationData) : undefined;
const rawTextSource = msg.text ?? msg.caption ?? "";
const rawText = expandTextLinks(rawTextSource, msg.entities ?? msg.caption_entities).trim();
const rawText = expandTextLinks(messageTextParts.text, messageTextParts.entities).trim();
const hasUserText = Boolean(rawText || locationText);
let rawBody = [rawText, locationText].filter(Boolean).join("\n").trim();
if (!rawBody) {
@@ -500,13 +501,11 @@ export const buildTelegramMessageContext = async ({
}
}
const hasAnyMention = (msg.entities ?? msg.caption_entities ?? []).some(
(ent) => ent.type === "mention",
);
const hasAnyMention = messageTextParts.entities.some((ent) => ent.type === "mention");
const explicitlyMentioned = botUsername ? hasBotMention(msg, botUsername) : false;
const computedWasMentioned = matchesMentionWithExplicit({
text: msg.text ?? msg.caption ?? "",
text: messageTextParts.text,
mentionRegexes,
explicit: {
hasAnyMention,

View File

@@ -4,6 +4,8 @@ import {
buildTypingThreadParams,
describeReplyTarget,
expandTextLinks,
getTelegramTextParts,
hasBotMention,
normalizeForwardedContext,
resolveTelegramDirectPeerId,
resolveTelegramForumThreadId,
@@ -346,6 +348,64 @@ describe("describeReplyTarget", () => {
});
});
describe("hasBotMention", () => {
it("prefers caption text and caption entities when message text is absent", () => {
expect(
getTelegramTextParts({
caption: "@gaian hello",
caption_entities: [{ type: "mention", offset: 0, length: 6 }],
chat: { id: 1, type: "private" },
date: 1,
message_id: 1,
// oxlint-disable-next-line typescript/no-explicit-any
} as any),
).toEqual({
text: "@gaian hello",
entities: [{ type: "mention", offset: 0, length: 6 }],
});
});
it("matches exact username mentions from plain text", () => {
expect(
hasBotMention(
{
text: "@gaian what is the group id?",
chat: { id: 1, type: "supergroup" },
// oxlint-disable-next-line typescript/no-explicit-any
} as any,
"gaian",
),
).toBe(true);
});
it("does not match mention prefixes from longer bot usernames", () => {
expect(
hasBotMention(
{
text: "@GaianChat_Bot what is the group id?",
chat: { id: 1, type: "supergroup" },
// oxlint-disable-next-line typescript/no-explicit-any
} as any,
"gaian",
),
).toBe(false);
});
it("still matches exact mention entities", () => {
expect(
hasBotMention(
{
text: "@GaianChat_Bot hi @gaian",
entities: [{ type: "mention", offset: 18, length: 6 }],
chat: { id: 1, type: "supergroup" },
// oxlint-disable-next-line typescript/no-explicit-any
} as any,
"gaian",
),
).toBe(true);
});
});
describe("expandTextLinks", () => {
it("returns text unchanged when no entities are provided", () => {
expect(expandTextLinks("Hello world")).toBe("Hello world");

View File

@@ -280,18 +280,52 @@ export function buildGroupLabel(msg: Message, chatId: number | string, messageTh
return `group:${chatId}${topicSuffix}`;
}
export type TelegramTextEntity = NonNullable<Message["entities"]>[number];
export function getTelegramTextParts(
msg: Pick<Message, "text" | "caption" | "entities" | "caption_entities">,
): {
text: string;
entities: TelegramTextEntity[];
} {
const text = msg.text ?? msg.caption ?? "";
const entities = msg.entities ?? msg.caption_entities ?? [];
return { text, entities };
}
function isTelegramMentionWordChar(char: string | undefined): boolean {
return char != null && /[a-z0-9_]/i.test(char);
}
function hasStandaloneTelegramMention(text: string, mention: string): boolean {
let startIndex = 0;
while (startIndex < text.length) {
const idx = text.indexOf(mention, startIndex);
if (idx === -1) {
return false;
}
const prev = idx > 0 ? text[idx - 1] : undefined;
const next = text[idx + mention.length];
if (!isTelegramMentionWordChar(prev) && !isTelegramMentionWordChar(next)) {
return true;
}
startIndex = idx + 1;
}
return false;
}
export function hasBotMention(msg: Message, botUsername: string) {
const text = (msg.text ?? msg.caption ?? "").toLowerCase();
if (text.includes(`@${botUsername}`)) {
const { text, entities } = getTelegramTextParts(msg);
const mention = `@${botUsername}`.toLowerCase();
if (hasStandaloneTelegramMention(text.toLowerCase(), mention)) {
return true;
}
const entities = msg.entities ?? msg.caption_entities ?? [];
for (const ent of entities) {
if (ent.type !== "mention") {
continue;
}
const slice = (msg.text ?? msg.caption ?? "").slice(ent.offset, ent.offset + ent.length);
if (slice.toLowerCase() === `@${botUsername}`) {
const slice = text.slice(ent.offset, ent.offset + ent.length);
if (slice.toLowerCase() === mention) {
return true;
}
}