fix: wire telegram disableAudioPreflight config validation and precedence tests (#23067) (thanks @yangnim21029)

2026-03-12 07:20:45 +00:00 · 2026-03-02 22:26:30 +00:00
parent d3cb85eaf5
commit 1fa2488db1
6 changed files with 138 additions and 28 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,7 @@ Docs: https://docs.openclaw.ai
 - Tools/PDF analysis: add a first-class `pdf` tool with native Anthropic and Google PDF provider support, extraction fallback for non-native models, configurable defaults (`agents.defaults.pdfModel`, `pdfMaxBytesMb`, `pdfMaxPages`), and docs/tests covering routing, validation, and registration. (#31319) Thanks @tyler6204.
 - Zalo Personal plugin (`@openclaw/zalouser`): rebuilt channel runtime to use native `zca-js` integration in-process, removing external CLI transport usage and keeping QR/login + send/listen flows fully inside OpenClaw.
 - Telegram/DM streaming: use `sendMessageDraft` for private preview streaming, keep reasoning/answer preview lanes separated in DM reasoning-stream mode. (#31824) Thanks @obviyus.
+- Telegram/voice mention gating: add optional `disableAudioPreflight` on group/topic config to skip mention-detection preflight transcription for inbound voice notes where operators want text-only mention checks. (#23067) Thanks @yangnim21029.
 - CLI/Config validation: add `openclaw config validate` (with `--json`) to validate config files before gateway startup, and include detailed invalid-key paths in startup invalid-config errors. (#31220) thanks @Sid-Qin.
 - Tools/Diffs: add PDF file output support and rendering quality customization controls (`fileQuality`, `fileScale`, `fileMaxWidth`) for generated diff artifacts, and document PDF as the preferred option when messaging channels compress images. (#31342) Thanks @gumadeiras.
 - README/Contributors: rank contributor avatars by composite score (commits + merged PRs + code LOC), excluding docs-only LOC to prevent bulk-generated files from inflating rankings. (#23970) Thanks @tyler6204.
--- a/docs/nodes/audio.md
+++ b/docs/nodes/audio.md
@@ -170,6 +170,12 @@ When `requireMention: true` is set for a group chat, OpenClaw now transcribes au
 - If transcription fails during preflight (timeout, API error, etc.), the message is processed based on text-only mention detection.
 - This ensures that mixed messages (text + audio) are never incorrectly dropped.

+**Opt-out per Telegram group/topic:**
+
+- Set `channels.telegram.groups.<chatId>.disableAudioPreflight: true` to skip preflight transcript mention checks for that group.
+- Set `channels.telegram.groups.<chatId>.topics.<threadId>.disableAudioPreflight` to override per-topic (`true` to skip, `false` to force-enable).
+- Default is `false` (preflight enabled when mention-gated conditions match).
+
 **Example:** A user sends a voice note saying "Hey @Claude, what's the weather?" in a Telegram group with `requireMention: true`. The voice note is transcribed, the mention is detected, and the agent replies.

 ## Gotchas
--- a/src/config/config.telegram-audio-preflight.test.ts
+++ b/src/config/config.telegram-audio-preflight.test.ts
@@ -0,0 +1,49 @@
+import { describe, expect, it } from "vitest";
+import { OpenClawSchema } from "./zod-schema.js";
+
+describe("telegram disableAudioPreflight schema", () => {
+  it("accepts disableAudioPreflight for groups and topics", () => {
+    const res = OpenClawSchema.safeParse({
+      channels: {
+        telegram: {
+          groups: {
+            "*": {
+              requireMention: true,
+              disableAudioPreflight: true,
+              topics: {
+                "123": {
+                  disableAudioPreflight: false,
+                },
+              },
+            },
+          },
+        },
+      },
+    });
+
+    expect(res.success).toBe(true);
+    if (!res.success) {
+      return;
+    }
+
+    const group = res.data.channels?.telegram?.groups?.["*"];
+    expect(group?.disableAudioPreflight).toBe(true);
+    expect(group?.topics?.["123"]?.disableAudioPreflight).toBe(false);
+  });
+
+  it("rejects non-boolean disableAudioPreflight values", () => {
+    const res = OpenClawSchema.safeParse({
+      channels: {
+        telegram: {
+          groups: {
+            "*": {
+              disableAudioPreflight: "yes",
+            },
+          },
+        },
+      },
+    });
+
+    expect(res.success).toBe(false);
+  });
+});
--- a/src/config/zod-schema.providers-core.ts
+++ b/src/config/zod-schema.providers-core.ts
@@ -57,6 +57,7 @@ const TelegramCapabilitiesSchema = z.union([
 export const TelegramTopicSchema = z
  .object({
    requireMention: z.boolean().optional(),
+    disableAudioPreflight: z.boolean().optional(),
    groupPolicy: GroupPolicySchema.optional(),
    skills: z.array(z.string()).optional(),
    enabled: z.boolean().optional(),
@@ -68,6 +69,7 @@ export const TelegramTopicSchema = z
 export const TelegramGroupSchema = z
  .object({
    requireMention: z.boolean().optional(),
+    disableAudioPreflight: z.boolean().optional(),
    groupPolicy: GroupPolicySchema.optional(),
    tools: ToolPolicySchema,
    toolsBySender: ToolPolicyBySenderSchema,
--- a/src/telegram/bot-message-context.audio-transcript.test.ts
+++ b/src/telegram/bot-message-context.audio-transcript.test.ts
@@ -45,39 +45,22 @@ describe("buildTelegramMessageContext audio transcript body", () => {
  it("skips preflight transcription when disableAudioPreflight is true", async () => {
    transcribeFirstAudioMock.mockClear();

-    const ctx = await buildTelegramMessageContext({
-      primaryCtx: {
-        message: {
-          message_id: 2,
-          chat: { id: -1001234567891, type: "supergroup", title: "Test Group 2" },
-          date: 1700000100,
-          from: { id: 43, first_name: "Bob" },
-          voice: { file_id: "voice-2" },
-        },
-        me: { id: 7, username: "bot" },
-      } as never,
+    const ctx = await buildTelegramMessageContextForTest({
+      message: {
+        message_id: 2,
+        chat: { id: -1001234567891, type: "supergroup", title: "Test Group 2" },
+        date: 1700000100,
+        text: undefined,
+        from: { id: 43, first_name: "Bob" },
+        voice: { file_id: "voice-2" },
+      },
      allMedia: [{ path: "/tmp/voice2.ogg", contentType: "audio/ogg" }],
-      storeAllowFrom: [],
      options: { forceWasMentioned: true },
-      bot: {
-        api: {
-          sendChatAction: vi.fn(),
-          setMessageReaction: vi.fn(),
-        },
-      } as never,
      cfg: {
        agents: { defaults: { model: "anthropic/claude-opus-4-5", workspace: "/tmp/openclaw" } },
        channels: { telegram: {} },
        messages: { groupChat: { mentionPatterns: ["\\bbot\\b"] } },
-      } as never,
-      account: { accountId: "default" } as never,
-      historyLimit: 0,
-      groupHistories: new Map(),
-      dmPolicy: "open",
-      allowFrom: [],
-      groupAllowFrom: [],
-      ackReactionScope: "off",
-      logger: { info: vi.fn() },
+      },
      resolveGroupActivation: () => true,
      resolveGroupRequireMention: () => true,
      resolveTelegramGroupConfig: () => ({
@@ -90,4 +73,70 @@ describe("buildTelegramMessageContext audio transcript body", () => {
    expect(transcribeFirstAudioMock).not.toHaveBeenCalled();
    expect(ctx?.ctxPayload?.Body).toContain("<media:audio>");
  });
+
+  it("uses topic disableAudioPreflight=false to override group disableAudioPreflight=true", async () => {
+    transcribeFirstAudioMock.mockResolvedValueOnce("topic override transcript");
+
+    const ctx = await buildTelegramMessageContextForTest({
+      message: {
+        message_id: 3,
+        chat: { id: -1001234567892, type: "supergroup", title: "Test Group 3" },
+        date: 1700000200,
+        text: undefined,
+        from: { id: 44, first_name: "Cara" },
+        voice: { file_id: "voice-3" },
+      },
+      allMedia: [{ path: "/tmp/voice3.ogg", contentType: "audio/ogg" }],
+      options: { forceWasMentioned: true },
+      cfg: {
+        agents: { defaults: { model: "anthropic/claude-opus-4-5", workspace: "/tmp/openclaw" } },
+        channels: { telegram: {} },
+        messages: { groupChat: { mentionPatterns: ["\\bbot\\b"] } },
+      },
+      resolveGroupActivation: () => true,
+      resolveGroupRequireMention: () => true,
+      resolveTelegramGroupConfig: () => ({
+        groupConfig: { requireMention: true, disableAudioPreflight: true },
+        topicConfig: { disableAudioPreflight: false },
+      }),
+    });
+
+    expect(ctx).not.toBeNull();
+    expect(transcribeFirstAudioMock).toHaveBeenCalledTimes(1);
+    expect(ctx?.ctxPayload?.BodyForAgent).toBe("topic override transcript");
+    expect(ctx?.ctxPayload?.Body).toContain("topic override transcript");
+    expect(ctx?.ctxPayload?.Body).not.toContain("<media:audio>");
+  });
+
+  it("uses topic disableAudioPreflight=true to override group disableAudioPreflight=false", async () => {
+    transcribeFirstAudioMock.mockClear();
+
+    const ctx = await buildTelegramMessageContextForTest({
+      message: {
+        message_id: 4,
+        chat: { id: -1001234567893, type: "supergroup", title: "Test Group 4" },
+        date: 1700000300,
+        text: undefined,
+        from: { id: 45, first_name: "Dan" },
+        voice: { file_id: "voice-4" },
+      },
+      allMedia: [{ path: "/tmp/voice4.ogg", contentType: "audio/ogg" }],
+      options: { forceWasMentioned: true },
+      cfg: {
+        agents: { defaults: { model: "anthropic/claude-opus-4-5", workspace: "/tmp/openclaw" } },
+        channels: { telegram: {} },
+        messages: { groupChat: { mentionPatterns: ["\\bbot\\b"] } },
+      },
+      resolveGroupActivation: () => true,
+      resolveGroupRequireMention: () => true,
+      resolveTelegramGroupConfig: () => ({
+        groupConfig: { requireMention: true, disableAudioPreflight: false },
+        topicConfig: { disableAudioPreflight: true },
+      }),
+    });
+
+    expect(ctx).not.toBeNull();
+    expect(transcribeFirstAudioMock).not.toHaveBeenCalled();
+    expect(ctx?.ctxPayload?.Body).toContain("<media:audio>");
+  });
 });
--- a/src/telegram/bot-message-context.ts
+++ b/src/telegram/bot-message-context.ts
@@ -394,7 +394,10 @@ export const buildTelegramMessageContext = async ({
  const hasAudio = allMedia.some((media) => media.contentType?.startsWith("audio/"));

  const disableAudioPreflight =
-    firstDefined(topicConfig?.disableAudioPreflight, groupConfig?.disableAudioPreflight) === true;
+    firstDefined(
+      topicConfig?.disableAudioPreflight,
+      (groupConfig as TelegramGroupConfig | undefined)?.disableAudioPreflight,
+    ) === true;

  // Preflight audio transcription for mention detection in groups
  // This allows voice notes to be checked for mentions before being dropped