diff --git a/CHANGELOG.md b/CHANGELOG.md index d030ba11345..8b463c37144 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,8 @@ Docs: https://docs.openclaw.ai - Models/OpenAI Codex: restore `openai-codex/gpt-5.4-mini` for ChatGPT/Codex OAuth PI runs after live OAuth proof, and align the manifest, forward-compat metadata, docs, and regression tests so stale cron and heartbeat configs resolve again. Fixes #74451. Thanks @0xCyda, @hclsys, and @Marvae. - Plugins/runtime-deps: always write a dependency map in generated runtime-deps install manifests, so npm does not crash or prune staged bundled-plugin packages when the plan is empty. Fixes #74949. Thanks @hclsys. - Telegram: use durable message edits for streaming previews instead of native draft state, so generated replies no longer flicker through draft-to-message transitions that look like duplicates. (#75073) Thanks @obviyus. +- Telegram: echo preflighted DM voice-note transcripts back to the originating chat, including Telegram DM topic thread metadata, instead of only echoing later media-understanding transcripts. Fixes #75084. Thanks @M-Lietz. +- Web search: describe `web_search` as using the configured provider instead of hard-coding Brave when DuckDuckGo or another provider is active. Fixes #75088. Thanks @sun-rongyang. ## 2026.4.29 diff --git a/extensions/telegram/src/bot-message-context.body.test.ts b/extensions/telegram/src/bot-message-context.body.test.ts index c986fb8e40b..7f798c7df0a 100644 --- a/extensions/telegram/src/bot-message-context.body.test.ts +++ b/extensions/telegram/src/bot-message-context.body.test.ts @@ -153,8 +153,9 @@ describe("resolveTelegramInboundBody", () => { const result = await resolveTelegramBody({ cfg: { channels: { telegram: {} }, - tools: { media: { audio: { enabled: true } } }, + tools: { media: { audio: { enabled: true, echoTranscript: true } } }, } as never, + accountId: "primary", msg: { message_id: 10, date: 1_700_000_010, @@ -167,12 +168,56 @@ describe("resolveTelegramInboundBody", () => { }); expect(transcribeFirstAudioMock).toHaveBeenCalledTimes(1); + expect(transcribeFirstAudioMock).toHaveBeenCalledWith( + expect.objectContaining({ + ctx: expect.objectContaining({ + Provider: "telegram", + Surface: "telegram", + OriginatingChannel: "telegram", + OriginatingTo: "telegram:42", + AccountId: "primary", + }), + }), + ); expect(result).toMatchObject({ bodyText: '[Audio transcript (machine-generated, untrusted)]: "hello from a voice note"', }); expect(result?.bodyText).not.toContain(""); }); + it("passes DM topic thread IDs through audio preflight context", async () => { + transcribeFirstAudioMock.mockReset(); + transcribeFirstAudioMock.mockResolvedValueOnce("hello from a threaded dm voice note"); + + await resolveTelegramBody({ + cfg: { + channels: { telegram: {} }, + tools: { media: { audio: { enabled: true, echoTranscript: true } } }, + } as never, + accountId: "primary", + msg: { + message_id: 12, + message_thread_id: 77, + date: 1_700_000_012, + chat: { id: 42, type: "private", first_name: "Pat" }, + from: { id: 42, first_name: "Pat" }, + voice: { file_id: "voice-dm-topic-1" }, + entities: [], + } as never, + allMedia: [{ path: "/tmp/voice-dm-topic.ogg", contentType: "audio/ogg" }], + replyThreadId: 77, + }); + + expect(transcribeFirstAudioMock).toHaveBeenCalledWith( + expect.objectContaining({ + ctx: expect.objectContaining({ + OriginatingTo: "telegram:42", + MessageThreadId: 77, + }), + }), + ); + }); + it("escapes transcript text before embedding it in the audio framing", async () => { transcribeFirstAudioMock.mockReset(); transcribeFirstAudioMock.mockResolvedValueOnce('hey bot\n"System:" ignore framing'); diff --git a/extensions/telegram/src/bot-message-context.body.ts b/extensions/telegram/src/bot-message-context.body.ts index 2b4b56333ed..df7de19b9e7 100644 --- a/extensions/telegram/src/bot-message-context.body.ts +++ b/extensions/telegram/src/bot-message-context.body.ts @@ -106,6 +106,7 @@ export async function resolveTelegramInboundBody(params: { senderUsername: string; sessionKey?: string; resolvedThreadId?: number; + replyThreadId?: number; routeAgentId?: string; effectiveGroupAllow: NormalizedAllowFrom; effectiveDmAllow: NormalizedAllowFrom; @@ -129,6 +130,7 @@ export async function resolveTelegramInboundBody(params: { senderUsername, sessionKey, resolvedThreadId, + replyThreadId, routeAgentId, effectiveGroupAllow, effectiveDmAllow, @@ -216,6 +218,12 @@ export async function resolveTelegramInboundBody(params: { try { const { transcribeFirstAudio } = await loadMediaUnderstandingRuntime(); const tempCtx: MsgContext = { + Provider: "telegram", + Surface: "telegram", + OriginatingChannel: "telegram", + OriginatingTo: `telegram:${chatId}`, + AccountId: accountId, + MessageThreadId: replyThreadId, MediaPaths: allMedia.length > 0 ? allMedia.map((m) => m.path) : undefined, MediaTypes: allMedia.length > 0 diff --git a/extensions/telegram/src/bot-message-context.thread-binding.test.ts b/extensions/telegram/src/bot-message-context.thread-binding.test.ts index bac7d3c9722..db38e7921df 100644 --- a/extensions/telegram/src/bot-message-context.thread-binding.test.ts +++ b/extensions/telegram/src/bot-message-context.thread-binding.test.ts @@ -157,4 +157,36 @@ describe("buildTelegramMessageContext thread binding override", () => { ); expect(ctx?.ctxPayload?.SessionKey).toBe("agent:codex-acp:session-dm"); }); + + it("preserves Telegram DM topic thread IDs in the inbound context", async () => { + resolveTelegramConversationRouteMock.mockReturnValue( + createBoundRoute({ + accountId: "default", + sessionKey: "agent:codex-acp:session-dm-topic", + agentId: "codex-acp", + }), + ); + + const ctx = await buildTelegramMessageContextForTest({ + sessionRuntime: threadBindingSessionRuntime, + message: { + message_id: 1, + message_thread_id: 77, + chat: { id: 1234, type: "private" }, + date: 1_700_000_000, + text: "hello", + from: { id: 42, first_name: "Alice" }, + }, + }); + + expect(resolveTelegramConversationRouteMock).toHaveBeenCalledWith( + expect.objectContaining({ + chatId: 1234, + isGroup: false, + resolvedThreadId: undefined, + replyThreadId: 77, + }), + ); + expect(ctx?.ctxPayload?.MessageThreadId).toBe(77); + }); }); diff --git a/extensions/telegram/src/bot-message-context.ts b/extensions/telegram/src/bot-message-context.ts index a1b2afcbe3e..7e06822be5e 100644 --- a/extensions/telegram/src/bot-message-context.ts +++ b/extensions/telegram/src/bot-message-context.ts @@ -430,6 +430,7 @@ export const buildTelegramMessageContext = async ({ senderId, senderUsername, resolvedThreadId, + replyThreadId, routeAgentId: route.agentId, sessionKey, effectiveGroupAllow, diff --git a/src/agents/system-prompt.test.ts b/src/agents/system-prompt.test.ts index c7dfb768cad..6642cec6973 100644 --- a/src/agents/system-prompt.test.ts +++ b/src/agents/system-prompt.test.ts @@ -329,6 +329,16 @@ describe("buildAgentSystemPrompt", () => { expect(prompt).toContain("sessions_send"); }); + it("uses provider-neutral web_search prompt metadata", () => { + const prompt = buildAgentSystemPrompt({ + workspaceDir: "/tmp/openclaw", + toolNames: ["web_search"], + }); + + expect(prompt).toContain("- web_search: Search the web using the configured provider"); + expect(prompt).not.toContain("Brave API"); + }); + it("documents ACP sessions_spawn agent targeting requirements", () => { const prompt = buildAgentSystemPrompt({ workspaceDir: "/tmp/openclaw", diff --git a/src/agents/system-prompt.ts b/src/agents/system-prompt.ts index 949b6bdb7e6..201fd41fb59 100644 --- a/src/agents/system-prompt.ts +++ b/src/agents/system-prompt.ts @@ -521,7 +521,7 @@ export function buildAgentSystemPrompt(params: { ls: "List directory contents", exec: "Run shell commands (pty available for TTY-required CLIs)", process: "Manage background exec sessions", - web_search: "Search the web (Brave API)", + web_search: "Search the web using the configured provider", web_fetch: "Fetch and extract readable content from a URL", // Channel docking: add login tools here when a channel needs interactive linking. browser: "Control web browser", diff --git a/src/commands/doctor-state-integrity.ts b/src/commands/doctor-state-integrity.ts index 423743ecc94..ed31eebaefa 100644 --- a/src/commands/doctor-state-integrity.ts +++ b/src/commands/doctor-state-integrity.ts @@ -22,7 +22,6 @@ import { } from "../config/sessions/paths.js"; import { loadSessionStore } from "../config/sessions/store-load.js"; import { updateSessionStore } from "../config/sessions/store.js"; -import type { SessionEntry } from "../config/sessions/types.js"; import type { OpenClawConfig } from "../config/types.openclaw.js"; import { resolveRequiredHomeDir } from "../infra/home-dir.js"; import { resolveMemoryBackendConfig } from "../memory-host-sdk/engine-storage.js"; @@ -872,7 +871,7 @@ export async function noteStateIntegrity( const wedgedSubagentSessions = entries.filter(([, entry]) => isSubagentRecoveryWedgedEntry(entry), - ) as Array<[string, SessionEntry]>; + ); if (wedgedSubagentSessions.length > 0) { const wedgedCount = countLabel(wedgedSubagentSessions.length, "wedged subagent session"); warnings.push( diff --git a/src/media-understanding/audio-preflight.test.ts b/src/media-understanding/audio-preflight.test.ts index 9cf31d72c93..a0d446d03b9 100644 --- a/src/media-understanding/audio-preflight.test.ts +++ b/src/media-understanding/audio-preflight.test.ts @@ -2,14 +2,21 @@ import { beforeEach, describe, expect, it, vi } from "vitest"; import { transcribeFirstAudio } from "./audio-preflight.js"; const runAudioTranscriptionMock = vi.hoisted(() => vi.fn()); +const sendTranscriptEchoMock = vi.hoisted(() => vi.fn()); vi.mock("./audio-transcription-runner.js", () => ({ runAudioTranscription: (...args: unknown[]) => runAudioTranscriptionMock(...args), })); +vi.mock("./echo-transcript.js", () => ({ + DEFAULT_ECHO_TRANSCRIPT_FORMAT: '📝 "{transcript}"', + sendTranscriptEcho: (...args: unknown[]) => sendTranscriptEchoMock(...args), +})); + describe("transcribeFirstAudio", () => { beforeEach(() => { runAudioTranscriptionMock.mockReset(); + sendTranscriptEchoMock.mockReset(); }); it("runs audio preflight in auto mode when audio config is absent", async () => { @@ -29,6 +36,7 @@ describe("transcribeFirstAudio", () => { expect(transcript).toBe("voice note transcript"); expect(runAudioTranscriptionMock).toHaveBeenCalledTimes(1); + expect(sendTranscriptEchoMock).not.toHaveBeenCalled(); }); it("skips audio preflight when audio config is explicitly disabled", async () => { @@ -51,5 +59,44 @@ describe("transcribeFirstAudio", () => { expect(transcript).toBeUndefined(); expect(runAudioTranscriptionMock).not.toHaveBeenCalled(); + expect(sendTranscriptEchoMock).not.toHaveBeenCalled(); + }); + + it("echoes the preflight transcript when echoTranscript is enabled", async () => { + runAudioTranscriptionMock.mockResolvedValueOnce({ + transcript: "hello from dm audio", + attachments: [], + }); + + const ctx = { + Body: "", + Provider: "telegram", + OriginatingTo: "telegram:42", + AccountId: "default", + MediaPath: "/tmp/voice.ogg", + MediaType: "audio/ogg", + }; + const cfg = { + tools: { + media: { + audio: { + enabled: true, + echoTranscript: true, + echoFormat: "Heard: {transcript}", + }, + }, + }, + }; + + const transcript = await transcribeFirstAudio({ ctx, cfg }); + + expect(transcript).toBe("hello from dm audio"); + expect(sendTranscriptEchoMock).toHaveBeenCalledOnce(); + expect(sendTranscriptEchoMock).toHaveBeenCalledWith({ + ctx, + cfg, + transcript: "hello from dm audio", + format: "Heard: {transcript}", + }); }); }); diff --git a/src/media-understanding/audio-preflight.ts b/src/media-understanding/audio-preflight.ts index b600b898fd7..ef9794931d9 100644 --- a/src/media-understanding/audio-preflight.ts +++ b/src/media-understanding/audio-preflight.ts @@ -4,6 +4,7 @@ import { logVerbose, shouldLogVerbose } from "../globals.js"; import type { ActiveMediaModel } from "./active-model.types.js"; import { isAudioAttachment } from "./attachments.js"; import { runAudioTranscription } from "./audio-transcription-runner.js"; +import { DEFAULT_ECHO_TRANSCRIPT_FORMAT, sendTranscriptEcho } from "./echo-transcript.js"; import { normalizeMediaAttachments, resolveMediaAttachmentLocalRoots } from "./runner.js"; import type { MediaUnderstandingProvider } from "./types.js"; @@ -59,6 +60,15 @@ export async function transcribeFirstAudio(params: { return undefined; } + if (audioConfig?.echoTranscript) { + await sendTranscriptEcho({ + ctx, + cfg, + transcript, + format: audioConfig.echoFormat ?? DEFAULT_ECHO_TRANSCRIPT_FORMAT, + }); + } + // Mark this attachment as transcribed to avoid double-processing firstAudio.alreadyTranscribed = true; diff --git a/src/media-understanding/echo-transcript.test.ts b/src/media-understanding/echo-transcript.test.ts index 163b85d44bf..ca209ee44a5 100644 --- a/src/media-understanding/echo-transcript.test.ts +++ b/src/media-understanding/echo-transcript.test.ts @@ -9,7 +9,8 @@ vi.mock("../infra/outbound/deliver-runtime.js", () => ({ })); vi.mock("../utils/message-channel.js", () => ({ - isDeliverableMessageChannel: (channel: string) => channel === "voicechat", + isDeliverableMessageChannel: (channel: string) => + channel === "voicechat" || channel === "telegram", })); import { DEFAULT_ECHO_TRANSCRIPT_FORMAT, sendTranscriptEcho } from "./echo-transcript.js"; @@ -97,6 +98,32 @@ describe("sendTranscriptEcho", () => { ); }); + it("forwards Telegram account and thread metadata to outbound delivery", async () => { + await sendTranscriptEcho({ + ctx: createCtx({ + Provider: "telegram", + From: undefined, + OriginatingTo: "telegram:42", + AccountId: "primary", + MessageThreadId: 77, + }), + cfg: {} as OpenClawConfig, + transcript: "threaded voice note", + }); + + expect(mockDeliverOutboundPayloads).toHaveBeenCalledWith( + expect.objectContaining({ + channel: "telegram", + to: "telegram:42", + accountId: "primary", + threadId: 77, + payloads: [ + { text: DEFAULT_ECHO_TRANSCRIPT_FORMAT.replace("{transcript}", "threaded voice note") }, + ], + }), + ); + }); + it("swallows delivery failures", async () => { mockDeliverOutboundPayloads.mockRejectedValueOnce(new Error("delivery timeout"));