diff --git a/CHANGELOG.md b/CHANGELOG.md index 15041d99a95..287f30a2025 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -147,7 +147,7 @@ Docs: https://docs.openclaw.ai - Providers/catalogs: reject malformed successful LM Studio, GitHub Copilot, DeepInfra, Vercel AI Gateway, and Kilocode model-list responses with provider-owned errors instead of raw parser/type failures or silent fallback catalogs. - Providers/polling: reject array, null, or scalar successful operation status responses with provider-owned malformed JSON errors instead of waiting until timeout. - ACPX/Codex: reap plugin-local Codex ACP adapter orphans on startup after wrapper crashes while keeping direct adapter commands out of launch-lease injection. Fixes #82364. (#82459) Thanks @joshavant. -- Agents/model fallback: periodically probe the configured primary for auto-pinned fallback sessions and clear the pin when it recovers, preventing sessions from staying on a fallback model indefinitely. Fixes #82544. Thanks @crpol. +- Agents/model fallback: periodically probe the configured primary for auto-pinned fallback sessions, announce fallback/recovery transitions, and clear the pin when it recovers, preventing sessions from staying on a fallback model indefinitely. Fixes #82544. Thanks @crpol. - Telegram: send presentation-only payloads by rendering fallback text and inline buttons instead of treating them as empty. Fixes #82404. (#82449) Thanks @joshavant. - Providers/Kimi: preserve Kimi Coding `reasoning_content` replay and backfill assistant tool-call placeholders when thinking is enabled, so `kimi-for-coding` follow-up tool turns no longer fail after prior tool use. Fixes #82161. Thanks @amknight. - Providers/search tools: reject malformed successful xAI, Gemini, and Kimi web/code search responses with provider-owned errors instead of silent `No response` payloads or ungrounded fallback state. diff --git a/docs/concepts/model-failover.md b/docs/concepts/model-failover.md index 9283c3d0310..51f0ed65e79 100644 --- a/docs/concepts/model-failover.md +++ b/docs/concepts/model-failover.md @@ -65,6 +65,24 @@ OpenClaw separates the selected provider/model from why it was selected. That so - **Legacy session override**: older session entries may have `modelOverride` without `modelOverrideSource`. OpenClaw treats those as user overrides so an explicit old selection is not silently converted into fallback behavior. - **Cron payload model**: a cron job `payload.model` / `--model` is a job primary, not a user session override. It uses configured fallbacks unless the job provides `payload.fallbacks`; `payload.fallbacks: []` makes the cron run strict. +The auto fallback primary-probe interval is five minutes and is not configurable. OpenClaw remembers recent probes per session and primary model so a failing primary is not retried on every turn. OpenClaw sends a visible notice when a session moves onto fallback and another notice when it returns to the selected primary; it does not repeat the notice on every sticky fallback turn. + +## User-visible fallback notices + +When a session moves onto an auto-selected fallback, OpenClaw sends a status notice in the same reply surface: + +```text +↪️ Model Fallback: (selected ; ) +``` + +When a later probe succeeds and the session returns to the selected primary, OpenClaw sends: + +```text +↪️ Model Fallback cleared: (was ) +``` + +These notices are operational messages, not assistant content. They are delivered once per state change, including side-effect-only turns when feasible, but sticky fallback turns do not repeat them. Delivery bypasses normal source-reply suppression, the notice does not consume the first assistant reply slot for threaded channels, and it is excluded from text-to-speech and commitment extraction. + ## Auth storage (keys + OAuth) OpenClaw uses **auth profiles** for both API keys and OAuth tokens. @@ -306,6 +324,7 @@ That means fallback retries have to coordinate with live model switching: - User-driven model overrides are treated as exact selections for fallback policy, so an unreachable selected provider surfaces as a failure instead of being masked by `agents.defaults.model.fallbacks`. - Before a fallback retry starts, the reply runner persists the selected fallback override fields to the session entry. - Auto fallback overrides remain selected on subsequent turns so OpenClaw does not probe a known-bad primary on every message. OpenClaw periodically probes the configured origin again and clears the auto override when it recovers; `/new`, `/reset`, and `sessions.reset` clear auto-sourced overrides immediately. +- User replies announce fallback transitions and fallback-cleared recovery once per state change. Sticky fallback turns do not repeat the notice. - `/status` shows the selected model and, when fallback state differs, the active fallback model and reason. - Live-session reconciliation prefers persisted session overrides over stale runtime model fields. - If a live-switch error points at a later candidate in the active fallback chain, OpenClaw jumps directly to that selected model instead of walking unrelated candidates first. diff --git a/docs/concepts/models.md b/docs/concepts/models.md index 83e686bff76..4336fbbe6d5 100644 --- a/docs/concepts/models.md +++ b/docs/concepts/models.md @@ -59,7 +59,7 @@ OpenClaw selects models in this order: The same `provider/model` can mean different things depending on where it came from: - Configured defaults (`agents.defaults.model.primary` and agent-specific primaries) are the normal starting point and use `agents.defaults.model.fallbacks`. -- Auto fallback selections are temporary recovery state. They are stored with `modelOverrideSource: "auto"` so later turns can keep using the fallback chain without probing a known-bad primary every time; OpenClaw periodically probes the original primary again and clears the auto selection when it recovers. +- Auto fallback selections are temporary recovery state. They are stored with `modelOverrideSource: "auto"` so later turns can keep using the fallback chain without probing a known-bad primary every time; OpenClaw periodically probes the original primary again, clears the auto selection when it recovers, and announces fallback/recovery transitions once per state change. - User session selections are exact. `/model`, the model picker, `session_status(model=...)`, and `sessions.patch` store `modelOverrideSource: "user"`; if that selected provider/model is unreachable, OpenClaw fails visibly instead of falling through to another configured model. - Cron `--model` / payload `model` is a per-job primary. It still uses configured fallbacks unless the job supplies explicit payload `fallbacks` (use `fallbacks: []` for a strict cron run). - CLI default-model and allowlist pickers respect `models.mode: "replace"` by listing explicit `models.providers.*.models` instead of loading the full built-in catalog. diff --git a/src/agents/agent-scope.ts b/src/agents/agent-scope.ts index b3aca0ba0ce..08f35253649 100644 --- a/src/agents/agent-scope.ts +++ b/src/agents/agent-scope.ts @@ -46,7 +46,7 @@ function stripNullBytes(s: string): string { return s.replace(/\0/g, ""); } -const AUTO_FALLBACK_PRIMARY_PROBE_INTERVAL_MS = 15 * 60 * 1000; +const AUTO_FALLBACK_PRIMARY_PROBE_INTERVAL_MS = 5 * 60 * 1000; const AUTO_FALLBACK_PRIMARY_PROBE_MAX_KEYS = 4096; const autoFallbackPrimaryProbeState = new Map(); diff --git a/src/agents/runtime-plan/types.ts b/src/agents/runtime-plan/types.ts index 32e0b3d15a0..27a9f3e00f8 100644 --- a/src/agents/runtime-plan/types.ts +++ b/src/agents/runtime-plan/types.ts @@ -178,6 +178,7 @@ export type AgentRuntimeReplyPayload = { isError?: boolean; isReasoning?: boolean; isCompactionNotice?: boolean; + isFallbackNotice?: boolean; channelData?: Record; }; diff --git a/src/auto-reply/reply-payload.ts b/src/auto-reply/reply-payload.ts index 1223cc36bb2..9a45212dc53 100644 --- a/src/auto-reply/reply-payload.ts +++ b/src/auto-reply/reply-payload.ts @@ -44,6 +44,8 @@ export type ReplyPayload = { * Should be excluded from TTS transcript accumulation so compaction * status lines are not synthesised into the spoken assistant reply. */ isCompactionNotice?: boolean; + /** Marks this payload as a model-fallback transition/recovery notice. */ + isFallbackNotice?: boolean; /** Channel-specific payload data (per-channel envelope). */ channelData?: Record; }; diff --git a/src/auto-reply/reply/agent-runner-payloads.ts b/src/auto-reply/reply/agent-runner-payloads.ts index d6fc2197ed2..0ef83994a7f 100644 --- a/src/auto-reply/reply/agent-runner-payloads.ts +++ b/src/auto-reply/reply/agent-runner-payloads.ts @@ -324,7 +324,7 @@ export async function buildReplyPayloads(params: { const isDirectlySentBlockPayload = (payload: ReplyPayload) => Boolean(params.directlySentBlockKeys?.has(createBlockReplyContentKey(payload))); const preserveUnsentMediaAfterBlockStream = (payload: ReplyPayload): ReplyPayload | null => { - if (payload.isError) { + if (payload.isError || payload.isFallbackNotice) { return payload; } const reply = resolveSendableOutboundReplyParts(payload); diff --git a/src/auto-reply/reply/agent-runner.runreplyagent.e2e.test.ts b/src/auto-reply/reply/agent-runner.runreplyagent.e2e.test.ts index 1b0f490f063..32518ca1ba1 100644 --- a/src/auto-reply/reply/agent-runner.runreplyagent.e2e.test.ts +++ b/src/auto-reply/reply/agent-runner.runreplyagent.e2e.test.ts @@ -917,10 +917,10 @@ describe("runReplyAgent typing (heartbeat)", () => { vi.useRealTimers(); }); - it("announces model fallback only when verbose mode is enabled", async () => { + it("announces model fallback transitions across verbose levels", async () => { const cases = [ - { name: "verbose on", verbose: "on" as const, expectNotice: true }, - { name: "verbose off", verbose: "off" as const, expectNotice: false }, + { name: "verbose on", verbose: "on" as const }, + { name: "verbose off", verbose: "off" as const }, ] as const; for (const testCase of cases) { const sessionEntry: SessionEntry = { @@ -974,13 +974,9 @@ describe("runReplyAgent typing (heartbeat)", () => { const payload = Array.isArray(res) ? (res[0] as { text?: string }) : (res as { text?: string }); - if (testCase.expectNotice) { - expect(payload.text, testCase.name).toContain("Model Fallback:"); - expect(payload.text, testCase.name).toContain("deepinfra/moonshotai/Kimi-K2.5"); - expect(sessionEntry.fallbackNoticeReason, testCase.name).toBe("rate limit"); - continue; - } - expect(payload.text, testCase.name).not.toContain("Model Fallback:"); + expect(payload.text, testCase.name).toContain("Model Fallback:"); + expect(payload.text, testCase.name).toContain("deepinfra/moonshotai/Kimi-K2.5"); + expect(sessionEntry.fallbackNoticeReason, testCase.name).toBe("rate limit"); expect( phases.filter((phase) => phase === "fallback"), testCase.name, @@ -989,6 +985,111 @@ describe("runReplyAgent typing (heartbeat)", () => { } }); + it("keeps fallback transition notices when block streaming has no final text", async () => { + const sessionEntry: SessionEntry = { + sessionId: "session", + updatedAt: Date.now(), + }; + const sessionStore = { main: sessionEntry }; + const onBlockReply = vi.fn(); + + state.runEmbeddedPiAgentMock.mockImplementationOnce(async (params: AgentRunParams) => { + await params.onBlockReply?.({ text: "streamed answer" }); + return { payloads: [], meta: {} }; + }); + const fallbackSpy = vi + .spyOn(modelFallbackModule, "runWithModelFallback") + .mockImplementationOnce( + async ({ run }: { run: (provider: string, model: string) => Promise }) => ({ + result: await run("deepinfra", "moonshotai/Kimi-K2.5"), + provider: "deepinfra", + model: "moonshotai/Kimi-K2.5", + attempts: [ + { + provider: "fireworks", + model: "fireworks/accounts/fireworks/routers/kimi-k2p5-turbo", + error: "Provider fireworks is in cooldown (all profiles unavailable)", + reason: "rate_limit", + }, + ], + }), + ); + try { + const { run } = createMinimalRun({ + blockStreamingEnabled: true, + opts: { onBlockReply }, + sessionEntry, + sessionStore, + sessionKey: "main", + }); + const res = await run(); + const payloads = Array.isArray(res) ? res : res ? [res] : []; + + expect(onBlockReply).toHaveBeenCalled(); + expect(payloads).toHaveLength(1); + expect(payloads[0]?.text).toContain("Model Fallback:"); + expect(payloads[0]?.text).not.toContain("streamed answer"); + } finally { + fallbackSpy.mockRestore(); + } + }); + + it("threads fallback notices without consuming the first assistant reply slot", async () => { + const sessionEntry: SessionEntry = { + sessionId: "session", + updatedAt: Date.now(), + }; + const sessionStore = { main: sessionEntry }; + + state.runEmbeddedPiAgentMock.mockResolvedValueOnce({ + payloads: [{ text: "final" }], + meta: {}, + }); + const fallbackSpy = vi + .spyOn(modelFallbackModule, "runWithModelFallback") + .mockImplementationOnce( + async ({ run }: { run: (provider: string, model: string) => Promise }) => ({ + result: await run("deepinfra", "moonshotai/Kimi-K2.5"), + provider: "deepinfra", + model: "moonshotai/Kimi-K2.5", + attempts: [ + { + provider: "fireworks", + model: "fireworks/accounts/fireworks/routers/kimi-k2p5-turbo", + error: "Provider fireworks is in cooldown (all profiles unavailable)", + reason: "rate_limit", + }, + ], + }), + ); + try { + const { run } = createMinimalRun({ + sessionEntry, + sessionStore, + sessionKey: "main", + runOverrides: { + config: { + channels: { + whatsapp: { + replyToMode: "first", + }, + }, + }, + }, + }); + const res = await run(); + const payloads = Array.isArray(res) ? res : res ? [res] : []; + + expect(payloads).toHaveLength(2); + expect(payloads[0]?.text).toContain("Model Fallback:"); + expect(payloads[0]?.replyToId).toBe("msg"); + expect(payloads[1]?.text).toBe("final"); + expect(payloads[1]?.replyToId).toBe("msg"); + } finally { + fallbackSpy.mockRestore(); + } + }); + it("surfaces a configured backend failure when fallback produces no visible reply", async () => { state.runEmbeddedPiAgentMock.mockResolvedValueOnce({ payloads: [{ text: "NO_REPLY" }], @@ -1121,7 +1222,7 @@ describe("runReplyAgent typing (heartbeat)", () => { expect(payload?.text).toContain("no visible reply"); }); - it("does not surface fallback silence when fallback already replied through a messaging tool", async () => { + it("announces fallback without silence failure when fallback already replied through a messaging tool", async () => { state.runEmbeddedPiAgentMock.mockResolvedValueOnce({ payloads: [{ text: "already sent" }], messagingToolSentTexts: ["already sent"], @@ -1162,7 +1263,12 @@ describe("runReplyAgent typing (heartbeat)", () => { }, }); - await expect(run()).resolves.toBeUndefined(); + const res = await run(); + const payload = Array.isArray(res) ? res[0] : res; + + expect(payload?.isError).not.toBe(true); + expect(payload?.text).toContain("Model Fallback:"); + expect(payload?.text).not.toContain("no visible reply"); } finally { fallbackSpy.mockRestore(); } @@ -1222,7 +1328,7 @@ describe("runReplyAgent typing (heartbeat)", () => { } }); - it("does not surface fallback silence when fallback already completed a cron side effect", async () => { + it("announces fallback without silence failure when fallback already completed a cron side effect", async () => { state.runEmbeddedPiAgentMock.mockResolvedValueOnce({ payloads: [{ text: "NO_REPLY" }], successfulCronAdds: 1, @@ -1262,13 +1368,18 @@ describe("runReplyAgent typing (heartbeat)", () => { }, }); - await expect(run()).resolves.toBeUndefined(); + const res = await run(); + const payload = Array.isArray(res) ? res[0] : res; + + expect(payload?.isError).not.toBe(true); + expect(payload?.text).toContain("Model Fallback:"); + expect(payload?.text).not.toContain("no visible reply"); } finally { fallbackSpy.mockRestore(); } }); - it("does not surface fallback silence when fallback committed target-only messaging delivery", async () => { + it("announces fallback without silence failure when fallback committed target-only messaging delivery", async () => { state.runEmbeddedPiAgentMock.mockResolvedValueOnce({ payloads: [{ text: "NO_REPLY" }], messagingToolSentTargets: [{ tool: "message", provider: "discord", to: "channel:C1" }], @@ -1308,13 +1419,18 @@ describe("runReplyAgent typing (heartbeat)", () => { }, }); - await expect(run()).resolves.toBeUndefined(); + const res = await run(); + const payload = Array.isArray(res) ? res[0] : res; + + expect(payload?.isError).not.toBe(true); + expect(payload?.text).toContain("Model Fallback:"); + expect(payload?.text).not.toContain("no visible reply"); } finally { fallbackSpy.mockRestore(); } }); - it("does not surface fallback silence when fallback already delivered an approval prompt", async () => { + it("announces fallback without silence failure when fallback already delivered an approval prompt", async () => { state.runEmbeddedPiAgentMock.mockResolvedValueOnce({ payloads: [], didSendDeterministicApprovalPrompt: true, @@ -1351,7 +1467,12 @@ describe("runReplyAgent typing (heartbeat)", () => { }, }); - await expect(run()).resolves.toBeUndefined(); + const res = await run(); + const payload = Array.isArray(res) ? res[0] : res; + + expect(payload?.isError).not.toBe(true); + expect(payload?.text).toContain("Model Fallback:"); + expect(payload?.text).not.toContain("no visible reply"); } finally { fallbackSpy.mockRestore(); } @@ -1608,7 +1729,7 @@ describe("runReplyAgent typing (heartbeat)", () => { } }); - it("emits fallback lifecycle events while verbose is off", async () => { + it("announces fallback transitions and emits lifecycle events while verbose is off", async () => { const sessionEntry: SessionEntry = { sessionId: "session", updatedAt: Date.now(), @@ -1676,8 +1797,8 @@ describe("runReplyAgent typing (heartbeat)", () => { const firstText = Array.isArray(first) ? first[0]?.text : first?.text; const secondText = Array.isArray(second) ? second[0]?.text : second?.text; - expect(firstText).not.toContain("Model Fallback:"); - expect(secondText).not.toContain("Model Fallback cleared:"); + expect(firstText).toContain("Model Fallback:"); + expect(secondText).toContain("Model Fallback cleared:"); expect(countMatching(phases, (phase) => phase === "fallback")).toBe(1); expect(countMatching(phases, (phase) => phase === "fallback_cleared")).toBe(1); } finally { diff --git a/src/auto-reply/reply/agent-runner.ts b/src/auto-reply/reply/agent-runner.ts index 1a073416d14..23653db7cb7 100644 --- a/src/auto-reply/reply/agent-runner.ts +++ b/src/auto-reply/reply/agent-runner.ts @@ -921,7 +921,13 @@ function buildInlineRawTracePayload(params: { function joinCommitmentAssistantText(payloads: ReplyPayload[]): string { return payloads - .filter((payload) => !payload.isError && !payload.isReasoning && !payload.isCompactionNotice) + .filter( + (payload) => + !payload.isError && + !payload.isReasoning && + !payload.isCompactionNotice && + !payload.isFallbackNotice, + ) .map((payload) => payload.text?.trim()) .filter((text): text is string => Boolean(text)) .join("\n") @@ -1618,21 +1624,22 @@ export async function runReplyAgent(params: { preserveFreshTotalTokensOnStaleUsage: preflightCompactionApplied, }); + const successfulSideEffectDelivery = hasSuccessfulSideEffectDelivery({ + blockReplyPipeline, + directlySentBlockKeys, + messagingToolSentTexts: runResult.messagingToolSentTexts, + messagingToolSentMediaUrls: runResult.messagingToolSentMediaUrls, + messagingToolSentTargets: runResult.messagingToolSentTargets, + successfulCronAdds: runResult.successfulCronAdds, + didSendDeterministicApprovalPrompt: runResult.didSendDeterministicApprovalPrompt, + }); const returnSilentFallbackFailureIfNeeded = async (): Promise => { const silentFallbackFailurePayload = buildSilentFallbackFailurePayload({ fallbackTransition, fallbackFailureKnown: fallbackAttempts.length > 0 || configuredFallbackModel.persistedAutoFallback, isHeartbeat, - hasSuccessfulSideEffectDelivery: hasSuccessfulSideEffectDelivery({ - blockReplyPipeline, - directlySentBlockKeys, - messagingToolSentTexts: runResult.messagingToolSentTexts, - messagingToolSentMediaUrls: runResult.messagingToolSentMediaUrls, - messagingToolSentTargets: runResult.messagingToolSentTargets, - successfulCronAdds: runResult.successfulCronAdds, - didSendDeterministicApprovalPrompt: runResult.didSendDeterministicApprovalPrompt, - }), + hasSuccessfulSideEffectDelivery: successfulSideEffectDelivery, allowEmptyAssistantReplyAsSilent: followupRun.run.allowEmptyAssistantReplyAsSilent, silentExpected: followupRun.run.silentExpected, }); @@ -1649,10 +1656,69 @@ export async function runReplyAgent(params: { return returnWithQueuedFollowupDrain(silentFallbackFailurePayload); }; + const fallbackNoticePayloads: ReplyPayload[] = []; + if (fallbackTransition.fallbackTransitioned) { + emitAgentEvent({ + runId, + sessionKey, + stream: "lifecycle", + data: { + phase: "fallback", + selectedProvider, + selectedModel, + activeProvider: providerUsed, + activeModel: modelUsed, + reasonSummary: fallbackTransition.reasonSummary, + attemptSummaries: fallbackTransition.attemptSummaries, + attempts: fallbackAttempts, + }, + }); + const fallbackNotice = buildFallbackNotice({ + selectedProvider, + selectedModel, + activeProvider: providerUsed, + activeModel: modelUsed, + attempts: fallbackAttempts, + }); + if (fallbackNotice) { + fallbackNoticePayloads.push( + markReplyPayloadForSourceSuppressionDelivery({ + text: fallbackNotice, + isFallbackNotice: true, + }), + ); + } + } + if (fallbackTransition.fallbackCleared) { + emitAgentEvent({ + runId, + sessionKey, + stream: "lifecycle", + data: { + phase: "fallback_cleared", + selectedProvider, + selectedModel, + activeProvider: providerUsed, + activeModel: modelUsed, + previousActiveModel: fallbackTransition.previousState.activeModel, + }, + }); + fallbackNoticePayloads.push( + markReplyPayloadForSourceSuppressionDelivery({ + text: buildFallbackClearedNotice({ + selectedProvider, + selectedModel, + previousActiveModel: fallbackTransition.previousState.activeModel, + }), + isFallbackNotice: true, + }), + ); + } + // Drain any late tool/block deliveries before deciding there's "nothing to send". // Otherwise, a late typing trigger (e.g. from a tool callback) can outlive the run and // keep the typing indicator stuck. - if (payloadArray.length === 0) { + if (payloadArray.length === 0 && fallbackNoticePayloads.length === 0) { const silentFallbackFailurePayload = await returnSilentFallbackFailureIfNeeded(); if (silentFallbackFailurePayload) { return silentFallbackFailurePayload; @@ -1662,7 +1728,10 @@ export async function runReplyAgent(params: { const currentMessageId = sessionCtx.MessageSidFull ?? sessionCtx.MessageSid; const payloadResult = await buildReplyPayloads({ - payloads: payloadArray, + payloads: + fallbackNoticePayloads.length > 0 + ? [...fallbackNoticePayloads, ...payloadArray] + : payloadArray, isHeartbeat, didLogHeartbeatStrip, silentExpected: followupRun.run.silentExpected, @@ -1688,7 +1757,18 @@ export async function runReplyAgent(params: { const { replyPayloads } = payloadResult; didLogHeartbeatStrip = payloadResult.didLogHeartbeatStrip; - if (replyPayloads.length === 0) { + const hasReplyPayloadBeyondFallbackNotice = replyPayloads.some( + (payload) => !payload.isFallbackNotice, + ); + const hasDeliveredBlockStream = Boolean( + blockReplyPipeline?.didStream() && !blockReplyPipeline.isAborted(), + ); + const canDeliverStandaloneFallbackNotice = + hasDeliveredBlockStream || successfulSideEffectDelivery; + if ( + replyPayloads.length === 0 || + (!hasReplyPayloadBeyondFallbackNotice && !canDeliverStandaloneFallbackNotice) + ) { const silentFallbackFailurePayload = await returnSilentFallbackFailureIfNeeded(); if (silentFallbackFailurePayload) { return silentFallbackFailurePayload; @@ -1700,6 +1780,7 @@ export async function runReplyAgent(params: { const hasReminderCommitment = replyPayloads.some( (payload) => !payload.isError && + !payload.isFallbackNotice && typeof payload.text === "string" && hasUnbackedReminderCommitment(payload.text), ); @@ -1820,66 +1901,13 @@ export async function runReplyAgent(params: { }); } - // If verbose is enabled, prepend operational run notices. + // Prepend verbose operational notices. Model fallback notices are prepared + // earlier so they pass through normal reply threading and stream-dedupe. let finalPayloads = guardedReplyPayloads; - const verboseNotices: ReplyPayload[] = []; + const prefixNotices: ReplyPayload[] = []; if (verboseEnabled && activeIsNewSession) { - verboseNotices.push({ text: `🧭 New session: ${followupRun.run.sessionId}` }); - } - - if (fallbackTransition.fallbackTransitioned) { - emitAgentEvent({ - runId, - sessionKey, - stream: "lifecycle", - data: { - phase: "fallback", - selectedProvider, - selectedModel, - activeProvider: providerUsed, - activeModel: modelUsed, - reasonSummary: fallbackTransition.reasonSummary, - attemptSummaries: fallbackTransition.attemptSummaries, - attempts: fallbackAttempts, - }, - }); - if (verboseEnabled) { - const fallbackNotice = buildFallbackNotice({ - selectedProvider, - selectedModel, - activeProvider: providerUsed, - activeModel: modelUsed, - attempts: fallbackAttempts, - }); - if (fallbackNotice) { - verboseNotices.push({ text: fallbackNotice }); - } - } - } - if (fallbackTransition.fallbackCleared) { - emitAgentEvent({ - runId, - sessionKey, - stream: "lifecycle", - data: { - phase: "fallback_cleared", - selectedProvider, - selectedModel, - activeProvider: providerUsed, - activeModel: modelUsed, - previousActiveModel: fallbackTransition.previousState.activeModel, - }, - }); - if (verboseEnabled) { - verboseNotices.push({ - text: buildFallbackClearedNotice({ - selectedProvider, - selectedModel, - previousActiveModel: fallbackTransition.previousState.activeModel, - }), - }); - } + prefixNotices.push({ text: `🧭 New session: ${followupRun.run.sessionId}` }); } if (autoCompactionCount > 0) { @@ -1927,10 +1955,10 @@ export async function runReplyAgent(params: { if (verboseEnabled) { const suffix = typeof count === "number" ? ` (count ${count})` : ""; - verboseNotices.push({ text: `🧹 Auto-compaction complete${suffix}.` }); + prefixNotices.push({ text: `🧹 Auto-compaction complete${suffix}.` }); } } - const prefixPayloads = [...verboseNotices]; + const prefixPayloads = [...prefixNotices]; const isHookBlockedRun = runResult.meta?.error?.kind === "hook_block"; const rawUserText = isHookBlockedRun ? runResult.meta?.finalPromptText diff --git a/src/auto-reply/reply/block-reply-coalescer.ts b/src/auto-reply/reply/block-reply-coalescer.ts index ecb50508d69..c5ffbb1ce67 100644 --- a/src/auto-reply/reply/block-reply-coalescer.ts +++ b/src/auto-reply/reply/block-reply-coalescer.ts @@ -26,6 +26,7 @@ export function createBlockReplyCoalescer(params: { let bufferAudioAsVoice: ReplyPayload["audioAsVoice"]; let bufferIsReasoning: ReplyPayload["isReasoning"]; let bufferIsCompactionNotice: ReplyPayload["isCompactionNotice"]; + let bufferIsFallbackNotice: ReplyPayload["isFallbackNotice"]; let idleTimer: NodeJS.Timeout | undefined; const clearIdleTimer = () => { @@ -42,6 +43,7 @@ export function createBlockReplyCoalescer(params: { bufferAudioAsVoice = undefined; bufferIsReasoning = undefined; bufferIsCompactionNotice = undefined; + bufferIsFallbackNotice = undefined; }; const scheduleIdleFlush = () => { @@ -73,6 +75,7 @@ export function createBlockReplyCoalescer(params: { audioAsVoice: bufferAudioAsVoice, isReasoning: bufferIsReasoning, isCompactionNotice: bufferIsCompactionNotice, + isFallbackNotice: bufferIsFallbackNotice, }; resetBuffer(); await onFlush(payload); @@ -105,6 +108,7 @@ export function createBlockReplyCoalescer(params: { bufferAudioAsVoice = payload.audioAsVoice; bufferIsReasoning = payload.isReasoning; bufferIsCompactionNotice = payload.isCompactionNotice; + bufferIsFallbackNotice = payload.isFallbackNotice; bufferText = text; void flush({ force: true }); return; @@ -118,7 +122,8 @@ export function createBlockReplyCoalescer(params: { const visibilityConflict = bufferText && (bufferIsReasoning !== payload.isReasoning || - bufferIsCompactionNotice !== payload.isCompactionNotice); + bufferIsCompactionNotice !== payload.isCompactionNotice || + bufferIsFallbackNotice !== payload.isFallbackNotice); if ( bufferText && (replyToConflict || bufferAudioAsVoice !== payload.audioAsVoice || visibilityConflict) @@ -131,6 +136,7 @@ export function createBlockReplyCoalescer(params: { bufferAudioAsVoice = payload.audioAsVoice; bufferIsReasoning = payload.isReasoning; bufferIsCompactionNotice = payload.isCompactionNotice; + bufferIsFallbackNotice = payload.isFallbackNotice; } const nextText = bufferText ? `${bufferText}${joiner}${text}` : text; @@ -141,6 +147,7 @@ export function createBlockReplyCoalescer(params: { bufferAudioAsVoice = payload.audioAsVoice; bufferIsReasoning = payload.isReasoning; bufferIsCompactionNotice = payload.isCompactionNotice; + bufferIsFallbackNotice = payload.isFallbackNotice; if (text.length >= maxChars) { void onFlush(payload); return; diff --git a/src/auto-reply/reply/dispatch-acp-delivery.test.ts b/src/auto-reply/reply/dispatch-acp-delivery.test.ts index 276a91f0e54..e53873214cf 100644 --- a/src/auto-reply/reply/dispatch-acp-delivery.test.ts +++ b/src/auto-reply/reply/dispatch-acp-delivery.test.ts @@ -178,6 +178,30 @@ describe("createAcpDispatchDeliveryCoordinator", () => { expect(dispatcher.sendFinalReply).toHaveBeenCalledWith({ text: "hello" }); }); + it("bypasses TTS for final status notices", async () => { + const dispatcher = createDispatcher(); + const coordinator = createAcpDispatchDeliveryCoordinator({ + cfg: createAcpTestConfig({ + messages: { tts: { enabled: true } }, + }), + ctx: buildTestCtx({ + Provider: "visiblechat", + Surface: "visiblechat", + SessionKey: "agent:codex-acp:session-1", + }), + dispatcher, + inboundAudio: false, + shouldRouteToOriginating: false, + }); + + const notice = { text: "Model Fallback: openai/gpt-5.5", isFallbackNotice: true }; + await coordinator.deliver("final", notice); + await coordinator.settleVisibleText(); + + expect(ttsMocks.maybeApplyTtsToPayload).not.toHaveBeenCalled(); + expect(dispatcher.sendFinalReply).toHaveBeenCalledWith(notice); + }); + it("tracks successful final delivery separately from routed counters", async () => { const coordinator = createCoordinator(); @@ -247,6 +271,65 @@ describe("createAcpDispatchDeliveryCoordinator", () => { ); }); + it("keeps status notices out of ACP block TTS accumulation", async () => { + const dispatcher = createDispatcher(); + const coordinator = createAcpDispatchDeliveryCoordinator({ + cfg: createAcpTestConfig({ + messages: { tts: { enabled: true } }, + }), + ctx: buildTestCtx({ + Provider: "visiblechat", + Surface: "visiblechat", + SessionKey: "agent:codex-acp:session-1", + }), + dispatcher, + inboundAudio: false, + shouldRouteToOriginating: false, + }); + + await coordinator.deliver("block", { + text: "Model Fallback: openai/gpt-5.5", + isFallbackNotice: true, + }); + await coordinator.deliver("block", { text: "Visible answer" }); + + expect(dispatcher.sendBlockReply).toHaveBeenNthCalledWith(1, { + text: "Model Fallback: openai/gpt-5.5", + isFallbackNotice: true, + }); + expect(dispatcher.sendBlockReply).toHaveBeenNthCalledWith(2, { text: "Visible answer" }); + expect(coordinator.getAccumulatedBlockText()).toBe("Visible answer"); + expect(coordinator.getAccumulatedBlockTtsText()).toBe("Visible answer"); + expect(coordinator.getBlockCount()).toBe(1); + }); + + it("keeps final fallback notices out of ACP transcript accumulation", async () => { + const dispatcher = createDispatcher(); + const coordinator = createAcpDispatchDeliveryCoordinator({ + cfg: createAcpTestConfig(), + ctx: buildTestCtx({ + Provider: "visiblechat", + Surface: "visiblechat", + SessionKey: "agent:codex-acp:session-1", + }), + dispatcher, + inboundAudio: false, + shouldRouteToOriginating: false, + }); + + const delivered = await coordinator.deliver("final", { + text: "Model Fallback: openai/gpt-5.5", + isFallbackNotice: true, + }); + + expect(delivered).toBe(true); + expect(dispatcher.sendFinalReply).toHaveBeenCalledWith({ + text: "Model Fallback: openai/gpt-5.5", + isFallbackNotice: true, + }); + expect(coordinator.getAccumulatedFinalText()).toBe(""); + }); + it("prefers provider over surface when detecting direct channel visibility", async () => { const coordinator = createAcpDispatchDeliveryCoordinator({ cfg: createAcpTestConfig(), diff --git a/src/auto-reply/reply/dispatch-acp-delivery.ts b/src/auto-reply/reply/dispatch-acp-delivery.ts index d566ad74888..ff95d0f7601 100644 --- a/src/auto-reply/reply/dispatch-acp-delivery.ts +++ b/src/auto-reply/reply/dispatch-acp-delivery.ts @@ -100,6 +100,9 @@ async function maybeApplyAcpTts(params: { if (params.skipTts) { return params.payload; } + if (params.payload.isCompactionNotice || params.payload.isFallbackNotice) { + return params.payload; + } const ttsStatus = resolveStatusTtsSnapshot({ cfg: params.cfg, sessionAuto: params.ttsAuto, @@ -311,19 +314,22 @@ export function createAcpDispatchDeliveryCoordinator(params: { let visiblePayload = payload; const rawBlockText = kind === "block" ? normalizeOptionalString(payload.text) : undefined; if (rawBlockText) { + const isStatusNotice = payload.isCompactionNotice || payload.isFallbackNotice; const joinsBufferedTtsDirective = state.cleanBlockTtsDirectiveText?.hasBufferedDirectiveText() === true; - if (state.accumulatedBlockText.length > 0) { - state.accumulatedBlockText += "\n"; + if (!isStatusNotice) { + if (state.accumulatedBlockText.length > 0) { + state.accumulatedBlockText += "\n"; + } + state.accumulatedBlockText += rawBlockText; + if (state.accumulatedBlockTtsText.length > 0 && !joinsBufferedTtsDirective) { + state.accumulatedBlockTtsText += "\n"; + } + state.accumulatedBlockTtsText += rawBlockText; + state.blockCount += 1; } - state.accumulatedBlockText += rawBlockText; - if (state.accumulatedBlockTtsText.length > 0 && !joinsBufferedTtsDirective) { - state.accumulatedBlockTtsText += "\n"; - } - state.accumulatedBlockTtsText += rawBlockText; - state.blockCount += 1; - if (state.cleanBlockTtsDirectiveText && !payload.isCompactionNotice) { + if (state.cleanBlockTtsDirectiveText && !isStatusNotice) { const text = state.cleanBlockTtsDirectiveText.push(rawBlockText); visiblePayload = { ...payload, text: text.trim() ? text : undefined }; } @@ -334,7 +340,9 @@ export function createAcpDispatchDeliveryCoordinator(params: { state.accumulatedVisibleBlockText += visiblePayload.text; } } - const rawFinalText = kind === "final" ? normalizeOptionalString(payload.text) : undefined; + const isStatusNotice = payload.isCompactionNotice || payload.isFallbackNotice; + const rawFinalText = + kind === "final" && !isStatusNotice ? normalizeOptionalString(payload.text) : undefined; if (rawFinalText) { if (state.accumulatedFinalText.length > 0) { state.accumulatedFinalText += "\n"; diff --git a/src/auto-reply/reply/dispatch-from-config.test.ts b/src/auto-reply/reply/dispatch-from-config.test.ts index 37aa52e98aa..cc46c8c04b0 100644 --- a/src/auto-reply/reply/dispatch-from-config.test.ts +++ b/src/auto-reply/reply/dispatch-from-config.test.ts @@ -1648,6 +1648,31 @@ describe("dispatchReplyFromConfig", () => { expect(dispatcher.sendFinalReply).toHaveBeenCalledTimes(1); }); + it("bypasses final TTS for status notices", async () => { + setNoAbort(); + ttsMocks.state.synthesizeFinalAudio = true; + const cfg = emptyConfig; + const dispatcher = createDispatcher(); + const ctx = buildTestCtx({ + Provider: "telegram", + ChatType: "direct", + }); + const notice = { + text: "Model Fallback: openai/gpt-5.5", + isFallbackNotice: true, + } satisfies ReplyPayload; + + await dispatchReplyFromConfig({ + ctx, + cfg, + dispatcher, + replyResolver: async () => notice, + }); + + expect(ttsMocks.maybeApplyTtsToPayload).not.toHaveBeenCalled(); + expect(dispatcher.sendFinalReply).toHaveBeenCalledWith(notice); + }); + it("renders plain-text plan updates and concise approval progress when verbose is enabled", async () => { setNoAbort(); const cfg = { diff --git a/src/auto-reply/reply/dispatch-from-config.ts b/src/auto-reply/reply/dispatch-from-config.ts index 0bcbe5232bf..471069f05b1 100644 --- a/src/auto-reply/reply/dispatch-from-config.ts +++ b/src/auto-reply/reply/dispatch-from-config.ts @@ -173,6 +173,9 @@ function formatSuppressedReplyPayloadForLog(reply: ReplyPayload): string { async function maybeApplyTtsToReplyPayload( params: Parameters>["maybeApplyTtsToPayload"]>[0], ) { + if (params.payload.isCompactionNotice || params.payload.isFallbackNotice) { + return params.payload; + } if ( !shouldAttemptTtsPayload({ cfg: params.cfg, @@ -1507,9 +1510,10 @@ export async function dispatchReplyFromConfig( return; } // Accumulate block text for TTS generation after streaming. - // Exclude compaction status notices — they are informational UI - // signals and must not be synthesised into the spoken reply. - if (payload.text && !payload.isCompactionNotice) { + // Exclude status notices — they are informational UI signals + // and must not be synthesised into the spoken reply. + const isStatusNotice = payload.isCompactionNotice || payload.isFallbackNotice; + if (payload.text && !isStatusNotice) { const joinsBufferedTtsDirective = cleanBlockTtsDirectiveText?.hasBufferedDirectiveText() === true; if (accumulatedBlockText.length > 0) { @@ -1523,7 +1527,7 @@ export async function dispatchReplyFromConfig( blockCount++; } const visiblePayload = - payload.text && cleanBlockTtsDirectiveText && !payload.isCompactionNotice + payload.text && cleanBlockTtsDirectiveText && !isStatusNotice ? (() => { const text = cleanBlockTtsDirectiveText.push(payload.text); return { ...payload, text: text.trim() ? text : undefined }; diff --git a/src/auto-reply/reply/reply-threading.ts b/src/auto-reply/reply/reply-threading.ts index c784c100055..656776471fc 100644 --- a/src/auto-reply/reply/reply-threading.ts +++ b/src/auto-reply/reply/reply-threading.ts @@ -95,17 +95,18 @@ export function createReplyToModeFilter( ) { let hasThreaded = false; return (payload: ReplyPayload): ReplyPayload => { + const isStatusNotice = payload.isCompactionNotice || payload.isFallbackNotice; if (!payload.replyToId) { return payload; } if (mode === "off") { const isExplicit = Boolean(payload.replyToTag) || Boolean(payload.replyToCurrent); - // Compaction notices must never be threaded when replyToMode=off — even + // Status notices must never be threaded when replyToMode=off — even // if they carry explicit reply tags (replyToCurrent). Honouring the // explicit tag here would make status notices appear in-thread while // normal assistant replies stay off-thread, contradicting the off-mode // expectation. Strip replyToId unconditionally for compaction payloads. - if (opts.allowExplicitReplyTagsWhenOff && isExplicit && !payload.isCompactionNotice) { + if (opts.allowExplicitReplyTagsWhenOff && isExplicit && !isStatusNotice) { return payload; } return copyReplyPayloadMetadata(payload, { ...payload, replyToId: undefined }); @@ -114,19 +115,19 @@ export function createReplyToModeFilter( return payload; } if (isSingleUseReplyToMode(mode) && hasThreaded) { - // Compaction notices are transient status messages that should always + // Status notices are transient messages that should always // appear in-thread, even after the first assistant block has already // consumed the "first" slot. Let them keep their replyToId. - if (payload.isCompactionNotice) { + if (isStatusNotice) { return payload; } return copyReplyPayloadMetadata(payload, { ...payload, replyToId: undefined }); } - // Compaction notices are transient status messages — they should be + // Status notices are transient messages — they should be // threaded (so they appear in-context), but they must not consume the // "first" slot of the replyToMode=first|batched filter. Skip advancing // hasThreaded so the real assistant reply still gets replyToId. - if (isSingleUseReplyToMode(mode) && !payload.isCompactionNotice) { + if (isSingleUseReplyToMode(mode) && !isStatusNotice) { hasThreaded = true; } return payload; diff --git a/src/auto-reply/reply/reply-utils.test.ts b/src/auto-reply/reply/reply-utils.test.ts index 30407333c59..6bed15d7110 100644 --- a/src/auto-reply/reply/reply-utils.test.ts +++ b/src/auto-reply/reply/reply-utils.test.ts @@ -865,7 +865,11 @@ describe("block reply coalescer", () => { }); it("preserves compaction notice markers across flushes", async () => { - const flushes: Array<{ text?: string; isCompactionNotice?: boolean }> = []; + const flushes: Array<{ + text?: string; + isCompactionNotice?: boolean; + isFallbackNotice?: boolean; + }> = []; const coalescer = createBlockReplyCoalescer({ config: { minChars: 1, maxChars: 200, idleMs: 0, joiner: "\n\n" }, shouldAbort: () => false, @@ -873,14 +877,27 @@ describe("block reply coalescer", () => { flushes.push({ text: payload.text, isCompactionNotice: payload.isCompactionNotice, + isFallbackNotice: payload.isFallbackNotice, }); }, }); coalescer.enqueue({ text: "Compacting context...", isCompactionNotice: true }); + coalescer.enqueue({ text: "Model Fallback: openai/gpt-5.5", isFallbackNotice: true }); await coalescer.flush({ force: true }); - expect(flushes).toEqual([{ text: "Compacting context...", isCompactionNotice: true }]); + expect(flushes).toEqual([ + { + text: "Compacting context...", + isCompactionNotice: true, + isFallbackNotice: undefined, + }, + { + text: "Model Fallback: openai/gpt-5.5", + isCompactionNotice: undefined, + isFallbackNotice: true, + }, + ]); coalescer.stop(); });