From 05c9492bff0f57e65fbb27b9b251cb586802ba2d Mon Sep 17 00:00:00 2001 From: Val Alexander Date: Sat, 2 May 2026 18:39:06 -0500 Subject: [PATCH] fix: reduce WebUI session latency churn (#76277) thanks @BunsDev Reduce WebUI/Gateway latency churn by avoiding redundant session reloads, carrying session keys through transcript update events, and deferring explicit media provider discovery. Includes changelog attribution and closes the referenced runtime latency issues. --- CHANGELOG.md | 1 + docs/nodes/audio.md | 1 - docs/providers/arcee.md | 24 +- docs/web/control-ui.md | 2 - docs/web/webchat.md | 3 +- extensions/arcee/index.test.ts | 176 --------------- extensions/arcee/index.ts | 19 +- extensions/arcee/models.ts | 9 +- extensions/arcee/provider-catalog.ts | 36 ++- extensions/arcee/provider-policy-api.test.ts | 73 ------- extensions/arcee/provider-policy-api.ts | 11 - extensions/arcee/provider-policy.ts | 132 ----------- scripts/openclaw-npm-release-check.ts | 18 +- src/agents/command/attempt-execution.ts | 2 +- .../pi-embedded-runner/compact.hooks.test.ts | 15 +- .../pi-embedded-runner/compaction-hooks.ts | 2 +- .../tool-result-truncation.test.ts | 6 + .../tool-result-truncation.ts | 10 +- .../transcript-rewrite.test.ts | 2 +- .../pi-embedded-runner/transcript-rewrite.ts | 5 +- src/agents/tools/image-generate-tool.test.ts | 57 ++++- src/agents/tools/image-generate-tool.ts | 14 +- src/agents/tools/media-tool-shared.ts | 14 +- src/agents/tools/music-generate-tool.test.ts | 57 +++++ src/agents/tools/music-generate-tool.ts | 38 +++- src/agents/tools/video-generate-tool.test.ts | 53 +++++ src/agents/tools/video-generate-tool.ts | 17 +- .../channel-setup/plugin-install.test.ts | 4 +- .../onboarding-plugin-install.test.ts | 138 +----------- src/commands/onboarding-plugin-install.ts | 11 +- src/config/sessions/transcript.test.ts | 5 +- src/config/sessions/transcript.ts | 2 +- src/flows/channel-setup.status.test.ts | 107 +-------- src/flows/channel-setup.status.ts | 18 +- src/flows/channel-setup.test.ts | 78 +------ src/flows/channel-setup.ts | 9 +- src/gateway/method-scopes.ts | 1 - src/gateway/server-methods-list.ts | 1 - src/gateway/server-methods.ts | 2 - .../chat-transcribe-audio.runtime.ts | 1 - .../chat-transcribe-audio.test.ts | 123 ----------- .../server-methods/chat-transcribe-audio.ts | 125 ----------- src/image-generation/runtime-types.ts | 1 + src/image-generation/runtime.test.ts | 40 ++++ src/image-generation/runtime.ts | 1 + src/music-generation/runtime-types.ts | 1 + src/music-generation/runtime.test.ts | 37 ++++ src/music-generation/runtime.ts | 1 + src/plugins/install.npm-spec.test.ts | 55 ----- src/plugins/install.ts | 25 +-- src/plugins/tool-descriptor-cache.test.ts | 48 +++- src/plugins/tool-descriptor-cache.ts | 17 +- src/plugins/tools.optional.test.ts | 49 +++++ src/video-generation/runtime-types.ts | 1 + src/video-generation/runtime.test.ts | 37 ++++ src/video-generation/runtime.ts | 1 + ui/src/styles/chat/layout.css | 9 - ui/src/ui/app-chat.test.ts | 63 ------ ui/src/ui/app-chat.ts | 67 ------ ui/src/ui/app-gateway.sessions.node.test.ts | 65 +++++- ui/src/ui/app-gateway.ts | 20 +- ui/src/ui/app-lifecycle.node.test.ts | 3 - ui/src/ui/app-lifecycle.ts | 2 - ui/src/ui/app-render.ts | 3 - ui/src/ui/app-view-state.ts | 5 +- ui/src/ui/app.test.ts | 205 ------------------ ui/src/ui/app.ts | 132 ----------- ui/src/ui/controllers/sessions.test.ts | 64 +++++- ui/src/ui/controllers/sessions.ts | 22 +- ui/src/ui/views/chat.test.ts | 41 +--- ui/src/ui/views/chat.ts | 60 +---- 71 files changed, 767 insertions(+), 1730 deletions(-) delete mode 100644 extensions/arcee/provider-policy-api.test.ts delete mode 100644 extensions/arcee/provider-policy-api.ts delete mode 100644 extensions/arcee/provider-policy.ts delete mode 100644 src/gateway/server-methods/chat-transcribe-audio.runtime.ts delete mode 100644 src/gateway/server-methods/chat-transcribe-audio.test.ts delete mode 100644 src/gateway/server-methods/chat-transcribe-audio.ts delete mode 100644 ui/src/ui/app.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 59de65de876..d165d3c116b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Control UI/Gateway: avoid full session-list reloads for locally applied message-phase session updates, carry known session keys through transcript-file update events, and defer media provider listing when explicit generation model config is present. Refs #76236, #76203, #76188, #76107, and #76166. Thanks @BunsDev. - Gateway: keep directly requested plugin tools invokable under restrictive tool profiles while preserving explicit deny lists and the HTTP safety deny list, preventing catalog/invoke mismatches that surface as "Tool not available". Thanks @BunsDev. - Gateway/update: allow beta binaries to refresh gateway services when the config was last written by the matching stable release version, avoiding false newer-config downgrade blocks during beta channel updates. - Channels: keep Matrix and Mattermost bundled in the core package instead of advertising external npm installs before those channels are cut over. Thanks @vincentkoc. diff --git a/docs/nodes/audio.md b/docs/nodes/audio.md index c9ace927ea6..28e852787de 100644 --- a/docs/nodes/audio.md +++ b/docs/nodes/audio.md @@ -17,7 +17,6 @@ title: "Audio and voice notes" 5. On success, it replaces `Body` with an `[Audio]` block and sets `{{Transcript}}`. - **Command parsing**: When transcription succeeds, `CommandBody`/`RawBody` are set to the transcript so slash commands still work. - **Verbose logging**: In `--verbose`, we log when transcription runs and when it replaces the body. -- **Control UI dictation**: The Chat composer can send a browser-recorded microphone clip to `chat.transcribeAudio`. That Gateway RPC writes the clip to a temporary local file, runs this same audio transcription pipeline, returns draft text to the browser, and deletes the temporary file. It does not create an agent run by itself. ## Auto-detection (default) diff --git a/docs/providers/arcee.md b/docs/providers/arcee.md index b08f7a86c22..90e0523cca2 100644 --- a/docs/providers/arcee.md +++ b/docs/providers/arcee.md @@ -98,24 +98,24 @@ Arcee AI models can be accessed directly via the Arcee platform or through [Open OpenClaw currently ships this bundled Arcee catalog: -| Model ref | Name | Input | Context | Cost (in/out per 1M) | Notes | -| ------------------------------ | ---------------------- | ----- | ------- | -------------------- | ------------------------------------------ | -| `arcee/trinity-large-thinking` | Trinity Large Thinking | text | 256K | $0.25 / $0.90 | Default model; reasoning enabled; no tools | -| `arcee/trinity-large-preview` | Trinity Large Preview | text | 128K | $0.25 / $1.00 | General-purpose; 400B params, 13B active | -| `arcee/trinity-mini` | Trinity Mini 26B | text | 128K | $0.045 / $0.15 | Fast and cost-efficient; function calling | +| Model ref | Name | Input | Context | Cost (in/out per 1M) | Notes | +| ------------------------------ | ---------------------- | ----- | ------- | -------------------- | ----------------------------------------- | +| `arcee/trinity-large-thinking` | Trinity Large Thinking | text | 256K | $0.25 / $0.90 | Default model; reasoning enabled | +| `arcee/trinity-large-preview` | Trinity Large Preview | text | 128K | $0.25 / $1.00 | General-purpose; 400B params, 13B active | +| `arcee/trinity-mini` | Trinity Mini 26B | text | 128K | $0.045 / $0.15 | Fast and cost-efficient; function calling | -The onboarding preset sets `arcee/trinity-large-thinking` as the default model. It is reasoning/text-only and does not support tool use or function calling. +The onboarding preset sets `arcee/trinity-large-thinking` as the default model. ## Supported features -| Feature | Supported | -| --------------------------------------------- | ------------------------------------------- | -| Streaming | Yes | -| Tool use / function calling | Model-dependent; not Trinity Large Thinking | -| Structured output (JSON mode and JSON schema) | Yes | -| Extended thinking | Yes (Trinity Large Thinking) | +| Feature | Supported | +| --------------------------------------------- | ---------------------------- | +| Streaming | Yes | +| Tool use / function calling | Yes | +| Structured output (JSON mode and JSON schema) | Yes | +| Extended thinking | Yes (Trinity Large Thinking) | diff --git a/docs/web/control-ui.md b/docs/web/control-ui.md index c8771e52327..a4d9c27d4a7 100644 --- a/docs/web/control-ui.md +++ b/docs/web/control-ui.md @@ -96,7 +96,6 @@ Imported themes are stored only in the current browser profile. They are not wri - Chat with the model via Gateway WS (`chat.history`, `chat.send`, `chat.abort`, `chat.inject`). - - Dictate into the Chat composer with server-side STT (`chat.transcribeAudio`). The browser records a short microphone clip and sends it to the Gateway, which runs the configured `tools.media.audio` transcription pipeline and returns draft text without exposing provider credentials to the browser. - Talk through browser realtime sessions. OpenAI uses direct WebRTC, Google Live uses a constrained one-use browser token over WebSocket, and backend-only realtime voice plugins use the Gateway relay transport. The relay keeps provider credentials on the Gateway while the browser streams microphone PCM through `talk.realtime.relay*` RPCs and sends `openclaw_agent_consult` tool calls back through `chat.send` for the larger configured OpenClaw model. - Stream tool calls + live tool output cards in Chat (agent events). @@ -150,7 +149,6 @@ Imported themes are stored only in the current browser profile. They are not wri - `chat.send` is **non-blocking**: it acks immediately with `{ runId, status: "started" }` and the response streams via `chat` events. - - `chat.transcribeAudio` is a one-shot dictation helper for Chat drafts. It accepts browser-recorded base64 audio, keeps uploads below the Gateway WebSocket frame limit, writes a temporary local file, runs media-understanding audio transcription with the active Gateway config, returns `{ text, provider, model }`, and removes the temporary file. It does not create an agent run and is separate from realtime Talk. - Chat uploads accept images plus non-video files. Images keep the native image path; other files are stored as managed media and shown in history as attachment links. - Re-sending with the same `idempotencyKey` returns `{ status: "in_flight" }` while running, and `{ status: "ok" }` after completion. - `chat.history` responses are size-bounded for UI safety. When transcript entries are too large, Gateway may truncate long text fields, omit heavy metadata blocks, and replace oversized messages with a placeholder (`[chat.history omitted: message too large]`). diff --git a/docs/web/webchat.md b/docs/web/webchat.md index 5344acd14a6..0499f607e09 100644 --- a/docs/web/webchat.md +++ b/docs/web/webchat.md @@ -22,7 +22,7 @@ Status: the macOS/iOS SwiftUI chat UI talks directly to the Gateway WebSocket. ## How it works (behavior) -- The UI connects to the Gateway WebSocket and uses `chat.history`, `chat.send`, `chat.inject`, and `chat.transcribeAudio`. +- The UI connects to the Gateway WebSocket and uses `chat.history`, `chat.send`, and `chat.inject`. - `chat.history` is bounded for stability: Gateway may truncate long text fields, omit heavy metadata, and replace oversized entries with `[chat.history omitted: message too large]`. - `chat.history` follows the active transcript branch for modern append-only session files, so abandoned rewrite branches and superseded prompt copies are not rendered in WebChat. - Control UI remembers the backing Gateway `sessionId` returned by `chat.history` and includes it on follow-up `chat.send` calls, so reconnects and page refreshes continue the same stored conversation unless the user starts or resets a session. @@ -37,7 +37,6 @@ Status: the macOS/iOS SwiftUI chat UI talks directly to the Gateway WebSocket. and assistant entries whose whole visible text is only the exact silent token `NO_REPLY` / `no_reply` are omitted. - Reasoning-flagged reply payloads (`isReasoning: true`) are excluded from WebChat assistant content, transcript replay text, and audio content blocks, so thinking-only payloads do not surface as visible assistant messages or playable audio. -- `chat.transcribeAudio` powers server-side dictation in the Control UI chat composer. The browser records microphone audio, sends it as base64 to the Gateway, and the Gateway runs the configured `tools.media.audio` pipeline. The returned transcript is inserted into the draft; no agent run is started until the user sends it. - `chat.inject` appends an assistant note directly to the transcript and broadcasts it to the UI (no agent run). - Aborted runs can keep partial assistant output visible in the UI. - Gateway persists aborted partial assistant text into transcript history when buffered output exists, and marks those entries with abort metadata. diff --git a/extensions/arcee/index.test.ts b/extensions/arcee/index.test.ts index a8ed79207f0..8379057b443 100644 --- a/extensions/arcee/index.test.ts +++ b/extensions/arcee/index.test.ts @@ -69,14 +69,6 @@ describe("arcee provider plugin", () => { "arcee/trinity-large-preview", "arcee/trinity-large-thinking", ]); - expect( - config?.models?.providers?.arcee?.models?.find( - (model) => model.id === "arcee/trinity-large-thinking", - )?.compat, - ).toMatchObject({ - supportsReasoningEffort: false, - supportsTools: false, - }); }); it("keeps direct Arcee auth env candidates separate from OpenRouter", () => { @@ -100,12 +92,6 @@ describe("arcee provider plugin", () => { "trinity-large-preview", "trinity-large-thinking", ]); - expect( - catalogProvider.models?.find((model) => model.id === "trinity-large-thinking")?.compat, - ).toMatchObject({ - supportsReasoningEffort: false, - supportsTools: false, - }); }); it("builds the OpenRouter-backed Arcee AI model catalog", async () => { @@ -126,12 +112,6 @@ describe("arcee provider plugin", () => { "arcee/trinity-large-preview", "arcee/trinity-large-thinking", ]); - expect( - catalogProvider.models?.find((model) => model.id === "arcee/trinity-large-thinking")?.compat, - ).toMatchObject({ - supportsReasoningEffort: false, - supportsTools: false, - }); }); it("normalizes Arcee OpenRouter models to vendor-prefixed runtime ids", async () => { @@ -150,10 +130,6 @@ describe("arcee provider plugin", () => { } as never), ).toMatchObject({ id: "arcee/trinity-large-thinking", - compat: { - supportsReasoningEffort: false, - supportsTools: false, - }, }); expect( @@ -200,10 +176,6 @@ describe("arcee provider plugin", () => { ).toMatchObject({ id: "arcee/trinity-large-thinking", baseUrl: "https://openrouter.ai/api/v1", - compat: { - supportsReasoningEffort: false, - supportsTools: false, - }, }); expect( @@ -217,152 +189,4 @@ describe("arcee provider plugin", () => { baseUrl: "https://openrouter.ai/api/v1", }); }); - - it("repairs stale Trinity tool compat on existing Arcee configs and runtime models", async () => { - const provider = await registerSingleProviderPlugin(arceePlugin); - - expect( - provider.normalizeConfig?.({ - provider: "arcee", - providerConfig: { - api: "openai-completions", - baseUrl: "https://openrouter.ai/v1/", - models: [ - { - id: "arcee/trinity-large-thinking", - name: "Trinity Large Thinking", - reasoning: true, - input: ["text"], - contextWindow: 262144, - maxTokens: 80000, - cost: { - input: 0.25, - output: 0.9, - cacheRead: 0.25, - cacheWrite: 0.25, - }, - compat: { - supportsReasoningEffort: false, - supportsStrictMode: true, - }, - }, - ], - }, - } as never), - ).toMatchObject({ - baseUrl: "https://openrouter.ai/api/v1", - models: [ - { - id: "arcee/trinity-large-thinking", - compat: { - supportsReasoningEffort: false, - supportsStrictMode: true, - supportsTools: false, - }, - }, - ], - }); - - expect( - provider.normalizeConfig?.({ - provider: "arcee", - providerConfig: { - api: "openai-completions", - baseUrl: "https://api.arcee.ai/api/v1", - models: [ - { - id: "trinity-large-thinking", - name: "Trinity Large Thinking", - reasoning: true, - input: ["text"], - contextWindow: 262144, - maxTokens: 80000, - cost: { - input: 0.25, - output: 0.9, - cacheRead: 0.25, - cacheWrite: 0.25, - }, - compat: { - supportsReasoningEffort: false, - }, - }, - ], - }, - } as never), - ).toMatchObject({ - baseUrl: "https://api.arcee.ai/api/v1", - models: [ - { - id: "trinity-large-thinking", - compat: { - supportsReasoningEffort: false, - supportsTools: false, - }, - }, - ], - }); - - const trinityRuntimeModel = { - name: "Trinity Large Thinking", - api: "openai-completions", - reasoning: true, - input: ["text"], - contextWindow: 262144, - maxTokens: 80000, - cost: { - input: 0.25, - output: 0.9, - cacheRead: 0.25, - cacheWrite: 0.25, - }, - compat: { - supportsReasoningEffort: false, - }, - }; - - const trinityCompat = { - supportsReasoningEffort: false, - supportsTools: false, - }; - - expect( - provider.contributeResolvedModelCompat?.({ - provider: "arcee", - modelId: "arcee/trinity-large-thinking", - model: { - ...trinityRuntimeModel, - provider: "arcee", - id: "arcee/trinity-large-thinking", - baseUrl: "https://openrouter.ai/api/v1", - }, - } as never), - ).toEqual(trinityCompat); - - expect( - provider.contributeResolvedModelCompat?.({ - provider: "arcee", - modelId: "trinity-large-thinking", - model: { - ...trinityRuntimeModel, - provider: "arcee", - id: "trinity-large-thinking", - baseUrl: "https://api.arcee.ai/api/v1", - }, - } as never), - ).toEqual(trinityCompat); - - expect( - provider.contributeResolvedModelCompat?.({ - provider: "openrouter", - modelId: "trinity-large-thinking", - model: { - ...trinityRuntimeModel, - provider: "openrouter", - id: "trinity-large-thinking", - baseUrl: "https://openrouter.ai/api/v1", - }, - } as never), - ).toBeUndefined(); - }); }); diff --git a/extensions/arcee/index.ts b/extensions/arcee/index.ts index edd4fda7aa3..e7fd0126950 100644 --- a/extensions/arcee/index.ts +++ b/extensions/arcee/index.ts @@ -17,12 +17,6 @@ import { normalizeArceeOpenRouterBaseUrl, toArceeOpenRouterModelId, } from "./provider-catalog.js"; -import { - ARCEE_TRINITY_LARGE_THINKING_COMPAT, - applyArceeTrinityLargeThinkingCompat, - normalizeArceeProviderConfig, - shouldContributeArceeTrinityLargeThinkingCompat, -} from "./provider-policy.js"; const PROVIDER_ID = "arcee"; const ARCEE_WIZARD_GROUP = { @@ -101,7 +95,7 @@ function normalizeArceeResolvedModel return undefined; } return { - ...applyArceeTrinityLargeThinkingCompat(model), + ...model, id: normalizedId, baseUrl: normalizedBaseUrl, }; @@ -126,12 +120,13 @@ export default definePluginEntry({ config, providerId: PROVIDER_ID, }), - normalizeConfig: ({ providerConfig }) => normalizeArceeProviderConfig(providerConfig), + normalizeConfig: ({ providerConfig }) => { + const normalizedBaseUrl = normalizeArceeOpenRouterBaseUrl(providerConfig.baseUrl); + return normalizedBaseUrl && normalizedBaseUrl !== providerConfig.baseUrl + ? { ...providerConfig, baseUrl: normalizedBaseUrl } + : undefined; + }, normalizeResolvedModel: ({ model }) => normalizeArceeResolvedModel(model), - contributeResolvedModelCompat: (ctx) => - shouldContributeArceeTrinityLargeThinkingCompat(ctx) - ? ARCEE_TRINITY_LARGE_THINKING_COMPAT - : undefined, normalizeTransport: ({ api, baseUrl }) => { const normalizedBaseUrl = normalizeArceeOpenRouterBaseUrl(baseUrl); return normalizedBaseUrl && normalizedBaseUrl !== baseUrl diff --git a/extensions/arcee/models.ts b/extensions/arcee/models.ts index cb8873301f2..399faed49a8 100644 --- a/extensions/arcee/models.ts +++ b/extensions/arcee/models.ts @@ -1,7 +1,6 @@ -import type { ModelDefinitionConfig } from "openclaw/plugin-sdk/provider-model-types"; -import { ARCEE_BASE_URL, ARCEE_TRINITY_LARGE_THINKING_COMPAT } from "./provider-policy.js"; +import type { ModelDefinitionConfig } from "openclaw/plugin-sdk/provider-model-shared"; -export { ARCEE_BASE_URL, ARCEE_TRINITY_LARGE_THINKING_COMPAT }; +export const ARCEE_BASE_URL = "https://api.arcee.ai/api/v1"; export const ARCEE_MODEL_CATALOG: ModelDefinitionConfig[] = [ { @@ -45,7 +44,9 @@ export const ARCEE_MODEL_CATALOG: ModelDefinitionConfig[] = [ cacheRead: 0.25, cacheWrite: 0.25, }, - compat: ARCEE_TRINITY_LARGE_THINKING_COMPAT, + compat: { + supportsReasoningEffort: false, + }, }, ]; diff --git a/extensions/arcee/provider-catalog.ts b/extensions/arcee/provider-catalog.ts index 844d9a68dd6..5631ad2998d 100644 --- a/extensions/arcee/provider-catalog.ts +++ b/extensions/arcee/provider-catalog.ts @@ -1,13 +1,31 @@ -import type { ModelProviderConfig } from "openclaw/plugin-sdk/provider-model-types"; -import { buildArceeModelDefinition, ARCEE_MODEL_CATALOG } from "./models.js"; -import { - ARCEE_BASE_URL, - normalizeArceeOpenRouterBaseUrl, - OPENROUTER_BASE_URL, - toArceeOpenRouterModelId, -} from "./provider-policy.js"; +import type { ModelProviderConfig } from "openclaw/plugin-sdk/provider-model-shared"; +import { buildArceeModelDefinition, ARCEE_BASE_URL, ARCEE_MODEL_CATALOG } from "./models.js"; -export { normalizeArceeOpenRouterBaseUrl, OPENROUTER_BASE_URL, toArceeOpenRouterModelId }; +export const OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"; +const OPENROUTER_LEGACY_BASE_URL = "https://openrouter.ai/v1"; + +function normalizeBaseUrl(baseUrl: string | undefined): string { + return (baseUrl ?? "").trim().replace(/\/+$/, ""); +} + +export function normalizeArceeOpenRouterBaseUrl(baseUrl: string | undefined): string | undefined { + const normalized = normalizeBaseUrl(baseUrl); + if (!normalized) { + return undefined; + } + if (normalized === OPENROUTER_BASE_URL || normalized === OPENROUTER_LEGACY_BASE_URL) { + return OPENROUTER_BASE_URL; + } + return undefined; +} + +export function toArceeOpenRouterModelId(modelId: string): string { + const normalized = modelId.trim(); + if (!normalized || normalized.startsWith("arcee/")) { + return normalized; + } + return `arcee/${normalized}`; +} export function buildArceeCatalogModels(): NonNullable { return ARCEE_MODEL_CATALOG.map(buildArceeModelDefinition); diff --git a/extensions/arcee/provider-policy-api.test.ts b/extensions/arcee/provider-policy-api.test.ts deleted file mode 100644 index 0d4afe464d2..00000000000 --- a/extensions/arcee/provider-policy-api.test.ts +++ /dev/null @@ -1,73 +0,0 @@ -import { describe, expect, it } from "vitest"; -import { normalizeConfig } from "./provider-policy-api.js"; - -describe("arcee provider policy public artifact", () => { - it("normalizes stale OpenRouter base URLs and Trinity compat without loading the full plugin", () => { - expect( - normalizeConfig({ - provider: "arcee", - providerConfig: { - api: "openai-completions", - baseUrl: "https://openrouter.ai/v1/", - models: [ - { - id: "arcee/trinity-large-thinking", - name: "Trinity Large Thinking", - reasoning: true, - input: ["text"], - contextWindow: 262144, - maxTokens: 80000, - cost: { - input: 0.25, - output: 0.9, - cacheRead: 0.25, - cacheWrite: 0.25, - }, - compat: { - supportsReasoningEffort: false, - supportsStrictMode: true, - }, - }, - ], - }, - }), - ).toMatchObject({ - baseUrl: "https://openrouter.ai/api/v1", - models: [ - { - id: "arcee/trinity-large-thinking", - compat: { - supportsReasoningEffort: false, - supportsStrictMode: true, - supportsTools: false, - }, - }, - ], - }); - }); - - it("returns unchanged non-Trinity configs by identity", () => { - const providerConfig = { - api: "openai-completions", - baseUrl: "https://api.arcee.ai/api/v1", - models: [ - { - id: "trinity-mini", - name: "Trinity Mini 26B", - reasoning: false, - input: ["text"], - contextWindow: 131072, - maxTokens: 80000, - cost: { - input: 0.045, - output: 0.15, - cacheRead: 0.045, - cacheWrite: 0.045, - }, - }, - ], - } satisfies Parameters[0]["providerConfig"]; - - expect(normalizeConfig({ provider: "arcee", providerConfig })).toBe(providerConfig); - }); -}); diff --git a/extensions/arcee/provider-policy-api.ts b/extensions/arcee/provider-policy-api.ts deleted file mode 100644 index 61fffb9057f..00000000000 --- a/extensions/arcee/provider-policy-api.ts +++ /dev/null @@ -1,11 +0,0 @@ -import type { ModelProviderConfig } from "openclaw/plugin-sdk/provider-model-types"; -import { normalizeArceeProviderConfig } from "./provider-policy.js"; - -export { normalizeArceeProviderConfig }; - -export function normalizeConfig(params: { - provider?: string; - providerConfig: ModelProviderConfig; -}): ModelProviderConfig { - return normalizeArceeProviderConfig(params.providerConfig); -} diff --git a/extensions/arcee/provider-policy.ts b/extensions/arcee/provider-policy.ts deleted file mode 100644 index a02cf9336cd..00000000000 --- a/extensions/arcee/provider-policy.ts +++ /dev/null @@ -1,132 +0,0 @@ -import type { - ModelCompatConfig, - ModelProviderConfig, -} from "openclaw/plugin-sdk/provider-model-types"; - -export const ARCEE_BASE_URL = "https://api.arcee.ai/api/v1"; -export const OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"; -export const ARCEE_TRINITY_LARGE_THINKING_COMPAT = { - supportsReasoningEffort: false, - supportsTools: false, -} as const satisfies ModelCompatConfig; - -const ARCEE_PROVIDER_ID = "arcee"; -const OPENROUTER_LEGACY_BASE_URL = "https://openrouter.ai/v1"; -const ARCEE_TRINITY_LARGE_THINKING_ID = "trinity-large-thinking"; -const ARCEE_TRINITY_LARGE_THINKING_REF = `${ARCEE_PROVIDER_ID}/${ARCEE_TRINITY_LARGE_THINKING_ID}`; - -function normalizeModelId(modelId: string): string { - return modelId.trim().toLowerCase(); -} - -function normalizeBaseUrl(baseUrl: unknown): string { - return typeof baseUrl === "string" ? baseUrl.trim().replace(/\/+$/, "") : ""; -} - -export function normalizeArceeOpenRouterBaseUrl(baseUrl: string | undefined): string | undefined { - const normalized = normalizeBaseUrl(baseUrl); - if (!normalized) { - return undefined; - } - if (normalized === OPENROUTER_BASE_URL || normalized === OPENROUTER_LEGACY_BASE_URL) { - return OPENROUTER_BASE_URL; - } - return undefined; -} - -export function toArceeOpenRouterModelId(modelId: string): string { - const normalized = modelId.trim(); - if (!normalized || normalized.startsWith("arcee/")) { - return normalized; - } - return `arcee/${normalized}`; -} - -export function isArceeTrinityLargeThinkingModelId(modelId: string): boolean { - const normalized = normalizeModelId(modelId); - return ( - normalized === ARCEE_TRINITY_LARGE_THINKING_ID || - normalized === ARCEE_TRINITY_LARGE_THINKING_REF - ); -} - -export function shouldContributeArceeTrinityLargeThinkingCompat(params: { - provider?: unknown; - modelId: string; - model: { id: string; provider?: unknown; baseUrl?: unknown }; -}): boolean { - const modelId = normalizeModelId(params.modelId); - const resolvedId = normalizeModelId(params.model.id); - if ( - modelId === ARCEE_TRINITY_LARGE_THINKING_REF || - resolvedId === ARCEE_TRINITY_LARGE_THINKING_REF - ) { - return true; - } - if ( - modelId !== ARCEE_TRINITY_LARGE_THINKING_ID && - resolvedId !== ARCEE_TRINITY_LARGE_THINKING_ID - ) { - return false; - } - if (params.provider === ARCEE_PROVIDER_ID || params.model.provider === ARCEE_PROVIDER_ID) { - return true; - } - return normalizeBaseUrl(params.model.baseUrl) === normalizeBaseUrl(ARCEE_BASE_URL); -} - -export function applyArceeTrinityLargeThinkingCompat( - model: T, -): T { - if (!isArceeTrinityLargeThinkingModelId(model.id)) { - return model; - } - const compat = - model.compat && typeof model.compat === "object" - ? (model.compat as Record) - : undefined; - if ( - compat?.supportsReasoningEffort === - ARCEE_TRINITY_LARGE_THINKING_COMPAT.supportsReasoningEffort && - compat?.supportsTools === ARCEE_TRINITY_LARGE_THINKING_COMPAT.supportsTools - ) { - return model; - } - return { - ...model, - compat: { - ...compat, - ...ARCEE_TRINITY_LARGE_THINKING_COMPAT, - } as T extends { compat?: infer TCompat } ? TCompat : never, - } as T; -} - -export function normalizeArceeProviderConfig( - providerConfig: ModelProviderConfig, -): ModelProviderConfig { - let changed = false; - const normalizedBaseUrl = normalizeArceeOpenRouterBaseUrl(providerConfig.baseUrl); - const baseUrl = - normalizedBaseUrl && normalizedBaseUrl !== providerConfig.baseUrl - ? normalizedBaseUrl - : providerConfig.baseUrl; - if (baseUrl !== providerConfig.baseUrl) { - changed = true; - } - - const hasModels = Array.isArray(providerConfig.models); - const models = hasModels - ? providerConfig.models.map((model) => { - const normalizedModel = applyArceeTrinityLargeThinkingCompat(model); - if (normalizedModel === model) { - return model; - } - changed = true; - return normalizedModel; - }) - : providerConfig.models; - - return changed - ? { ...providerConfig, baseUrl, ...(hasModels ? { models } : {}) } - : providerConfig; -} diff --git a/scripts/openclaw-npm-release-check.ts b/scripts/openclaw-npm-release-check.ts index 4733141a491..b32574be439 100644 --- a/scripts/openclaw-npm-release-check.ts +++ b/scripts/openclaw-npm-release-check.ts @@ -427,14 +427,16 @@ function isNpmExecPath(value: string): boolean { return /^npm(?:-cli)?(?:\.(?:c?js|cmd|exe))?$/.test(basename(value).toLowerCase()); } -export function resolveNpmCommandInvocation(params?: { - npmExecPath?: string; - nodeExecPath?: string; - platform?: NodeJS.Platform; -}): { command: string; args: string[] } { - const npmExecPath = params === undefined ? process.env.npm_execpath : params.npmExecPath; - const nodeExecPath = params?.nodeExecPath ?? process.execPath; - const npmCommand = (params?.platform ?? process.platform) === "win32" ? "npm.cmd" : "npm"; +export function resolveNpmCommandInvocation( + params: { + npmExecPath?: string; + nodeExecPath?: string; + platform?: NodeJS.Platform; + } = {}, +): { command: string; args: string[] } { + const npmExecPath = params.npmExecPath ?? process.env.npm_execpath; + const nodeExecPath = params.nodeExecPath ?? process.execPath; + const npmCommand = (params.platform ?? process.platform) === "win32" ? "npm.cmd" : "npm"; if (typeof npmExecPath === "string" && npmExecPath.length > 0 && isNpmExecPath(npmExecPath)) { return { command: nodeExecPath, args: [npmExecPath] }; diff --git a/src/agents/command/attempt-execution.ts b/src/agents/command/attempt-execution.ts index 1ef26a11f0d..0789ed97ac6 100644 --- a/src/agents/command/attempt-execution.ts +++ b/src/agents/command/attempt-execution.ts @@ -237,7 +237,7 @@ async function persistTextTurnTranscript( await lock.release(); } - emitSessionTranscriptUpdate(sessionFile); + emitSessionTranscriptUpdate({ sessionFile, sessionKey: params.sessionKey }); return sessionEntry; } diff --git a/src/agents/pi-embedded-runner/compact.hooks.test.ts b/src/agents/pi-embedded-runner/compact.hooks.test.ts index 45de0f9c399..29be4b136d5 100644 --- a/src/agents/pi-embedded-runner/compact.hooks.test.ts +++ b/src/agents/pi-embedded-runner/compact.hooks.test.ts @@ -658,7 +658,10 @@ describe("compactEmbeddedPiSessionDirect hooks", () => { }); expect(listener).toHaveBeenCalledTimes(1); - expect(listener).toHaveBeenCalledWith({ sessionFile: "/tmp/session.jsonl" }); + expect(listener).toHaveBeenCalledWith({ + sessionFile: "/tmp/session.jsonl", + sessionKey: "agent:main:session-1", + }); } finally { cleanup(); } @@ -696,7 +699,10 @@ describe("compactEmbeddedPiSessionDirect hooks", () => { expect(result.ok).toBe(true); expect(listener).toHaveBeenCalledTimes(1); - expect(listener).toHaveBeenCalledWith({ sessionFile: "/tmp/rotated-session.jsonl" }); + expect(listener).toHaveBeenCalledWith({ + sessionFile: "/tmp/rotated-session.jsonl", + sessionKey: TEST_SESSION_KEY, + }); expect(sync).toHaveBeenCalledTimes(1); expect(sync).toHaveBeenCalledWith({ reason: "post-compaction", @@ -1096,7 +1102,10 @@ describe("compactEmbeddedPiSession hooks (ownsCompaction engine)", () => { expect(result.ok).toBe(true); expect(listener).toHaveBeenCalledTimes(1); - expect(listener).toHaveBeenCalledWith({ sessionFile: TEST_SESSION_FILE }); + expect(listener).toHaveBeenCalledWith({ + sessionFile: TEST_SESSION_FILE, + sessionKey: TEST_SESSION_KEY, + }); expect(sync).toHaveBeenCalledWith({ reason: "post-compaction", sessionFiles: [TEST_SESSION_FILE], diff --git a/src/agents/pi-embedded-runner/compaction-hooks.ts b/src/agents/pi-embedded-runner/compaction-hooks.ts index 318b785d9aa..2d7e7db480d 100644 --- a/src/agents/pi-embedded-runner/compaction-hooks.ts +++ b/src/agents/pi-embedded-runner/compaction-hooks.ts @@ -88,7 +88,7 @@ export async function runPostCompactionSideEffects(params: { if (!sessionFile) { return; } - emitSessionTranscriptUpdate(sessionFile); + emitSessionTranscriptUpdate({ sessionFile, sessionKey: params.sessionKey }); await syncPostCompactionSessionMemory({ config: params.config, sessionKey: params.sessionKey, diff --git a/src/agents/pi-embedded-runner/tool-result-truncation.test.ts b/src/agents/pi-embedded-runner/tool-result-truncation.test.ts index 750d4ba3774..4a9265cc19d 100644 --- a/src/agents/pi-embedded-runner/tool-result-truncation.test.ts +++ b/src/agents/pi-embedded-runner/tool-result-truncation.test.ts @@ -5,6 +5,7 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; import type { AssistantMessage, ToolResultMessage, UserMessage } from "@mariozechner/pi-ai"; import { SessionManager } from "@mariozechner/pi-coding-agent"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { onSessionTranscriptUpdate } from "../../sessions/transcript-events.js"; import { makeAgentAssistantMessage } from "../test-helpers/agent-message-fixtures.js"; let truncateToolResultText: typeof import("./tool-result-truncation.js").truncateToolResultText; @@ -444,14 +445,19 @@ describe("truncateOversizedToolResultsInSession", () => { const openSpy = vi.spyOn(SessionManager, "open").mockImplementation(() => { throw new Error("SessionManager.open should not be used for persisted truncation"); }); + const listener = vi.fn(); + const cleanup = onSessionTranscriptUpdate(listener); const result = await truncateOversizedToolResultsInSession({ sessionFile, + sessionKey: "agent:main:test", contextWindowTokens: 100, }); + cleanup(); openSpy.mockRestore(); expect(result.truncated).toBe(true); expect(result.truncatedCount).toBeGreaterThan(0); + expect(listener).toHaveBeenCalledWith({ sessionFile, sessionKey: "agent:main:test" }); const afterBranch = SessionManager.open(sessionFile).getBranch(); const afterToolResults = afterBranch.filter( diff --git a/src/agents/pi-embedded-runner/tool-result-truncation.ts b/src/agents/pi-embedded-runner/tool-result-truncation.ts index ba5fa8d219a..34dda5d79c3 100644 --- a/src/agents/pi-embedded-runner/tool-result-truncation.ts +++ b/src/agents/pi-embedded-runner/tool-result-truncation.ts @@ -659,7 +659,10 @@ function truncateOversizedToolResultsInExistingSessionManager(params: { replacements: plan.replacements, }); if (rewriteResult.changed && params.sessionFile) { - emitSessionTranscriptUpdate(params.sessionFile); + emitSessionTranscriptUpdate({ + sessionFile: params.sessionFile, + sessionKey: params.sessionKey, + }); } log.info( @@ -723,7 +726,10 @@ async function truncateOversizedToolResultsInTranscriptState(params: { state, appendedEntries: rewriteResult.appendedEntries, }); - emitSessionTranscriptUpdate(params.sessionFile); + emitSessionTranscriptUpdate({ + sessionFile: params.sessionFile, + sessionKey: params.sessionKey, + }); } log.info( diff --git a/src/agents/pi-embedded-runner/transcript-rewrite.test.ts b/src/agents/pi-embedded-runner/transcript-rewrite.test.ts index bbb76340ce5..7bd15635fd8 100644 --- a/src/agents/pi-embedded-runner/transcript-rewrite.test.ts +++ b/src/agents/pi-embedded-runner/transcript-rewrite.test.ts @@ -330,7 +330,7 @@ describe("rewriteTranscriptEntriesInSessionFile", () => { timeoutMs: 60_000, }); expect(acquireSessionWriteLockReleaseMock).toHaveBeenCalledTimes(1); - expect(listener).toHaveBeenCalledWith({ sessionFile }); + expect(listener).toHaveBeenCalledWith({ sessionFile, sessionKey: "agent:main:test" }); openSpy.mockRestore(); const rewrittenSession = SessionManager.open(sessionFile); diff --git a/src/agents/pi-embedded-runner/transcript-rewrite.ts b/src/agents/pi-embedded-runner/transcript-rewrite.ts index 3209d432dd9..9040e4efbe0 100644 --- a/src/agents/pi-embedded-runner/transcript-rewrite.ts +++ b/src/agents/pi-embedded-runner/transcript-rewrite.ts @@ -379,7 +379,10 @@ export async function rewriteTranscriptEntriesInSessionFile(params: { state, appendedEntries: result.appendedEntries, }); - emitSessionTranscriptUpdate(params.sessionFile); + emitSessionTranscriptUpdate({ + sessionFile: params.sessionFile, + sessionKey: params.sessionKey, + }); log.info( `[transcript-rewrite] rewrote ${result.rewrittenEntries} entr` + `${result.rewrittenEntries === 1 ? "y" : "ies"} ` + diff --git a/src/agents/tools/image-generate-tool.test.ts b/src/agents/tools/image-generate-tool.test.ts index c665cc9505a..e543dc209d2 100644 --- a/src/agents/tools/image-generate-tool.test.ts +++ b/src/agents/tools/image-generate-tool.test.ts @@ -8,6 +8,32 @@ let webMedia: typeof import("../../media/web-media.js"); let createImageGenerateTool: typeof import("./image-generate-tool.js").createImageGenerateTool; let resolveImageGenerationModelConfigForTool: typeof import("./image-generate-tool.js").resolveImageGenerationModelConfigForTool; +const IMAGE_GENERATION_PROVIDER_AUTH_ENV_VARS = [ + "OPENAI_API_KEY", + "OPENAI_API_KEYS", + "GEMINI_API_KEY", + "GEMINI_API_KEYS", + "GOOGLE_API_KEY", + "GOOGLE_API_KEYS", + "DEEPINFRA_API_KEY", + "FAL_KEY", + "FAL_API_KEY", + "LITELLM_API_KEY", + "MINIMAX_CODE_PLAN_KEY", + "MINIMAX_CODING_API_KEY", + "MINIMAX_API_KEY", + "MINIMAX_OAUTH_TOKEN", + "OPENROUTER_API_KEY", + "XAI_API_KEY", + "VYDRA_API_KEY", +] as const; + +function clearImageGenerationProviderAuthEnv() { + for (const key of IMAGE_GENERATION_PROVIDER_AUTH_ENV_VARS) { + vi.stubEnv(key, ""); + } +} + function hasStubbedImageProviderAuth(providerId: string): boolean { if (providerId === "openai") { return Boolean(process.env.OPENAI_API_KEY?.trim() || process.env.OPENAI_API_KEYS?.trim()); @@ -217,12 +243,7 @@ describe("createImageGenerateTool", () => { }); beforeEach(() => { - vi.stubEnv("OPENAI_API_KEY", ""); - vi.stubEnv("OPENAI_API_KEYS", ""); - vi.stubEnv("GEMINI_API_KEY", ""); - vi.stubEnv("GEMINI_API_KEYS", ""); - vi.stubEnv("GOOGLE_API_KEY", ""); - vi.stubEnv("GOOGLE_API_KEYS", ""); + clearImageGenerationProviderAuthEnv(); }); afterEach(() => { @@ -319,6 +340,29 @@ describe("createImageGenerateTool", () => { expect(createImageGenerateTool({ config: {} })).not.toBeNull(); }); + it("does not load runtime providers while resolving an explicitly configured model", () => { + const listProviders = vi + .spyOn(imageGenerationRuntime, "listRuntimeImageGenerationProviders") + .mockImplementation(() => { + throw new Error("runtime provider list should not run for explicit image model config"); + }); + + expect( + resolveImageGenerationModelConfigForTool({ + cfg: { + agents: { + defaults: { + imageGenerationModel: { + primary: "openai/gpt-image-1", + }, + }, + }, + }, + }), + ).toEqual({ primary: "openai/gpt-image-1" }); + expect(listProviders).not.toHaveBeenCalled(); + }); + it("infers the canonical OpenAI image model from provider readiness without explicit config", () => { vi.stubEnv("OPENAI_API_KEY", "openai-test"); const isConfigured = vi.fn(({ agentDir }: { agentDir?: string }) => agentDir === "/tmp/agent"); @@ -1099,6 +1143,7 @@ describe("createImageGenerateTool", () => { expect(generateImage).toHaveBeenCalledWith( expect.objectContaining({ + autoProviderFallback: false, aspectRatio: "16:9", inputImages: expect.arrayContaining([ expect.objectContaining({ buffer: Buffer.from("input-image"), mimeType: "image/png" }), diff --git a/src/agents/tools/image-generate-tool.ts b/src/agents/tools/image-generate-tool.ts index fc0d3372a5f..1e26bae0cd0 100644 --- a/src/agents/tools/image-generate-tool.ts +++ b/src/agents/tools/image-generate-tool.ts @@ -50,7 +50,11 @@ import { resolveMediaToolLocalRoots, resolveSelectedCapabilityProvider, } from "./media-tool-shared.js"; -import { type ToolModelConfig } from "./model-config.helpers.js"; +import { + coerceToolModelConfig, + hasToolModelConfig, + type ToolModelConfig, +} from "./model-config.helpers.js"; import { createSandboxBridgeReadFile, resolveSandboxedBridgeMediaPath, @@ -203,10 +207,14 @@ export function resolveImageGenerationModelConfigForTool(params: { agentDir: params.agentDir, authStore: params.authStore, modelConfig: params.cfg?.agents?.defaults?.imageGenerationModel, - providers: listRuntimeImageGenerationProviders({ config: params.cfg }), + providers: () => listRuntimeImageGenerationProviders({ config: params.cfg }), }); } +function hasExplicitImageGenerationModelConfig(cfg?: OpenClawConfig): boolean { + return hasToolModelConfig(coerceToolModelConfig(cfg?.agents?.defaults?.imageGenerationModel)); +} + function resolveAction(args: Record): "generate" | "list" { return resolveGenerateAction({ args, @@ -673,6 +681,7 @@ export function createImageGenerateTool(options?: { if (!imageGenerationModelConfig) { throw new ToolInputError("No image-generation model configured."); } + const explicitModelConfig = hasExplicitImageGenerationModelConfig(cfg); const effectiveCfg = applyImageGenerationModelConfigDefaults(cfg, imageGenerationModelConfig) ?? cfg; const remoteMediaSsrfPolicy = resolveRemoteMediaSsrfPolicy(effectiveCfg); @@ -730,6 +739,7 @@ export function createImageGenerateTool(options?: { prompt, agentDir: options?.agentDir, modelOverride: model, + autoProviderFallback: explicitModelConfig ? false : undefined, size, aspectRatio, resolution, diff --git a/src/agents/tools/media-tool-shared.ts b/src/agents/tools/media-tool-shared.ts index d9f84ef2532..299b738a0fc 100644 --- a/src/agents/tools/media-tool-shared.ts +++ b/src/agents/tools/media-tool-shared.ts @@ -138,6 +138,8 @@ type CapabilityProvider = { isConfigured?: (ctx: { cfg?: OpenClawConfig; agentDir?: string }) => boolean; }; +type CapabilityProviderSource = CapabilityProvider[] | (() => CapabilityProvider[]); + type GenerationCapabilityProviderKey = | "imageGenerationProviders" | "videoGenerationProviders" @@ -271,12 +273,18 @@ export function resolveCapabilityModelConfigForTool(params: { agentDir?: string; authStore?: AuthProfileStore; modelConfig?: AgentModelConfig; - providers: CapabilityProvider[]; + providers: CapabilityProviderSource; }): ToolModelConfig | null { const explicit = coerceToolModelConfig(params.modelConfig); if (hasToolModelConfig(explicit)) { return explicit; } + let resolvedProviders: CapabilityProvider[] | undefined; + const getProviders = (): CapabilityProvider[] => { + resolvedProviders ??= + typeof params.providers === "function" ? params.providers() : params.providers; + return resolvedProviders; + }; return buildToolModelConfigFromCandidates({ explicit, agentDir: params.agentDir, @@ -285,11 +293,11 @@ export function resolveCapabilityModelConfigForTool(params: { cfg: params.cfg, agentDir: params.agentDir, authStore: params.authStore, - providers: params.providers, + providers: getProviders(), }), isProviderConfigured: (providerId) => isCapabilityProviderConfigured({ - providers: params.providers, + providers: getProviders(), providerId, cfg: params.cfg, agentDir: params.agentDir, diff --git a/src/agents/tools/music-generate-tool.test.ts b/src/agents/tools/music-generate-tool.test.ts index 9a449066685..346e03bd703 100644 --- a/src/agents/tools/music-generate-tool.test.ts +++ b/src/agents/tools/music-generate-tool.test.ts @@ -171,6 +171,61 @@ describe("createMusicGenerateTool", () => { expect(listProviders).not.toHaveBeenCalled(); }); + it("does not load runtime providers while executing an explicitly configured tool", async () => { + const listProviders = vi + .spyOn(musicGenerationRuntime, "listRuntimeMusicGenerationProviders") + .mockImplementation(() => { + throw new Error("runtime provider list should not run for explicit music model config"); + }); + vi.spyOn(musicGenerationRuntime, "generateMusic").mockResolvedValue({ + provider: "google", + model: "lyria-3-clip-preview", + attempts: [], + ignoredOverrides: [], + tracks: [ + { + buffer: Buffer.from("music-bytes"), + mimeType: "audio/mpeg", + fileName: "night-drive.mp3", + }, + ], + metadata: {}, + }); + vi.spyOn(mediaStore, "saveMediaBuffer").mockResolvedValueOnce({ + path: "/tmp/generated-night-drive.mp3", + id: "generated-night-drive.mp3", + size: 11, + contentType: "audio/mpeg", + }); + + const tool = createMusicGenerateTool({ + config: asConfig({ + agents: { + defaults: { + musicGenerationModel: { primary: "google/lyria-3-clip-preview" }, + }, + }, + }), + }); + expect(tool).not.toBeNull(); + if (!tool) { + throw new Error("expected music_generate tool"); + } + + await expect( + tool.execute("call-1", { + prompt: "night-drive synthwave", + instrumental: true, + }), + ).resolves.toBeTruthy(); + expect(listProviders).not.toHaveBeenCalled(); + expect(musicGenerationRuntime.generateMusic).toHaveBeenCalledWith( + expect.objectContaining({ + autoProviderFallback: false, + }), + ); + }); + it("generates tracks, saves them, and emits MEDIA paths without a session-backed detach", async () => { taskExecutorMocks.createRunningTaskRun.mockReturnValue({ taskId: "task-123", @@ -295,6 +350,7 @@ describe("createMusicGenerateTool", () => { expect(generateSpy).toHaveBeenCalledWith( expect.objectContaining({ + autoProviderFallback: false, timeoutMs: 10_000, }), ); @@ -398,6 +454,7 @@ describe("createMusicGenerateTool", () => { await scheduledWork?.(); expect(musicGenerationRuntime.generateMusic).toHaveBeenCalledWith( expect.objectContaining({ + autoProviderFallback: false, timeoutMs: 10_000, }), ); diff --git a/src/agents/tools/music-generate-tool.ts b/src/agents/tools/music-generate-tool.ts index 4d49fbd0435..9f1ed351414 100644 --- a/src/agents/tools/music-generate-tool.ts +++ b/src/agents/tools/music-generate-tool.ts @@ -44,7 +44,11 @@ import { resolveRemoteMediaSsrfPolicy, resolveSelectedCapabilityProvider, } from "./media-tool-shared.js"; -import { type ToolModelConfig } from "./model-config.helpers.js"; +import { + coerceToolModelConfig, + hasToolModelConfig, + type ToolModelConfig, +} from "./model-config.helpers.js"; import { completeMusicGenerationTaskRun, createMusicGenerationTaskRun, @@ -141,10 +145,14 @@ function resolveMusicGenerationModelConfigForTool(params: { agentDir: params.agentDir, authStore: params.authStore, modelConfig: params.cfg?.agents?.defaults?.musicGenerationModel, - providers: listRuntimeMusicGenerationProviders({ config: params.cfg }), + providers: () => listRuntimeMusicGenerationProviders({ config: params.cfg }), }); } +function hasExplicitMusicGenerationModelConfig(cfg?: OpenClawConfig): boolean { + return hasToolModelConfig(coerceToolModelConfig(cfg?.agents?.defaults?.musicGenerationModel)); +} + function resolveSelectedMusicGenerationProvider(params: { config?: OpenClawConfig; musicGenerationModelConfig: ToolModelConfig; @@ -406,6 +414,7 @@ async function executeMusicGenerationJob(params: { filename?: string; loadedReferenceImages: LoadedReferenceImage[]; taskHandle?: MusicGenerationTaskHandle | null; + autoProviderFallback?: boolean; timeoutMs?: number; timeoutNormalization?: MusicGenerationTimeoutNormalization; }): Promise { @@ -425,6 +434,7 @@ async function executeMusicGenerationJob(params: { durationSeconds: params.durationSeconds, format: params.format, inputImages: params.loadedReferenceImages.map((entry) => entry.sourceImage), + autoProviderFallback: params.autoProviderFallback, timeoutMs: params.timeoutMs, }); if (params.taskHandle) { @@ -600,6 +610,7 @@ export function createMusicGenerateTool(options?: { if (!musicGenerationModelConfig) { throw new ToolInputError("No music-generation model configured."); } + const explicitModelConfig = hasExplicitMusicGenerationModelConfig(cfg); const effectiveCfg = applyMusicGenerationModelConfigDefaults(cfg, musicGenerationModelConfig) ?? cfg; @@ -624,11 +635,17 @@ export function createMusicGenerateTool(options?: { const timeout = normalizeMusicGenerationTimeoutMs(requestedTimeoutMs); const timeoutMs = timeout.timeoutMs; const imageInputs = normalizeReferenceImageInputs(args); - const selectedProvider = resolveSelectedMusicGenerationProvider({ - config: effectiveCfg, - musicGenerationModelConfig, - modelOverride: model, - }); + const selectedModelRef = + parseMusicGenerationModelRef(model) ?? + parseMusicGenerationModelRef(musicGenerationModelConfig.primary); + const selectedProvider = + imageInputs.length > 0 + ? resolveSelectedMusicGenerationProvider({ + config: effectiveCfg, + musicGenerationModelConfig, + modelOverride: model, + }) + : undefined; const remoteMediaSsrfPolicy = resolveRemoteMediaSsrfPolicy(effectiveCfg); const loadedReferenceImages = await loadReferenceImages({ inputs: imageInputs, @@ -639,8 +656,7 @@ export function createMusicGenerateTool(options?: { }); validateMusicGenerationCapabilities({ provider: selectedProvider, - model: - parseMusicGenerationModelRef(model)?.model ?? model ?? selectedProvider?.defaultModel, + model: selectedModelRef?.model ?? model ?? selectedProvider?.defaultModel, inputImageCount: loadedReferenceImages.length, lyrics, instrumental, @@ -651,7 +667,7 @@ export function createMusicGenerateTool(options?: { sessionKey: options?.agentSessionKey, requesterOrigin: options?.requesterOrigin, prompt, - providerId: selectedProvider?.id, + providerId: selectedProvider?.id ?? selectedModelRef?.provider, }); const shouldDetach = Boolean(taskHandle && options?.agentSessionKey?.trim()); @@ -674,6 +690,7 @@ export function createMusicGenerateTool(options?: { filename, loadedReferenceImages, taskHandle, + autoProviderFallback: explicitModelConfig ? false : undefined, timeoutMs, timeoutNormalization: timeout.normalization, }), @@ -770,6 +787,7 @@ export function createMusicGenerateTool(options?: { filename, loadedReferenceImages, taskHandle, + autoProviderFallback: explicitModelConfig ? false : undefined, timeoutMs, timeoutNormalization: timeout.normalization, }); diff --git a/src/agents/tools/video-generate-tool.test.ts b/src/agents/tools/video-generate-tool.test.ts index 9465d73bd67..c69ba01bc1c 100644 --- a/src/agents/tools/video-generate-tool.test.ts +++ b/src/agents/tools/video-generate-tool.test.ts @@ -21,6 +21,34 @@ const taskExecutorMocks = vi.hoisted(() => ({ createRunningTaskRun: vi.fn(), })); +const VIDEO_GENERATION_PROVIDER_AUTH_ENV_VARS = [ + "OPENAI_API_KEY", + "OPENAI_API_KEYS", + "GEMINI_API_KEY", + "GEMINI_API_KEYS", + "GOOGLE_API_KEY", + "GOOGLE_API_KEYS", + "DEEPINFRA_API_KEY", + "MODELSTUDIO_API_KEY", + "DASHSCOPE_API_KEY", + "QWEN_API_KEY", + "BYTEPLUS_API_KEY", + "COMFY_API_KEY", + "COMFY_CLOUD_API_KEY", + "FAL_KEY", + "FAL_API_KEY", + "MINIMAX_CODE_PLAN_KEY", + "MINIMAX_CODING_API_KEY", + "MINIMAX_API_KEY", + "MINIMAX_OAUTH_TOKEN", + "OPENROUTER_API_KEY", + "RUNWAYML_API_SECRET", + "RUNWAY_API_KEY", + "TOGETHER_API_KEY", + "XAI_API_KEY", + "VYDRA_API_KEY", +] as const; + vi.mock("../../tasks/runtime-internal.js", () => taskRuntimeInternalMocks); vi.mock("../../tasks/detached-task-runtime.js", () => taskExecutorMocks); @@ -77,6 +105,9 @@ function mockSavedVideoResult(fileName = "out.mp4") { function resetVideoGenerateMocks() { vi.restoreAllMocks(); + for (const key of VIDEO_GENERATION_PROVIDER_AUTH_ENV_VARS) { + vi.stubEnv(key, ""); + } vi.spyOn(videoGenerationRuntime, "listRuntimeVideoGenerationProviders").mockReturnValue([]); taskRuntimeInternalMocks.listTasksForOwnerKey.mockReset(); taskRuntimeInternalMocks.listTasksForOwnerKey.mockReturnValue([]); @@ -134,6 +165,27 @@ describe("createVideoGenerateTool", () => { expect(listProviders).not.toHaveBeenCalled(); }); + it("does not load runtime providers while resolving an explicitly configured model", () => { + const listProviders = vi + .spyOn(videoGenerationRuntime, "listRuntimeVideoGenerationProviders") + .mockImplementation(() => { + throw new Error("runtime provider list should not run for explicit video model config"); + }); + + expect( + resolveVideoGenerationModelConfigForTool({ + cfg: asConfig({ + agents: { + defaults: { + videoGenerationModel: { primary: "qwen/wan2.6-t2v" }, + }, + }, + }), + }), + ).toEqual({ primary: "qwen/wan2.6-t2v" }); + expect(listProviders).not.toHaveBeenCalled(); + }); + it("orders auto-detected provider defaults by canonical aliases", () => { vi.spyOn(videoGenerationRuntime, "listRuntimeVideoGenerationProviders").mockReturnValue([ { @@ -854,6 +906,7 @@ describe("createVideoGenerateTool", () => { expect(generateSpy).toHaveBeenCalledWith( expect.objectContaining({ + autoProviderFallback: false, providerOptions: { seed: 42, draft: true }, }), ); diff --git a/src/agents/tools/video-generate-tool.ts b/src/agents/tools/video-generate-tool.ts index 5048cae5c7a..b32c774625a 100644 --- a/src/agents/tools/video-generate-tool.ts +++ b/src/agents/tools/video-generate-tool.ts @@ -47,7 +47,11 @@ import { resolveRemoteMediaSsrfPolicy, resolveSelectedCapabilityProvider, } from "./media-tool-shared.js"; -import { type ToolModelConfig } from "./model-config.helpers.js"; +import { + coerceToolModelConfig, + hasToolModelConfig, + type ToolModelConfig, +} from "./model-config.helpers.js"; import { createSandboxBridgeReadFile, resolveSandboxedBridgeMediaPath, @@ -233,10 +237,14 @@ export function resolveVideoGenerationModelConfigForTool(params: { agentDir: params.agentDir, authStore: params.authStore, modelConfig: params.cfg?.agents?.defaults?.videoGenerationModel, - providers: listRuntimeVideoGenerationProviders({ config: params.cfg }), + providers: () => listRuntimeVideoGenerationProviders({ config: params.cfg }), }); } +function hasExplicitVideoGenerationModelConfig(cfg?: OpenClawConfig): boolean { + return hasToolModelConfig(coerceToolModelConfig(cfg?.agents?.defaults?.videoGenerationModel)); +} + function resolveAction(args: Record): "generate" | "list" | "status" { return resolveGenerateAction({ args, @@ -586,6 +594,7 @@ async function executeVideoGenerationJob(params: { loadedReferenceAudios: LoadedReferenceAsset[]; taskHandle?: VideoGenerationTaskHandle | null; providerOptions?: Record; + autoProviderFallback?: boolean; timeoutMs?: number; }): Promise { if (params.taskHandle) { @@ -608,6 +617,7 @@ async function executeVideoGenerationJob(params: { inputImages: params.loadedReferenceImages.map((entry) => entry.sourceAsset), inputVideos: params.loadedReferenceVideos.map((entry) => entry.sourceAsset), inputAudios: params.loadedReferenceAudios.map((entry) => entry.sourceAsset), + autoProviderFallback: params.autoProviderFallback, providerOptions: params.providerOptions, timeoutMs: params.timeoutMs, }); @@ -857,6 +867,7 @@ export function createVideoGenerateTool(options?: { if (!videoGenerationModelConfig) { throw new ToolInputError("No video-generation model configured."); } + const explicitModelConfig = hasExplicitVideoGenerationModelConfig(cfg); const effectiveCfg = applyVideoGenerationModelConfigDefaults(cfg, videoGenerationModelConfig) ?? cfg; const remoteMediaSsrfPolicy = resolveRemoteMediaSsrfPolicy(effectiveCfg); @@ -1023,6 +1034,7 @@ export function createVideoGenerateTool(options?: { loadedReferenceAudios, taskHandle, providerOptions, + autoProviderFallback: explicitModelConfig ? false : undefined, timeoutMs, }), }); @@ -1120,6 +1132,7 @@ export function createVideoGenerateTool(options?: { loadedReferenceAudios, taskHandle, providerOptions, + autoProviderFallback: explicitModelConfig ? false : undefined, timeoutMs, }); completeVideoGenerationTaskRun({ diff --git a/src/commands/channel-setup/plugin-install.test.ts b/src/commands/channel-setup/plugin-install.test.ts index 395d6b8d94b..0103ea230ec 100644 --- a/src/commands/channel-setup/plugin-install.test.ts +++ b/src/commands/channel-setup/plugin-install.test.ts @@ -518,7 +518,7 @@ describe("ensureChannelSetupPluginInstalled", () => { options: [ expect.objectContaining({ value: "npm", - label: `Remote install from npm (${bundledChatForkNpmSpec})`, + label: `Download from npm (${bundledChatForkNpmSpec})`, }), expect.objectContaining({ value: "skip", @@ -562,7 +562,7 @@ describe("ensureChannelSetupPluginInstalled", () => { options: [ expect.objectContaining({ value: "clawhub", - label: "Remote install from ClawHub (clawhub:openclaw/clawhub-chat@2026.5.2)", + label: "Download from ClawHub (clawhub:openclaw/clawhub-chat@2026.5.2)", }), expect.objectContaining({ value: "skip", diff --git a/src/commands/onboarding-plugin-install.test.ts b/src/commands/onboarding-plugin-install.test.ts index 74d9391fad6..76bbafa2b68 100644 --- a/src/commands/onboarding-plugin-install.test.ts +++ b/src/commands/onboarding-plugin-install.test.ts @@ -72,23 +72,6 @@ vi.mock("../utils/with-timeout.js", () => ({ import { ensureOnboardingPluginInstalled } from "./onboarding-plugin-install.js"; -function createDeferred() { - let resolve!: (value: T) => void; - const promise = new Promise((next) => { - resolve = next; - }); - return { promise, resolve }; -} - -async function waitForMockCall(mock: { mock: { calls: unknown[][] } }) { - for (let i = 0; i < 20; i += 1) { - if (mock.mock.calls.length > 0) { - return; - } - await new Promise((resolve) => setTimeout(resolve, 0)); - } -} - describe("ensureOnboardingPluginInstalled", () => { beforeEach(() => { vi.clearAllMocks(); @@ -258,114 +241,6 @@ describe("ensureOnboardingPluginInstalled", () => { expect(refreshPluginRegistryAfterConfigMutation).not.toHaveBeenCalled(); }); - it("animates ClawHub install progress while the remote install is running", async () => { - const deferred = createDeferred>>(); - installPluginFromClawHub.mockImplementation(async (params) => { - params.logger?.info?.("Downloading demo-plugin from ClawHub…"); - return await deferred.promise; - }); - const stop = vi.fn(); - const update = vi.fn(); - - const install = ensureOnboardingPluginInstalled({ - cfg: {}, - entry: { - pluginId: "demo-plugin", - label: "Demo Provider", - install: { - clawhubSpec: "clawhub:demo-plugin@2026.5.2", - defaultChoice: "clawhub", - }, - }, - prompter: { - select: vi.fn(async () => "clawhub"), - progress: vi.fn(() => ({ update, stop })), - } as never, - runtime: {} as never, - }); - - await waitForMockCall(installPluginFromClawHub); - expect(installPluginFromClawHub).toHaveBeenCalled(); - - await new Promise((resolve) => setTimeout(resolve, 250)); - expect(update).toHaveBeenCalledWith("Downloading"); - expect( - update.mock.calls.some( - ([message]) => - typeof message === "string" && /^Downloading {2}\[[█░]{16}\] \d+%$/u.test(message), - ), - ).toBe(true); - - deferred.resolve({ - ok: true, - pluginId: "demo-plugin", - targetDir: "/tmp/demo-plugin", - version: "2026.5.2", - packageName: "demo-plugin", - clawhub: { - source: "clawhub", - clawhubUrl: "https://clawhub.ai", - clawhubPackage: "demo-plugin", - clawhubFamily: "code-plugin", - clawhubChannel: "official", - version: "2026.5.2", - integrity: "sha256-clawpack", - resolvedAt: "2026-05-02T00:00:00.000Z", - clawpackSha256: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", - clawpackSpecVersion: 1, - clawpackManifestSha256: "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", - clawpackSize: 4096, - }, - }); - await install; - }); - - it("animates npm install progress while the remote install is running", async () => { - const deferred = createDeferred>>(); - installPluginFromNpmSpec.mockImplementation(async (params) => { - params.logger?.info?.("Resolving npm package…"); - return await deferred.promise; - }); - const stop = vi.fn(); - const update = vi.fn(); - - const install = ensureOnboardingPluginInstalled({ - cfg: {}, - entry: { - pluginId: "demo-plugin", - label: "Demo Plugin", - install: { - npmSpec: "@demo/plugin@1.2.3", - }, - }, - prompter: { - select: vi.fn(async () => "npm"), - progress: vi.fn(() => ({ update, stop })), - } as never, - runtime: {} as never, - }); - - await waitForMockCall(installPluginFromNpmSpec); - expect(installPluginFromNpmSpec).toHaveBeenCalled(); - - await new Promise((resolve) => setTimeout(resolve, 250)); - expect(update).toHaveBeenCalledWith("Resolving"); - expect( - update.mock.calls.some( - ([message]) => - typeof message === "string" && /^Resolving {2}\[[█░]{16}\] \d+%$/u.test(message), - ), - ).toBe(true); - - deferred.resolve({ - ok: true, - pluginId: "demo-plugin", - targetDir: "/tmp/demo-plugin", - version: "1.2.3", - }); - await install; - }); - it("returns a timed out status and notes the retry path when npm install hangs", async () => { const note = vi.fn(async () => {}); const stop = vi.fn(); @@ -435,7 +310,7 @@ describe("ensureOnboardingPluginInstalled", () => { }); expect(captured?.options).toEqual([ - { value: "npm", label: "Remote install from npm (@demo/plugin)" }, + { value: "npm", label: "Download from npm (@demo/plugin)" }, { value: "skip", label: "Skip for now" }, ]); expect(captured?.initialValue).toBe("npm"); @@ -474,11 +349,8 @@ describe("ensureOnboardingPluginInstalled", () => { }); expect(captured?.options).toEqual([ - { - value: "clawhub", - label: "Remote install from ClawHub (clawhub:demo-plugin@2026.5.2)", - }, - { value: "npm", label: "Remote install from npm (@openclaw/demo-plugin@2026.5.2)" }, + { value: "clawhub", label: "Download from ClawHub (clawhub:demo-plugin@2026.5.2)" }, + { value: "npm", label: "Download from npm (@openclaw/demo-plugin@2026.5.2)" }, { value: "skip", label: "Skip for now" }, ]); expect(captured?.initialValue).toBe("clawhub"); @@ -588,7 +460,7 @@ describe("ensureOnboardingPluginInstalled", () => { expect(captured).toBeDefined(); expect(captured?.message).toBe("Install Demo Plugin\\n plugin?"); expect(captured?.options).toEqual([ - { value: "npm", label: "Remote install from npm (@demo/plugin@1.2.3)" }, + { value: "npm", label: "Download from npm (@demo/plugin@1.2.3)" }, { value: "local", label: "Use local plugin path", @@ -802,7 +674,7 @@ describe("ensureOnboardingPluginInstalled", () => { }); expect(captured).toBeDefined(); - // "Remote install from npm (@openclaw/tlon)" must NOT appear: the bundled + // "Download from npm (@openclaw/tlon)" must NOT appear: the bundled // copy is what gets enabled, so the npm hint would only confuse // users into thinking the plugin is missing. expect(captured?.options).toEqual([ diff --git a/src/commands/onboarding-plugin-install.ts b/src/commands/onboarding-plugin-install.ts index f2430cd488b..4b28b9ea7d0 100644 --- a/src/commands/onboarding-plugin-install.ts +++ b/src/commands/onboarding-plugin-install.ts @@ -320,7 +320,7 @@ async function promptInstallChoice(params: { // `extensions/` and is discovered via `resolveBundledPluginSources`), // the bundled copy is the source of truth: it is version-locked to the // current host build and is what `defaultChoice` will pick anyway (see - // `resolveInstallDefaultChoice`). Surfacing remote install options in that + // `resolveInstallDefaultChoice`). Surfacing remote download options in that // case is misleading; those catalog specs only exist as fallback metadata for // non-bundled builds. Hide them so bundled channels like Tlon look identical // to Twitch / Slack in the menu. @@ -334,13 +334,13 @@ async function promptInstallChoice(params: { if (safeClawHubSpec) { options.push({ value: "clawhub", - label: formatRemoteInstallChoiceLabel("clawhub", safeClawHubSpec), + label: `Download from ClawHub (${safeClawHubSpec})`, }); } if (safeNpmSpec) { options.push({ value: "npm", - label: formatRemoteInstallChoiceLabel("npm", safeNpmSpec), + label: `Download from npm (${safeNpmSpec})`, }); } if (params.localPath) { @@ -420,11 +420,6 @@ function isTimeoutError(error: unknown): boolean { return error instanceof Error && error.message === "timeout"; } -function formatRemoteInstallChoiceLabel(source: "clawhub" | "npm", spec: string): string { - const sourceLabel = source === "clawhub" ? "ClawHub" : "npm"; - return `Remote install from ${sourceLabel} (${spec})`; -} - async function applyPluginEnablement(params: { cfg: OpenClawConfig; pluginId: string; diff --git a/src/config/sessions/transcript.test.ts b/src/config/sessions/transcript.test.ts index 2dbf8dffc9e..4cb4df8994f 100644 --- a/src/config/sessions/transcript.test.ts +++ b/src/config/sessions/transcript.test.ts @@ -354,7 +354,10 @@ describe("appendAssistantMessageToSessionTranscript", () => { expect(result.ok).toBe(true); if (result.ok) { - expect(emitSpy).toHaveBeenCalledWith(result.sessionFile); + expect(emitSpy).toHaveBeenCalledWith({ + sessionFile: result.sessionFile, + sessionKey, + }); } emitSpy.mockRestore(); }); diff --git a/src/config/sessions/transcript.ts b/src/config/sessions/transcript.ts index a3d2ea63481..c174d78b2be 100644 --- a/src/config/sessions/transcript.ts +++ b/src/config/sessions/transcript.ts @@ -295,7 +295,7 @@ export async function appendExactAssistantMessageToSessionTranscript(params: { emitSessionTranscriptUpdate({ sessionFile, sessionKey, message, messageId }); break; case "file-only": - emitSessionTranscriptUpdate(sessionFile); + emitSessionTranscriptUpdate({ sessionFile, sessionKey }); break; case "none": break; diff --git a/src/flows/channel-setup.status.test.ts b/src/flows/channel-setup.status.test.ts index 45fdea6ef13..38e9bc40352 100644 --- a/src/flows/channel-setup.status.test.ts +++ b/src/flows/channel-setup.status.test.ts @@ -12,10 +12,6 @@ type FormatChannelPrimerLine = typeof import("../channels/registry.js").formatCh type FormatChannelSelectionLine = typeof import("../channels/registry.js").formatChannelSelectionLine; type IsChannelConfigured = typeof import("../config/channel-configured.js").isChannelConfigured; -type ResolveBundledPluginSources = - typeof import("../plugins/bundled-sources.js").resolveBundledPluginSources; -type FindBundledPluginSourceInMap = - typeof import("../plugins/bundled-sources.js").findBundledPluginSourceInMap; type NoteChannelPrimerChannels = Parameters< typeof import("./channel-setup.status.js").noteChannelPrimer >[1]; @@ -37,21 +33,6 @@ const formatChannelSelectionLine = vi.hoisted(() => vi.fn((meta) => `${meta.label} — ${meta.blurb}`), ); const isChannelConfigured = vi.hoisted(() => vi.fn(() => false)); -const resolveBundledPluginSources = vi.hoisted(() => - vi.fn(() => new Map()), -); -const findBundledPluginSourceInMap = vi.hoisted(() => - vi.fn(({ bundled, lookup }) => { - const value = lookup.value.trim(); - if (!value) { - return undefined; - } - if (lookup.kind === "pluginId") { - return bundled.get(value); - } - return Array.from(bundled.values()).find((source) => source.npmSpec === value); - }), -); vi.mock("../channels/chat-meta.js", () => ({ listChatChannels: () => listChatChannels(), @@ -81,20 +62,20 @@ vi.mock("../config/channel-configured.js", () => ({ ) => isChannelConfigured(cfg, channelId), })); -// Avoid touching the real `extensions/` tree from unit tests. Tests opt -// into bundled-source entries explicitly when they cover bundled catalog -// rendering; the default fixture behaves as if nothing is bundled. +// Avoid touching the real `extensions/` tree from unit tests. Status +// rendering for installable catalog entries asks `bundled-sources` whether +// a plugin already lives in-tree to decide between +// "install plugin to enable" vs "bundled · enable to use". For these tests +// we want the installable-catalog branch unconditionally, so we stub the +// bundled lookup to "nothing is bundled". vi.mock("../plugins/bundled-sources.js", () => ({ - resolveBundledPluginSources: (params: Parameters[0]) => - resolveBundledPluginSources(params), - findBundledPluginSourceInMap: (params: Parameters[0]) => - findBundledPluginSourceInMap(params), + resolveBundledPluginSources: () => new Map(), + findBundledPluginSourceInMap: () => undefined, })); import { collectChannelStatus, noteChannelPrimer, - resolveCatalogChannelSelectionHint, resolveChannelSelectionNoteLines, resolveChannelSetupSelectionContributions, } from "./channel-setup.status.js"; @@ -112,17 +93,6 @@ describe("resolveChannelSetupSelectionContributions", () => { ); formatChannelSelectionLine.mockImplementation((meta) => `${meta.label} — ${meta.blurb}`); isChannelConfigured.mockReturnValue(false); - resolveBundledPluginSources.mockReturnValue(new Map()); - findBundledPluginSourceInMap.mockImplementation(({ bundled, lookup }) => { - const value = lookup.value.trim(); - if (!value) { - return undefined; - } - if (lookup.kind === "pluginId") { - return bundled.get(value); - } - return Array.from(bundled.values()).find((source) => source.npmSpec === value); - }); }); it("sorts channels alphabetically by picker label", () => { @@ -188,67 +158,6 @@ describe("resolveChannelSetupSelectionContributions", () => { ]); }); - it("describes installable catalog choices as remote npm installs", () => { - expect( - resolveCatalogChannelSelectionHint({ - install: { npmSpec: "@openclaw/googlechat" }, - }), - ).toBe("remote install from npm: @openclaw/googlechat"); - }); - - it("sanitizes remote npm install hints", () => { - expect( - resolveCatalogChannelSelectionHint({ - install: { npmSpec: "@openclaw/googlechat\u001B[31m\nbeta" }, - }), - ).toBe("remote install from npm: @openclaw/googlechat\\nbeta"); - }); - - it("suppresses remote install hints for bundled channels", () => { - expect( - resolveCatalogChannelSelectionHint( - { - install: { npmSpec: "@openclaw/googlechat" }, - }, - { bundledLocalPath: "extensions/googlechat" }, - ), - ).toBe(""); - }); - - it("renders bundled catalog statuses without remote install hints", async () => { - const entry = makeCatalogEntry("slack", "Slack", { - pluginId: "@openclaw/slack", - install: { npmSpec: "@openclaw/slack" }, - }); - listChatChannels.mockReturnValue([]); - resolveBundledPluginSources.mockReturnValue( - new Map([ - [ - "@openclaw/slack", - { - pluginId: "@openclaw/slack", - localPath: "extensions/slack", - npmSpec: "@openclaw/slack", - }, - ], - ]), - ); - resolveChannelSetupEntries.mockReturnValue( - makeChannelSetupEntries({ - installableCatalogEntries: [entry], - }), - ); - - const summary = await collectChannelStatus({ - cfg: {} as never, - accountOverrides: {}, - installedPlugins: [], - }); - - expect(summary.statusLines).toEqual(["Slack: bundled · enable to use"]); - expect(summary.statusByChannel.get("slack")?.selectionHint).toBe(""); - }); - it("combines real status and disabled hints when available", () => { const contributions = resolveChannelSetupSelectionContributions({ entries: [ diff --git a/src/flows/channel-setup.status.ts b/src/flows/channel-setup.status.ts index e043950fd13..d6a32e1850e 100644 --- a/src/flows/channel-setup.status.ts +++ b/src/flows/channel-setup.status.ts @@ -135,17 +135,17 @@ function formatSetupDisplayMeta(meta: ChannelMeta): ChannelMeta { /** * Hint shown next to an installable channel option in the selection menu when * we don't yet have a runtime-collected status. Mirrors the "configured" / - * "installed" affordance other channels get so users can see "remote install - * from npm: " before committing to install. + * "installed" affordance other channels get so users can see "download from + * " before committing to install. * * Bundled channels (the plugin lives under `extensions/` in the host * repo, e.g. Signal / Tlon / Twitch / Slack) are NOT downloaded from npm — * they ship with the host. Even when their `package.json` declares an * `npmSpec` (or the catalog falls back to the package name), surfacing - * "remote install from npm: " misleads users into believing the - * plugin is missing. For bundled channels we suppress the npm hint entirely - * so the menu shows the same neutral "plugin · install" affordance used when - * no npm source is known. + * "download from " misleads users into believing the plugin is + * missing. For bundled channels we suppress the npm hint entirely so the + * menu shows the same neutral "plugin · install" affordance used when no + * npm source is known. */ export function resolveCatalogChannelSelectionHint( entry: { install?: { npmSpec?: string } }, @@ -153,7 +153,7 @@ export function resolveCatalogChannelSelectionHint( ): string { const npmSpec = entry.install?.npmSpec?.trim(); if (npmSpec && !options?.bundledLocalPath) { - return `remote install from npm: ${formatSetupSelectionLabel(npmSpec, npmSpec)}`; + return `download from ${formatSetupSelectionLabel(npmSpec, npmSpec)}`; } return ""; } @@ -162,8 +162,8 @@ export function resolveCatalogChannelSelectionHint( * Look up the bundled-source entry for a catalog channel, regardless of * whether the catalog refers to it by `pluginId` or `npmSpec`. We use this * to detect bundled channels in the selection menu so we can suppress the - * misleading "remote install from npm: " hint for plugins that - * already ship with the host (Signal / Tlon / Twitch / Slack ...). + * misleading "download from " hint for plugins that already ship + * with the host (Signal / Tlon / Twitch / Slack ...). */ export function findBundledSourceForCatalogChannel(params: { bundled: ReadonlyMap; diff --git a/src/flows/channel-setup.test.ts b/src/flows/channel-setup.test.ts index 41f5ffafcb3..864d8452048 100644 --- a/src/flows/channel-setup.test.ts +++ b/src/flows/channel-setup.test.ts @@ -9,12 +9,6 @@ type ChannelSetupPlugin = import("../channels/plugins/setup-wizard-types.js").Ch type ResolveChannelSetupEntries = typeof import("../commands/channel-setup/discovery.js").resolveChannelSetupEntries; type CollectChannelStatus = typeof import("./channel-setup.status.js").collectChannelStatus; -type FindBundledSourceForCatalogChannel = - typeof import("./channel-setup.status.js").findBundledSourceForCatalogChannel; -type ResolveCatalogChannelSelectionHint = - typeof import("./channel-setup.status.js").resolveCatalogChannelSelectionHint; -type ResolveChannelSetupSelectionContributions = - typeof import("./channel-setup.status.js").resolveChannelSetupSelectionContributions; type EnsureChannelSetupPluginInstalled = typeof import("../commands/channel-setup/plugin-install.js").ensureChannelSetupPluginInstalled; type LoadChannelSetupPluginRegistrySnapshotForChannel = @@ -123,18 +117,6 @@ const collectChannelStatus = vi.hoisted(() => statusLines: [], })), ); -const findBundledSourceForCatalogChannel = vi.hoisted(() => - vi.fn(() => undefined), -); -const resolveCatalogChannelSelectionHint = vi.hoisted(() => - vi.fn((entry, options) => { - const npmSpec = entry.install?.npmSpec?.trim(); - return npmSpec && !options?.bundledLocalPath ? `remote install from npm: ${npmSpec}` : ""; - }), -); -const resolveChannelSetupSelectionContributions = vi.hoisted(() => - vi.fn(() => []), -); const isChannelConfigured = vi.hoisted(() => vi.fn((_cfg?: unknown, _channel?: unknown) => true)); vi.mock("../agents/agent-scope.js", () => ({ @@ -196,18 +178,12 @@ vi.mock("./channel-setup.prompts.js", () => ({ vi.mock("./channel-setup.status.js", () => ({ collectChannelStatus: (params: Parameters[0]) => collectChannelStatus(params), - findBundledSourceForCatalogChannel: (params: Parameters[0]) => - findBundledSourceForCatalogChannel(params), + findBundledSourceForCatalogChannel: vi.fn(() => undefined), noteChannelPrimer: vi.fn(), noteChannelStatus: vi.fn(), - resolveCatalogChannelSelectionHint: ( - entry: Parameters[0], - options: Parameters[1], - ) => resolveCatalogChannelSelectionHint(entry, options), + resolveCatalogChannelSelectionHint: vi.fn(() => "download from "), resolveChannelSelectionNoteLines: vi.fn(() => []), - resolveChannelSetupSelectionContributions: ( - params: Parameters[0], - ) => resolveChannelSetupSelectionContributions(params), + resolveChannelSetupSelectionContributions: vi.fn(() => []), resolveQuickstartDefault: vi.fn(() => undefined), })); @@ -243,12 +219,6 @@ describe("setupChannels workspace shadow exclusion", () => { statusByChannel: new Map(), statusLines: [], }); - findBundledSourceForCatalogChannel.mockReturnValue(undefined); - resolveCatalogChannelSelectionHint.mockImplementation((entry, options) => { - const npmSpec = entry.install?.npmSpec?.trim(); - return npmSpec && !options?.bundledLocalPath ? `remote install from npm: ${npmSpec}` : ""; - }); - resolveChannelSetupSelectionContributions.mockReturnValue([]); isChannelConfigured.mockReturnValue(true); }); @@ -368,48 +338,6 @@ describe("setupChannels workspace shadow exclusion", () => { expect(collectChannelStatus).not.toHaveBeenCalled(); }); - it("suppresses deferred picker remote install hints for bundled catalog choices", async () => { - const installableCatalogEntry = makeCatalogEntry("external-chat", "External Chat", { - pluginId: "@openclaw/external-chat", - install: { npmSpec: "@openclaw/external-chat" }, - }); - resolveChannelSetupEntries.mockReturnValue( - externalChatSetupEntries({ - installableCatalogEntries: [installableCatalogEntry], - installableCatalogById: new Map([["external-chat", installableCatalogEntry]]), - }), - ); - findBundledSourceForCatalogChannel.mockReturnValue({ - pluginId: "@openclaw/external-chat", - localPath: "extensions/external-chat", - npmSpec: "@openclaw/external-chat", - }); - const select = vi.fn(async () => "__done__"); - - await setupChannels( - {} as never, - {} as never, - { - confirm: vi.fn(async () => true), - note: vi.fn(async () => undefined), - select, - } as never, - { - deferStatusUntilSelection: true, - skipConfirm: true, - }, - ); - - expect(resolveCatalogChannelSelectionHint).toHaveBeenCalledWith(installableCatalogEntry, { - bundledLocalPath: "extensions/external-chat", - }); - expect( - resolveChannelSetupSelectionContributions.mock.calls[0]?.[0].statusByChannel.get( - "external-chat", - )?.selectionHint, - ).toBe(""); - }); - it("uses an active deferred setup plugin without enabling config on selection", async () => { const setupWizard = { channel: "custom-chat", diff --git a/src/flows/channel-setup.ts b/src/flows/channel-setup.ts index fa9d613564f..3f5996c77ad 100644 --- a/src/flows/channel-setup.ts +++ b/src/flows/channel-setup.ts @@ -320,14 +320,13 @@ export async function setupChannels( // installable catalog channels (e.g. WeCom shipped via npm). In QuickStart we // run with `deferStatusUntilSelection`, which leaves `statusByChannel` empty // until the user picks a channel — without this overlay the selection menu - // would render those options without any "remote install from npm: - // " hint. + // would render those options without any "download from " hint. // // Bundled channels (Signal / Tlon / Twitch / Slack ...) reach this code path // too whenever their plugin is not yet enabled, because they share the same - // "installable catalog" bucket. For those we must NOT show "remote install - // from npm: " — the plugin already lives under `extensions/` - // and the hint would mislead users into thinking the plugin is missing. + // "installable catalog" bucket. For those we must NOT show "download from + // " — the plugin already lives under `extensions/` and the + // hint would mislead users into thinking the plugin is missing. const buildStatusByChannelForSelection = ( catalogById: ReturnType["catalogById"], ): Map => { diff --git a/src/gateway/method-scopes.ts b/src/gateway/method-scopes.ts index 282707b7c94..af96bc0f09b 100644 --- a/src/gateway/method-scopes.ts +++ b/src/gateway/method-scopes.ts @@ -151,7 +151,6 @@ const METHOD_SCOPE_GROUPS: Record = { "tools.invoke", "chat.send", "chat.abort", - "chat.transcribeAudio", "sessions.create", "sessions.send", "sessions.steer", diff --git a/src/gateway/server-methods-list.ts b/src/gateway/server-methods-list.ts index 264a77d62a9..2d3924f294e 100644 --- a/src/gateway/server-methods-list.ts +++ b/src/gateway/server-methods-list.ts @@ -155,7 +155,6 @@ const BASE_METHODS = [ "chat.history", "chat.abort", "chat.send", - "chat.transcribeAudio", ]; export function listGatewayMethods(): string[] { diff --git a/src/gateway/server-methods.ts b/src/gateway/server-methods.ts index 9a87c51a8f5..78a613a9188 100644 --- a/src/gateway/server-methods.ts +++ b/src/gateway/server-methods.ts @@ -12,7 +12,6 @@ import { agentHandlers } from "./server-methods/agent.js"; import { agentsHandlers } from "./server-methods/agents.js"; import { artifactsHandlers } from "./server-methods/artifacts.js"; import { channelsHandlers } from "./server-methods/channels.js"; -import { chatTranscribeAudioHandlers } from "./server-methods/chat-transcribe-audio.js"; import { chatHandlers } from "./server-methods/chat.js"; import { commandsHandlers } from "./server-methods/commands.js"; import { configHandlers } from "./server-methods/config.js"; @@ -86,7 +85,6 @@ export const coreGatewayHandlers: GatewayRequestHandlers = { ...healthHandlers, ...channelsHandlers, ...chatHandlers, - ...chatTranscribeAudioHandlers, ...commandsHandlers, ...cronHandlers, ...deviceHandlers, diff --git a/src/gateway/server-methods/chat-transcribe-audio.runtime.ts b/src/gateway/server-methods/chat-transcribe-audio.runtime.ts deleted file mode 100644 index 647a8cab66d..00000000000 --- a/src/gateway/server-methods/chat-transcribe-audio.runtime.ts +++ /dev/null @@ -1 +0,0 @@ -export { transcribeAudioFile } from "../../media-understanding/runtime.js"; diff --git a/src/gateway/server-methods/chat-transcribe-audio.test.ts b/src/gateway/server-methods/chat-transcribe-audio.test.ts deleted file mode 100644 index 416eeaf6057..00000000000 --- a/src/gateway/server-methods/chat-transcribe-audio.test.ts +++ /dev/null @@ -1,123 +0,0 @@ -import fs from "node:fs/promises"; -import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; -import { ErrorCodes } from "../protocol/index.js"; -import { MAX_PAYLOAD_BYTES } from "../server-constants.js"; - -const mocks = vi.hoisted(() => ({ - transcribeAudioFile: vi.fn(async () => ({ - text: "hello from audio", - provider: "openai", - model: "gpt-4o-transcribe", - })), -})); - -vi.mock("../../media-understanding/runtime.js", () => ({ - transcribeAudioFile: - mocks.transcribeAudioFile as typeof import("../../media-understanding/runtime.js").transcribeAudioFile, -})); - -describe("chatTranscribeAudioHandlers", () => { - beforeEach(() => { - mocks.transcribeAudioFile.mockReset(); - mocks.transcribeAudioFile.mockResolvedValue({ - text: "hello from audio", - provider: "openai", - model: "gpt-4o-transcribe", - }); - }); - - afterEach(() => { - vi.restoreAllMocks(); - }); - - it("keeps the decoded audio cap below the base64 WebSocket frame limit", async () => { - const { MAX_CHAT_TRANSCRIBE_AUDIO_BYTES } = await import("./chat-transcribe-audio.js"); - const base64Bytes = Math.ceil(MAX_CHAT_TRANSCRIBE_AUDIO_BYTES / 3) * 4; - - expect(base64Bytes + 64 * 1024).toBeLessThanOrEqual(MAX_PAYLOAD_BYTES); - expect(MAX_CHAT_TRANSCRIBE_AUDIO_BYTES).toBeLessThan(20 * 1024 * 1024); - }); - - it("transcribes uploaded chat dictation audio through media understanding", async () => { - const { chatTranscribeAudioHandlers } = await import("./chat-transcribe-audio.js"); - const respond = vi.fn(); - - await chatTranscribeAudioHandlers["chat.transcribeAudio"]({ - params: { - audioDataUrl: `data:audio/webm;base64,${Buffer.from("audio").toString("base64")}`, - }, - respond, - context: { getRuntimeConfig: () => ({ tools: { media: {} } }) }, - } as never); - - expect(mocks.transcribeAudioFile).toHaveBeenCalledWith( - expect.objectContaining({ - cfg: { tools: { media: {} } }, - mime: "audio/webm", - }), - ); - const call = (mocks.transcribeAudioFile.mock.calls as unknown as Array<[{ filePath?: string }]>) - .at(0) - ?.at(0); - const filePath = call?.filePath; - expect(filePath).toMatch(/dictation\.webm$/); - await expect(fs.stat(filePath ?? "")).rejects.toMatchObject({ code: "ENOENT" }); - expect(respond).toHaveBeenCalledWith(true, { - text: "hello from audio", - provider: "openai", - model: "gpt-4o-transcribe", - }); - }); - - it("returns INVALID_REQUEST for missing audio payloads", async () => { - const { chatTranscribeAudioHandlers } = await import("./chat-transcribe-audio.js"); - const respond = vi.fn(); - - await chatTranscribeAudioHandlers["chat.transcribeAudio"]({ - params: {}, - respond, - context: { getRuntimeConfig: () => ({}) }, - } as never); - - expect(respond).toHaveBeenCalledWith( - false, - undefined, - expect.objectContaining({ - code: ErrorCodes.INVALID_REQUEST, - message: expect.stringContaining("requires audioDataUrl or audioBase64"), - }), - ); - expect(mocks.transcribeAudioFile).not.toHaveBeenCalled(); - }); - - it("returns UNAVAILABLE when no transcription provider is configured", async () => { - mocks.transcribeAudioFile.mockResolvedValue({ - text: undefined, - decision: { - capability: "audio", - outcome: "skipped", - attachments: [{ attempts: [] }], - }, - } as never); - const { chatTranscribeAudioHandlers } = await import("./chat-transcribe-audio.js"); - const respond = vi.fn(); - - await chatTranscribeAudioHandlers["chat.transcribeAudio"]({ - params: { - audioBase64: Buffer.from("audio").toString("base64"), - mimeType: "audio/ogg", - }, - respond, - context: { getRuntimeConfig: () => ({}) }, - } as never); - - expect(respond).toHaveBeenCalledWith( - false, - undefined, - expect.objectContaining({ - code: ErrorCodes.UNAVAILABLE, - message: expect.stringContaining("No audio transcription provider"), - }), - ); - }); -}); diff --git a/src/gateway/server-methods/chat-transcribe-audio.ts b/src/gateway/server-methods/chat-transcribe-audio.ts deleted file mode 100644 index 13dbb2a84e3..00000000000 --- a/src/gateway/server-methods/chat-transcribe-audio.ts +++ /dev/null @@ -1,125 +0,0 @@ -import fs from "node:fs/promises"; -import os from "node:os"; -import path from "node:path"; -import { extensionForMime, normalizeMimeType } from "../../media/mime.js"; -import { normalizeOptionalString } from "../../shared/string-coerce.js"; -import { ErrorCodes, errorShape } from "../protocol/index.js"; -import { MAX_PAYLOAD_BYTES } from "../server-constants.js"; -import { formatForLog } from "../ws-log.js"; -import type { GatewayRequestHandlers } from "./types.js"; - -type ChatTranscribeAudioRuntime = typeof import("./chat-transcribe-audio.runtime.js"); -type TranscribeAudioFileResult = Awaited< - ReturnType ->; - -let chatTranscribeAudioRuntimePromise: Promise | null = null; - -function loadChatTranscribeAudioRuntime(): Promise { - chatTranscribeAudioRuntimePromise ??= import("./chat-transcribe-audio.runtime.js"); - return chatTranscribeAudioRuntimePromise; -} - -const CHAT_TRANSCRIBE_AUDIO_WS_JSON_OVERHEAD_BYTES = 64 * 1024; -export const MAX_CHAT_TRANSCRIBE_AUDIO_BYTES = Math.floor( - ((MAX_PAYLOAD_BYTES - CHAT_TRANSCRIBE_AUDIO_WS_JSON_OVERHEAD_BYTES) * 3) / 4, -); - -function decodeAudioPayload(params: Record): { - data: Buffer; - mime?: string; -} { - const dataUrl = normalizeOptionalString(params.audioDataUrl); - const rawBase64 = normalizeOptionalString(params.audioBase64); - const explicitMime = normalizeMimeType(normalizeOptionalString(params.mimeType)); - - if (dataUrl) { - const match = /^data:([^;,]+)?(?:;[^,]*)?;base64,(.*)$/s.exec(dataUrl); - if (!match) { - throw new Error("chat.transcribeAudio requires a base64 data URL"); - } - const mime = normalizeMimeType(match[1]) ?? explicitMime; - return { data: Buffer.from(match[2] ?? "", "base64"), mime }; - } - - if (rawBase64) { - return { data: Buffer.from(rawBase64, "base64"), mime: explicitMime }; - } - - throw new Error("chat.transcribeAudio requires audioDataUrl or audioBase64"); -} - -function extensionForAudioMime(mime?: string): string { - if (mime === "audio/webm") { - return ".webm"; - } - return extensionForMime(mime) ?? ".audio"; -} - -function isMissingMediaUnderstandingProvider(result: TranscribeAudioFileResult) { - const decision = result.decision; - return ( - decision?.outcome === "skipped" && - decision.attachments.length > 0 && - decision.attachments.every((attachment) => attachment.attempts.length === 0) - ); -} - -export const chatTranscribeAudioHandlers: GatewayRequestHandlers = { - "chat.transcribeAudio": async ({ params, respond, context }) => { - let decoded: ReturnType; - try { - decoded = decodeAudioPayload(params); - } catch (err) { - respond(false, undefined, errorShape(ErrorCodes.INVALID_REQUEST, formatForLog(err))); - return; - } - - if (decoded.data.byteLength === 0) { - respond(false, undefined, errorShape(ErrorCodes.INVALID_REQUEST, "Audio payload is empty")); - return; - } - if (decoded.data.byteLength > MAX_CHAT_TRANSCRIBE_AUDIO_BYTES) { - respond( - false, - undefined, - errorShape( - ErrorCodes.INVALID_REQUEST, - `Audio payload exceeds ${MAX_CHAT_TRANSCRIBE_AUDIO_BYTES} bytes`, - ), - ); - return; - } - - const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-chat-stt-")); - const filePath = path.join(tmpDir, `dictation${extensionForAudioMime(decoded.mime)}`); - try { - await fs.writeFile(filePath, decoded.data); - const { transcribeAudioFile } = await loadChatTranscribeAudioRuntime(); - const result = await transcribeAudioFile({ - filePath, - cfg: context.getRuntimeConfig(), - mime: decoded.mime, - language: normalizeOptionalString(params.language), - prompt: normalizeOptionalString(params.prompt), - }); - const text = result.text?.trim(); - if (!text) { - const message = isMissingMediaUnderstandingProvider(result) - ? "No audio transcription provider is configured or ready. Configure tools.media.audio.models." - : "No transcript returned for audio"; - respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, message)); - return; - } - respond(true, { - text, - provider: result.provider ?? null, - model: result.model ?? null, - }); - } catch (err) { - respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, formatForLog(err))); - } finally { - await fs.rm(tmpDir, { recursive: true, force: true }); - } - }, -}; diff --git a/src/image-generation/runtime-types.ts b/src/image-generation/runtime-types.ts index dceb2aa80d8..24332fabf72 100644 --- a/src/image-generation/runtime-types.ts +++ b/src/image-generation/runtime-types.ts @@ -28,6 +28,7 @@ export type GenerateImageParams = { outputFormat?: ImageGenerationOutputFormat; background?: ImageGenerationBackground; inputImages?: ImageGenerationSourceImage[]; + autoProviderFallback?: boolean; /** Optional per-request provider timeout in milliseconds. */ timeoutMs?: number; providerOptions?: ImageGenerationProviderOptions; diff --git a/src/image-generation/runtime.test.ts b/src/image-generation/runtime.test.ts index 2ffeafacf5e..0a7bb5c0078 100644 --- a/src/image-generation/runtime.test.ts +++ b/src/image-generation/runtime.test.ts @@ -95,6 +95,46 @@ describe("image-generation runtime", () => { expect(result.ignoredOverrides).toEqual([]); }); + it("does not list providers when explicit config disables auto provider fallback", async () => { + const provider: ImageGenerationProvider = { + id: "image-plugin", + capabilities: { + generate: {}, + edit: { enabled: false }, + }, + async generateImage() { + return { + images: [ + { + buffer: Buffer.from("png-bytes"), + mimeType: "image/png", + fileName: "sample.png", + }, + ], + model: "img-v1", + }; + }, + }; + providers = [provider]; + + const params: GenerateImageParams = { + cfg: { + agents: { + defaults: { + imageGenerationModel: { primary: "image-plugin/img-v1" }, + }, + }, + } as OpenClawConfig, + prompt: "draw a cat", + autoProviderFallback: false, + }; + + const result = await runGenerateImage(params); + + expect(result.provider).toBe("image-plugin"); + expect(listedConfigs).toEqual([]); + }); + it("uses configured image-generation timeout when the call omits timeoutMs", async () => { let seenTimeoutMs: number | undefined; const provider: ImageGenerationProvider = { diff --git a/src/image-generation/runtime.ts b/src/image-generation/runtime.ts index fd290167202..7cda8fab5ac 100644 --- a/src/image-generation/runtime.ts +++ b/src/image-generation/runtime.ts @@ -65,6 +65,7 @@ export async function generateImage( parseModelRef: parseImageGenerationModelRef, agentDir: params.agentDir, listProviders, + autoProviderFallback: params.autoProviderFallback, }); if (candidates.length === 0) { throw new Error(buildNoImageGenerationModelConfiguredMessage(params.cfg, deps)); diff --git a/src/music-generation/runtime-types.ts b/src/music-generation/runtime-types.ts index 8cd770d6875..5cd575fac1c 100644 --- a/src/music-generation/runtime-types.ts +++ b/src/music-generation/runtime-types.ts @@ -21,6 +21,7 @@ export type GenerateMusicParams = { durationSeconds?: number; format?: MusicGenerationOutputFormat; inputImages?: MusicGenerationSourceImage[]; + autoProviderFallback?: boolean; /** Optional per-request provider timeout in milliseconds. */ timeoutMs?: number; }; diff --git a/src/music-generation/runtime.test.ts b/src/music-generation/runtime.test.ts index 0180e2ced91..c7d50c40180 100644 --- a/src/music-generation/runtime.test.ts +++ b/src/music-generation/runtime.test.ts @@ -85,6 +85,43 @@ describe("music-generation runtime", () => { ]); }); + it("does not list providers when explicit config disables auto provider fallback", async () => { + const provider: MusicGenerationProvider = { + id: "music-plugin", + capabilities: {}, + async generateMusic() { + return { + tracks: [ + { + buffer: Buffer.from("mp3-bytes"), + mimeType: "audio/mpeg", + fileName: "sample.mp3", + }, + ], + model: "track-v1", + }; + }, + }; + providers = [provider]; + + const params: GenerateMusicParams = { + cfg: { + agents: { + defaults: { + musicGenerationModel: { primary: "music-plugin/track-v1" }, + }, + }, + } as OpenClawConfig, + prompt: "play a synth line", + autoProviderFallback: false, + }; + + const result = await runGenerateMusic(params); + + expect(result.provider).toBe("music-plugin"); + expect(listedConfigs).toEqual([]); + }); + it("auto-detects and falls through to another configured music-generation provider by default", async () => { providers = [ { diff --git a/src/music-generation/runtime.ts b/src/music-generation/runtime.ts index e597e8e8a06..d3a3cd7e686 100644 --- a/src/music-generation/runtime.ts +++ b/src/music-generation/runtime.ts @@ -47,6 +47,7 @@ export async function generateMusic( parseModelRef: parseMusicGenerationModelRef, agentDir: params.agentDir, listProviders, + autoProviderFallback: params.autoProviderFallback, }); if (candidates.length === 0) { throw new Error( diff --git a/src/plugins/install.npm-spec.test.ts b/src/plugins/install.npm-spec.test.ts index 6be8c8f46a0..cb66176fd3d 100644 --- a/src/plugins/install.npm-spec.test.ts +++ b/src/plugins/install.npm-spec.test.ts @@ -298,61 +298,6 @@ describe("installPluginFromNpmSpec", () => { }); }); - it("allows official catalog-matched npm plugins through the trusted scanner path", async () => { - const npmRoot = path.join(suiteTempRootTracker.makeTempDir(), "npm"); - const warnings: string[] = []; - mockNpmViewAndInstall({ - spec: "@openclaw/feishu@2026.5.2", - packageName: "@openclaw/feishu", - version: "2026.5.2", - pluginId: "feishu", - npmRoot, - indexJs: `const token = process.env.FEISHU_BOT_TOKEN;\nfetch("https://open.feishu.cn/open-apis/bot/v2/hook", { headers: { authorization: token } });`, - }); - - const result = await installPluginFromNpmSpec({ - spec: "@openclaw/feishu@2026.5.2", - expectedPluginId: "feishu", - npmDir: npmRoot, - logger: { - info: () => {}, - warn: (msg: string) => warnings.push(msg), - }, - }); - - expect(result.ok).toBe(true); - expect( - warnings.some((warning) => - warning.includes("allowed because it is an official OpenClaw package"), - ), - ).toBe(true); - }); - - it("keeps blocking dangerous npm installs that do not match the official catalog", async () => { - const npmRoot = path.join(suiteTempRootTracker.makeTempDir(), "npm"); - mockNpmViewAndInstall({ - spec: "@openclaw/feishu-spoof@2026.5.2", - packageName: "@openclaw/feishu-spoof", - version: "2026.5.2", - pluginId: "feishu", - npmRoot, - indexJs: `const token = process.env.FEISHU_BOT_TOKEN;\nfetch("https://open.feishu.cn/open-apis/bot/v2/hook", { headers: { authorization: token } });`, - }); - - const result = await installPluginFromNpmSpec({ - spec: "@openclaw/feishu-spoof@2026.5.2", - expectedPluginId: "feishu", - npmDir: npmRoot, - logger: { info: () => {}, warn: () => {} }, - }); - - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.code).toBe(PLUGIN_INSTALL_ERROR_CODE.SECURITY_SCAN_BLOCKED); - expect(result.error).toContain("dangerous code patterns detected"); - } - }); - it("rejects non-registry npm specs", async () => { const result = await installPluginFromNpmSpec({ spec: "github:evil/evil" }); expect(result.ok).toBe(false); diff --git a/src/plugins/install.ts b/src/plugins/install.ts index 20fecca261f..a0e41be21b6 100644 --- a/src/plugins/install.ts +++ b/src/plugins/install.ts @@ -34,11 +34,6 @@ import { resolvePackageExtensionEntries, type PackageManifest as PluginPackageManifest, } from "./manifest.js"; -import { - listOfficialExternalPluginCatalogEntries, - resolveOfficialExternalPluginId, - resolveOfficialExternalPluginInstall, -} from "./official-external-plugin-catalog.js"; import { validatePackageExtensionEntriesForInstall } from "./package-entry-resolution.js"; import { linkOpenClawPeerDependencies } from "./plugin-peer-link.js"; @@ -115,23 +110,7 @@ type PluginInstallPolicyRequest = { }; const defaultLogger: PluginInstallLogger = {}; - -function listTrustedOfficialNpmPluginPackages(): Map { - const packages = new Map(); - for (const entry of listOfficialExternalPluginCatalogEntries()) { - if (entry.source !== "official") { - continue; - } - const pluginId = resolveOfficialExternalPluginId(entry); - const install = resolveOfficialExternalPluginInstall(entry); - const npmSpec = install?.npmSpec ? parseRegistryNpmSpec(install.npmSpec) : null; - if (!pluginId || !npmSpec) { - continue; - } - packages.set(npmSpec.name, pluginId); - } - return packages; -} +const TRUSTED_OFFICIAL_NPM_PLUGIN_PACKAGES = new Map([["@openclaw/codex", "codex"]]); function ensureOpenClawExtensions(params: { manifest: PackageManifest }): | { @@ -219,7 +198,7 @@ function isTrustedOfficialNpmPluginInstall(params: { if (!requested) { return false; } - const expectedPluginId = listTrustedOfficialNpmPluginPackages().get(requested.name); + const expectedPluginId = TRUSTED_OFFICIAL_NPM_PLUGIN_PACKAGES.get(requested.name); return ( expectedPluginId !== undefined && params.packageName === requested.name && diff --git a/src/plugins/tool-descriptor-cache.test.ts b/src/plugins/tool-descriptor-cache.test.ts index f9e5128a405..a8df4115f49 100644 --- a/src/plugins/tool-descriptor-cache.test.ts +++ b/src/plugins/tool-descriptor-cache.test.ts @@ -6,7 +6,7 @@ const hoisted = vi.hoisted(() => ({ value && typeof value === "object" && "id" in value ? String((value as { id?: unknown }).id) : "config"; - return `config:${id}`; + return `config:${id}:${JSON.stringify(value)}`; }), })); @@ -90,4 +90,50 @@ describe("plugin tool descriptor cache keys", () => { expect(hoisted.resolveRuntimeConfigCacheKey).toHaveBeenCalledTimes(2); expect(firstKey).not.toBe(secondKey); }); + + it("keeps descriptor keys stable across config bookkeeping writes", () => { + const firstConfig = { + id: "runtime", + meta: { lastTouchedAt: "2026-05-02T10:00:00.000Z" }, + plugins: { + entries: { + demo: { enabled: true }, + }, + }, + wizard: { lastRunAt: "2026-05-02T10:00:00.000Z" }, + } as never; + const secondConfig = { + id: "runtime", + meta: { lastTouchedAt: "2026-05-02T10:00:05.000Z" }, + plugins: { + entries: { + demo: { enabled: true }, + }, + }, + wizard: { lastRunAt: "2026-05-02T10:00:05.000Z" }, + } as never; + + const firstKey = buildPluginToolDescriptorCacheKey({ + pluginId: "demo", + source: "/tmp/demo.js", + contractToolNames: ["demo"], + ctx: { + config: firstConfig, + runtimeConfig: firstConfig, + }, + currentRuntimeConfig: firstConfig, + }); + const secondKey = buildPluginToolDescriptorCacheKey({ + pluginId: "demo", + source: "/tmp/demo.js", + contractToolNames: ["demo"], + ctx: { + config: secondConfig, + runtimeConfig: secondConfig, + }, + currentRuntimeConfig: secondConfig, + }); + + expect(firstKey).toBe(secondKey); + }); }); diff --git a/src/plugins/tool-descriptor-cache.ts b/src/plugins/tool-descriptor-cache.ts index a219d5e56ec..25d6e9301e8 100644 --- a/src/plugins/tool-descriptor-cache.ts +++ b/src/plugins/tool-descriptor-cache.ts @@ -53,6 +53,19 @@ function getDescriptorCacheObjectId(value: object | null | undefined): number | return next; } +function stripDescriptorVolatileConfigFields( + value: NonNullable, +): NonNullable { + if (typeof value !== "object") { + return value; + } + if (!("meta" in value) && !("wizard" in value)) { + return value; + } + const { meta: _meta, wizard: _wizard, ...stableConfig } = value as Record; + return stableConfig as NonNullable; +} + function getDescriptorConfigCacheKey( value: PluginLoadOptions["config"] | null | undefined, memo?: PluginToolDescriptorConfigCacheKeyMemo, @@ -66,7 +79,7 @@ function getDescriptorConfigCacheKey( } let resolved: string | number | null; try { - resolved = resolveRuntimeConfigCacheKey(value); + resolved = resolveRuntimeConfigCacheKey(stripDescriptorVolatileConfigFields(value)); } catch { resolved = getDescriptorCacheObjectId(value); } @@ -91,8 +104,6 @@ function buildDescriptorContextCacheKey(params: { workspaceDir: ctx.workspaceDir ?? null, agentDir: ctx.agentDir ?? null, agentId: ctx.agentId ?? null, - sessionKey: ctx.sessionKey ?? null, - sessionId: ctx.sessionId ?? null, browser: ctx.browser ?? null, messageChannel: ctx.messageChannel ?? null, agentAccountId: ctx.agentAccountId ?? null, diff --git a/src/plugins/tools.optional.test.ts b/src/plugins/tools.optional.test.ts index 92dfde59b72..f9d8cb9d33a 100644 --- a/src/plugins/tools.optional.test.ts +++ b/src/plugins/tools.optional.test.ts @@ -928,6 +928,55 @@ describe("resolvePluginTools optional tools", () => { expect(factory).toHaveBeenCalledTimes(2); }); + it("reuses cached plugin tool descriptors across session identity changes", async () => { + const factory = vi.fn((rawCtx: unknown) => { + const ctx = rawCtx as { sessionId?: string }; + return { + ...makeTool("cached_session_tool"), + async execute() { + return { content: [{ type: "text", text: ctx.sessionId ?? "missing" }] }; + }, + }; + }); + setRegistry([ + { + pluginId: "cache-session-test", + optional: false, + source: "/tmp/cache-session-test.js", + names: ["cached_session_tool"], + factory, + }, + ]); + + const first = resolvePluginTools( + createResolveToolsParams({ + context: { + ...createContext(), + sessionId: "first-session", + sessionKey: "agent:main:first-session", + }, + }), + ); + const second = resolvePluginTools( + createResolveToolsParams({ + context: { + ...createContext(), + sessionId: "second-session", + sessionKey: "agent:main:second-session", + }, + }), + ); + + expectResolvedToolNames(first, ["cached_session_tool"]); + expectResolvedToolNames(second, ["cached_session_tool"]); + expect(factory).toHaveBeenCalledTimes(1); + + await expect(second[0]?.execute("call", {}, undefined)).resolves.toEqual({ + content: [{ type: "text", text: "second-session" }], + }); + expect(factory).toHaveBeenCalledTimes(2); + }); + it("does not reuse cached plugin tool descriptors across sandbox context changes", () => { const factory = vi.fn((rawCtx: unknown) => { const ctx = rawCtx as { sandboxed?: boolean }; diff --git a/src/video-generation/runtime-types.ts b/src/video-generation/runtime-types.ts index b632d841c9e..6ebccf3f67f 100644 --- a/src/video-generation/runtime-types.ts +++ b/src/video-generation/runtime-types.ts @@ -25,6 +25,7 @@ export type GenerateVideoParams = { inputImages?: VideoGenerationSourceAsset[]; inputVideos?: VideoGenerationSourceAsset[]; inputAudios?: VideoGenerationSourceAsset[]; + autoProviderFallback?: boolean; /** Arbitrary provider-specific options forwarded as-is to provider.generateVideo. */ providerOptions?: Record; /** Optional per-request provider timeout in milliseconds. */ diff --git a/src/video-generation/runtime.test.ts b/src/video-generation/runtime.test.ts index 487ce2a1457..ee7a842567a 100644 --- a/src/video-generation/runtime.test.ts +++ b/src/video-generation/runtime.test.ts @@ -106,6 +106,43 @@ describe("video-generation runtime", () => { ]); }); + it("does not list providers when explicit config disables auto provider fallback", async () => { + const provider: VideoGenerationProvider = { + id: "video-plugin", + capabilities: {}, + async generateVideo() { + return { + videos: [ + { + buffer: Buffer.from("mp4-bytes"), + mimeType: "video/mp4", + fileName: "sample.mp4", + }, + ], + model: "vid-v1", + }; + }, + }; + providers = [provider]; + + const params: GenerateVideoParams = { + cfg: { + agents: { + defaults: { + videoGenerationModel: { primary: "video-plugin/vid-v1" }, + }, + }, + } as OpenClawConfig, + prompt: "animate a cat", + autoProviderFallback: false, + }; + + const result = await runGenerateVideo(params); + + expect(result.provider).toBe("video-plugin"); + expect(listedConfigs).toEqual([]); + }); + it("auto-detects and falls through to another configured video-generation provider by default", async () => { providers = [ { diff --git a/src/video-generation/runtime.ts b/src/video-generation/runtime.ts index 783bf5fba47..8e9948b7d83 100644 --- a/src/video-generation/runtime.ts +++ b/src/video-generation/runtime.ts @@ -116,6 +116,7 @@ export async function generateVideo( parseModelRef: parseVideoGenerationModelRef, agentDir: params.agentDir, listProviders, + autoProviderFallback: params.autoProviderFallback, }); if (candidates.length === 0) { throw new Error(buildNoVideoGenerationModelConfiguredMessage(params.cfg, deps)); diff --git a/ui/src/styles/chat/layout.css b/ui/src/styles/chat/layout.css index 2e4fc2be528..79813602a16 100644 --- a/ui/src/styles/chat/layout.css +++ b/ui/src/styles/chat/layout.css @@ -663,19 +663,10 @@ background: color-mix(in srgb, var(--danger, #ef4444) 14%, transparent); } -.agent-chat__input-btn--dictating { - color: var(--danger, #ef4444); - background: color-mix(in srgb, var(--danger, #ef4444) 14%, transparent); -} - .agent-chat__talk-status { color: var(--text); } -.agent-chat__dictation-status { - color: var(--text); -} - .agent-chat__input-divider { width: 1px; height: 16px; diff --git a/ui/src/ui/app-chat.test.ts b/ui/src/ui/app-chat.test.ts index 8b20ea2bbc2..12893fbd365 100644 --- a/ui/src/ui/app-chat.test.ts +++ b/ui/src/ui/app-chat.test.ts @@ -44,7 +44,6 @@ let handleAbortChat: typeof import("./app-chat.ts").handleAbortChat; let refreshChatAvatar: typeof import("./app-chat.ts").refreshChatAvatar; let clearPendingQueueItemsForRun: typeof import("./app-chat.ts").clearPendingQueueItemsForRun; let removeQueuedMessage: typeof import("./app-chat.ts").removeQueuedMessage; -let transcribeChatAudio: typeof import("./app-chat.ts").transcribeChatAudio; async function loadChatHelpers(): Promise { ({ @@ -55,7 +54,6 @@ async function loadChatHelpers(): Promise { refreshChatAvatar, clearPendingQueueItemsForRun, removeQueuedMessage, - transcribeChatAudio, } = await import("./app-chat.ts")); } @@ -105,73 +103,12 @@ function makeHost(overrides?: Partial): ChatHost { toolStreamById: new Map(), toolStreamOrder: [], toolStreamSyncTimer: null, - chatDictationStatus: "idle", - chatDictationDetail: null, updateComplete: Promise.resolve(), ...overrides, }; return host as ChatHost; } -describe("transcribeChatAudio", () => { - beforeAll(async () => { - await loadChatHelpers(); - }); - - it("sends recorded audio to the gateway and appends the transcript to the draft", async () => { - const request = vi.fn(async () => ({ text: "new words" })); - const host = makeHost({ - client: { request } as never, - chatMessage: "existing", - }); - - await transcribeChatAudio(host, new Blob([new Uint8Array([1, 2, 3])], { type: "audio/webm" })); - - expect(request).toHaveBeenCalledWith("chat.transcribeAudio", { - audioBase64: "AQID", - mimeType: "audio/webm", - }); - expect(host.chatMessage).toBe("existing new words"); - expect(host.chatDictationStatus).toBe("idle"); - expect(host.chatDictationDetail).toBeNull(); - }); - - it("surfaces gateway transcription errors without changing the draft", async () => { - const request = vi.fn(async () => { - throw new Error("no provider"); - }); - const host = makeHost({ - client: { request } as never, - chatMessage: "existing", - }); - - await transcribeChatAudio(host, new Blob([new Uint8Array([1])], { type: "audio/ogg" })); - - expect(host.chatMessage).toBe("existing"); - expect(host.chatDictationStatus).toBe("error"); - expect(host.chatDictationDetail).toBe("no provider"); - expect(host.lastError).toBe("no provider"); - }); - - it("rejects oversized dictation before sending it over the gateway socket", async () => { - const request = vi.fn(); - const host = makeHost({ - client: { request } as never, - chatMessage: "existing", - }); - - await transcribeChatAudio( - host, - new Blob([new Uint8Array(18 * 1024 * 1024 + 1)], { type: "audio/webm" }), - ); - - expect(request).not.toHaveBeenCalled(); - expect(host.chatMessage).toBe("existing"); - expect(host.chatDictationStatus).toBe("error"); - expect(host.chatDictationDetail).toContain("too large"); - }); -}); - function createSessionsResult(sessions: GatewaySessionRow[]): SessionsListResult { return { ts: 0, diff --git a/ui/src/ui/app-chat.ts b/ui/src/ui/app-chat.ts index d2d8717d070..1c458005687 100644 --- a/ui/src/ui/app-chat.ts +++ b/ui/src/ui/app-chat.ts @@ -17,7 +17,6 @@ import { type ChatInputHistoryKeyResult, type ChatInputHistoryState, } from "./chat/input-history.ts"; -import { bytesToBase64 } from "./chat/realtime-talk-audio.ts"; import type { ChatSideResult } from "./chat/side-result.ts"; import { executeSlashCommand } from "./chat/slash-command-executor.ts"; import { parseSlashCommand, refreshSlashCommands } from "./chat/slash-commands.ts"; @@ -69,22 +68,10 @@ export type ChatHost = ChatInputHistoryState & { refreshSessionsAfterChat: Set; pendingAbort?: { runId?: string | null; sessionKey: string } | null; chatSubmitGuards?: Map>; - chatDictationStatus?: ChatDictationStatus; - chatDictationDetail?: string | null; /** Callback for slash-command side effects that need app-level access. */ onSlashAction?: (action: string) => void | Promise; }; -export type ChatDictationStatus = "idle" | "starting" | "recording" | "transcribing" | "error"; - -type ChatTranscribeAudioResult = { - text?: unknown; - provider?: unknown; - model?: unknown; -}; - -export const CHAT_TRANSCRIBE_AUDIO_MAX_BYTES = 18 * 1024 * 1024; - export type ChatSendOptions = { confirmReset?: boolean; restoreDraft?: boolean; @@ -136,60 +123,6 @@ export function isChatStopCommand(text: string) { ); } -function appendDictationText(draft: string, transcript: string): string { - const text = transcript.trim(); - if (!text) { - return draft; - } - const current = draft.trimEnd(); - return current ? `${current} ${text}` : text; -} - -export async function transcribeChatAudio(host: ChatHost, audio: Blob): Promise { - if (!host.client || !host.connected) { - host.chatDictationStatus = "error"; - host.chatDictationDetail = "Gateway not connected"; - host.lastError = host.chatDictationDetail; - return null; - } - if (audio.size <= 0) { - host.chatDictationStatus = "error"; - host.chatDictationDetail = "No audio captured"; - host.lastError = host.chatDictationDetail; - return null; - } - if (audio.size > CHAT_TRANSCRIBE_AUDIO_MAX_BYTES) { - host.chatDictationStatus = "error"; - host.chatDictationDetail = `Audio clip is too large for WebChat dictation. Keep recordings under ${CHAT_TRANSCRIBE_AUDIO_MAX_BYTES} bytes.`; - host.lastError = host.chatDictationDetail; - return null; - } - - host.chatDictationStatus = "transcribing"; - host.chatDictationDetail = "Transcribing dictation..."; - try { - const bytes = new Uint8Array(await audio.arrayBuffer()); - const mimeType = audio.type || "audio/webm"; - const result = await host.client.request("chat.transcribeAudio", { - audioBase64: bytesToBase64(bytes), - mimeType, - }); - const transcript = typeof result.text === "string" ? result.text.trim() : ""; - if (!transcript) { - throw new Error("No transcript returned"); - } - host.chatMessage = appendDictationText(host.chatMessage, transcript); - host.chatDictationStatus = "idle"; - host.chatDictationDetail = null; - return transcript; - } catch (err) { - host.chatDictationStatus = "error"; - host.chatDictationDetail = err instanceof Error ? err.message : String(err); - host.lastError = host.chatDictationDetail; - return null; - } -} - function isChatResetCommand(text: string) { const trimmed = text.trim(); if (!trimmed) { diff --git a/ui/src/ui/app-gateway.sessions.node.test.ts b/ui/src/ui/app-gateway.sessions.node.test.ts index 9c23ae80a60..c4d2a45f1da 100644 --- a/ui/src/ui/app-gateway.sessions.node.test.ts +++ b/ui/src/ui/app-gateway.sessions.node.test.ts @@ -3,6 +3,7 @@ import { describe, expect, it, vi } from "vitest"; const loadSessionsMock = vi.fn(); const loadChatHistoryMock = vi.fn(); +const applySessionsChangedEventMock = vi.fn(); vi.mock("./app-chat.ts", () => ({ CHAT_SESSIONS_ACTIVE_MINUTES: 10, @@ -43,7 +44,7 @@ vi.mock("./controllers/nodes.ts", () => ({ loadNodes: vi.fn(), })); vi.mock("./controllers/sessions.ts", () => ({ - applySessionsChangedEvent: vi.fn(), + applySessionsChangedEvent: applySessionsChangedEventMock, loadSessions: loadSessionsMock, subscribeSessions: vi.fn(), })); @@ -114,6 +115,7 @@ function createHost() { describe("handleGatewayEvent sessions.changed", () => { it("reloads sessions when the gateway pushes a sessions.changed event", () => { loadSessionsMock.mockReset(); + applySessionsChangedEventMock.mockReset().mockReturnValue({ applied: false }); const host = createHost(); handleGatewayEvent(host, { @@ -126,6 +128,67 @@ describe("handleGatewayEvent sessions.changed", () => { expect(loadSessionsMock).toHaveBeenCalledTimes(1); expect(loadSessionsMock).toHaveBeenCalledWith(host); }); + + it("does not reload sessions for applied message-phase session patches to existing rows", () => { + loadSessionsMock.mockReset(); + applySessionsChangedEventMock.mockReset().mockReturnValue({ applied: true, change: "updated" }); + const host = createHost(); + + handleGatewayEvent(host, { + type: "event", + event: "sessions.changed", + payload: { + sessionKey: "agent:main:main", + phase: "message", + updatedAt: 123, + totalTokens: 456, + }, + seq: 1, + }); + + expect(applySessionsChangedEventMock).toHaveBeenCalledTimes(1); + expect(loadSessionsMock).not.toHaveBeenCalled(); + }); + + it("reloads sessions when an applied message-phase event inserts a session row", () => { + loadSessionsMock.mockReset(); + applySessionsChangedEventMock + .mockReset() + .mockReturnValue({ applied: true, change: "inserted" }); + const host = createHost(); + + handleGatewayEvent(host, { + type: "event", + event: "sessions.changed", + payload: { + sessionKey: "agent:main:new", + phase: "message", + updatedAt: 123, + totalTokens: 456, + }, + seq: 1, + }); + + expect(applySessionsChangedEventMock).toHaveBeenCalledTimes(1); + expect(loadSessionsMock).toHaveBeenCalledTimes(1); + expect(loadSessionsMock).toHaveBeenCalledWith(host); + }); + + it("reloads sessions when a message-phase event cannot patch local state", () => { + loadSessionsMock.mockReset(); + applySessionsChangedEventMock.mockReset().mockReturnValue({ applied: false }); + const host = createHost(); + + handleGatewayEvent(host, { + type: "event", + event: "sessions.changed", + payload: { sessionKey: "agent:main:main", phase: "message" }, + seq: 1, + }); + + expect(loadSessionsMock).toHaveBeenCalledTimes(1); + expect(loadSessionsMock).toHaveBeenCalledWith(host); + }); }); describe("handleGatewayEvent session.message", () => { diff --git a/ui/src/ui/app-gateway.ts b/ui/src/ui/app-gateway.ts index ca7a18cfb60..eafddc934c2 100644 --- a/ui/src/ui/app-gateway.ts +++ b/ui/src/ui/app-gateway.ts @@ -103,7 +103,6 @@ type GatewayHost = { sessionKey: string; chatRunId: string | null; pendingAbort?: { runId?: string | null; sessionKey: string } | null; - cancelChatDictation?: () => void; refreshSessionsAfterChat: Set; execApprovalQueue: ExecApprovalRequest[]; execApprovalError: string | null; @@ -157,6 +156,15 @@ function isTerminalChatState( return state === "final" || state === "aborted" || state === "error"; } +function isSessionMessagePhasePayload(payload: unknown): boolean { + return ( + Boolean(payload) && + typeof payload === "object" && + !Array.isArray(payload) && + (payload as { phase?: unknown }).phase === "message" + ); +} + type ConnectGatewayOptions = { reason?: "initial" | "seq-gap"; }; @@ -484,7 +492,6 @@ export function connectGateway(host: GatewayHost, options?: ConnectGatewayOption return; } host.connected = false; - host.cancelChatDictation?.(); // Code 1012 = Service Restart (expected during config saves, don't show as error) host.lastErrorCode = resolveGatewayErrorDetailCode(error) ?? @@ -742,7 +749,14 @@ function handleGatewayEventUnsafe(host: GatewayHost, evt: GatewayEventFrame) { } if (evt.event === "sessions.changed") { - applySessionsChangedEvent(host as unknown as SessionsState, evt.payload); + const applyResult = applySessionsChangedEvent(host as unknown as SessionsState, evt.payload); + if ( + applyResult.applied && + applyResult.change === "updated" && + isSessionMessagePhasePayload(evt.payload) + ) { + return; + } void loadSessions(host as unknown as SessionsState); return; } diff --git a/ui/src/ui/app-lifecycle.node.test.ts b/ui/src/ui/app-lifecycle.node.test.ts index 2dd84558bed..23d3129d887 100644 --- a/ui/src/ui/app-lifecycle.node.test.ts +++ b/ui/src/ui/app-lifecycle.node.test.ts @@ -34,8 +34,6 @@ describe("handleDisconnected", () => { }); const removeSpy = vi.spyOn(window, "removeEventListener").mockImplementation(() => undefined); const host = createHost(); - const cancelChatDictation = vi.fn(); - Object.assign(host, { cancelChatDictation }); const disconnectSpy = ( host.topbarObserver as unknown as { disconnect: ReturnType } ).disconnect; @@ -44,7 +42,6 @@ describe("handleDisconnected", () => { expect(removeSpy).toHaveBeenCalledWith("popstate", host.popStateHandler); expect(host.connectGeneration).toBe(1); - expect(cancelChatDictation).toHaveBeenCalledTimes(1); expect(host.client).toBeNull(); expect(host.connected).toBe(false); expect(disconnectSpy).toHaveBeenCalledTimes(1); diff --git a/ui/src/ui/app-lifecycle.ts b/ui/src/ui/app-lifecycle.ts index 0bd5b0db483..784b9101e59 100644 --- a/ui/src/ui/app-lifecycle.ts +++ b/ui/src/ui/app-lifecycle.ts @@ -41,7 +41,6 @@ type LifecycleHost = { realtimeTalkStatus?: string; realtimeTalkDetail?: string | null; realtimeTalkTranscript?: string | null; - cancelChatDictation?: () => void; chatLoading: boolean; chatMessages: unknown[]; chatToolMessages: unknown[]; @@ -92,7 +91,6 @@ export function handleDisconnected(host: LifecycleHost) { host.realtimeTalkStatus = "idle"; host.realtimeTalkDetail = null; host.realtimeTalkTranscript = null; - host.cancelChatDictation?.(); host.client?.stop(); host.client = null; host.connected = false; diff --git a/ui/src/ui/app-render.ts b/ui/src/ui/app-render.ts index f8dfbd8e9d8..c73772b00e8 100644 --- a/ui/src/ui/app-render.ts +++ b/ui/src/ui/app-render.ts @@ -2342,8 +2342,6 @@ export function renderApp(state: AppViewState) { realtimeTalkStatus: state.realtimeTalkStatus, realtimeTalkDetail: state.realtimeTalkDetail, realtimeTalkTranscript: state.realtimeTalkTranscript, - chatDictationStatus: state.chatDictationStatus, - chatDictationDetail: state.chatDictationDetail, connected: state.connected, canSend: state.connected, disabledReason: chatDisabledReason, @@ -2375,7 +2373,6 @@ export function renderApp(state: AppViewState) { onSend: () => state.handleSendChat(), onCompact: () => state.handleSendChat("/compact", { restoreDraft: true }), onToggleRealtimeTalk: () => state.toggleRealtimeTalk(), - onToggleChatDictation: () => state.toggleChatDictation(), canAbort: hasAbortableSessionRun(state), onAbort: () => void state.handleAbortChat(), onQueueRemove: (id) => state.removeQueuedMessage(id), diff --git a/ui/src/ui/app-view-state.ts b/ui/src/ui/app-view-state.ts index 4405dd878d4..f221065c8a8 100644 --- a/ui/src/ui/app-view-state.ts +++ b/ui/src/ui/app-view-state.ts @@ -1,4 +1,4 @@ -import type { ChatDictationStatus, ChatSendOptions } from "./app-chat.ts"; +import type { ChatSendOptions } from "./app-chat.ts"; import type { EventLogEntry } from "./app-events.ts"; import type { CompactionStatus, FallbackStatus } from "./app-tool-stream.ts"; import type { ChatInputHistoryKeyInput, ChatInputHistoryKeyResult } from "./chat/input-history.ts"; @@ -119,8 +119,6 @@ export type AppViewState = { realtimeTalkStatus: RealtimeTalkStatus; realtimeTalkDetail: string | null; realtimeTalkTranscript: string | null; - chatDictationStatus: ChatDictationStatus; - chatDictationDetail: string | null; chatManualRefreshInFlight: boolean; chatMobileControlsOpen: boolean; nodesLoading: boolean; @@ -472,7 +470,6 @@ export type AppViewState = { resetChatInputHistoryNavigation: () => void; handleSendChat: (messageOverride?: string, opts?: ChatSendOptions) => Promise; toggleRealtimeTalk: () => Promise; - toggleChatDictation: () => Promise; steerQueuedChatMessage: (id: string) => Promise; handleAbortChat: () => Promise; removeQueuedMessage: (id: string) => void; diff --git a/ui/src/ui/app.test.ts b/ui/src/ui/app.test.ts deleted file mode 100644 index 72a57758f5d..00000000000 --- a/ui/src/ui/app.test.ts +++ /dev/null @@ -1,205 +0,0 @@ -/* @vitest-environment jsdom */ - -import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; - -const { transcribeChatAudioMock } = vi.hoisted(() => ({ - transcribeChatAudioMock: vi.fn(), -})); - -vi.mock("./app-chat.ts", async (importOriginal) => { - const actual = await importOriginal(); - return { - ...actual, - transcribeChatAudio: transcribeChatAudioMock, - }; -}); - -class MockMediaRecorder extends EventTarget { - static instances: MockMediaRecorder[] = []; - static isTypeSupported = vi.fn((mimeType: string) => mimeType === "audio/webm"); - - readonly mimeType: string; - state: RecordingState = "inactive"; - - constructor( - readonly stream: MediaStream, - options?: MediaRecorderOptions, - ) { - super(); - this.mimeType = options?.mimeType ?? ""; - MockMediaRecorder.instances.push(this); - } - - start() { - this.state = "recording"; - } - - stop() { - this.state = "inactive"; - this.dispatchEvent(new Event("stop")); - } - - emitData(data: Blob) { - const event = new Event("dataavailable") as Event & { data: Blob }; - Object.defineProperty(event, "data", { value: data }); - this.dispatchEvent(event); - } - - emitError(message: string) { - const event = new Event("error") as Event & { error: Error; message: string }; - Object.defineProperty(event, "error", { value: new Error(message) }); - Object.defineProperty(event, "message", { value: message }); - this.dispatchEvent(event); - } -} - -type AppWithDictationInternals = { - client: unknown; - connected: boolean; - chatDictationStatus: string; - chatDictationDetail: string | null; - chatDictationChunks: Blob[]; - toggleChatDictation: () => Promise; - cancelChatDictation: () => void; -}; - -let originalMediaDevices: PropertyDescriptor | undefined; - -function createDeferred() { - let resolve!: (value: T) => void; - let reject!: (error: unknown) => void; - const promise = new Promise((resolvePromise, rejectPromise) => { - resolve = resolvePromise; - reject = rejectPromise; - }); - return { promise, resolve, reject }; -} - -function createMockStream(track = { stop: vi.fn() }) { - return { - getTracks: () => [track], - track, - } as unknown as MediaStream & { track: { stop: ReturnType } }; -} - -async function createRecordingApp() { - const { OpenClawApp } = await import("./app.ts"); - const app = new OpenClawApp(); - app.client = { request: vi.fn() } as never; - app.connected = true; - return app as unknown as AppWithDictationInternals; -} - -describe("OpenClawApp dictation recorder lifecycle", () => { - beforeEach(() => { - transcribeChatAudioMock.mockReset(); - transcribeChatAudioMock.mockResolvedValue(null); - MockMediaRecorder.instances = []; - MockMediaRecorder.isTypeSupported.mockClear(); - vi.stubGlobal("MediaRecorder", MockMediaRecorder); - originalMediaDevices = Object.getOwnPropertyDescriptor(globalThis.navigator, "mediaDevices"); - Object.defineProperty(globalThis.navigator, "mediaDevices", { - configurable: true, - value: { - getUserMedia: vi.fn(async () => createMockStream()), - }, - }); - }); - - afterEach(() => { - if (originalMediaDevices) { - Object.defineProperty(globalThis.navigator, "mediaDevices", originalMediaDevices); - } else { - Reflect.deleteProperty(globalThis.navigator, "mediaDevices"); - } - vi.unstubAllGlobals(); - }); - - it("does not submit collected audio after a recorder error and later stop", async () => { - const app = await createRecordingApp(); - await app.toggleChatDictation(); - const recorder = MockMediaRecorder.instances[0]; - - recorder.emitData(new Blob(["audio"], { type: "audio/webm" })); - recorder.emitError("microphone failed"); - recorder.emitData(new Blob(["late audio"], { type: "audio/webm" })); - recorder.stop(); - - expect(transcribeChatAudioMock).not.toHaveBeenCalled(); - expect(app.chatDictationStatus).toBe("error"); - expect(app.chatDictationDetail).toBe("microphone failed"); - expect(app.chatDictationChunks).toEqual([]); - }); - - it("releases recorded chunks after copying them for normal transcription", async () => { - const app = await createRecordingApp(); - await app.toggleChatDictation(); - const recorder = MockMediaRecorder.instances[0]; - recorder.emitData(new Blob(["audio"], { type: "audio/webm" })); - const transcription = createDeferred(); - transcribeChatAudioMock.mockReturnValueOnce(transcription.promise); - - await app.toggleChatDictation(); - - expect(app.chatDictationChunks).toEqual([]); - expect(transcribeChatAudioMock).toHaveBeenCalledTimes(1); - expect(transcribeChatAudioMock.mock.calls[0]?.[1]).toMatchObject({ - size: 5, - type: "audio/webm", - }); - transcription.resolve(null); - await transcription.promise; - }); - - it("ignores duplicate starts while microphone permission is pending", async () => { - const app = await createRecordingApp(); - const pendingUserMedia = createDeferred(); - const getUserMedia = vi.fn(() => pendingUserMedia.promise); - Object.defineProperty(globalThis.navigator, "mediaDevices", { - configurable: true, - value: { getUserMedia }, - }); - const stream = createMockStream(); - - const firstStart = app.toggleChatDictation(); - const secondStart = app.toggleChatDictation(); - - expect(getUserMedia).toHaveBeenCalledTimes(1); - await secondStart; - expect(app.chatDictationStatus).toBe("starting"); - - pendingUserMedia.resolve(stream); - await firstStart; - - expect(MockMediaRecorder.instances).toHaveLength(1); - expect(MockMediaRecorder.instances[0].state).toBe("recording"); - expect(stream.track.stop).not.toHaveBeenCalled(); - - MockMediaRecorder.instances[0].emitData(new Blob(["audio"], { type: "audio/webm" })); - MockMediaRecorder.instances[0].stop(); - - expect(stream.track.stop).toHaveBeenCalledTimes(1); - expect(transcribeChatAudioMock).toHaveBeenCalledTimes(1); - }); - - it("stops a microphone stream that resolves after pending dictation is canceled", async () => { - const app = await createRecordingApp(); - const pendingUserMedia = createDeferred(); - const getUserMedia = vi.fn(() => pendingUserMedia.promise); - Object.defineProperty(globalThis.navigator, "mediaDevices", { - configurable: true, - value: { getUserMedia }, - }); - const stream = createMockStream(); - - const start = app.toggleChatDictation(); - app.cancelChatDictation(); - pendingUserMedia.resolve(stream); - await start; - - expect(MockMediaRecorder.instances).toHaveLength(0); - expect(stream.track.stop).toHaveBeenCalledTimes(1); - expect(app.chatDictationStatus).toBe("idle"); - expect(transcribeChatAudioMock).not.toHaveBeenCalled(); - }); -}); diff --git a/ui/src/ui/app.ts b/ui/src/ui/app.ts index 50da8be7577..1953c6f6c90 100644 --- a/ui/src/ui/app.ts +++ b/ui/src/ui/app.ts @@ -22,10 +22,8 @@ import { removeQueuedMessage as removeQueuedMessageInternal, resetChatInputHistoryNavigation as resetChatInputHistoryNavigationInternal, steerQueuedChatMessage as steerQueuedChatMessageInternal, - transcribeChatAudio as transcribeChatAudioInternal, type ChatInputHistoryKeyInput, type ChatInputHistoryKeyResult, - type ChatDictationStatus, } from "./app-chat.ts"; import { DEFAULT_CRON_FORM, DEFAULT_LOG_LEVEL_FILTERS } from "./app-defaults.ts"; import type { EventLogEntry } from "./app-events.ts"; @@ -224,13 +222,6 @@ export class OpenClawApp extends LitElement { @state() realtimeTalkDetail: string | null = null; @state() realtimeTalkTranscript: string | null = null; private realtimeTalkSession: RealtimeTalkSession | null = null; - @state() chatDictationStatus: ChatDictationStatus = "idle"; - @state() chatDictationDetail: string | null = null; - private chatDictationRecorder: MediaRecorder | null = null; - private chatDictationStream: MediaStream | null = null; - private chatDictationChunks: Blob[] = []; - private chatDictationCancelNextStop = false; - private chatDictationStartToken = 0; @state() chatManualRefreshInFlight = false; @state() chatMobileControlsOpen = false; private chatMobileControlsTrigger: HTMLElement | null = null; @@ -953,129 +944,6 @@ export class OpenClawApp extends LitElement { } } - async toggleChatDictation() { - if (this.chatDictationRecorder && this.chatDictationStatus === "recording") { - this.chatDictationRecorder.stop(); - return; - } - if (this.chatDictationStatus === "starting" || this.chatDictationStatus === "transcribing") { - return; - } - if (!this.client || !this.connected) { - this.chatDictationStatus = "error"; - this.chatDictationDetail = "Gateway not connected"; - this.lastError = this.chatDictationDetail; - return; - } - if (!navigator.mediaDevices?.getUserMedia || typeof MediaRecorder === "undefined") { - this.chatDictationStatus = "error"; - this.chatDictationDetail = "Browser microphone recording is unavailable"; - this.lastError = this.chatDictationDetail; - return; - } - - const startToken = ++this.chatDictationStartToken; - this.chatDictationStatus = "starting"; - this.chatDictationDetail = "Starting dictation..."; - let stream: MediaStream | null = null; - try { - stream = await navigator.mediaDevices.getUserMedia({ audio: true }); - if (this.chatDictationStartToken !== startToken || this.chatDictationStatus !== "starting") { - this.stopMediaStream(stream); - return; - } - const mimeType = ["audio/webm;codecs=opus", "audio/webm", "audio/mp4"].find((candidate) => - MediaRecorder.isTypeSupported(candidate), - ); - const recorder = new MediaRecorder(stream, mimeType ? { mimeType } : undefined); - this.chatDictationStream = stream; - this.chatDictationRecorder = recorder; - this.chatDictationChunks = []; - recorder.addEventListener("dataavailable", (event) => { - if (this.chatDictationRecorder !== recorder || this.chatDictationCancelNextStop) { - return; - } - if (event.data.size > 0) { - this.chatDictationChunks.push(event.data); - } - }); - recorder.addEventListener("error", (event) => { - if (this.chatDictationRecorder !== recorder) { - return; - } - this.chatDictationRecorder = null; - this.chatDictationChunks = []; - this.chatDictationStatus = "error"; - this.chatDictationDetail = - event.message || event.error?.message || "Dictation recording failed"; - this.lastError = this.chatDictationDetail; - this.stopChatDictationStream(); - }); - recorder.addEventListener("stop", () => { - if (this.chatDictationRecorder !== recorder) { - return; - } - const chunks = this.chatDictationChunks.splice(0); - const canceledByRequest = this.chatDictationCancelNextStop; - this.chatDictationCancelNextStop = false; - this.chatDictationRecorder = null; - this.stopChatDictationStream(); - if (canceledByRequest) { - if (this.chatDictationStatus !== "error") { - this.chatDictationStatus = "idle"; - this.chatDictationDetail = null; - } - return; - } - const blob = new Blob(chunks, { - type: recorder.mimeType || chunks[0]?.type || "audio/webm", - }); - void transcribeChatAudioInternal( - this as unknown as Parameters[0], - blob, - ); - }); - this.chatDictationStatus = "recording"; - this.chatDictationDetail = "Recording dictation..."; - recorder.start(); - } catch (error) { - if (stream && this.chatDictationStream !== stream) { - this.stopMediaStream(stream); - } - if (this.chatDictationStartToken !== startToken) { - return; - } - this.chatDictationRecorder = null; - this.stopChatDictationStream(); - this.chatDictationStatus = "error"; - this.chatDictationDetail = error instanceof Error ? error.message : String(error); - this.lastError = this.chatDictationDetail; - } - } - - private stopChatDictationStream() { - this.stopMediaStream(this.chatDictationStream); - this.chatDictationStream = null; - } - - private stopMediaStream(stream: MediaStream | null) { - stream?.getTracks().forEach((track) => track.stop()); - } - - cancelChatDictation() { - this.chatDictationStartToken += 1; - if (this.chatDictationRecorder?.state === "recording") { - this.chatDictationCancelNextStop = true; - this.chatDictationRecorder.stop(); - } - this.chatDictationRecorder = null; - this.chatDictationChunks = []; - this.chatDictationCancelNextStop = false; - this.stopChatDictationStream(); - this.chatDictationStatus = "idle"; - this.chatDictationDetail = null; - } - async steerQueuedChatMessage(id: string) { await steerQueuedChatMessageInternal( this as unknown as Parameters[0], diff --git a/ui/src/ui/controllers/sessions.test.ts b/ui/src/ui/controllers/sessions.test.ts index d39aa0739f0..3bec7e41b27 100644 --- a/ui/src/ui/controllers/sessions.test.ts +++ b/ui/src/ui/controllers/sessions.test.ts @@ -420,7 +420,7 @@ describe("applySessionsChangedEvent", () => { model: "gpt-5.4", }); - expect(applied).toBe(true); + expect(applied).toEqual({ applied: true, change: "updated" }); expect(state.sessionsResult?.ts).toBe(2); expect(state.sessionsResult?.sessions[0]).toMatchObject({ key: "agent:main:main", @@ -461,4 +461,66 @@ describe("applySessionsChangedEvent", () => { expect(state.sessionsResult?.sessions[0]?.totalTokensFresh).toBe(false); expect(state.sessionsResult?.sessions[0]?.contextTokens).toBe(200_000); }); + + it("keeps updated existing rows sorted like sessions.list", () => { + const state = createState(async () => undefined, { + sessionsResult: { + ts: 1, + path: "(multiple)", + count: 2, + defaults: { modelProvider: null, model: null, contextTokens: null }, + sessions: [ + { + key: "agent:main:newer", + kind: "direct", + updatedAt: 10, + }, + { + key: "agent:main:older", + kind: "direct", + updatedAt: 1, + }, + ], + }, + }); + + const applied = applySessionsChangedEvent(state, { + sessionKey: "agent:main:older", + ts: 2, + updatedAt: 20, + }); + + expect(applied).toEqual({ applied: true, change: "updated" }); + expect(state.sessionsResult?.sessions.map((row) => row.key)).toEqual([ + "agent:main:older", + "agent:main:newer", + ]); + }); + + it("reports when websocket event payloads insert new rows", () => { + const state = createState(async () => undefined, { + sessionsResult: { + ts: 1, + path: "(multiple)", + count: 0, + defaults: { modelProvider: null, model: null, contextTokens: null }, + sessions: [], + }, + }); + + const applied = applySessionsChangedEvent(state, { + sessionKey: "agent:main:new", + ts: 2, + kind: "direct", + updatedAt: 2, + }); + + expect(applied).toEqual({ applied: true, change: "inserted" }); + expect(state.sessionsResult?.count).toBe(1); + expect(state.sessionsResult?.sessions[0]).toMatchObject({ + key: "agent:main:new", + kind: "direct", + updatedAt: 2, + }); + }); }); diff --git a/ui/src/ui/controllers/sessions.ts b/ui/src/ui/controllers/sessions.ts index fb97c60ef97..68489d0e64f 100644 --- a/ui/src/ui/controllers/sessions.ts +++ b/ui/src/ui/controllers/sessions.ts @@ -123,6 +123,10 @@ function normalizeSessionKind(value: unknown): GatewaySessionRow["kind"] | undef : undefined; } +function compareSessionRowsByUpdatedAt(a: GatewaySessionRow, b: GatewaySessionRow): number { + return (b.updatedAt ?? 0) - (a.updatedAt ?? 0); +} + function checkpointSummarySignature( row: | { @@ -230,9 +234,16 @@ async function runCompactionMutation( } } -export function applySessionsChangedEvent(state: SessionsState, payload: unknown): boolean { +export type SessionsChangedApplyResult = + | { applied: false } + | { applied: true; change: "inserted" | "updated" }; + +export function applySessionsChangedEvent( + state: SessionsState, + payload: unknown, +): SessionsChangedApplyResult { if (!isRecord(payload) || !state.sessionsResult) { - return false; + return { applied: false }; } const eventSession = isRecord(payload.session) ? payload.session : null; const source = eventSession ?? payload; @@ -242,7 +253,7 @@ export function applySessionsChangedEvent(state: SessionsState, payload: unknown (typeof payload.key === "string" && payload.key.trim()) || ""; if (!key) { - return false; + return { applied: false }; } const previousRows = state.sessionsResult.sessions; @@ -271,10 +282,11 @@ export function applySessionsChangedEvent(state: SessionsState, payload: unknown delete nextRow.totalTokens; } - const sessions = + const nextRows = existingIndex >= 0 ? previousRows.map((row, index) => (index === existingIndex ? nextRow : row)) : [nextRow, ...previousRows]; + const sessions = nextRows.toSorted(compareSessionRowsByUpdatedAt); const eventTs = typeof payload.ts === "number" && Number.isFinite(payload.ts) ? payload.ts : null; state.sessionsResult = { ...state.sessionsResult, @@ -286,7 +298,7 @@ export function applySessionsChangedEvent(state: SessionsState, payload: unknown if (previousCheckpointSignature !== checkpointSummarySignature(nextRow)) { invalidateCheckpointCacheForKey(state, key); } - return true; + return { applied: true, change: existingIndex >= 0 ? "updated" : "inserted" }; } export async function subscribeSessions(state: SessionsState) { diff --git a/ui/src/ui/views/chat.test.ts b/ui/src/ui/views/chat.test.ts index 01d15840eef..eca9f273619 100644 --- a/ui/src/ui/views/chat.test.ts +++ b/ui/src/ui/views/chat.test.ts @@ -334,8 +334,6 @@ function renderChatView(overrides: Partial[0]> = { realtimeTalkStatus: "idle", realtimeTalkDetail: null, realtimeTalkTranscript: null, - chatDictationStatus: "idle", - chatDictationDetail: null, connected: true, canSend: true, disabledReason: null, @@ -368,7 +366,6 @@ function renderChatView(overrides: Partial[0]> = { onSend: () => undefined, onCompact: () => undefined, onToggleRealtimeTalk: () => undefined, - onToggleChatDictation: () => undefined, onAbort: () => undefined, onQueueRemove: () => undefined, onQueueSteer: () => undefined, @@ -448,48 +445,12 @@ describe("chat loading skeleton", () => { }); describe("chat voice controls", () => { - it("shows server dictation and Talk without the stale browser dictation button", () => { + it("keeps Talk visible without the stale browser dictation button", () => { const container = renderChatView(); - expect(container.querySelector('[aria-label="Dictate with server STT"]')).not.toBeNull(); expect(container.querySelector('[aria-label="Start Talk"]')).not.toBeNull(); expect(container.querySelector('[aria-label="Voice input"]')).toBeNull(); }); - - it("shows dictation recording state", () => { - const container = renderChatView({ - chatDictationStatus: "recording", - chatDictationDetail: null, - }); - - expect(container.querySelector('[aria-label="Stop dictation"]')).not.toBeNull(); - expect(container.textContent).toContain("Recording dictation"); - }); - - it("disables duplicate dictation starts while microphone access is pending", () => { - const container = renderChatView({ - chatDictationStatus: "starting", - chatDictationDetail: null, - }); - - const button = container.querySelector( - '[aria-label="Dictate with server STT"]', - ); - expect(button).not.toBeNull(); - expect(button!.disabled).toBe(true); - expect(container.textContent).toContain("Starting dictation"); - }); - - it("keeps stop dictation enabled while recording after disconnect", () => { - const container = renderChatView({ - connected: false, - chatDictationStatus: "recording", - }); - - const button = container.querySelector('[aria-label="Stop dictation"]'); - expect(button).not.toBeNull(); - expect(button!.disabled).toBe(false); - }); }); describe("chat slash menu accessibility", () => { diff --git a/ui/src/ui/views/chat.ts b/ui/src/ui/views/chat.ts index 6790eab1a49..1553bc5b1b0 100644 --- a/ui/src/ui/views/chat.ts +++ b/ui/src/ui/views/chat.ts @@ -3,7 +3,6 @@ import { ifDefined } from "lit/directives/if-defined.js"; import { ref } from "lit/directives/ref.js"; import { repeat } from "lit/directives/repeat.js"; import { t } from "../../i18n/index.ts"; -import type { ChatDictationStatus } from "../app-chat.ts"; import type { CompactionStatus, FallbackStatus } from "../app-tool-stream.ts"; import { getChatAttachmentPreviewUrl, @@ -78,8 +77,6 @@ export type ChatProps = { realtimeTalkStatus?: RealtimeTalkStatus; realtimeTalkDetail?: string | null; realtimeTalkTranscript?: string | null; - chatDictationStatus?: ChatDictationStatus; - chatDictationDetail?: string | null; connected: boolean; canSend: boolean; disabledReason: string | null; @@ -113,7 +110,6 @@ export type ChatProps = { onSend: () => void; onCompact?: () => void | Promise; onToggleRealtimeTalk?: () => void; - onToggleChatDictation?: () => void; onAbort?: () => void; onQueueRemove: (id: string) => void; onQueueSteer?: (id: string) => void; @@ -1202,32 +1198,19 @@ export function renderChat(props: ChatProps) { @change=${(e: Event) => handleFileSelect(e, props)} /> - ${props.chatDictationStatus && props.chatDictationStatus !== "idle" + ${props.realtimeTalkActive || props.realtimeTalkDetail || props.realtimeTalkTranscript ? html` -
- ${props.chatDictationDetail ?? - (props.chatDictationStatus === "starting" - ? "Starting dictation..." - : props.chatDictationStatus === "recording" - ? "Recording dictation..." - : props.chatDictationStatus === "transcribing" - ? "Transcribing dictation..." - : "Dictation unavailable")} +
+ ${props.realtimeTalkDetail ?? + props.realtimeTalkTranscript ?? + (props.realtimeTalkStatus === "thinking" + ? "Asking OpenClaw..." + : props.realtimeTalkStatus === "connecting" + ? "Connecting Talk..." + : "Talk live")}
` - : props.realtimeTalkActive || props.realtimeTalkDetail || props.realtimeTalkTranscript - ? html` -
- ${props.realtimeTalkDetail ?? - props.realtimeTalkTranscript ?? - (props.realtimeTalkStatus === "thinking" - ? "Asking OpenClaw..." - : props.realtimeTalkStatus === "connecting" - ? "Connecting Talk..." - : "Talk live")} -
- ` - : nothing} + : nothing}