From eebdda92f0c34c62c8e4baaae10032c416ec2a9b Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 27 Apr 2026 12:12:20 +0100 Subject: [PATCH] fix(media): keep audio input repair in doctor --- CHANGELOG.md | 2 +- docs/.generated/config-baseline.sha256 | 4 +-- docs/gateway/config-tools.md | 2 +- docs/nodes/audio.md | 2 +- src/config/schema.base.generated.ts | 4 +-- src/config/schema.help.ts | 2 +- .../runner.cli-audio.test.ts | 31 ------------------- src/media-understanding/runner.entries.ts | 11 +------ 8 files changed, 9 insertions(+), 49 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e01db72ea7a..48e589a519d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,7 +20,7 @@ Docs: https://docs.openclaw.ai ### Fixes -- Media-understanding/audio: expand deprecated `{input}` CLI placeholders to the local media path and migrate legacy `audio.transcription.command` configs to `{{MediaPath}}`, so custom audio transcribers no longer receive the literal placeholder. Fixes #72760. Thanks @krisfanue3-hash. +- Media-understanding/audio: migrate deprecated `{input}` placeholders in legacy `audio.transcription.command` configs to `{{MediaPath}}`, so custom audio transcribers no longer receive the literal placeholder after doctor repair. Fixes #72760. Thanks @krisfanue3-hash. - Control UI/Gateway: preserve WebChat client version labels across localhost, 127.0.0.1, and IPv6 loopback aliases on the same port, avoiding misleading `vcontrol-ui` connection logs while investigating duplicate-message reports. Refs #72753 and #72742. Thanks @LumenFromTheFuture and @allesgutefy. - Agents/reasoning: treat orphan closing reasoning tags with following answer text as a privacy boundary across delivery, history, streaming, and Control UI sanitizers so malformed local-model output cannot leak chain-of-thought text. Fixes #67092. Thanks @AnildoSilva. - Memory-core: run one-shot memory CLI commands through transient builtin and QMD managers so `memory index`, `memory status --index`, and `memory search` no longer start long-lived file watchers that can hit macOS `EMFILE` limits. Fixes #59101; carries forward #49851. Thanks @mbear469210-coder and @maoyuanxue. diff --git a/docs/.generated/config-baseline.sha256 b/docs/.generated/config-baseline.sha256 index 844429421ea..f87b0eb3fc6 100644 --- a/docs/.generated/config-baseline.sha256 +++ b/docs/.generated/config-baseline.sha256 @@ -1,4 +1,4 @@ -1784e028361704e55bbffd845234f0df5657e2772a8bf1e4816483ad453c8125 config-baseline.json -12a9fb470f9a40d587b3595c96d777956c33804706d2b5bded6c42af12f4cc57 config-baseline.core.json +8a37b104c6b3a25618cbf4ecd0dd511703997fb1a10a1167226ab9918eb85455 config-baseline.json +a1839a03fc557a5439fc7b4ce2d45c7212b61e15588e15886bb22b65ff7dc32d config-baseline.core.json 07963db49502132f26db396c56b36e018b110e6c55a68b3cb012d3ec96f43901 config-baseline.channel.json ed65cefbef96f034ce2b73069d9d5bacc341a43489ff9b20a34d40956b877f79 config-baseline.plugin.json diff --git a/docs/gateway/config-tools.md b/docs/gateway/config-tools.md index 9049215db7f..de0322798ff 100644 --- a/docs/gateway/config-tools.md +++ b/docs/gateway/config-tools.md @@ -241,7 +241,7 @@ Configures inbound media understanding (image/audio/video): **CLI entry** (`type: "cli"`): - `command`: executable to run - - `args`: templated args (supports `{{MediaPath}}`, `{{Prompt}}`, `{{MaxChars}}`, etc.; deprecated `{input}` is accepted as an alias for `{{MediaPath}}`) + - `args`: templated args (supports `{{MediaPath}}`, `{{Prompt}}`, `{{MaxChars}}`, etc.; `openclaw doctor --fix` migrates deprecated `{input}` placeholders to `{{MediaPath}}`) **Common fields:** diff --git a/docs/nodes/audio.md b/docs/nodes/audio.md index 69b02c70ee4..f949156d2e8 100644 --- a/docs/nodes/audio.md +++ b/docs/nodes/audio.md @@ -163,7 +163,7 @@ Note: Binary detection is best-effort across macOS/Linux/Windows; ensure the CLI - `tools.media.audio.echoTranscript` is off by default; enable it to send transcript confirmation back to the originating chat before agent processing. - `tools.media.audio.echoFormat` customizes the echo text (placeholder: `{transcript}`). - CLI stdout is capped (5MB); keep CLI output concise. -- CLI `args` should use `{{MediaPath}}` for the local audio file path. Deprecated `{input}` placeholders from older `audio.transcription.command` configs are accepted as a compatibility alias and migrate to `{{MediaPath}}`. +- CLI `args` should use `{{MediaPath}}` for the local audio file path. Run `openclaw doctor --fix` to migrate deprecated `{input}` placeholders from older `audio.transcription.command` configs. ### Proxy environment support diff --git a/src/config/schema.base.generated.ts b/src/config/schema.base.generated.ts index 3f5a51a3576..38cb47a8c96 100644 --- a/src/config/schema.base.generated.ts +++ b/src/config/schema.base.generated.ts @@ -18736,7 +18736,7 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { }, title: "Audio Transcription Command", description: - 'Executable + args used to transcribe audio (first token must be a safe binary/path), for example `["whisper-cli", "--model", "small", "{{MediaPath}}"]`. Deprecated `{input}` placeholders are still accepted and migrated to `{{MediaPath}}`.', + 'Executable + args used to transcribe audio (first token must be a safe binary/path), for example `["whisper-cli", "--model", "small", "{{MediaPath}}"]`. Deprecated `{input}` placeholders are migrated to `{{MediaPath}}` by `openclaw doctor --fix`.', }, timeoutSeconds: { type: "integer", @@ -25727,7 +25727,7 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { }, "audio.transcription.command": { label: "Audio Transcription Command", - help: 'Executable + args used to transcribe audio (first token must be a safe binary/path), for example `["whisper-cli", "--model", "small", "{{MediaPath}}"]`. Deprecated `{input}` placeholders are still accepted and migrated to `{{MediaPath}}`.', + help: 'Executable + args used to transcribe audio (first token must be a safe binary/path), for example `["whisper-cli", "--model", "small", "{{MediaPath}}"]`. Deprecated `{input}` placeholders are migrated to `{{MediaPath}}` by `openclaw doctor --fix`.', tags: ["media"], }, "audio.transcription.timeoutSeconds": { diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index 4e9a2315847..55955453ddb 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -511,7 +511,7 @@ export const FIELD_HELP: Record = { "audio.transcription": "Command-based transcription settings for converting audio files into text before agent handling. Keep a simple, deterministic command path here so failures are easy to diagnose in logs.", "audio.transcription.command": - 'Executable + args used to transcribe audio (first token must be a safe binary/path), for example `["whisper-cli", "--model", "small", "{{MediaPath}}"]`. Deprecated `{input}` placeholders are still accepted and migrated to `{{MediaPath}}`.', + 'Executable + args used to transcribe audio (first token must be a safe binary/path), for example `["whisper-cli", "--model", "small", "{{MediaPath}}"]`. Deprecated `{input}` placeholders are migrated to `{{MediaPath}}` by `openclaw doctor --fix`.', "audio.transcription.timeoutSeconds": "Maximum time allowed for the transcription command to finish before it is aborted. Increase this for longer recordings, and keep it tight in latency-sensitive deployments.", bindings: diff --git a/src/media-understanding/runner.cli-audio.test.ts b/src/media-understanding/runner.cli-audio.test.ts index fbc30e3dd32..6db28691336 100644 --- a/src/media-understanding/runner.cli-audio.test.ts +++ b/src/media-understanding/runner.cli-audio.test.ts @@ -64,35 +64,4 @@ describe("media-understanding CLI audio entry", () => { expect.any(Object), ); }); - - it("expands legacy {input} aliases to the media path", async () => { - let mediaPath = ""; - await withAudioFixture("openclaw-cli-audio-input-alias", async ({ ctx, cache }) => { - mediaPath = ctx.MediaPath; - await runCliEntry({ - capability: "audio", - entry: { - type: "cli", - command: "mock-transcriber", - args: ["{input}", "--input={input}"], - }, - cfg: { - tools: { - media: { - audio: {}, - }, - }, - } as OpenClawConfig, - ctx, - attachmentIndex: 0, - cache, - }); - }); - - expect(runExecMock).toHaveBeenCalledWith( - "mock-transcriber", - [mediaPath, `--input=${mediaPath}`], - expect.any(Object), - ); - }); }); diff --git a/src/media-understanding/runner.entries.ts b/src/media-understanding/runner.entries.ts index b14a306aeb2..56d300a4f2e 100644 --- a/src/media-understanding/runner.entries.ts +++ b/src/media-understanding/runner.entries.ts @@ -407,15 +407,6 @@ function resolveAudioRequestOverrides(config: MediaUnderstandingConfig | undefin }; } -function applyCliArgTemplate(part: string, ctx: MsgContext): string { - const templated = applyTemplate(part, ctx); - const mediaPath = ctx.MediaPath; - if (!mediaPath || !templated.includes("{input}")) { - return templated; - } - return templated.replace(/\{input\}/g, mediaPath); -} - async function resolveProviderExecutionAuth(params: { providerId: string; cfg: OpenClawConfig; @@ -783,7 +774,7 @@ export async function runCliEntry(params: { MaxChars: maxChars, }; const argv = [command, ...args].map((part, index) => - index === 0 ? part : applyCliArgTemplate(part, templCtx), + index === 0 ? part : applyTemplate(part, templCtx), ); try { if (shouldLogVerbose()) {