diff --git a/CHANGELOG.md b/CHANGELOG.md index 4389cb82c07..7bf2b8f254a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai - Plugins/media: auto-enable provider plugins referenced by `agents.defaults.imageGenerationModel`, `videoGenerationModel`, and `musicGenerationModel` primary/fallback refs, so configured Google and MiniMax media providers do not stay disabled behind a restrictive plugin allowlist. Thanks @vincentkoc. - Memory-core/dreaming: retry managed dreaming cron registration after startup when the cron service is not reachable yet, so the scheduled Memory Dreaming Promotion sweep recovers without waiting for heartbeat traffic. Fixes #72841. Thanks @amknight. - Acpx/runtime: validate the runtime session mode at the `AcpxRuntime.ensureSession` wrapper boundary so callers that pass anything other than `persistent` or `oneshot` get a clear `ACP_INVALID_RUNTIME_OPTION` error instead of silently round-tripping through the encoded handle as a default `persistent` mode and later throwing `SessionResumeRequiredError`. Investigation context: #73071. (#73548) Thanks @amknight. +- CLI/infer: keep web-search fallback on missing provider API keys, preserve structured validation errors from the selected provider, and let per-request image describe prompts override configured media-entry prompts. (#63263) Thanks @Spolen23. ## 2026.4.27 diff --git a/extensions/openai/media-understanding-provider.ts b/extensions/openai/media-understanding-provider.ts index 3fb89248946..5de21d15b1d 100644 --- a/extensions/openai/media-understanding-provider.ts +++ b/extensions/openai/media-understanding-provider.ts @@ -35,6 +35,7 @@ export const openaiCodexMediaUnderstandingProvider: MediaUnderstandingProvider = id: "openai-codex", capabilities: ["image"], defaultModels: { image: "gpt-5.5" }, + autoPriority: { image: 20 }, describeImage: describeImageWithModel, describeImages: describeImagesWithModel, }; diff --git a/src/config/types.tools.ts b/src/config/types.tools.ts index 8c8e0d11051..82253708c1e 100644 --- a/src/config/types.tools.ts +++ b/src/config/types.tools.ts @@ -90,10 +90,14 @@ export type MediaUnderstandingConfig = MediaProviderRequestConfig & { maxChars?: number; /** Default prompt. */ prompt?: string; + /** Internal request-scoped prompt override injected by CLI/runtime wrappers. */ + _requestPromptOverride?: string; /** Default timeout (seconds). */ timeoutSeconds?: number; /** Default language hint (audio). */ language?: string; + /** Internal request-scoped language override injected by CLI/runtime wrappers. */ + _requestLanguageOverride?: string; /** Attachment selection policy. */ attachments?: MediaUnderstandingAttachmentsConfig; /** Ordered model list (fallbacks in order). */ diff --git a/src/media-understanding/defaults.test.ts b/src/media-understanding/defaults.test.ts index d879e7b8ed2..a07473b9430 100644 --- a/src/media-understanding/defaults.test.ts +++ b/src/media-understanding/defaults.test.ts @@ -57,7 +57,11 @@ const mediaMetadataPlugins = vi.hoisted(() => [ defaultModels: { image: "gpt-5.4-mini", audio: "gpt-4o-transcribe" }, autoPriority: { image: 10, audio: 10 }, }, - "openai-codex": { capabilities: ["image"], defaultModels: { image: "gpt-5.5" } }, + "openai-codex": { + capabilities: ["image"], + defaultModels: { image: "gpt-5.5" }, + autoPriority: { image: 20 }, + }, opencode: { capabilities: ["image"], defaultModels: { image: "gpt-5-nano" } }, "opencode-go": { capabilities: ["image"], defaultModels: { image: "kimi-k2.6" } }, openrouter: { capabilities: ["image"], defaultModels: { image: "auto" } }, @@ -124,6 +128,7 @@ describe("resolveAutoMediaKeyProviders", () => { expect(resolveAutoMediaKeyProviders({ capability: "image" })).toEqual([ "openai", "anthropic", + "openai-codex", "google", "minimax", "minimax-portal", diff --git a/src/media-understanding/runner.entries.ts b/src/media-understanding/runner.entries.ts index 56d300a4f2e..49ec18a29be 100644 --- a/src/media-understanding/runner.entries.ts +++ b/src/media-understanding/runner.entries.ts @@ -393,7 +393,7 @@ function resolveEntryRunOptions(params: { return { maxBytes, maxChars, timeoutMs, prompt }; } -function resolveAudioRequestOverrides(config: MediaUnderstandingConfig | undefined): { +function resolveMediaRequestOverrides(config: MediaUnderstandingConfig | undefined): { prompt?: string; language?: string; } { @@ -571,6 +571,7 @@ export async function runProviderEntry(params: { maxBytes, timeoutMs, }); + const requestOverrides = resolveMediaRequestOverrides(params.config); const provider = getMediaUnderstandingProvider(providerId, params.providerRegistry); const imageInput = { buffer: media.buffer, @@ -578,7 +579,7 @@ export async function runProviderEntry(params: { mime: media.mime, model: modelId, provider: providerId, - prompt, + prompt: requestOverrides.prompt ?? prompt, timeoutMs, profile: entry.profile, preferredProfile: entry.preferredProfile, @@ -610,7 +611,7 @@ export async function runProviderEntry(params: { throw new Error(`Audio transcription provider "${providerId}" not available.`); } const transcribeAudio = provider.transcribeAudio; - const requestOverrides = resolveAudioRequestOverrides(params.config); + const requestOverrides = resolveMediaRequestOverrides(params.config); const media = await params.cache.getBuffer({ attachmentIndex: params.attachmentIndex, maxBytes, @@ -736,7 +737,7 @@ export async function runCliEntry(params: { if (!command) { throw new Error(`CLI entry missing command for ${capability}`); } - const requestOverrides = resolveAudioRequestOverrides(params.config); + const requestOverrides = resolveMediaRequestOverrides(params.config); const { maxBytes, maxChars, timeoutMs, prompt } = resolveEntryRunOptions({ capability, entry, diff --git a/src/media-understanding/runner.vision-skip.test.ts b/src/media-understanding/runner.vision-skip.test.ts index a26a669d363..7cf19e91caa 100644 --- a/src/media-understanding/runner.vision-skip.test.ts +++ b/src/media-understanding/runner.vision-skip.test.ts @@ -192,6 +192,57 @@ describe("runCapability image skip", () => { ); }); + it("lets per-request image prompts override entry prompts", async () => { + await withMediaFixture( + { + filePrefix: "openclaw-image-request-prompt", + extension: "png", + mediaType: "image/png", + fileContents: Buffer.from("image"), + }, + async ({ ctx, media, cache }) => { + let seenPrompt: string | undefined; + const cfg = {} as OpenClawConfig; + + const result = await runCapability({ + capability: "image", + cfg, + ctx, + attachments: cache, + media, + agentDir: "/tmp", + providerRegistry: new Map([ + [ + "openrouter", + { + id: "openrouter", + capabilities: ["image"], + describeImage: async (req) => { + seenPrompt = req.prompt; + return { text: "request prompt ok", model: req.model }; + }, + }, + ], + ]), + config: { + _requestPromptOverride: "Use this request prompt", + models: [ + { + provider: "openrouter", + model: "google/gemini-2.5-flash", + prompt: "entry prompt", + }, + ], + }, + activeModel: { provider: "openai", model: "gpt-4.1" }, + }); + + expect(result.decision.outcome).toBe("success"); + expect(seenPrompt).toBe("Use this request prompt"); + }, + ); + }); + it("prefers agents.defaults.imageModel over the active model for auto image resolution", async () => { const cfg = { agents: { diff --git a/src/web-search/runtime.test.ts b/src/web-search/runtime.test.ts index 0be4d68eb56..2c70d23e2a0 100644 --- a/src/web-search/runtime.test.ts +++ b/src/web-search/runtime.test.ts @@ -318,6 +318,7 @@ describe("web search runtime", () => { it("falls back to another provider when auto-selected search execution fails", async () => { resolveRuntimeWebSearchProvidersMock.mockReturnValue([ createGoogleSearchProvider({ + requiresCredential: false, createTool: () => ({ description: "google", parameters: {}, @@ -340,6 +341,63 @@ describe("web search runtime", () => { }); }); + it("falls back when an auto-selected provider returns a structured error payload", async () => { + resolveRuntimeWebSearchProvidersMock.mockReturnValue([ + createGoogleSearchProvider({ + requiresCredential: false, + createTool: () => ({ + description: "google", + parameters: {}, + execute: async () => ({ + error: "missing_google_api_key", + message: "google key missing", + }), + }), + }), + createDuckDuckGoSearchProvider(), + ]); + + await expect( + runWebSearch({ + config: {}, + args: { query: "fallback-structured-error" }, + }), + ).resolves.toEqual({ + provider: "duckduckgo", + result: { query: "fallback-structured-error", provider: "duckduckgo" }, + }); + }); + + it("does not fall back when an auto-selected provider returns a validation error payload", async () => { + resolveRuntimeWebSearchProvidersMock.mockReturnValue([ + createGoogleSearchProvider({ + requiresCredential: false, + createTool: () => ({ + description: "google", + parameters: {}, + execute: async () => ({ + error: "invalid_freshness", + message: "freshness must be day, week, month, or year.", + }), + }), + }), + createDuckDuckGoSearchProvider(), + ]); + + await expect( + runWebSearch({ + config: {}, + args: { query: "fallback-validation-error", freshness: "forever" }, + }), + ).resolves.toEqual({ + provider: "google", + result: { + error: "invalid_freshness", + message: "freshness must be day, week, month, or year.", + }, + }); + }); + it("does not prebuild fallback provider tools before attempting the selected provider", async () => { resolveRuntimeWebSearchProvidersMock.mockReturnValue([ createGoogleSearchProvider(), diff --git a/src/web-search/runtime.ts b/src/web-search/runtime.ts index 79556397e1d..ec5a219571e 100644 --- a/src/web-search/runtime.ts +++ b/src/web-search/runtime.ts @@ -8,9 +8,11 @@ import { logVerbose } from "../globals.js"; import type { PluginWebSearchProviderEntry, WebSearchProviderToolDefinition, -} from "../plugins/web-provider-types.js"; -import { resolvePluginWebSearchProviders } from "../plugins/web-search-providers.runtime.js"; -import { resolveRuntimeWebSearchProviders } from "../plugins/web-search-providers.runtime.js"; +} from "../plugins/types.js"; +import { + resolvePluginWebSearchProviders, + resolveRuntimeWebSearchProviders, +} from "../plugins/web-search-providers.runtime.js"; import { sortWebSearchProvidersForAutoDetect } from "../plugins/web-search-providers.shared.js"; import { getActiveRuntimeWebToolsMetadata } from "../secrets/runtime-web-tools-state.js"; import type { RuntimeWebSearchMetadata } from "../secrets/runtime-web-tools.types.js"; @@ -311,6 +313,14 @@ function hasExplicitWebSearchSelection(params: { return false; } +function isStructuredAvailabilityError(result: unknown): result is { error: string } { + if (!result || typeof result !== "object" || !("error" in result)) { + return false; + } + const error = (result as { error?: unknown }).error; + return typeof error === "string" && /^missing_[a-z0-9_]*api_key$/i.test(error); +} + export async function runWebSearch(params: RunWebSearchParams): Promise { const config = resolveWebSearchRuntimeConfig(params.config); const search = resolveSearchConfig(config); @@ -347,9 +357,14 @@ export async function runWebSearch(params: RunWebSearchParams): Promise