diff --git a/CHANGELOG.md b/CHANGELOG.md index fe9af6ea21b..052509381bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -87,6 +87,7 @@ Docs: https://docs.openclaw.ai - Providers/OpenAI-compatible: forward `prompt_cache_key` on Completions requests only for providers that opt in with `compat.supportsPromptCacheKey`, keeping default proxy payloads unchanged. Fixes #69272. - Providers/OpenAI-compatible: skip null or non-object streaming chunks from custom providers instead of failing the turn after partial output. Fixes #51112. - Providers/OpenAI-compatible: treat singular MLX-style `finish_reason: "tool_call"` as tool use instead of a provider error. Fixes #61499. +- Plugins/OpenCode: strip unsupported disabled Responses reasoning payloads for OpenCode image understanding. Fixes #70252. - Providers/ElevenLabs: omit the MP3-only `Accept` header for PCM telephony synthesis, so Voice Call requests for `pcm_22050` no longer receive MP3 audio. Fixes #67340. Thanks @marcchabot. - Providers/MiniMax TTS: mark MP3 output voice-compatible for Telegram voice-note delivery. Fixes #63540. - Providers/Microsoft TTS: keep allowlisted bundled speech providers discoverable even when another speech plugin has already registered, so Edge/Microsoft TTS is available alongside OpenAI. Fixes #62117 and #66850. diff --git a/extensions/opencode/index.test.ts b/extensions/opencode/index.test.ts index ff59311b196..7172f76023c 100644 --- a/extensions/opencode/index.test.ts +++ b/extensions/opencode/index.test.ts @@ -1,8 +1,29 @@ -import { describe, it } from "vitest"; +import { describe, expect, it } from "vitest"; +import { registerProviderPlugin } from "../../test/helpers/plugins/provider-registration.js"; import { expectPassthroughReplayPolicy } from "../../test/helpers/provider-replay-policy.ts"; import plugin from "./index.js"; describe("opencode provider plugin", () => { + it("registers image media understanding through the OpenCode plugin", async () => { + const { mediaProviders } = await registerProviderPlugin({ + plugin, + id: "opencode", + name: "OpenCode Zen Provider", + }); + + expect(mediaProviders).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + id: "opencode", + capabilities: ["image"], + defaultModels: { image: "gpt-5-nano" }, + describeImage: expect.any(Function), + describeImages: expect.any(Function), + }), + ]), + ); + }); + it("owns passthrough-gemini replay policy for Gemini-backed models", async () => { await expectPassthroughReplayPolicy({ plugin, diff --git a/extensions/opencode/index.ts b/extensions/opencode/index.ts index c7ba3e8f8d9..1e97b81e160 100644 --- a/extensions/opencode/index.ts +++ b/extensions/opencode/index.ts @@ -6,6 +6,7 @@ import { } from "openclaw/plugin-sdk/provider-model-shared"; import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime"; import { applyOpencodeZenConfig, OPENCODE_ZEN_DEFAULT_MODEL } from "./api.js"; +import { opencodeMediaUnderstandingProvider } from "./media-understanding-provider.js"; const PROVIDER_ID = "opencode"; const MINIMAX_MODERN_MODEL_MATCHERS = ["minimax-m2.7"] as const; @@ -49,5 +50,6 @@ export default definePluginEntry({ ...PASSTHROUGH_GEMINI_REPLAY_HOOKS, isModernModelRef: ({ modelId }) => isModernOpencodeModel(modelId), }); + api.registerMediaUnderstandingProvider(opencodeMediaUnderstandingProvider); }, }); diff --git a/extensions/opencode/media-understanding-provider.test.ts b/extensions/opencode/media-understanding-provider.test.ts new file mode 100644 index 00000000000..1fc34012dac --- /dev/null +++ b/extensions/opencode/media-understanding-provider.test.ts @@ -0,0 +1,48 @@ +import { describe, expect, it } from "vitest"; +import { + opencodeMediaUnderstandingProvider, + stripOpencodeDisabledResponsesReasoningPayload, +} from "./media-understanding-provider.js"; + +describe("opencode media understanding provider", () => { + it("strips disabled Responses reasoning payloads", () => { + const payload = { + reasoning: { effort: "none" }, + include: ["reasoning.encrypted_content"], + store: false, + }; + + stripOpencodeDisabledResponsesReasoningPayload(payload); + + expect(payload).toEqual({ + include: ["reasoning.encrypted_content"], + store: false, + }); + }); + + it("keeps supported Responses reasoning payloads", () => { + const payload = { + reasoning: { effort: "low" }, + store: false, + }; + + stripOpencodeDisabledResponsesReasoningPayload(payload); + + expect(payload).toEqual({ + reasoning: { effort: "low" }, + store: false, + }); + }); + + it("declares OpenCode image understanding support", () => { + expect(opencodeMediaUnderstandingProvider).toEqual( + expect.objectContaining({ + id: "opencode", + capabilities: ["image"], + defaultModels: { image: "gpt-5-nano" }, + describeImage: expect.any(Function), + describeImages: expect.any(Function), + }), + ); + }); +}); diff --git a/extensions/opencode/media-understanding-provider.ts b/extensions/opencode/media-understanding-provider.ts new file mode 100644 index 00000000000..cd389153552 --- /dev/null +++ b/extensions/opencode/media-understanding-provider.ts @@ -0,0 +1,42 @@ +import type { ProviderStreamOptions } from "@mariozechner/pi-ai"; +import { + describeImageWithModelPayloadTransform, + describeImagesWithModelPayloadTransform, + type MediaUnderstandingProvider, +} from "openclaw/plugin-sdk/media-understanding"; + +function isRecord(value: unknown): value is Record { + return Boolean(value) && typeof value === "object" && !Array.isArray(value); +} + +export function stripOpencodeDisabledResponsesReasoningPayload(payload: unknown): void { + if (!isRecord(payload)) { + return; + } + const reasoning = payload.reasoning; + if (reasoning === "none") { + delete payload.reasoning; + return; + } + if (!isRecord(reasoning) || reasoning.effort !== "none") { + return; + } + delete payload.reasoning; +} + +const stripDisabledResponsesReasoning: ProviderStreamOptions["onPayload"] = (payload) => { + stripOpencodeDisabledResponsesReasoningPayload(payload); + return undefined; +}; + +export const opencodeMediaUnderstandingProvider: MediaUnderstandingProvider = { + id: "opencode", + capabilities: ["image"], + defaultModels: { + image: "gpt-5-nano", + }, + describeImage: (request) => + describeImageWithModelPayloadTransform(request, stripDisabledResponsesReasoning), + describeImages: (request) => + describeImagesWithModelPayloadTransform(request, stripDisabledResponsesReasoning), +}; diff --git a/src/media-understanding/image-runtime.ts b/src/media-understanding/image-runtime.ts index d04f850ff1b..b0c5c82c8f6 100644 --- a/src/media-understanding/image-runtime.ts +++ b/src/media-understanding/image-runtime.ts @@ -7,3 +7,9 @@ export const describeImageWithModel = bindImageRuntime((runtime) => runtime.desc export const describeImagesWithModel = bindImageRuntime( (runtime) => runtime.describeImagesWithModel, ); +export const describeImageWithModelPayloadTransform = bindImageRuntime( + (runtime) => runtime.describeImageWithModelPayloadTransform, +); +export const describeImagesWithModelPayloadTransform = bindImageRuntime( + (runtime) => runtime.describeImagesWithModelPayloadTransform, +); diff --git a/src/media-understanding/image.ts b/src/media-understanding/image.ts index 83073871b16..261fdb9b8a4 100644 --- a/src/media-understanding/image.ts +++ b/src/media-understanding/image.ts @@ -95,6 +95,38 @@ function isImageModelNoTextError(err: unknown): boolean { return err instanceof Error && /^Image model returned no text\b/.test(err.message); } +function isPromiseLike(value: unknown): value is PromiseLike { + return Boolean(value) && typeof (value as { then?: unknown }).then === "function"; +} + +function composeImageDescriptionPayloadHandlers( + first: ProviderStreamOptions["onPayload"] | undefined, + second: ProviderStreamOptions["onPayload"] | undefined, +): ProviderStreamOptions["onPayload"] | undefined { + if (!first) { + return second; + } + if (!second) { + return first; + } + return (payload, payloadModel) => { + const runSecond = (firstResult: unknown) => { + const nextPayload = firstResult === undefined ? payload : firstResult; + const secondResult = second(nextPayload, payloadModel); + const coerceResult = (resolvedSecond: unknown) => + resolvedSecond === undefined ? firstResult : resolvedSecond; + return isPromiseLike(secondResult) + ? Promise.resolve(secondResult).then(coerceResult) + : coerceResult(secondResult); + }; + const firstResult = first(payload, payloadModel); + if (isPromiseLike(firstResult)) { + return Promise.resolve(firstResult).then(runSecond); + } + return runSecond(firstResult); + }; +} + async function resolveImageRuntime(params: { cfg: ImageDescriptionRequest["cfg"]; agentDir: string; @@ -231,8 +263,9 @@ async function resolveMinimaxVlmFallbackRuntime(params: { }; } -export async function describeImagesWithModel( +async function describeImagesWithModelInternal( params: ImagesDescriptionRequest, + options: { onPayload?: ProviderStreamOptions["onPayload"] } = {}, ): Promise { const prompt = params.prompt ?? "Describe the image."; let apiKey: string; @@ -284,13 +317,15 @@ export async function describeImagesWithModel( : undefined; const maxTokens = resolveImageToolMaxTokens(model.maxTokens, params.maxTokens ?? 512); - const completeImage = async (onPayload?: ProviderStreamOptions["onPayload"]) => - await complete(model, context, { + const completeImage = async (onPayload?: ProviderStreamOptions["onPayload"]) => { + const payloadHandler = composeImageDescriptionPayloadHandlers(onPayload, options.onPayload); + return await complete(model, context, { apiKey, maxTokens, signal: controller.signal, - ...(onPayload ? { onPayload } : {}), + ...(payloadHandler ? { onPayload: payloadHandler } : {}), }); + }; try { const message = await completeImage(); @@ -319,6 +354,19 @@ export async function describeImagesWithModel( } } +export async function describeImagesWithModel( + params: ImagesDescriptionRequest, +): Promise { + return await describeImagesWithModelInternal(params); +} + +export async function describeImagesWithModelPayloadTransform( + params: ImagesDescriptionRequest, + onPayload: ProviderStreamOptions["onPayload"], +): Promise { + return await describeImagesWithModelInternal(params, { onPayload }); +} + export async function describeImageWithModel( params: ImageDescriptionRequest, ): Promise { @@ -342,3 +390,31 @@ export async function describeImageWithModel( cfg: params.cfg, }); } + +export async function describeImageWithModelPayloadTransform( + params: ImageDescriptionRequest, + onPayload: ProviderStreamOptions["onPayload"], +): Promise { + return await describeImagesWithModelPayloadTransform( + { + images: [ + { + buffer: params.buffer, + fileName: params.fileName, + mime: params.mime, + }, + ], + model: params.model, + provider: params.provider, + prompt: params.prompt, + maxTokens: params.maxTokens, + timeoutMs: params.timeoutMs, + profile: params.profile, + preferredProfile: params.preferredProfile, + authStore: params.authStore, + agentDir: params.agentDir, + cfg: params.cfg, + }, + onPayload, + ); +} diff --git a/src/plugin-sdk/media-understanding.ts b/src/plugin-sdk/media-understanding.ts index 2fc4399b77d..07c7b360c37 100644 --- a/src/plugin-sdk/media-understanding.ts +++ b/src/plugin-sdk/media-understanding.ts @@ -15,7 +15,9 @@ export type { export { describeImageWithModel, + describeImageWithModelPayloadTransform, describeImagesWithModel, + describeImagesWithModelPayloadTransform, } from "../media-understanding/image-runtime.js"; export { buildOpenAiCompatibleVideoRequestBody,