refactor: dedupe openai-compatible video helpers

2026-06-03 19:04:05 +00:00 · 2026-04-06 19:20:54 +01:00
parent 283b103e75
commit a171de283f
4 changed files with 101 additions and 118 deletions
--- a/extensions/moonshot/media-understanding-provider.ts
+++ b/extensions/moonshot/media-understanding-provider.ts
@@ -1,7 +1,11 @@
 import {
+  buildOpenAiCompatibleVideoRequestBody,
+  coerceOpenAiCompatibleVideoText,
  describeImageWithModel,
  describeImagesWithModel,
+  resolveMediaUnderstandingString,
  type MediaUnderstandingProvider,
+  type OpenAiCompatibleVideoPayload,
  type VideoDescriptionRequest,
  type VideoDescriptionResult,
 } from "openclaw/plugin-sdk/media-understanding";
@@ -15,56 +19,13 @@ export const DEFAULT_MOONSHOT_VIDEO_BASE_URL = "https://api.moonshot.ai/v1";
 const DEFAULT_MOONSHOT_VIDEO_MODEL = "kimi-k2.5";
 const DEFAULT_MOONSHOT_VIDEO_PROMPT = "Describe the video.";

-type MoonshotVideoPayload = {
-  choices?: Array<{
-    message?: {
-      content?: string | Array<{ text?: string }>;
-      reasoning_content?: string;
-    };
-  }>;
-};
-
-function resolveModel(model?: string): string {
-  const trimmed = model?.trim();
-  return trimmed || DEFAULT_MOONSHOT_VIDEO_MODEL;
-}
-
-function resolvePrompt(prompt?: string): string {
-  const trimmed = prompt?.trim();
-  return trimmed || DEFAULT_MOONSHOT_VIDEO_PROMPT;
-}
-
-function coerceMoonshotText(payload: MoonshotVideoPayload): string | null {
-  const message = payload.choices?.[0]?.message;
-  if (!message) {
-    return null;
-  }
-  if (typeof message.content === "string" && message.content.trim()) {
-    return message.content.trim();
-  }
-  if (Array.isArray(message.content)) {
-    const text = message.content
-      .map((part) => (typeof part.text === "string" ? part.text.trim() : ""))
-      .filter(Boolean)
-      .join("\n")
-      .trim();
-    if (text) {
-      return text;
-    }
-  }
-  if (typeof message.reasoning_content === "string" && message.reasoning_content.trim()) {
-    return message.reasoning_content.trim();
-  }
-  return null;
-}
-
 export async function describeMoonshotVideo(
  params: VideoDescriptionRequest,
 ): Promise<VideoDescriptionResult> {
  const fetchFn = params.fetchFn ?? fetch;
-  const model = resolveModel(params.model);
-  const mime = params.mime ?? "video/mp4";
-  const prompt = resolvePrompt(params.prompt);
+  const model = resolveMediaUnderstandingString(params.model, DEFAULT_MOONSHOT_VIDEO_MODEL);
+  const mime = resolveMediaUnderstandingString(params.mime, "video/mp4");
+  const prompt = resolveMediaUnderstandingString(params.prompt, DEFAULT_MOONSHOT_VIDEO_PROMPT);
  const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } =
    resolveProviderHttpRequestConfig({
      baseUrl: params.baseUrl,
@@ -82,23 +43,12 @@ export async function describeMoonshotVideo(
    });
  const url = `${baseUrl}/chat/completions`;

-  const body = {
+  const body = buildOpenAiCompatibleVideoRequestBody({
    model,
-    messages: [
-      {
-        role: "user",
-        content: [
-          { type: "text", text: prompt },
-          {
-            type: "video_url",
-            video_url: {
-              url: `data:${mime};base64,${params.buffer.toString("base64")}`,
-            },
-          },
-        ],
-      },
-    ],
-  };
+    prompt,
+    mime,
+    buffer: params.buffer,
+  });

  const { response: res, release } = await postJsonRequest({
    url,
@@ -112,8 +62,8 @@ export async function describeMoonshotVideo(

  try {
    await assertOkOrThrowHttpError(res, "Moonshot video description failed");
-    const payload = (await res.json()) as MoonshotVideoPayload;
-    const text = coerceMoonshotText(payload);
+    const payload = (await res.json()) as OpenAiCompatibleVideoPayload;
+    const text = coerceOpenAiCompatibleVideoText(payload);
    if (!text) {
      throw new Error("Moonshot video description response missing content");
    }
--- a/extensions/qwen/media-understanding-provider.ts
+++ b/extensions/qwen/media-understanding-provider.ts
@@ -1,7 +1,11 @@
 import {
+  buildOpenAiCompatibleVideoRequestBody,
+  coerceOpenAiCompatibleVideoText,
  describeImageWithModel,
  describeImagesWithModel,
+  resolveMediaUnderstandingString,
  type MediaUnderstandingProvider,
+  type OpenAiCompatibleVideoPayload,
  type VideoDescriptionRequest,
  type VideoDescriptionResult,
 } from "openclaw/plugin-sdk/media-understanding";
@@ -15,15 +19,6 @@ import { QWEN_STANDARD_CN_BASE_URL, QWEN_STANDARD_GLOBAL_BASE_URL } from "./mode
 const DEFAULT_QWEN_VIDEO_MODEL = "qwen-vl-max-latest";
 const DEFAULT_QWEN_VIDEO_PROMPT = "Describe the video in detail.";

-type QwenVideoPayload = {
-  choices?: Array<{
-    message?: {
-      content?: string | Array<{ text?: string }>;
-      reasoning_content?: string;
-    };
-  }>;
-};
-
 function resolveQwenStandardBaseUrl(
  cfg: { models?: { providers?: Record<string, { baseUrl?: string } | undefined> } } | undefined,
  providerId: string,
@@ -46,37 +41,13 @@ function resolveQwenStandardBaseUrl(
  }
 }

-function coerceQwenText(payload: QwenVideoPayload): string | null {
-  const message = payload.choices?.[0]?.message;
-  if (!message) {
-    return null;
-  }
-  if (typeof message.content === "string" && message.content.trim()) {
-    return message.content.trim();
-  }
-  if (Array.isArray(message.content)) {
-    const text = message.content
-      .map((part) => (typeof part.text === "string" ? part.text.trim() : ""))
-      .filter(Boolean)
-      .join("\n")
-      .trim();
-    if (text) {
-      return text;
-    }
-  }
-  if (typeof message.reasoning_content === "string" && message.reasoning_content.trim()) {
-    return message.reasoning_content.trim();
-  }
-  return null;
-}
-
 export async function describeQwenVideo(
  params: VideoDescriptionRequest,
 ): Promise<VideoDescriptionResult> {
  const fetchFn = params.fetchFn ?? fetch;
-  const model = params.model?.trim() || DEFAULT_QWEN_VIDEO_MODEL;
-  const mime = params.mime?.trim() || "video/mp4";
-  const prompt = params.prompt?.trim() || DEFAULT_QWEN_VIDEO_PROMPT;
+  const model = resolveMediaUnderstandingString(params.model, DEFAULT_QWEN_VIDEO_MODEL);
+  const mime = resolveMediaUnderstandingString(params.mime, "video/mp4");
+  const prompt = resolveMediaUnderstandingString(params.prompt, DEFAULT_QWEN_VIDEO_PROMPT);
  const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } =
    resolveProviderHttpRequestConfig({
      baseUrl: params.baseUrl,
@@ -96,23 +67,12 @@ export async function describeQwenVideo(
  const { response: res, release } = await postJsonRequest({
    url: `${baseUrl}/chat/completions`,
    headers,
-    body: {
+    body: buildOpenAiCompatibleVideoRequestBody({
      model,
-      messages: [
-        {
-          role: "user",
-          content: [
-            { type: "text", text: prompt },
-            {
-              type: "video_url",
-              video_url: {
-                url: `data:${mime};base64,${params.buffer.toString("base64")}`,
-              },
-            },
-          ],
-        },
-      ],
-    },
+      prompt,
+      mime,
+      buffer: params.buffer,
+    }),
    timeoutMs: params.timeoutMs,
    fetchFn,
    allowPrivateNetwork,
@@ -121,8 +81,8 @@ export async function describeQwenVideo(

  try {
    await assertOkOrThrowHttpError(res, "Qwen video description failed");
-    const payload = (await res.json()) as QwenVideoPayload;
-    const text = coerceQwenText(payload);
+    const payload = (await res.json()) as OpenAiCompatibleVideoPayload;
+    const text = coerceOpenAiCompatibleVideoText(payload);
    if (!text) {
      throw new Error("Qwen video description response missing content");
    }
--- a/src/media-understanding/openai-compatible-video.ts
+++ b/src/media-understanding/openai-compatible-video.ts
@@ -0,0 +1,67 @@
+export type OpenAiCompatibleVideoPayload = {
+  choices?: Array<{
+    message?: {
+      content?: string | Array<{ text?: string }>;
+      reasoning_content?: string;
+    };
+  }>;
+};
+
+export function resolveMediaUnderstandingString(
+  value: string | undefined,
+  fallback: string,
+): string {
+  const trimmed = value?.trim();
+  return trimmed || fallback;
+}
+
+export function coerceOpenAiCompatibleVideoText(
+  payload: OpenAiCompatibleVideoPayload,
+): string | null {
+  const message = payload.choices?.[0]?.message;
+  if (!message) {
+    return null;
+  }
+  if (typeof message.content === "string" && message.content.trim()) {
+    return message.content.trim();
+  }
+  if (Array.isArray(message.content)) {
+    const text = message.content
+      .map((part) => (typeof part.text === "string" ? part.text.trim() : ""))
+      .filter(Boolean)
+      .join("\n")
+      .trim();
+    if (text) {
+      return text;
+    }
+  }
+  if (typeof message.reasoning_content === "string" && message.reasoning_content.trim()) {
+    return message.reasoning_content.trim();
+  }
+  return null;
+}
+
+export function buildOpenAiCompatibleVideoRequestBody(params: {
+  model: string;
+  prompt: string;
+  mime: string;
+  buffer: Buffer;
+}) {
+  return {
+    model: params.model,
+    messages: [
+      {
+        role: "user",
+        content: [
+          { type: "text", text: params.prompt },
+          {
+            type: "video_url",
+            video_url: {
+              url: `data:${params.mime};base64,${params.buffer.toString("base64")}`,
+            },
+          },
+        ],
+      },
+    ],
+  };
+}
--- a/src/plugin-sdk/media-understanding.ts
+++ b/src/plugin-sdk/media-understanding.ts
@@ -17,4 +17,10 @@ export {
  describeImageWithModel,
  describeImagesWithModel,
 } from "../media-understanding/image-runtime.js";
+export {
+  buildOpenAiCompatibleVideoRequestBody,
+  coerceOpenAiCompatibleVideoText,
+  resolveMediaUnderstandingString,
+  type OpenAiCompatibleVideoPayload,
+} from "../media-understanding/openai-compatible-video.ts";
 export { transcribeOpenAiCompatibleAudio } from "../media-understanding/openai-compatible-audio.js";