mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:20:43 +00:00
refactor: move media defaults into plugin manifests
This commit is contained in:
@@ -148,36 +148,37 @@ Those belong in your plugin code and `package.json`.
|
||||
|
||||
## Top-level field reference
|
||||
|
||||
| Field | Required | Type | What it means |
|
||||
| ----------------------------------- | -------- | -------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `id` | Yes | `string` | Canonical plugin id. This is the id used in `plugins.entries.<id>`. |
|
||||
| `configSchema` | Yes | `object` | Inline JSON Schema for this plugin's config. |
|
||||
| `enabledByDefault` | No | `true` | Marks a bundled plugin as enabled by default. Omit it, or set any non-`true` value, to leave the plugin disabled by default. |
|
||||
| `legacyPluginIds` | No | `string[]` | Legacy ids that normalize to this canonical plugin id. |
|
||||
| `autoEnableWhenConfiguredProviders` | No | `string[]` | Provider ids that should auto-enable this plugin when auth, config, or model refs mention them. |
|
||||
| `kind` | No | `"memory"` \| `"context-engine"` | Declares an exclusive plugin kind used by `plugins.slots.*`. |
|
||||
| `channels` | No | `string[]` | Channel ids owned by this plugin. Used for discovery and config validation. |
|
||||
| `providers` | No | `string[]` | Provider ids owned by this plugin. |
|
||||
| `modelSupport` | No | `object` | Manifest-owned shorthand model-family metadata used to auto-load the plugin before runtime. |
|
||||
| `providerEndpoints` | No | `object[]` | Manifest-owned endpoint host/baseUrl metadata for provider routes that core must classify before provider runtime loads. |
|
||||
| `cliBackends` | No | `string[]` | CLI inference backend ids owned by this plugin. Used for startup auto-activation from explicit config refs. |
|
||||
| `syntheticAuthRefs` | No | `string[]` | Provider or CLI backend refs whose plugin-owned synthetic auth hook should be probed during cold model discovery before runtime loads. |
|
||||
| `nonSecretAuthMarkers` | No | `string[]` | Bundled-plugin-owned placeholder API key values that represent non-secret local, OAuth, or ambient credential state. |
|
||||
| `commandAliases` | No | `object[]` | Command names owned by this plugin that should produce plugin-aware config and CLI diagnostics before runtime loads. |
|
||||
| `providerAuthEnvVars` | No | `Record<string, string[]>` | Cheap provider-auth env metadata that OpenClaw can inspect without loading plugin code. |
|
||||
| `providerAuthAliases` | No | `Record<string, string>` | Provider ids that should reuse another provider id for auth lookup, for example a coding provider that shares the base provider API key and auth profiles. |
|
||||
| `channelEnvVars` | No | `Record<string, string[]>` | Cheap channel env metadata that OpenClaw can inspect without loading plugin code. Use this for env-driven channel setup or auth surfaces that generic startup/config helpers should see. |
|
||||
| `providerAuthChoices` | No | `object[]` | Cheap auth-choice metadata for onboarding pickers, preferred-provider resolution, and simple CLI flag wiring. |
|
||||
| `activation` | No | `object` | Cheap activation hints for provider, command, channel, route, and capability-triggered loading. Metadata only; plugin runtime still owns actual behavior. |
|
||||
| `setup` | No | `object` | Cheap setup/onboarding descriptors that discovery and setup surfaces can inspect without loading plugin runtime. |
|
||||
| `qaRunners` | No | `object[]` | Cheap QA runner descriptors used by the shared `openclaw qa` host before plugin runtime loads. |
|
||||
| `contracts` | No | `object` | Static bundled capability snapshot for speech, realtime transcription, realtime voice, media-understanding, image-generation, music-generation, video-generation, web-fetch, web search, and tool ownership. |
|
||||
| `channelConfigs` | No | `Record<string, object>` | Manifest-owned channel config metadata merged into discovery and validation surfaces before runtime loads. |
|
||||
| `skills` | No | `string[]` | Skill directories to load, relative to the plugin root. |
|
||||
| `name` | No | `string` | Human-readable plugin name. |
|
||||
| `description` | No | `string` | Short summary shown in plugin surfaces. |
|
||||
| `version` | No | `string` | Informational plugin version. |
|
||||
| `uiHints` | No | `Record<string, object>` | UI labels, placeholders, and sensitivity hints for config fields. |
|
||||
| Field | Required | Type | What it means |
|
||||
| ------------------------------------ | -------- | -------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `id` | Yes | `string` | Canonical plugin id. This is the id used in `plugins.entries.<id>`. |
|
||||
| `configSchema` | Yes | `object` | Inline JSON Schema for this plugin's config. |
|
||||
| `enabledByDefault` | No | `true` | Marks a bundled plugin as enabled by default. Omit it, or set any non-`true` value, to leave the plugin disabled by default. |
|
||||
| `legacyPluginIds` | No | `string[]` | Legacy ids that normalize to this canonical plugin id. |
|
||||
| `autoEnableWhenConfiguredProviders` | No | `string[]` | Provider ids that should auto-enable this plugin when auth, config, or model refs mention them. |
|
||||
| `kind` | No | `"memory"` \| `"context-engine"` | Declares an exclusive plugin kind used by `plugins.slots.*`. |
|
||||
| `channels` | No | `string[]` | Channel ids owned by this plugin. Used for discovery and config validation. |
|
||||
| `providers` | No | `string[]` | Provider ids owned by this plugin. |
|
||||
| `modelSupport` | No | `object` | Manifest-owned shorthand model-family metadata used to auto-load the plugin before runtime. |
|
||||
| `providerEndpoints` | No | `object[]` | Manifest-owned endpoint host/baseUrl metadata for provider routes that core must classify before provider runtime loads. |
|
||||
| `cliBackends` | No | `string[]` | CLI inference backend ids owned by this plugin. Used for startup auto-activation from explicit config refs. |
|
||||
| `syntheticAuthRefs` | No | `string[]` | Provider or CLI backend refs whose plugin-owned synthetic auth hook should be probed during cold model discovery before runtime loads. |
|
||||
| `nonSecretAuthMarkers` | No | `string[]` | Bundled-plugin-owned placeholder API key values that represent non-secret local, OAuth, or ambient credential state. |
|
||||
| `commandAliases` | No | `object[]` | Command names owned by this plugin that should produce plugin-aware config and CLI diagnostics before runtime loads. |
|
||||
| `providerAuthEnvVars` | No | `Record<string, string[]>` | Cheap provider-auth env metadata that OpenClaw can inspect without loading plugin code. |
|
||||
| `providerAuthAliases` | No | `Record<string, string>` | Provider ids that should reuse another provider id for auth lookup, for example a coding provider that shares the base provider API key and auth profiles. |
|
||||
| `channelEnvVars` | No | `Record<string, string[]>` | Cheap channel env metadata that OpenClaw can inspect without loading plugin code. Use this for env-driven channel setup or auth surfaces that generic startup/config helpers should see. |
|
||||
| `providerAuthChoices` | No | `object[]` | Cheap auth-choice metadata for onboarding pickers, preferred-provider resolution, and simple CLI flag wiring. |
|
||||
| `activation` | No | `object` | Cheap activation hints for provider, command, channel, route, and capability-triggered loading. Metadata only; plugin runtime still owns actual behavior. |
|
||||
| `setup` | No | `object` | Cheap setup/onboarding descriptors that discovery and setup surfaces can inspect without loading plugin runtime. |
|
||||
| `qaRunners` | No | `object[]` | Cheap QA runner descriptors used by the shared `openclaw qa` host before plugin runtime loads. |
|
||||
| `contracts` | No | `object` | Static bundled capability snapshot for speech, realtime transcription, realtime voice, media-understanding, image-generation, music-generation, video-generation, web-fetch, web search, and tool ownership. |
|
||||
| `mediaUnderstandingProviderMetadata` | No | `Record<string, object>` | Cheap media-understanding defaults for provider ids declared in `contracts.mediaUnderstandingProviders`. |
|
||||
| `channelConfigs` | No | `Record<string, object>` | Manifest-owned channel config metadata merged into discovery and validation surfaces before runtime loads. |
|
||||
| `skills` | No | `string[]` | Skill directories to load, relative to the plugin root. |
|
||||
| `name` | No | `string` | Human-readable plugin name. |
|
||||
| `description` | No | `string` | Short summary shown in plugin surfaces. |
|
||||
| `version` | No | `string` | Informational plugin version. |
|
||||
| `uiHints` | No | `Record<string, object>` | UI labels, placeholders, and sensitivity hints for config fields. |
|
||||
|
||||
## providerAuthChoices reference
|
||||
|
||||
@@ -408,6 +409,43 @@ Each list is optional:
|
||||
| `webSearchProviders` | `string[]` | Web-search provider ids this plugin owns. |
|
||||
| `tools` | `string[]` | Agent tool names this plugin owns for bundled contract checks. |
|
||||
|
||||
## mediaUnderstandingProviderMetadata reference
|
||||
|
||||
Use `mediaUnderstandingProviderMetadata` when a media-understanding provider has
|
||||
default models, auto-auth fallback priority, or native document support that
|
||||
generic core helpers need before runtime loads. Keys must also be declared in
|
||||
`contracts.mediaUnderstandingProviders`.
|
||||
|
||||
```json
|
||||
{
|
||||
"contracts": {
|
||||
"mediaUnderstandingProviders": ["example"]
|
||||
},
|
||||
"mediaUnderstandingProviderMetadata": {
|
||||
"example": {
|
||||
"capabilities": ["image", "audio"],
|
||||
"defaultModels": {
|
||||
"image": "example-vision-latest",
|
||||
"audio": "example-transcribe-latest"
|
||||
},
|
||||
"autoPriority": {
|
||||
"image": 40
|
||||
},
|
||||
"nativeDocumentInputs": ["pdf"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Each provider entry can include:
|
||||
|
||||
| Field | Type | What it means |
|
||||
| ---------------------- | ----------------------------------- | ---------------------------------------------------------------------------- |
|
||||
| `capabilities` | `("image" \| "audio" \| "video")[]` | Media capabilities exposed by this provider. |
|
||||
| `defaultModels` | `Record<string, string>` | Capability-to-model defaults used when config does not specify a model. |
|
||||
| `autoPriority` | `Record<string, number>` | Lower numbers sort earlier for automatic credential-based provider fallback. |
|
||||
| `nativeDocumentInputs` | `"pdf"[]` | Native document inputs supported by the provider. |
|
||||
|
||||
## channelConfigs reference
|
||||
|
||||
Use `channelConfigs` when a channel plugin needs cheap config metadata before
|
||||
|
||||
@@ -40,6 +40,18 @@
|
||||
"contracts": {
|
||||
"mediaUnderstandingProviders": ["anthropic"]
|
||||
},
|
||||
"mediaUnderstandingProviderMetadata": {
|
||||
"anthropic": {
|
||||
"capabilities": ["image"],
|
||||
"defaultModels": {
|
||||
"image": "claude-opus-4-7"
|
||||
},
|
||||
"autoPriority": {
|
||||
"image": 20
|
||||
},
|
||||
"nativeDocumentInputs": ["pdf"]
|
||||
}
|
||||
},
|
||||
"configSchema": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
|
||||
@@ -7,6 +7,17 @@
|
||||
"contracts": {
|
||||
"mediaUnderstandingProviders": ["deepgram"]
|
||||
},
|
||||
"mediaUnderstandingProviderMetadata": {
|
||||
"deepgram": {
|
||||
"capabilities": ["audio"],
|
||||
"defaultModels": {
|
||||
"audio": "nova-3"
|
||||
},
|
||||
"autoPriority": {
|
||||
"audio": 30
|
||||
}
|
||||
}
|
||||
},
|
||||
"configSchema": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
|
||||
@@ -53,6 +53,22 @@
|
||||
"videoGenerationProviders": ["google"],
|
||||
"webSearchProviders": ["gemini"]
|
||||
},
|
||||
"mediaUnderstandingProviderMetadata": {
|
||||
"google": {
|
||||
"capabilities": ["image", "audio", "video"],
|
||||
"defaultModels": {
|
||||
"image": "gemini-3-flash-preview",
|
||||
"audio": "gemini-3-flash-preview",
|
||||
"video": "gemini-3-flash-preview"
|
||||
},
|
||||
"autoPriority": {
|
||||
"image": 30,
|
||||
"audio": 40,
|
||||
"video": 10
|
||||
},
|
||||
"nativeDocumentInputs": ["pdf"]
|
||||
}
|
||||
},
|
||||
"configContracts": {
|
||||
"compatibilityRuntimePaths": ["tools.web.search.apiKey"]
|
||||
},
|
||||
|
||||
@@ -7,6 +7,17 @@
|
||||
"contracts": {
|
||||
"mediaUnderstandingProviders": ["groq"]
|
||||
},
|
||||
"mediaUnderstandingProviderMetadata": {
|
||||
"groq": {
|
||||
"capabilities": ["audio"],
|
||||
"defaultModels": {
|
||||
"audio": "whisper-large-v3-turbo"
|
||||
},
|
||||
"autoPriority": {
|
||||
"audio": 20
|
||||
}
|
||||
}
|
||||
},
|
||||
"configSchema": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
|
||||
@@ -72,6 +72,26 @@
|
||||
"configContracts": {
|
||||
"compatibilityRuntimePaths": ["tools.web.search.apiKey"]
|
||||
},
|
||||
"mediaUnderstandingProviderMetadata": {
|
||||
"minimax": {
|
||||
"capabilities": ["image"],
|
||||
"defaultModels": {
|
||||
"image": "MiniMax-VL-01"
|
||||
},
|
||||
"autoPriority": {
|
||||
"image": 40
|
||||
}
|
||||
},
|
||||
"minimax-portal": {
|
||||
"capabilities": ["image"],
|
||||
"defaultModels": {
|
||||
"image": "MiniMax-VL-01"
|
||||
},
|
||||
"autoPriority": {
|
||||
"image": 50
|
||||
}
|
||||
}
|
||||
},
|
||||
"uiHints": {
|
||||
"webSearch.apiKey": {
|
||||
"label": "MiniMax Coding Plan key",
|
||||
|
||||
@@ -24,6 +24,17 @@
|
||||
"memoryEmbeddingProviders": ["mistral"],
|
||||
"mediaUnderstandingProviders": ["mistral"]
|
||||
},
|
||||
"mediaUnderstandingProviderMetadata": {
|
||||
"mistral": {
|
||||
"capabilities": ["audio"],
|
||||
"defaultModels": {
|
||||
"audio": "voxtral-mini-latest"
|
||||
},
|
||||
"autoPriority": {
|
||||
"audio": 50
|
||||
}
|
||||
}
|
||||
},
|
||||
"configSchema": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
|
||||
@@ -52,6 +52,18 @@
|
||||
"mediaUnderstandingProviders": ["moonshot"],
|
||||
"webSearchProviders": ["kimi"]
|
||||
},
|
||||
"mediaUnderstandingProviderMetadata": {
|
||||
"moonshot": {
|
||||
"capabilities": ["image", "video"],
|
||||
"defaultModels": {
|
||||
"image": "kimi-k2.6",
|
||||
"video": "kimi-k2.6"
|
||||
},
|
||||
"autoPriority": {
|
||||
"video": 20
|
||||
}
|
||||
}
|
||||
},
|
||||
"configContracts": {
|
||||
"compatibilityRuntimePaths": ["tools.web.search.apiKey"]
|
||||
},
|
||||
|
||||
@@ -44,6 +44,25 @@
|
||||
"imageGenerationProviders": ["openai"],
|
||||
"videoGenerationProviders": ["openai"]
|
||||
},
|
||||
"mediaUnderstandingProviderMetadata": {
|
||||
"openai": {
|
||||
"capabilities": ["image", "audio"],
|
||||
"defaultModels": {
|
||||
"image": "gpt-5.4-mini",
|
||||
"audio": "gpt-4o-transcribe"
|
||||
},
|
||||
"autoPriority": {
|
||||
"image": 10,
|
||||
"audio": 10
|
||||
}
|
||||
},
|
||||
"openai-codex": {
|
||||
"capabilities": ["image"],
|
||||
"defaultModels": {
|
||||
"image": "gpt-5.4"
|
||||
}
|
||||
}
|
||||
},
|
||||
"configSchema": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
|
||||
@@ -23,6 +23,14 @@
|
||||
"contracts": {
|
||||
"mediaUnderstandingProviders": ["openrouter"]
|
||||
},
|
||||
"mediaUnderstandingProviderMetadata": {
|
||||
"openrouter": {
|
||||
"capabilities": ["image"],
|
||||
"defaultModels": {
|
||||
"image": "auto"
|
||||
}
|
||||
}
|
||||
},
|
||||
"configSchema": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
|
||||
@@ -6,6 +6,18 @@
|
||||
"mediaUnderstandingProviders": ["qwen"],
|
||||
"videoGenerationProviders": ["qwen"]
|
||||
},
|
||||
"mediaUnderstandingProviderMetadata": {
|
||||
"qwen": {
|
||||
"capabilities": ["image", "video"],
|
||||
"defaultModels": {
|
||||
"image": "qwen-vl-max-latest",
|
||||
"video": "qwen-vl-max-latest"
|
||||
},
|
||||
"autoPriority": {
|
||||
"video": 15
|
||||
}
|
||||
}
|
||||
},
|
||||
"providerAuthEnvVars": {
|
||||
"qwen": ["QWEN_API_KEY", "MODELSTUDIO_API_KEY", "DASHSCOPE_API_KEY"]
|
||||
},
|
||||
|
||||
@@ -79,6 +79,17 @@
|
||||
"contracts": {
|
||||
"mediaUnderstandingProviders": ["zai"]
|
||||
},
|
||||
"mediaUnderstandingProviderMetadata": {
|
||||
"zai": {
|
||||
"capabilities": ["image"],
|
||||
"defaultModels": {
|
||||
"image": "glm-4.6v"
|
||||
},
|
||||
"autoPriority": {
|
||||
"image": 60
|
||||
}
|
||||
}
|
||||
},
|
||||
"configSchema": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
|
||||
@@ -4,7 +4,7 @@ import {
|
||||
resolveAgentModelPrimaryValue,
|
||||
} from "../../config/model-input.js";
|
||||
import type { OpenClawConfig } from "../../config/types.openclaw.js";
|
||||
import { bundledProviderSupportsNativePdfDocument } from "../../media-understanding/bundled-defaults.js";
|
||||
import { providerSupportsNativePdfDocument } from "../../media-understanding/defaults.js";
|
||||
import { extractAssistantText } from "../pi-embedded-utils.js";
|
||||
|
||||
export type PdfModelConfig = { primary?: string; fallbacks?: string[] };
|
||||
@@ -38,7 +38,7 @@ export function resolvePdfInputs(record: Record<string, unknown>): string[] {
|
||||
* Check whether a provider supports native PDF document input.
|
||||
*/
|
||||
export function providerSupportsNativePdf(provider: string): boolean {
|
||||
return bundledProviderSupportsNativePdfDocument(provider);
|
||||
return providerSupportsNativePdfDocument({ providerId: provider });
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import type { OpenClawConfig } from "../../config/types.openclaw.js";
|
||||
import {
|
||||
bundledProviderSupportsNativePdfDocument,
|
||||
resolveBundledAutoMediaKeyProviders,
|
||||
resolveBundledDefaultMediaModel,
|
||||
} from "../../media-understanding/bundled-defaults.js";
|
||||
providerSupportsNativePdfDocument,
|
||||
resolveAutoMediaKeyProviders,
|
||||
resolveDefaultMediaModel,
|
||||
} from "../../media-understanding/defaults.js";
|
||||
import {
|
||||
coerceImageModelConfig,
|
||||
type ImageModelConfig,
|
||||
@@ -12,12 +12,12 @@ import {
|
||||
import { hasAuthForProvider, resolveDefaultModelRef } from "./model-config.helpers.js";
|
||||
import { coercePdfModelConfig } from "./pdf-tool.helpers.js";
|
||||
|
||||
function resolveBundledImageCandidateRefs(params: {
|
||||
function resolveImageCandidateRefs(params: {
|
||||
cfg?: OpenClawConfig;
|
||||
agentDir: string;
|
||||
filter?: (providerId: string) => boolean;
|
||||
}): string[] {
|
||||
return resolveBundledAutoMediaKeyProviders("image")
|
||||
return resolveAutoMediaKeyProviders({ capability: "image", cfg: params.cfg })
|
||||
.filter((providerId) => !params.filter || params.filter(providerId))
|
||||
.filter((providerId) => hasAuthForProvider({ provider: providerId, agentDir: params.agentDir }))
|
||||
.map((providerId) => {
|
||||
@@ -26,7 +26,8 @@ function resolveBundledImageCandidateRefs(params: {
|
||||
cfg: params.cfg,
|
||||
provider: providerId,
|
||||
})?.split("/")[1] ??
|
||||
resolveBundledDefaultMediaModel({
|
||||
resolveDefaultMediaModel({
|
||||
cfg: params.cfg,
|
||||
providerId,
|
||||
capability: "image",
|
||||
});
|
||||
@@ -69,17 +70,21 @@ export function resolvePdfModelConfigForTool(params: {
|
||||
});
|
||||
const providerDefault =
|
||||
providerVision?.split("/")[1] ??
|
||||
resolveBundledDefaultMediaModel({
|
||||
resolveDefaultMediaModel({
|
||||
cfg: params.cfg,
|
||||
providerId: primary.provider,
|
||||
capability: "image",
|
||||
});
|
||||
const primarySupportsNativePdf = bundledProviderSupportsNativePdfDocument(primary.provider);
|
||||
const nativePdfCandidates = resolveBundledImageCandidateRefs({
|
||||
const primarySupportsNativePdf = providerSupportsNativePdfDocument({
|
||||
cfg: params.cfg,
|
||||
providerId: primary.provider,
|
||||
});
|
||||
const nativePdfCandidates = resolveImageCandidateRefs({
|
||||
cfg: params.cfg,
|
||||
agentDir: params.agentDir,
|
||||
filter: bundledProviderSupportsNativePdfDocument,
|
||||
filter: (providerId) => providerSupportsNativePdfDocument({ cfg: params.cfg, providerId }),
|
||||
});
|
||||
const genericImageCandidates = resolveBundledImageCandidateRefs({
|
||||
const genericImageCandidates = resolveImageCandidateRefs({
|
||||
cfg: params.cfg,
|
||||
agentDir: params.agentDir,
|
||||
});
|
||||
|
||||
@@ -1,109 +0,0 @@
|
||||
import { normalizeMediaProviderId } from "./provider-id.js";
|
||||
import type { MediaUnderstandingCapability } from "./types.js";
|
||||
|
||||
type BundledMediaProviderDefaults = {
|
||||
defaultModels?: Partial<Record<MediaUnderstandingCapability, string>>;
|
||||
autoPriority?: Partial<Record<MediaUnderstandingCapability, number>>;
|
||||
nativeDocumentInputs?: Array<"pdf">;
|
||||
};
|
||||
|
||||
const BUNDLED_MEDIA_PROVIDER_DEFAULTS: Record<string, BundledMediaProviderDefaults> = {
|
||||
openai: {
|
||||
defaultModels: { image: "gpt-5.4-mini", audio: "gpt-4o-transcribe" },
|
||||
autoPriority: { image: 10, audio: 10 },
|
||||
},
|
||||
"openai-codex": {
|
||||
defaultModels: { image: "gpt-5.4" },
|
||||
},
|
||||
anthropic: {
|
||||
defaultModels: { image: "claude-opus-4-7" },
|
||||
autoPriority: { image: 20 },
|
||||
nativeDocumentInputs: ["pdf"],
|
||||
},
|
||||
google: {
|
||||
defaultModels: {
|
||||
image: "gemini-3-flash-preview",
|
||||
audio: "gemini-3-flash-preview",
|
||||
video: "gemini-3-flash-preview",
|
||||
},
|
||||
autoPriority: { image: 30, audio: 40, video: 10 },
|
||||
nativeDocumentInputs: ["pdf"],
|
||||
},
|
||||
groq: {
|
||||
defaultModels: { audio: "whisper-large-v3-turbo" },
|
||||
autoPriority: { audio: 20 },
|
||||
},
|
||||
deepgram: {
|
||||
defaultModels: { audio: "nova-3" },
|
||||
autoPriority: { audio: 30 },
|
||||
},
|
||||
mistral: {
|
||||
defaultModels: { audio: "voxtral-mini-latest" },
|
||||
autoPriority: { audio: 50 },
|
||||
},
|
||||
minimax: {
|
||||
defaultModels: { image: "MiniMax-VL-01" },
|
||||
autoPriority: { image: 40 },
|
||||
},
|
||||
"minimax-portal": {
|
||||
defaultModels: { image: "MiniMax-VL-01" },
|
||||
autoPriority: { image: 50 },
|
||||
},
|
||||
zai: {
|
||||
defaultModels: { image: "glm-4.6v" },
|
||||
autoPriority: { image: 60 },
|
||||
},
|
||||
qwen: {
|
||||
defaultModels: { image: "qwen-vl-max-latest", video: "qwen-vl-max-latest" },
|
||||
autoPriority: { video: 15 },
|
||||
},
|
||||
moonshot: {
|
||||
defaultModels: { image: "kimi-k2.6", video: "kimi-k2.6" },
|
||||
autoPriority: { video: 20 },
|
||||
},
|
||||
openrouter: {
|
||||
defaultModels: { image: "auto" },
|
||||
},
|
||||
};
|
||||
|
||||
export function getBundledMediaProviderDefaults(
|
||||
providerId: string,
|
||||
): BundledMediaProviderDefaults | null {
|
||||
return BUNDLED_MEDIA_PROVIDER_DEFAULTS[normalizeMediaProviderId(providerId)] ?? null;
|
||||
}
|
||||
|
||||
export function resolveBundledDefaultMediaModel(params: {
|
||||
providerId: string;
|
||||
capability: MediaUnderstandingCapability;
|
||||
}): string | undefined {
|
||||
return getBundledMediaProviderDefaults(params.providerId)?.defaultModels?.[
|
||||
params.capability
|
||||
]?.trim();
|
||||
}
|
||||
|
||||
export function resolveBundledAutoMediaKeyProviders(
|
||||
capability: MediaUnderstandingCapability,
|
||||
): string[] {
|
||||
return Object.entries(BUNDLED_MEDIA_PROVIDER_DEFAULTS)
|
||||
.map(([providerId, defaults]) => ({
|
||||
providerId,
|
||||
priority: defaults.autoPriority?.[capability],
|
||||
}))
|
||||
.filter(
|
||||
(entry): entry is { providerId: string; priority: number } =>
|
||||
typeof entry.priority === "number",
|
||||
)
|
||||
.toSorted((left, right) => {
|
||||
if (left.priority !== right.priority) {
|
||||
return left.priority - right.priority;
|
||||
}
|
||||
return left.providerId.localeCompare(right.providerId);
|
||||
})
|
||||
.map((entry) => entry.providerId);
|
||||
}
|
||||
|
||||
export function bundledProviderSupportsNativePdfDocument(providerId: string): boolean {
|
||||
return (
|
||||
getBundledMediaProviderDefaults(providerId)?.nativeDocumentInputs?.includes("pdf") ?? false
|
||||
);
|
||||
}
|
||||
@@ -1,11 +1,7 @@
|
||||
import type { OpenClawConfig } from "../config/types.js";
|
||||
import { normalizeOptionalString } from "../shared/string-coerce.js";
|
||||
import {
|
||||
bundledProviderSupportsNativePdfDocument,
|
||||
resolveBundledAutoMediaKeyProviders,
|
||||
resolveBundledDefaultMediaModel,
|
||||
} from "./bundled-defaults.js";
|
||||
import { buildMediaUnderstandingRegistry, normalizeMediaProviderId } from "./provider-registry.js";
|
||||
import { buildMediaUnderstandingManifestMetadataRegistry } from "./manifest-metadata.js";
|
||||
import { normalizeMediaProviderId } from "./provider-registry.js";
|
||||
import { providerSupportsCapability } from "./provider-supports.js";
|
||||
import type { MediaUnderstandingCapability, MediaUnderstandingProvider } from "./types.js";
|
||||
|
||||
@@ -39,8 +35,30 @@ export const DEFAULT_VIDEO_MAX_BASE64_BYTES = 70 * MB;
|
||||
export const CLI_OUTPUT_MAX_BUFFER = 5 * MB;
|
||||
export const DEFAULT_MEDIA_CONCURRENCY = 2;
|
||||
|
||||
let defaultRegistryCache: Map<string, MediaUnderstandingProvider> | null = null;
|
||||
const configRegistryCache = new WeakMap<OpenClawConfig, Map<string, MediaUnderstandingProvider>>();
|
||||
|
||||
function resolveDefaultRegistry(cfg?: OpenClawConfig) {
|
||||
return buildMediaUnderstandingRegistry(undefined, cfg ?? ({} as OpenClawConfig));
|
||||
if (!cfg) {
|
||||
defaultRegistryCache ??= buildMediaUnderstandingManifestMetadataRegistry();
|
||||
return defaultRegistryCache;
|
||||
}
|
||||
const cached = configRegistryCache.get(cfg);
|
||||
if (cached) {
|
||||
return cached;
|
||||
}
|
||||
const registry = buildMediaUnderstandingManifestMetadataRegistry(cfg);
|
||||
configRegistryCache.set(cfg, registry);
|
||||
return registry;
|
||||
}
|
||||
|
||||
function providerHasDeclaredCapability(
|
||||
provider: MediaUnderstandingProvider | undefined,
|
||||
capability: MediaUnderstandingCapability,
|
||||
): boolean {
|
||||
return (
|
||||
provider?.capabilities?.includes(capability) ?? providerSupportsCapability(provider, capability)
|
||||
);
|
||||
}
|
||||
|
||||
function resolveConfiguredImageProviderModel(params: {
|
||||
@@ -68,6 +86,28 @@ function resolveConfiguredImageProviderModel(params: {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function resolveConfiguredImageProviderIds(cfg?: OpenClawConfig): string[] {
|
||||
const providers = cfg?.models?.providers;
|
||||
if (!providers || typeof providers !== "object") {
|
||||
return [];
|
||||
}
|
||||
const configured: string[] = [];
|
||||
for (const [providerKey, providerCfg] of Object.entries(providers)) {
|
||||
const normalizedProviderId = normalizeMediaProviderId(providerKey);
|
||||
if (!normalizedProviderId || configured.includes(normalizedProviderId)) {
|
||||
continue;
|
||||
}
|
||||
const models = providerCfg?.models ?? [];
|
||||
const hasImageModel = models.some(
|
||||
(model) => Array.isArray(model?.input) && model.input.includes("image"),
|
||||
);
|
||||
if (hasImageModel) {
|
||||
configured.push(normalizedProviderId);
|
||||
}
|
||||
}
|
||||
return configured;
|
||||
}
|
||||
|
||||
export function resolveDefaultMediaModel(params: {
|
||||
providerId: string;
|
||||
capability: MediaUnderstandingCapability;
|
||||
@@ -85,13 +125,6 @@ export function resolveDefaultMediaModel(params: {
|
||||
if (configuredImageModel) {
|
||||
return configuredImageModel;
|
||||
}
|
||||
const bundledDefault = resolveBundledDefaultMediaModel({
|
||||
providerId: params.providerId,
|
||||
capability: params.capability,
|
||||
});
|
||||
if (bundledDefault) {
|
||||
return bundledDefault;
|
||||
}
|
||||
}
|
||||
const registry = params.providerRegistry ?? resolveDefaultRegistry(params.cfg);
|
||||
const provider = registry.get(normalizeMediaProviderId(params.providerId));
|
||||
@@ -103,35 +136,13 @@ export function resolveAutoMediaKeyProviders(params: {
|
||||
cfg?: OpenClawConfig;
|
||||
providerRegistry?: Map<string, MediaUnderstandingProvider>;
|
||||
}): string[] {
|
||||
if (!params.providerRegistry) {
|
||||
const bundledProviders = resolveBundledAutoMediaKeyProviders(params.capability);
|
||||
if (params.capability !== "image") {
|
||||
return bundledProviders;
|
||||
}
|
||||
const configProviders = params.cfg?.models?.providers;
|
||||
if (!configProviders || typeof configProviders !== "object") {
|
||||
return bundledProviders;
|
||||
}
|
||||
const merged = [...bundledProviders];
|
||||
for (const [providerKey, providerCfg] of Object.entries(configProviders)) {
|
||||
const normalizedProviderId = normalizeMediaProviderId(providerKey);
|
||||
const models = providerCfg?.models ?? [];
|
||||
const hasImageModel = models.some(
|
||||
(model) => Array.isArray(model?.input) && model.input.includes("image"),
|
||||
);
|
||||
if (hasImageModel && !merged.includes(normalizedProviderId)) {
|
||||
merged.push(normalizedProviderId);
|
||||
}
|
||||
}
|
||||
return merged;
|
||||
}
|
||||
const registry = params.providerRegistry ?? resolveDefaultRegistry(params.cfg);
|
||||
type AutoProviderEntry = {
|
||||
provider: MediaUnderstandingProvider;
|
||||
priority: number;
|
||||
};
|
||||
return [...registry.values()]
|
||||
.filter((provider) => providerSupportsCapability(provider, params.capability))
|
||||
const prioritized = [...registry.values()]
|
||||
.filter((provider) => providerHasDeclaredCapability(provider, params.capability))
|
||||
.map((provider): AutoProviderEntry | null => {
|
||||
const priority = provider.autoPriority?.[params.capability];
|
||||
return typeof priority === "number" && Number.isFinite(priority)
|
||||
@@ -147,6 +158,10 @@ export function resolveAutoMediaKeyProviders(params: {
|
||||
})
|
||||
.map((entry) => normalizeMediaProviderId(entry.provider.id))
|
||||
.filter(Boolean);
|
||||
if (params.providerRegistry || params.capability !== "image") {
|
||||
return prioritized;
|
||||
}
|
||||
return [...new Set([...prioritized, ...resolveConfiguredImageProviderIds(params.cfg)])];
|
||||
}
|
||||
|
||||
export function providerSupportsNativePdfDocument(params: {
|
||||
@@ -154,9 +169,6 @@ export function providerSupportsNativePdfDocument(params: {
|
||||
cfg?: OpenClawConfig;
|
||||
providerRegistry?: Map<string, MediaUnderstandingProvider>;
|
||||
}): boolean {
|
||||
if (!params.providerRegistry && bundledProviderSupportsNativePdfDocument(params.providerId)) {
|
||||
return true;
|
||||
}
|
||||
const registry = params.providerRegistry ?? resolveDefaultRegistry(params.cfg);
|
||||
const provider = registry.get(normalizeMediaProviderId(params.providerId));
|
||||
return provider?.nativeDocumentInputs?.includes("pdf") ?? false;
|
||||
|
||||
36
src/media-understanding/manifest-metadata.ts
Normal file
36
src/media-understanding/manifest-metadata.ts
Normal file
@@ -0,0 +1,36 @@
|
||||
import type { OpenClawConfig } from "../config/types.js";
|
||||
import { loadPluginManifestRegistry } from "../plugins/manifest-registry.js";
|
||||
import { normalizeMediaProviderId } from "./provider-id.js";
|
||||
import type { MediaUnderstandingProvider } from "./types.js";
|
||||
|
||||
export function buildMediaUnderstandingManifestMetadataRegistry(
|
||||
cfg?: OpenClawConfig,
|
||||
): Map<string, MediaUnderstandingProvider> {
|
||||
const registry = new Map<string, MediaUnderstandingProvider>();
|
||||
for (const plugin of loadPluginManifestRegistry({
|
||||
config: cfg,
|
||||
env: process.env,
|
||||
}).plugins) {
|
||||
const declaredProviders = new Set(
|
||||
(plugin.contracts?.mediaUnderstandingProviders ?? []).map((providerId) =>
|
||||
normalizeMediaProviderId(providerId),
|
||||
),
|
||||
);
|
||||
for (const [providerId, metadata] of Object.entries(
|
||||
plugin.mediaUnderstandingProviderMetadata ?? {},
|
||||
)) {
|
||||
const normalizedProviderId = normalizeMediaProviderId(providerId);
|
||||
if (!normalizedProviderId || !declaredProviders.has(normalizedProviderId)) {
|
||||
continue;
|
||||
}
|
||||
registry.set(normalizedProviderId, {
|
||||
id: normalizedProviderId,
|
||||
capabilities: metadata.capabilities,
|
||||
defaultModels: metadata.defaultModels,
|
||||
autoPriority: metadata.autoPriority,
|
||||
nativeDocumentInputs: metadata.nativeDocumentInputs,
|
||||
});
|
||||
}
|
||||
}
|
||||
return registry;
|
||||
}
|
||||
@@ -532,6 +532,54 @@ describe("loadPluginManifestRegistry", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("preserves media-understanding provider metadata from plugin manifests", () => {
|
||||
const dir = makeTempDir();
|
||||
writeManifest(dir, {
|
||||
id: "openai",
|
||||
contracts: {
|
||||
mediaUnderstandingProviders: ["openai"],
|
||||
},
|
||||
mediaUnderstandingProviderMetadata: {
|
||||
openai: {
|
||||
capabilities: ["image", "audio", "unknown"],
|
||||
defaultModels: {
|
||||
image: "gpt-5.4-mini",
|
||||
audio: "gpt-4o-transcribe",
|
||||
unknown: "ignored",
|
||||
},
|
||||
autoPriority: {
|
||||
image: 10,
|
||||
audio: 20,
|
||||
video: "ignored",
|
||||
},
|
||||
nativeDocumentInputs: ["pdf", "docx"],
|
||||
},
|
||||
},
|
||||
configSchema: { type: "object" },
|
||||
});
|
||||
|
||||
const registry = loadSingleCandidateRegistry({
|
||||
idHint: "openai",
|
||||
rootDir: dir,
|
||||
origin: "bundled",
|
||||
});
|
||||
|
||||
expect(registry.plugins[0]?.mediaUnderstandingProviderMetadata).toEqual({
|
||||
openai: {
|
||||
capabilities: ["image", "audio"],
|
||||
defaultModels: {
|
||||
image: "gpt-5.4-mini",
|
||||
audio: "gpt-4o-transcribe",
|
||||
},
|
||||
autoPriority: {
|
||||
image: 10,
|
||||
audio: 20,
|
||||
},
|
||||
nativeDocumentInputs: ["pdf"],
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it("preserves channel env metadata from plugin manifests", () => {
|
||||
const dir = makeTempDir();
|
||||
writeManifest(dir, {
|
||||
|
||||
@@ -33,6 +33,7 @@ import {
|
||||
type PluginManifest,
|
||||
type PluginManifestChannelConfig,
|
||||
type PluginManifestContracts,
|
||||
type PluginManifestMediaUnderstandingProviderMetadata,
|
||||
type PluginManifestModelSupport,
|
||||
type PluginManifestProviderEndpoint,
|
||||
type PluginManifestQaRunner,
|
||||
@@ -112,6 +113,10 @@ export type PluginManifestRecord = {
|
||||
configSchema?: Record<string, unknown>;
|
||||
configUiHints?: Record<string, PluginConfigUiHint>;
|
||||
contracts?: PluginManifestContracts;
|
||||
mediaUnderstandingProviderMetadata?: Record<
|
||||
string,
|
||||
PluginManifestMediaUnderstandingProviderMetadata
|
||||
>;
|
||||
configContracts?: PluginManifestConfigContracts;
|
||||
channelConfigs?: Record<string, PluginManifestChannelConfig>;
|
||||
channelCatalogMeta?: {
|
||||
@@ -359,6 +364,7 @@ function buildRecord(params: {
|
||||
configSchema: params.configSchema,
|
||||
configUiHints: params.manifest.uiHints,
|
||||
contracts: params.manifest.contracts,
|
||||
mediaUnderstandingProviderMetadata: params.manifest.mediaUnderstandingProviderMetadata,
|
||||
configContracts: params.manifest.configContracts,
|
||||
channelConfigs,
|
||||
...(params.candidate.packageManifest?.channel?.id
|
||||
|
||||
@@ -219,6 +219,11 @@ export type PluginManifest = {
|
||||
* compat wiring, and contract coverage without importing plugin runtime.
|
||||
*/
|
||||
contracts?: PluginManifestContracts;
|
||||
/** Cheap media-understanding provider defaults without importing plugin runtime. */
|
||||
mediaUnderstandingProviderMetadata?: Record<
|
||||
string,
|
||||
PluginManifestMediaUnderstandingProviderMetadata
|
||||
>;
|
||||
/** Manifest-owned config behavior consumed by generic core helpers. */
|
||||
configContracts?: PluginManifestConfigContracts;
|
||||
channelConfigs?: Record<string, PluginManifestChannelConfig>;
|
||||
@@ -238,6 +243,15 @@ export type PluginManifestContracts = {
|
||||
tools?: string[];
|
||||
};
|
||||
|
||||
export type PluginManifestMediaUnderstandingCapability = "image" | "audio" | "video";
|
||||
|
||||
export type PluginManifestMediaUnderstandingProviderMetadata = {
|
||||
capabilities?: PluginManifestMediaUnderstandingCapability[];
|
||||
defaultModels?: Partial<Record<PluginManifestMediaUnderstandingCapability, string>>;
|
||||
autoPriority?: Partial<Record<PluginManifestMediaUnderstandingCapability, number>>;
|
||||
nativeDocumentInputs?: Array<"pdf">;
|
||||
};
|
||||
|
||||
export type PluginManifestProviderAuthChoice = {
|
||||
/** Provider id owned by this manifest entry. */
|
||||
provider: string;
|
||||
@@ -311,6 +325,92 @@ function normalizeStringRecord(value: unknown): Record<string, string> | undefin
|
||||
return Object.keys(normalized).length > 0 ? normalized : undefined;
|
||||
}
|
||||
|
||||
const MEDIA_UNDERSTANDING_CAPABILITIES = new Set(["image", "audio", "video"]);
|
||||
|
||||
function normalizeMediaUnderstandingCapabilityRecord(
|
||||
value: unknown,
|
||||
): Partial<Record<PluginManifestMediaUnderstandingCapability, string>> | undefined {
|
||||
if (!isRecord(value)) {
|
||||
return undefined;
|
||||
}
|
||||
const normalized: Partial<Record<PluginManifestMediaUnderstandingCapability, string>> = {};
|
||||
for (const [rawKey, rawValue] of Object.entries(value)) {
|
||||
if (!MEDIA_UNDERSTANDING_CAPABILITIES.has(rawKey)) {
|
||||
continue;
|
||||
}
|
||||
const model = normalizeOptionalString(rawValue);
|
||||
if (model) {
|
||||
normalized[rawKey as PluginManifestMediaUnderstandingCapability] = model;
|
||||
}
|
||||
}
|
||||
return Object.keys(normalized).length > 0 ? normalized : undefined;
|
||||
}
|
||||
|
||||
function normalizeMediaUnderstandingPriorityRecord(
|
||||
value: unknown,
|
||||
): Partial<Record<PluginManifestMediaUnderstandingCapability, number>> | undefined {
|
||||
if (!isRecord(value)) {
|
||||
return undefined;
|
||||
}
|
||||
const normalized: Partial<Record<PluginManifestMediaUnderstandingCapability, number>> = {};
|
||||
for (const [rawKey, rawValue] of Object.entries(value)) {
|
||||
if (
|
||||
!MEDIA_UNDERSTANDING_CAPABILITIES.has(rawKey) ||
|
||||
typeof rawValue !== "number" ||
|
||||
!Number.isFinite(rawValue)
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
normalized[rawKey as PluginManifestMediaUnderstandingCapability] = rawValue;
|
||||
}
|
||||
return Object.keys(normalized).length > 0 ? normalized : undefined;
|
||||
}
|
||||
|
||||
function normalizeMediaUnderstandingCapabilities(
|
||||
value: unknown,
|
||||
): PluginManifestMediaUnderstandingCapability[] | undefined {
|
||||
const values = normalizeTrimmedStringList(value).filter((entry) =>
|
||||
MEDIA_UNDERSTANDING_CAPABILITIES.has(entry),
|
||||
) as PluginManifestMediaUnderstandingCapability[];
|
||||
return values.length > 0 ? values : undefined;
|
||||
}
|
||||
|
||||
function normalizeMediaUnderstandingNativeDocumentInputs(value: unknown): Array<"pdf"> | undefined {
|
||||
const values = normalizeTrimmedStringList(value).filter((entry) => entry === "pdf");
|
||||
return values.length > 0 ? values : undefined;
|
||||
}
|
||||
|
||||
function normalizeMediaUnderstandingProviderMetadata(
|
||||
value: unknown,
|
||||
): Record<string, PluginManifestMediaUnderstandingProviderMetadata> | undefined {
|
||||
if (!isRecord(value)) {
|
||||
return undefined;
|
||||
}
|
||||
const normalized: Record<string, PluginManifestMediaUnderstandingProviderMetadata> = {};
|
||||
for (const [rawProviderId, rawMetadata] of Object.entries(value)) {
|
||||
const providerId = normalizeOptionalString(rawProviderId) ?? "";
|
||||
if (!providerId || !isRecord(rawMetadata)) {
|
||||
continue;
|
||||
}
|
||||
const capabilities = normalizeMediaUnderstandingCapabilities(rawMetadata.capabilities);
|
||||
const defaultModels = normalizeMediaUnderstandingCapabilityRecord(rawMetadata.defaultModels);
|
||||
const autoPriority = normalizeMediaUnderstandingPriorityRecord(rawMetadata.autoPriority);
|
||||
const nativeDocumentInputs = normalizeMediaUnderstandingNativeDocumentInputs(
|
||||
rawMetadata.nativeDocumentInputs,
|
||||
);
|
||||
const metadata = {
|
||||
...(capabilities ? { capabilities } : {}),
|
||||
...(defaultModels ? { defaultModels } : {}),
|
||||
...(autoPriority ? { autoPriority } : {}),
|
||||
...(nativeDocumentInputs ? { nativeDocumentInputs } : {}),
|
||||
} satisfies PluginManifestMediaUnderstandingProviderMetadata;
|
||||
if (Object.keys(metadata).length > 0) {
|
||||
normalized[providerId] = metadata;
|
||||
}
|
||||
}
|
||||
return Object.keys(normalized).length > 0 ? normalized : undefined;
|
||||
}
|
||||
|
||||
function normalizeManifestContracts(value: unknown): PluginManifestContracts | undefined {
|
||||
if (!isRecord(value)) {
|
||||
return undefined;
|
||||
@@ -769,6 +869,9 @@ export function loadPluginManifest(
|
||||
const qaRunners = normalizeManifestQaRunners(raw.qaRunners);
|
||||
const skills = normalizeTrimmedStringList(raw.skills);
|
||||
const contracts = normalizeManifestContracts(raw.contracts);
|
||||
const mediaUnderstandingProviderMetadata = normalizeMediaUnderstandingProviderMetadata(
|
||||
raw.mediaUnderstandingProviderMetadata,
|
||||
);
|
||||
const configContracts = normalizeManifestConfigContracts(raw.configContracts);
|
||||
const channelConfigs = normalizeChannelConfigs(raw.channelConfigs);
|
||||
|
||||
@@ -810,6 +913,7 @@ export function loadPluginManifest(
|
||||
version,
|
||||
uiHints,
|
||||
contracts,
|
||||
mediaUnderstandingProviderMetadata,
|
||||
configContracts,
|
||||
channelConfigs,
|
||||
},
|
||||
|
||||
@@ -54,6 +54,13 @@ const CORE_SECRET_SURFACE_GUARDS = [
|
||||
path: "src/gateway/channel-health-policy.ts",
|
||||
forbiddenPatterns: [/\btelegram\b/],
|
||||
},
|
||||
{
|
||||
path: "src/media-understanding/defaults.ts",
|
||||
forbiddenPatterns: [
|
||||
/\b(?:openai|anthropic|google|groq|deepgram|mistral|minimax|zai|qwen|moonshot|openrouter)\b/,
|
||||
/\b(?:gpt-|claude-|gemini-|whisper-|nova-|voxtral-|MiniMax-|glm-|qwen-|kimi-)\b/,
|
||||
],
|
||||
},
|
||||
] as const;
|
||||
|
||||
describe("channel secret contract surface guardrails", () => {
|
||||
|
||||
Reference in New Issue
Block a user