Media: extract runtime planning helpers

This commit is contained in:
Gustavo Madeira Santana
2026-03-15 19:58:16 +00:00
parent 28bb7e0f4a
commit 67db9df589
6 changed files with 273 additions and 313 deletions

View File

@@ -0,0 +1,190 @@
import type { MsgContext } from "../auto-reply/templating.js";
import type { OpenClawConfig } from "../config/config.js";
import type {
MediaUnderstandingConfig,
MediaUnderstandingModelConfig,
MediaUnderstandingScopeConfig,
} from "../config/types.tools.js";
import { logVerbose, shouldLogVerbose } from "../globals.js";
import {
DEFAULT_MAX_BYTES,
DEFAULT_MAX_CHARS_BY_CAPABILITY,
DEFAULT_MEDIA_CONCURRENCY,
DEFAULT_PROMPT,
} from "../media-understanding/defaults.js";
import {
normalizeMediaUnderstandingChatType,
resolveMediaUnderstandingScope,
} from "../media-understanding/scope.js";
import type { MediaUnderstandingCapability } from "../media-understanding/types.js";
import { normalizeExtensionHostMediaProviderId } from "./media-runtime-registry.js";
export function resolveTimeoutMs(seconds: number | undefined, fallbackSeconds: number): number {
const value = typeof seconds === "number" && Number.isFinite(seconds) ? seconds : fallbackSeconds;
return Math.max(1000, Math.floor(value * 1000));
}
export function resolvePrompt(
capability: MediaUnderstandingCapability,
prompt?: string,
maxChars?: number,
): string {
const base = prompt?.trim() || DEFAULT_PROMPT[capability];
if (!maxChars || capability === "audio") {
return base;
}
return `${base} Respond in at most ${maxChars} characters.`;
}
export function resolveMaxChars(params: {
capability: MediaUnderstandingCapability;
entry: MediaUnderstandingModelConfig;
cfg: OpenClawConfig;
config?: MediaUnderstandingConfig;
}): number | undefined {
const { capability, entry, cfg } = params;
const configured =
entry.maxChars ?? params.config?.maxChars ?? cfg.tools?.media?.[capability]?.maxChars;
if (typeof configured === "number") {
return configured;
}
return DEFAULT_MAX_CHARS_BY_CAPABILITY[capability];
}
export function resolveMaxBytes(params: {
capability: MediaUnderstandingCapability;
entry: MediaUnderstandingModelConfig;
cfg: OpenClawConfig;
config?: MediaUnderstandingConfig;
}): number {
const configured =
params.entry.maxBytes ??
params.config?.maxBytes ??
params.cfg.tools?.media?.[params.capability]?.maxBytes;
if (typeof configured === "number") {
return configured;
}
return DEFAULT_MAX_BYTES[params.capability];
}
export function resolveCapabilityConfig(
cfg: OpenClawConfig,
capability: MediaUnderstandingCapability,
): MediaUnderstandingConfig | undefined {
return cfg.tools?.media?.[capability];
}
export function resolveScopeDecision(params: {
scope?: MediaUnderstandingScopeConfig;
ctx: MsgContext;
}): "allow" | "deny" {
return resolveMediaUnderstandingScope({
scope: params.scope,
sessionKey: params.ctx.SessionKey,
channel: params.ctx.Surface ?? params.ctx.Provider,
chatType: normalizeMediaUnderstandingChatType(params.ctx.ChatType),
});
}
function resolveEntryCapabilities(params: {
entry: MediaUnderstandingModelConfig;
providerRegistry: Map<string, { capabilities?: MediaUnderstandingCapability[] }>;
}): MediaUnderstandingCapability[] | undefined {
const entryType = params.entry.type ?? (params.entry.command ? "cli" : "provider");
if (entryType === "cli") {
return undefined;
}
const providerId = normalizeExtensionHostMediaProviderId(params.entry.provider ?? "");
if (!providerId) {
return undefined;
}
return params.providerRegistry.get(providerId)?.capabilities;
}
export function resolveModelEntries(params: {
cfg: OpenClawConfig;
capability: MediaUnderstandingCapability;
config?: MediaUnderstandingConfig;
providerRegistry: Map<string, { capabilities?: MediaUnderstandingCapability[] }>;
}): MediaUnderstandingModelConfig[] {
const { cfg, capability, config } = params;
const sharedModels = cfg.tools?.media?.models ?? [];
const entries = [
...(config?.models ?? []).map((entry) => ({ entry, source: "capability" as const })),
...sharedModels.map((entry) => ({ entry, source: "shared" as const })),
];
if (entries.length === 0) {
return [];
}
return entries
.filter(({ entry, source }) => {
const caps =
entry.capabilities && entry.capabilities.length > 0
? entry.capabilities
: source === "shared"
? resolveEntryCapabilities({ entry, providerRegistry: params.providerRegistry })
: undefined;
if (!caps || caps.length === 0) {
if (source === "shared") {
if (shouldLogVerbose()) {
logVerbose(
`Skipping shared media model without capabilities: ${entry.provider ?? entry.command ?? "unknown"}`,
);
}
return false;
}
return true;
}
return caps.includes(capability);
})
.map(({ entry }) => entry);
}
export function resolveConcurrency(cfg: OpenClawConfig): number {
const configured = cfg.tools?.media?.concurrency;
if (typeof configured === "number" && Number.isFinite(configured) && configured > 0) {
return Math.floor(configured);
}
return DEFAULT_MEDIA_CONCURRENCY;
}
export function resolveEntriesWithActiveFallback(params: {
cfg: OpenClawConfig;
capability: MediaUnderstandingCapability;
config?: MediaUnderstandingConfig;
providerRegistry: Map<string, { capabilities?: MediaUnderstandingCapability[] }>;
activeModel?: { provider: string; model?: string };
}): MediaUnderstandingModelConfig[] {
const entries = resolveModelEntries({
cfg: params.cfg,
capability: params.capability,
config: params.config,
providerRegistry: params.providerRegistry,
});
if (entries.length > 0) {
return entries;
}
if (params.config?.enabled !== true) {
return entries;
}
const activeProviderRaw = params.activeModel?.provider?.trim();
if (!activeProviderRaw) {
return entries;
}
const activeProvider = normalizeExtensionHostMediaProviderId(activeProviderRaw);
if (!activeProvider) {
return entries;
}
const capabilities = params.providerRegistry.get(activeProvider)?.capabilities;
if (!capabilities || !capabilities.includes(params.capability)) {
return entries;
}
return [
{
type: "provider",
provider: activeProvider,
model: params.activeModel?.model,
},
];
}

View File

@@ -0,0 +1,58 @@
import type { MediaUnderstandingModelConfig } from "../config/types.tools.js";
import type {
MediaUnderstandingDecision,
MediaUnderstandingModelDecision,
} from "../media-understanding/types.js";
import { normalizeExtensionHostMediaProviderId } from "./media-runtime-registry.js";
export function buildModelDecision(params: {
entry: MediaUnderstandingModelConfig;
entryType: "provider" | "cli";
outcome: MediaUnderstandingModelDecision["outcome"];
reason?: string;
}): MediaUnderstandingModelDecision {
if (params.entryType === "cli") {
const command = params.entry.command?.trim();
return {
type: "cli",
provider: command ?? "cli",
model: params.entry.model ?? command,
outcome: params.outcome,
reason: params.reason,
};
}
const providerIdRaw = params.entry.provider?.trim();
const providerId = providerIdRaw
? normalizeExtensionHostMediaProviderId(providerIdRaw)
: undefined;
return {
type: "provider",
provider: providerId ?? providerIdRaw,
model: params.entry.model,
outcome: params.outcome,
reason: params.reason,
};
}
export function formatDecisionSummary(decision: MediaUnderstandingDecision): string {
const attachments = Array.isArray(decision.attachments) ? decision.attachments : [];
const total = attachments.length;
const success = attachments.filter((entry) => entry?.chosen?.outcome === "success").length;
const chosen = attachments.find((entry) => entry?.chosen)?.chosen;
const provider = typeof chosen?.provider === "string" ? chosen.provider.trim() : undefined;
const model = typeof chosen?.model === "string" ? chosen.model.trim() : undefined;
const modelLabel = provider ? (model ? `${provider}/${model}` : provider) : undefined;
const reason = attachments
.flatMap((entry) => {
const attempts = Array.isArray(entry?.attempts) ? entry.attempts : [];
return attempts
.map((attempt) => (typeof attempt?.reason === "string" ? attempt.reason : undefined))
.filter((value): value is string => Boolean(value));
})
.find((value) => value.trim().length > 0);
const shortReason = reason ? reason.split(":")[0]?.trim() : undefined;
const countLabel = total > 0 ? ` (${success}/${total})` : "";
const viaLabel = modelLabel ? ` via ${modelLabel}` : "";
const reasonLabel = shortReason ? ` reason=${shortReason}` : "";
return `${decision.capability}: ${decision.outcome}${countLabel}${viaLabel}${reasonLabel}`;
}

View File

@@ -29,21 +29,19 @@ import {
import { MediaUnderstandingSkipError } from "../media-understanding/errors.js";
import { fileExists } from "../media-understanding/fs.js";
import { extractGeminiResponse } from "../media-understanding/output-extract.js";
import {
resolveMaxBytes,
resolveMaxChars,
resolvePrompt,
resolveTimeoutMs,
} from "../media-understanding/resolve.js";
import type {
MediaUnderstandingCapability,
MediaUnderstandingDecision,
MediaUnderstandingModelDecision,
MediaUnderstandingOutput,
MediaUnderstandingProvider,
} from "../media-understanding/types.js";
import { estimateBase64Size, resolveVideoMaxBase64Bytes } from "../media-understanding/video.js";
import { runExec } from "../process/exec.js";
import {
resolveMaxBytes,
resolveMaxChars,
resolvePrompt,
resolveTimeoutMs,
} from "./media-runtime-config.js";
export type ProviderRegistry = Map<string, MediaUnderstandingProvider>;
@@ -292,35 +290,6 @@ function resolveProviderQuery(params: {
return Object.keys(query).length > 0 ? query : undefined;
}
export function buildModelDecision(params: {
entry: MediaUnderstandingModelConfig;
entryType: "provider" | "cli";
outcome: MediaUnderstandingModelDecision["outcome"];
reason?: string;
}): MediaUnderstandingModelDecision {
if (params.entryType === "cli") {
const command = params.entry.command?.trim();
return {
type: "cli",
provider: command ?? "cli",
model: params.entry.model ?? command,
outcome: params.outcome,
reason: params.reason,
};
}
const providerIdRaw = params.entry.provider?.trim();
const providerId = providerIdRaw
? normalizeExtensionHostMediaProviderId(providerIdRaw)
: undefined;
return {
type: "provider",
provider: providerId ?? providerIdRaw,
model: params.entry.model,
outcome: params.outcome,
reason: params.reason,
};
}
function resolveEntryRunOptions(params: {
capability: MediaUnderstandingCapability;
entry: MediaUnderstandingModelConfig;
@@ -389,29 +358,6 @@ async function resolveProviderExecutionContext(params: {
return { apiKeys, baseUrl, headers };
}
export function formatDecisionSummary(decision: MediaUnderstandingDecision): string {
const attachments = Array.isArray(decision.attachments) ? decision.attachments : [];
const total = attachments.length;
const success = attachments.filter((entry) => entry?.chosen?.outcome === "success").length;
const chosen = attachments.find((entry) => entry?.chosen)?.chosen;
const provider = typeof chosen?.provider === "string" ? chosen.provider.trim() : undefined;
const model = typeof chosen?.model === "string" ? chosen.model.trim() : undefined;
const modelLabel = provider ? (model ? `${provider}/${model}` : provider) : undefined;
const reason = attachments
.flatMap((entry) => {
const attempts = Array.isArray(entry?.attempts) ? entry.attempts : [];
return attempts
.map((attempt) => (typeof attempt?.reason === "string" ? attempt.reason : undefined))
.filter((value): value is string => Boolean(value));
})
.find((value) => value.trim().length > 0);
const shortReason = reason ? reason.split(":")[0]?.trim() : undefined;
const countLabel = total > 0 ? ` (${success}/${total})` : "";
const viaLabel = modelLabel ? ` via ${modelLabel}` : "";
const reasonLabel = shortReason ? ` reason=${shortReason}` : "";
return `${decision.capability}: ${decision.outcome}${countLabel}${viaLabel}${reasonLabel}`;
}
function assertMinAudioSize(params: { size: number; attachmentIndex: number }): void {
if (params.size >= MIN_AUDIO_FILE_BYTES) {
return;

View File

@@ -12,13 +12,7 @@ import type {
import { logVerbose, shouldLogVerbose } from "../globals.js";
import { MediaAttachmentCache, selectAttachments } from "../media-understanding/attachments.js";
import { isMediaUnderstandingSkipError } from "../media-understanding/errors.js";
import { resolveModelEntries, resolveScopeDecision } from "../media-understanding/resolve.js";
import {
buildModelDecision,
formatDecisionSummary,
runCliEntry,
runProviderEntry,
} from "../media-understanding/runner.entries.js";
import { runCliEntry, runProviderEntry } from "../media-understanding/runner.entries.js";
import type {
MediaAttachment,
MediaUnderstandingCapability,
@@ -28,6 +22,8 @@ import type {
MediaUnderstandingProvider,
} from "../media-understanding/types.js";
import { resolveAutoEntries, type ActiveMediaModel } from "./media-runtime-auto.js";
import { resolveModelEntries, resolveScopeDecision } from "./media-runtime-config.js";
import { buildModelDecision, formatDecisionSummary } from "./media-runtime-decision.js";
type ProviderRegistry = Map<string, MediaUnderstandingProvider>;

View File

@@ -1,187 +1,11 @@
import type { MsgContext } from "../auto-reply/templating.js";
import type { OpenClawConfig } from "../config/config.js";
import type {
MediaUnderstandingConfig,
MediaUnderstandingModelConfig,
MediaUnderstandingScopeConfig,
} from "../config/types.tools.js";
import { normalizeExtensionHostMediaProviderId } from "../extension-host/media-runtime-registry.js";
import { logVerbose, shouldLogVerbose } from "../globals.js";
import {
DEFAULT_MAX_BYTES,
DEFAULT_MAX_CHARS_BY_CAPABILITY,
DEFAULT_MEDIA_CONCURRENCY,
DEFAULT_PROMPT,
} from "./defaults.js";
import { normalizeMediaUnderstandingChatType, resolveMediaUnderstandingScope } from "./scope.js";
import type { MediaUnderstandingCapability } from "./types.js";
export function resolveTimeoutMs(seconds: number | undefined, fallbackSeconds: number): number {
const value = typeof seconds === "number" && Number.isFinite(seconds) ? seconds : fallbackSeconds;
return Math.max(1000, Math.floor(value * 1000));
}
export function resolvePrompt(
capability: MediaUnderstandingCapability,
prompt?: string,
maxChars?: number,
): string {
const base = prompt?.trim() || DEFAULT_PROMPT[capability];
if (!maxChars || capability === "audio") {
return base;
}
return `${base} Respond in at most ${maxChars} characters.`;
}
export function resolveMaxChars(params: {
capability: MediaUnderstandingCapability;
entry: MediaUnderstandingModelConfig;
cfg: OpenClawConfig;
config?: MediaUnderstandingConfig;
}): number | undefined {
const { capability, entry, cfg } = params;
const configured =
entry.maxChars ?? params.config?.maxChars ?? cfg.tools?.media?.[capability]?.maxChars;
if (typeof configured === "number") {
return configured;
}
return DEFAULT_MAX_CHARS_BY_CAPABILITY[capability];
}
export function resolveMaxBytes(params: {
capability: MediaUnderstandingCapability;
entry: MediaUnderstandingModelConfig;
cfg: OpenClawConfig;
config?: MediaUnderstandingConfig;
}): number {
const configured =
params.entry.maxBytes ??
params.config?.maxBytes ??
params.cfg.tools?.media?.[params.capability]?.maxBytes;
if (typeof configured === "number") {
return configured;
}
return DEFAULT_MAX_BYTES[params.capability];
}
export function resolveCapabilityConfig(
cfg: OpenClawConfig,
capability: MediaUnderstandingCapability,
): MediaUnderstandingConfig | undefined {
return cfg.tools?.media?.[capability];
}
export function resolveScopeDecision(params: {
scope?: MediaUnderstandingScopeConfig;
ctx: MsgContext;
}): "allow" | "deny" {
return resolveMediaUnderstandingScope({
scope: params.scope,
sessionKey: params.ctx.SessionKey,
channel: params.ctx.Surface ?? params.ctx.Provider,
chatType: normalizeMediaUnderstandingChatType(params.ctx.ChatType),
});
}
function resolveEntryCapabilities(params: {
entry: MediaUnderstandingModelConfig;
providerRegistry: Map<string, { capabilities?: MediaUnderstandingCapability[] }>;
}): MediaUnderstandingCapability[] | undefined {
const entryType = params.entry.type ?? (params.entry.command ? "cli" : "provider");
if (entryType === "cli") {
return undefined;
}
const providerId = normalizeExtensionHostMediaProviderId(params.entry.provider ?? "");
if (!providerId) {
return undefined;
}
return params.providerRegistry.get(providerId)?.capabilities;
}
export function resolveModelEntries(params: {
cfg: OpenClawConfig;
capability: MediaUnderstandingCapability;
config?: MediaUnderstandingConfig;
providerRegistry: Map<string, { capabilities?: MediaUnderstandingCapability[] }>;
}): MediaUnderstandingModelConfig[] {
const { cfg, capability, config } = params;
const sharedModels = cfg.tools?.media?.models ?? [];
const entries = [
...(config?.models ?? []).map((entry) => ({ entry, source: "capability" as const })),
...sharedModels.map((entry) => ({ entry, source: "shared" as const })),
];
if (entries.length === 0) {
return [];
}
return entries
.filter(({ entry, source }) => {
const caps =
entry.capabilities && entry.capabilities.length > 0
? entry.capabilities
: source === "shared"
? resolveEntryCapabilities({ entry, providerRegistry: params.providerRegistry })
: undefined;
if (!caps || caps.length === 0) {
if (source === "shared") {
if (shouldLogVerbose()) {
logVerbose(
`Skipping shared media model without capabilities: ${entry.provider ?? entry.command ?? "unknown"}`,
);
}
return false;
}
return true;
}
return caps.includes(capability);
})
.map(({ entry }) => entry);
}
export function resolveConcurrency(cfg: OpenClawConfig): number {
const configured = cfg.tools?.media?.concurrency;
if (typeof configured === "number" && Number.isFinite(configured) && configured > 0) {
return Math.floor(configured);
}
return DEFAULT_MEDIA_CONCURRENCY;
}
export function resolveEntriesWithActiveFallback(params: {
cfg: OpenClawConfig;
capability: MediaUnderstandingCapability;
config?: MediaUnderstandingConfig;
providerRegistry: Map<string, { capabilities?: MediaUnderstandingCapability[] }>;
activeModel?: { provider: string; model?: string };
}): MediaUnderstandingModelConfig[] {
const entries = resolveModelEntries({
cfg: params.cfg,
capability: params.capability,
config: params.config,
providerRegistry: params.providerRegistry,
});
if (entries.length > 0) {
return entries;
}
if (params.config?.enabled !== true) {
return entries;
}
const activeProviderRaw = params.activeModel?.provider?.trim();
if (!activeProviderRaw) {
return entries;
}
const activeProvider = normalizeExtensionHostMediaProviderId(activeProviderRaw);
if (!activeProvider) {
return entries;
}
const capabilities = params.providerRegistry.get(activeProvider)?.capabilities;
if (!capabilities || !capabilities.includes(params.capability)) {
return entries;
}
return [
{
type: "provider",
provider: activeProvider,
model: params.activeModel?.model,
},
];
}
export {
resolveTimeoutMs,
resolvePrompt,
resolveMaxChars,
resolveMaxBytes,
resolveCapabilityConfig,
resolveScopeDecision,
resolveModelEntries,
resolveConcurrency,
resolveEntriesWithActiveFallback,
} from "../extension-host/media-runtime-config.js";

View File

@@ -4,69 +4,15 @@ import type {
MediaUnderstandingConfig,
MediaUnderstandingModelConfig,
} from "../config/types.tools.js";
import { normalizeExtensionHostMediaProviderId } from "../extension-host/media-runtime-registry.js";
export {
buildModelDecision,
formatDecisionSummary,
} from "../extension-host/media-runtime-decision.js";
import type { MediaAttachmentCache } from "./attachments.js";
import type {
MediaUnderstandingCapability,
MediaUnderstandingDecision,
MediaUnderstandingModelDecision,
MediaUnderstandingOutput,
} from "./types.js";
import type { MediaUnderstandingCapability, MediaUnderstandingOutput } from "./types.js";
export type ProviderRegistry = Map<string, import("./types.js").MediaUnderstandingProvider>;
export function buildModelDecision(params: {
entry: MediaUnderstandingModelConfig;
entryType: "provider" | "cli";
outcome: MediaUnderstandingModelDecision["outcome"];
reason?: string;
}): MediaUnderstandingModelDecision {
if (params.entryType === "cli") {
const command = params.entry.command?.trim();
return {
type: "cli",
provider: command ?? "cli",
model: params.entry.model ?? command,
outcome: params.outcome,
reason: params.reason,
};
}
const providerIdRaw = params.entry.provider?.trim();
const providerId = providerIdRaw
? normalizeExtensionHostMediaProviderId(providerIdRaw)
: undefined;
return {
type: "provider",
provider: providerId ?? providerIdRaw,
model: params.entry.model,
outcome: params.outcome,
reason: params.reason,
};
}
export function formatDecisionSummary(decision: MediaUnderstandingDecision): string {
const attachments = Array.isArray(decision.attachments) ? decision.attachments : [];
const total = attachments.length;
const success = attachments.filter((entry) => entry?.chosen?.outcome === "success").length;
const chosen = attachments.find((entry) => entry?.chosen)?.chosen;
const provider = typeof chosen?.provider === "string" ? chosen.provider.trim() : undefined;
const model = typeof chosen?.model === "string" ? chosen.model.trim() : undefined;
const modelLabel = provider ? (model ? `${provider}/${model}` : provider) : undefined;
const reason = attachments
.flatMap((entry) => {
const attempts = Array.isArray(entry?.attempts) ? entry.attempts : [];
return attempts
.map((attempt) => (typeof attempt?.reason === "string" ? attempt.reason : undefined))
.filter((value): value is string => Boolean(value));
})
.find((value) => value.trim().length > 0);
const shortReason = reason ? reason.split(":")[0]?.trim() : undefined;
const countLabel = total > 0 ? ` (${success}/${total})` : "";
const viaLabel = modelLabel ? ` via ${modelLabel}` : "";
const reasonLabel = shortReason ? ` reason=${shortReason}` : "";
return `${decision.capability}: ${decision.outcome}${countLabel}${viaLabel}${reasonLabel}`;
}
export async function runProviderEntry(params: {
capability: MediaUnderstandingCapability;
entry: MediaUnderstandingModelConfig;