Files
openclaw/src/media-understanding/runtime-types.ts

111 lines
3.0 KiB
TypeScript

import type { AuthProfileStore } from "../agents/auth-profiles/types.js";
import type { OpenClawConfig } from "../config/types.js";
import type { ActiveMediaModel } from "./active-model.types.js";
import type {
MediaUnderstandingDecision,
MediaUnderstandingOutput,
MediaUnderstandingProvider,
StructuredExtractionInput,
} from "./types.js";
export type RunMediaUnderstandingFileParams = {
capability: "image" | "audio" | "video";
filePath: string;
cfg: OpenClawConfig;
agentDir?: string;
mime?: string;
activeModel?: ActiveMediaModel;
prompt?: string;
timeoutMs?: number;
};
export type RunMediaUnderstandingFileResult = {
text: string | undefined;
provider?: string;
model?: string;
output?: MediaUnderstandingOutput;
decision?: MediaUnderstandingDecision;
};
export type DescribeImageFileParams = {
filePath: string;
cfg: OpenClawConfig;
agentDir?: string;
mime?: string;
activeModel?: ActiveMediaModel;
prompt?: string;
timeoutMs?: number;
};
export type DescribeImageFileWithModelParams = {
filePath: string;
cfg: OpenClawConfig;
agentDir?: string;
mime?: string;
provider: string;
model: string;
prompt: string;
maxTokens?: number;
timeoutMs?: number;
};
type DescribeImageFileWithModelResult = Awaited<
ReturnType<NonNullable<MediaUnderstandingProvider["describeImage"]>>
>;
export type ExtractStructuredWithModelParams = {
/** At least one image input is required; text inputs provide supplemental context. */
input: StructuredExtractionInput[];
instructions: string;
schemaName?: string;
jsonSchema?: unknown;
jsonMode?: boolean;
cfg: OpenClawConfig;
agentDir?: string;
provider: string;
model: string;
profile?: string;
preferredProfile?: string;
authStore?: AuthProfileStore;
timeoutMs?: number;
};
type ExtractStructuredWithModelResult = Awaited<
ReturnType<NonNullable<MediaUnderstandingProvider["extractStructured"]>>
>;
export type DescribeVideoFileParams = {
filePath: string;
cfg: OpenClawConfig;
agentDir?: string;
mime?: string;
activeModel?: ActiveMediaModel;
};
export type TranscribeAudioFileParams = {
filePath: string;
cfg: OpenClawConfig;
agentDir?: string;
mime?: string;
activeModel?: ActiveMediaModel;
language?: string;
prompt?: string;
};
export type MediaUnderstandingRuntime = {
runMediaUnderstandingFile: (
params: RunMediaUnderstandingFileParams,
) => Promise<RunMediaUnderstandingFileResult>;
describeImageFile: (params: DescribeImageFileParams) => Promise<RunMediaUnderstandingFileResult>;
describeImageFileWithModel: (
params: DescribeImageFileWithModelParams,
) => Promise<DescribeImageFileWithModelResult>;
extractStructuredWithModel: (
params: ExtractStructuredWithModelParams,
) => Promise<ExtractStructuredWithModelResult>;
describeVideoFile: (params: DescribeVideoFileParams) => Promise<RunMediaUnderstandingFileResult>;
transcribeAudioFile: (
params: TranscribeAudioFileParams,
) => Promise<RunMediaUnderstandingFileResult>;
};