mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-17 20:21:13 +00:00
fix(cycles): split media understanding runtime contracts
This commit is contained in:
4
src/media-understanding/active-model.types.ts
Normal file
4
src/media-understanding/active-model.types.ts
Normal file
@@ -0,0 +1,4 @@
|
||||
export type ActiveMediaModel = {
|
||||
provider: string;
|
||||
model?: string;
|
||||
};
|
||||
@@ -14,6 +14,7 @@ import {
|
||||
normalizeLowercaseStringOrEmpty,
|
||||
normalizeOptionalLowercaseString,
|
||||
} from "../shared/string-coerce.js";
|
||||
import type { ActiveMediaModel } from "./active-model.types.js";
|
||||
import { resolveAttachmentKind } from "./attachments.js";
|
||||
import { runWithConcurrency } from "./concurrency.js";
|
||||
import { DEFAULT_ECHO_TRANSCRIPT_FORMAT, sendTranscriptEcho } from "./echo-transcript.js";
|
||||
@@ -24,7 +25,6 @@ import {
|
||||
} from "./format.js";
|
||||
import { resolveConcurrency } from "./resolve.js";
|
||||
import {
|
||||
type ActiveMediaModel,
|
||||
buildProviderRegistry,
|
||||
createMediaAttachmentCache,
|
||||
normalizeMediaAttachments,
|
||||
|
||||
@@ -1,13 +1,10 @@
|
||||
import type { MsgContext } from "../auto-reply/templating.js";
|
||||
import type { OpenClawConfig } from "../config/types.js";
|
||||
import { logVerbose, shouldLogVerbose } from "../globals.js";
|
||||
import type { ActiveMediaModel } from "./active-model.types.js";
|
||||
import { isAudioAttachment } from "./attachments.js";
|
||||
import { runAudioTranscription } from "./audio-transcription-runner.js";
|
||||
import {
|
||||
type ActiveMediaModel,
|
||||
normalizeMediaAttachments,
|
||||
resolveMediaAttachmentLocalRoots,
|
||||
} from "./runner.js";
|
||||
import { normalizeMediaAttachments, resolveMediaAttachmentLocalRoots } from "./runner.js";
|
||||
import type { MediaUnderstandingProvider } from "./types.js";
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import type { MsgContext } from "../auto-reply/templating.js";
|
||||
import type { OpenClawConfig } from "../config/types.js";
|
||||
import type { ActiveMediaModel } from "./active-model.types.js";
|
||||
import {
|
||||
type ActiveMediaModel,
|
||||
buildProviderRegistry,
|
||||
createMediaAttachmentCache,
|
||||
normalizeMediaAttachments,
|
||||
|
||||
@@ -26,6 +26,7 @@ import { getDefaultMediaLocalRoots } from "../media/local-roots.js";
|
||||
import { runExec } from "../process/exec.js";
|
||||
import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js";
|
||||
import { normalizeOptionalString } from "../shared/string-coerce.js";
|
||||
import type { ActiveMediaModel } from "./active-model.types.js";
|
||||
import { MediaAttachmentCache, selectAttachments } from "./attachments.js";
|
||||
import { resolveAutoMediaKeyProviders, resolveDefaultMediaModel } from "./defaults.js";
|
||||
import { isMediaUnderstandingSkipError } from "./errors.js";
|
||||
@@ -52,11 +53,7 @@ import type {
|
||||
MediaUnderstandingProvider,
|
||||
} from "./types.js";
|
||||
export { createMediaAttachmentCache, normalizeMediaAttachments } from "./runner.attachments.js";
|
||||
|
||||
export type ActiveMediaModel = {
|
||||
provider: string;
|
||||
model?: string;
|
||||
};
|
||||
export type { ActiveMediaModel } from "./active-model.types.js";
|
||||
|
||||
type ProviderRegistry = Map<string, MediaUnderstandingProvider>;
|
||||
|
||||
|
||||
73
src/media-understanding/runtime-types.ts
Normal file
73
src/media-understanding/runtime-types.ts
Normal file
@@ -0,0 +1,73 @@
|
||||
import type { OpenClawConfig } from "../config/types.js";
|
||||
import type { ActiveMediaModel } from "./active-model.types.js";
|
||||
import type { MediaUnderstandingOutput, MediaUnderstandingProvider } from "./types.js";
|
||||
|
||||
export type RunMediaUnderstandingFileParams = {
|
||||
capability: "image" | "audio" | "video";
|
||||
filePath: string;
|
||||
cfg: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
mime?: string;
|
||||
activeModel?: ActiveMediaModel;
|
||||
};
|
||||
|
||||
export type RunMediaUnderstandingFileResult = {
|
||||
text: string | undefined;
|
||||
provider?: string;
|
||||
model?: string;
|
||||
output?: MediaUnderstandingOutput;
|
||||
};
|
||||
|
||||
export type DescribeImageFileParams = {
|
||||
filePath: string;
|
||||
cfg: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
mime?: string;
|
||||
activeModel?: ActiveMediaModel;
|
||||
};
|
||||
|
||||
export type DescribeImageFileWithModelParams = {
|
||||
filePath: string;
|
||||
cfg: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
mime?: string;
|
||||
provider: string;
|
||||
model: string;
|
||||
prompt: string;
|
||||
maxTokens?: number;
|
||||
timeoutMs?: number;
|
||||
};
|
||||
|
||||
export type DescribeImageFileWithModelResult = Awaited<
|
||||
ReturnType<NonNullable<MediaUnderstandingProvider["describeImage"]>>
|
||||
>;
|
||||
|
||||
export type DescribeVideoFileParams = {
|
||||
filePath: string;
|
||||
cfg: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
mime?: string;
|
||||
activeModel?: ActiveMediaModel;
|
||||
};
|
||||
|
||||
export type TranscribeAudioFileParams = {
|
||||
filePath: string;
|
||||
cfg: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
mime?: string;
|
||||
activeModel?: ActiveMediaModel;
|
||||
language?: string;
|
||||
prompt?: string;
|
||||
};
|
||||
|
||||
export type MediaUnderstandingRuntime = {
|
||||
runMediaUnderstandingFile: (
|
||||
params: RunMediaUnderstandingFileParams,
|
||||
) => Promise<RunMediaUnderstandingFileResult>;
|
||||
describeImageFile: (params: DescribeImageFileParams) => Promise<RunMediaUnderstandingFileResult>;
|
||||
describeImageFileWithModel: (
|
||||
params: DescribeImageFileWithModelParams,
|
||||
) => Promise<DescribeImageFileWithModelResult>;
|
||||
describeVideoFile: (params: DescribeVideoFileParams) => Promise<RunMediaUnderstandingFileResult>;
|
||||
transcribeAudioFile: (params: TranscribeAudioFileParams) => Promise<{ text: string | undefined }>;
|
||||
};
|
||||
@@ -1,14 +1,28 @@
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import type { OpenClawConfig } from "../config/types.js";
|
||||
import { normalizeMediaProviderId } from "./provider-registry.js";
|
||||
import {
|
||||
buildProviderRegistry,
|
||||
createMediaAttachmentCache,
|
||||
normalizeMediaAttachments,
|
||||
runCapability,
|
||||
type ActiveMediaModel,
|
||||
} from "./runner.js";
|
||||
import type {
|
||||
DescribeImageFileParams,
|
||||
DescribeImageFileWithModelParams,
|
||||
DescribeVideoFileParams,
|
||||
RunMediaUnderstandingFileParams,
|
||||
RunMediaUnderstandingFileResult,
|
||||
TranscribeAudioFileParams,
|
||||
} from "./runtime-types.js";
|
||||
export type {
|
||||
DescribeImageFileParams,
|
||||
DescribeImageFileWithModelParams,
|
||||
DescribeVideoFileParams,
|
||||
RunMediaUnderstandingFileParams,
|
||||
RunMediaUnderstandingFileResult,
|
||||
TranscribeAudioFileParams,
|
||||
} from "./runtime-types.js";
|
||||
|
||||
type MediaUnderstandingCapability = "image" | "audio" | "video";
|
||||
type MediaUnderstandingOutput = Awaited<ReturnType<typeof runCapability>>["outputs"][number];
|
||||
@@ -19,22 +33,6 @@ const KIND_BY_CAPABILITY: Record<MediaUnderstandingCapability, MediaUnderstandin
|
||||
video: "video.description",
|
||||
};
|
||||
|
||||
export type RunMediaUnderstandingFileParams = {
|
||||
capability: MediaUnderstandingCapability;
|
||||
filePath: string;
|
||||
cfg: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
mime?: string;
|
||||
activeModel?: ActiveMediaModel;
|
||||
};
|
||||
|
||||
export type RunMediaUnderstandingFileResult = {
|
||||
text: string | undefined;
|
||||
provider?: string;
|
||||
model?: string;
|
||||
output?: MediaUnderstandingOutput;
|
||||
};
|
||||
|
||||
function buildFileContext(params: { filePath: string; mime?: string }) {
|
||||
return {
|
||||
MediaPath: params.filePath,
|
||||
@@ -92,27 +90,13 @@ export async function runMediaUnderstandingFile(
|
||||
}
|
||||
}
|
||||
|
||||
export async function describeImageFile(params: {
|
||||
filePath: string;
|
||||
cfg: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
mime?: string;
|
||||
activeModel?: ActiveMediaModel;
|
||||
}): Promise<RunMediaUnderstandingFileResult> {
|
||||
export async function describeImageFile(
|
||||
params: DescribeImageFileParams,
|
||||
): Promise<RunMediaUnderstandingFileResult> {
|
||||
return await runMediaUnderstandingFile({ ...params, capability: "image" });
|
||||
}
|
||||
|
||||
export async function describeImageFileWithModel(params: {
|
||||
filePath: string;
|
||||
cfg: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
mime?: string;
|
||||
provider: string;
|
||||
model: string;
|
||||
prompt: string;
|
||||
maxTokens?: number;
|
||||
timeoutMs?: number;
|
||||
}) {
|
||||
export async function describeImageFileWithModel(params: DescribeImageFileWithModelParams) {
|
||||
const timeoutMs = params.timeoutMs ?? 30_000;
|
||||
const providerRegistry = buildProviderRegistry(undefined, params.cfg);
|
||||
const provider = providerRegistry.get(normalizeMediaProviderId(params.provider));
|
||||
@@ -134,25 +118,15 @@ export async function describeImageFileWithModel(params: {
|
||||
});
|
||||
}
|
||||
|
||||
export async function describeVideoFile(params: {
|
||||
filePath: string;
|
||||
cfg: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
mime?: string;
|
||||
activeModel?: ActiveMediaModel;
|
||||
}): Promise<RunMediaUnderstandingFileResult> {
|
||||
export async function describeVideoFile(
|
||||
params: DescribeVideoFileParams,
|
||||
): Promise<RunMediaUnderstandingFileResult> {
|
||||
return await runMediaUnderstandingFile({ ...params, capability: "video" });
|
||||
}
|
||||
|
||||
export async function transcribeAudioFile(params: {
|
||||
filePath: string;
|
||||
cfg: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
mime?: string;
|
||||
activeModel?: ActiveMediaModel;
|
||||
language?: string;
|
||||
prompt?: string;
|
||||
}): Promise<{ text: string | undefined }> {
|
||||
export async function transcribeAudioFile(
|
||||
params: TranscribeAudioFileParams,
|
||||
): Promise<{ text: string | undefined }> {
|
||||
const cfg =
|
||||
params.language || params.prompt
|
||||
? {
|
||||
|
||||
@@ -4,6 +4,7 @@ import type {
|
||||
} from "../../agents/pi-embedded-runtime.types.js";
|
||||
import type { HeartbeatRunResult } from "../../infra/heartbeat-wake.js";
|
||||
import type { LogLevel } from "../../logging/levels.js";
|
||||
import type { MediaUnderstandingRuntime } from "../../media-understanding/runtime-types.js";
|
||||
|
||||
export type { HeartbeatRunResult };
|
||||
|
||||
@@ -91,11 +92,11 @@ export type PluginRuntimeCore = {
|
||||
listVoices: typeof import("../../tts/tts.js").listSpeechVoices;
|
||||
};
|
||||
mediaUnderstanding: {
|
||||
runFile: typeof import("../../media-understanding/runtime.js").runMediaUnderstandingFile;
|
||||
describeImageFile: typeof import("../../media-understanding/runtime.js").describeImageFile;
|
||||
describeImageFileWithModel: typeof import("../../media-understanding/runtime.js").describeImageFileWithModel;
|
||||
describeVideoFile: typeof import("../../media-understanding/runtime.js").describeVideoFile;
|
||||
transcribeAudioFile: typeof import("../../media-understanding/runtime.js").transcribeAudioFile;
|
||||
runFile: MediaUnderstandingRuntime["runMediaUnderstandingFile"];
|
||||
describeImageFile: MediaUnderstandingRuntime["describeImageFile"];
|
||||
describeImageFileWithModel: MediaUnderstandingRuntime["describeImageFileWithModel"];
|
||||
describeVideoFile: MediaUnderstandingRuntime["describeVideoFile"];
|
||||
transcribeAudioFile: MediaUnderstandingRuntime["transcribeAudioFile"];
|
||||
};
|
||||
imageGeneration: {
|
||||
generate: (
|
||||
|
||||
Reference in New Issue
Block a user