feat(providers): add DeepInfra provider plugin (#73038)

* feat(providers): add DeepInfra provider plugin

* feat(deepinfra): add media provider surfaces

* fix(deepinfra): satisfy provider boundary checks

* docs: add gitcrawl maintainer skill

* test: include deepinfra in live media sweeps

* fix: remove stale tts contract import
This commit is contained in:
Peter Steinberger
2026-04-28 01:12:54 +01:00
committed by GitHub
parent 1fde7dbc0e
commit 0294aebe6f
54 changed files with 2830 additions and 179 deletions

View File

@@ -0,0 +1,8 @@
export { buildDeepInfraProvider, buildStaticDeepInfraProvider } from "./provider-catalog.js";
export { applyDeepInfraConfig } from "./onboard.js";
export { DEEPINFRA_DEFAULT_MODEL_REF } from "./provider-models.js";
export { buildDeepInfraImageGenerationProvider } from "./image-generation-provider.js";
export { deepinfraMediaUnderstandingProvider } from "./media-understanding-provider.js";
export { deepinfraMemoryEmbeddingProviderAdapter } from "./memory-embedding-adapter.js";
export { buildDeepInfraSpeechProvider } from "./speech-provider.js";
export { buildDeepInfraVideoGenerationProvider } from "./video-generation-provider.js";

View File

@@ -0,0 +1,33 @@
import {
createRemoteEmbeddingProvider,
resolveRemoteEmbeddingClient,
type MemoryEmbeddingProviderCreateOptions,
type MemoryEmbeddingProviderCreateResult,
} from "openclaw/plugin-sdk/memory-core-host-engine-embeddings";
import {
DEEPINFRA_BASE_URL,
DEFAULT_DEEPINFRA_EMBEDDING_MODEL,
normalizeDeepInfraModelRef,
} from "./media-models.js";
export { DEFAULT_DEEPINFRA_EMBEDDING_MODEL };
export async function createDeepInfraEmbeddingProvider(
options: MemoryEmbeddingProviderCreateOptions,
): Promise<MemoryEmbeddingProviderCreateResult & { client: { model: string } }> {
const client = await resolveRemoteEmbeddingClient({
provider: "deepinfra",
options: {
...options,
model: normalizeDeepInfraModelRef(options.model, DEFAULT_DEEPINFRA_EMBEDDING_MODEL),
},
defaultBaseUrl: DEEPINFRA_BASE_URL,
normalizeModel: (model) => normalizeDeepInfraModelRef(model, DEFAULT_DEEPINFRA_EMBEDDING_MODEL),
});
const provider = createRemoteEmbeddingProvider({
id: "deepinfra",
client,
errorPrefix: "DeepInfra embeddings API error",
});
return { provider, client };
}

View File

@@ -0,0 +1,148 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { buildDeepInfraImageGenerationProvider } from "./image-generation-provider.js";
const {
assertOkOrThrowHttpErrorMock,
postJsonRequestMock,
postMultipartRequestMock,
resolveApiKeyForProviderMock,
resolveProviderHttpRequestConfigMock,
} = vi.hoisted(() => ({
assertOkOrThrowHttpErrorMock: vi.fn(async () => {}),
postJsonRequestMock: vi.fn(),
postMultipartRequestMock: vi.fn(),
resolveApiKeyForProviderMock: vi.fn(async () => ({ apiKey: "deepinfra-key" })),
resolveProviderHttpRequestConfigMock: vi.fn((params: Record<string, unknown>) => ({
baseUrl: params.baseUrl ?? params.defaultBaseUrl ?? "https://api.deepinfra.com/v1/openai",
allowPrivateNetwork: false,
headers: new Headers(params.defaultHeaders as HeadersInit | undefined),
dispatcherPolicy: undefined,
})),
}));
vi.mock("openclaw/plugin-sdk/provider-auth-runtime", () => ({
resolveApiKeyForProvider: resolveApiKeyForProviderMock,
}));
vi.mock("openclaw/plugin-sdk/provider-http", () => ({
assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock,
postJsonRequest: postJsonRequestMock,
postMultipartRequest: postMultipartRequestMock,
resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock,
sanitizeConfiguredModelProviderRequest: vi.fn((request) => request),
}));
describe("deepinfra image generation provider", () => {
afterEach(() => {
assertOkOrThrowHttpErrorMock.mockClear();
postJsonRequestMock.mockReset();
postMultipartRequestMock.mockReset();
resolveApiKeyForProviderMock.mockClear();
resolveProviderHttpRequestConfigMock.mockClear();
});
it("declares generation and single-reference edit support", () => {
const provider = buildDeepInfraImageGenerationProvider();
expect(provider.id).toBe("deepinfra");
expect(provider.defaultModel).toBe("black-forest-labs/FLUX-1-schnell");
expect(provider.capabilities.generate.maxCount).toBe(4);
expect(provider.capabilities.edit.enabled).toBe(true);
expect(provider.capabilities.edit.maxInputImages).toBe(1);
});
it("sends OpenAI-compatible image generation requests and sniffs JPEG output", async () => {
const release = vi.fn(async () => {});
const jpegBytes = Buffer.from([0xff, 0xd8, 0xff, 0x00]);
postJsonRequestMock.mockResolvedValue({
response: {
json: async () => ({
data: [{ b64_json: jpegBytes.toString("base64"), revised_prompt: "red square" }],
}),
},
release,
});
const provider = buildDeepInfraImageGenerationProvider();
const result = await provider.generateImage({
provider: "deepinfra",
model: "deepinfra/black-forest-labs/FLUX-1-schnell",
prompt: "red square",
count: 2,
size: "512x512",
timeoutMs: 12_345,
cfg: {
models: {
providers: {
deepinfra: {
baseUrl: "https://api.deepinfra.com/v1/openai/",
},
},
},
} as never,
});
expect(resolveProviderHttpRequestConfigMock).toHaveBeenCalledWith(
expect.objectContaining({
provider: "deepinfra",
capability: "image",
baseUrl: "https://api.deepinfra.com/v1/openai",
}),
);
expect(postJsonRequestMock).toHaveBeenCalledWith(
expect.objectContaining({
url: "https://api.deepinfra.com/v1/openai/images/generations",
timeoutMs: 12_345,
body: {
model: "black-forest-labs/FLUX-1-schnell",
prompt: "red square",
n: 2,
size: "512x512",
response_format: "b64_json",
},
}),
);
expect(result.images[0]?.mimeType).toBe("image/jpeg");
expect(result.images[0]?.fileName).toBe("image-1.jpg");
expect(result.images[0]?.revisedPrompt).toBe("red square");
expect(release).toHaveBeenCalledOnce();
});
it("sends image edits as multipart OpenAI-compatible requests", async () => {
postMultipartRequestMock.mockResolvedValue({
response: {
json: async () => ({
data: [
{
b64_json: Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]).toString(
"base64",
),
},
],
}),
},
release: vi.fn(async () => {}),
});
const provider = buildDeepInfraImageGenerationProvider();
const result = await provider.generateImage({
provider: "deepinfra",
model: "black-forest-labs/FLUX-1-schnell",
prompt: "make it neon",
inputImages: [{ buffer: Buffer.from("source"), mimeType: "image/png" }],
cfg: {} as never,
});
expect(postMultipartRequestMock).toHaveBeenCalledWith(
expect.objectContaining({
url: "https://api.deepinfra.com/v1/openai/images/edits",
}),
);
const form = postMultipartRequestMock.mock.calls[0]?.[0].body as FormData;
expect(form.get("model")).toBe("black-forest-labs/FLUX-1-schnell");
expect(form.get("prompt")).toBe("make it neon");
expect(form.get("response_format")).toBe("b64_json");
expect(form.get("image")).toBeInstanceOf(File);
expect(result.images[0]?.mimeType).toBe("image/png");
});
});

View File

@@ -0,0 +1,250 @@
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-types";
import type {
GeneratedImageAsset,
ImageGenerationProvider,
ImageGenerationSourceImage,
} from "openclaw/plugin-sdk/image-generation";
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
import {
assertOkOrThrowHttpError,
postJsonRequest,
postMultipartRequest,
resolveProviderHttpRequestConfig,
sanitizeConfiguredModelProviderRequest,
} from "openclaw/plugin-sdk/provider-http";
import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime";
import {
DEEPINFRA_BASE_URL,
DEEPINFRA_IMAGE_MODELS,
DEFAULT_DEEPINFRA_IMAGE_MODEL,
DEFAULT_DEEPINFRA_IMAGE_SIZE,
normalizeDeepInfraBaseUrl,
normalizeDeepInfraModelRef,
} from "./media-models.js";
const DEEPINFRA_IMAGE_SIZES = ["512x512", "1024x1024", "1024x1792", "1792x1024"] as const;
const MAX_DEEPINFRA_INPUT_IMAGES = 1;
type DeepInfraProviderConfig = NonNullable<
NonNullable<OpenClawConfig["models"]>["providers"]
>[string];
type DeepInfraImageApiResponse = {
data?: Array<{
b64_json?: string;
revised_prompt?: string;
url?: string;
}>;
};
function resolveDeepInfraProviderConfig(
cfg: OpenClawConfig | undefined,
): DeepInfraProviderConfig | undefined {
return cfg?.models?.providers?.deepinfra;
}
function detectImageMimeType(buffer: Buffer): {
mimeType: string;
extension: "jpg" | "png" | "webp";
} {
if (buffer.length >= 3 && buffer[0] === 0xff && buffer[1] === 0xd8 && buffer[2] === 0xff) {
return { mimeType: "image/jpeg", extension: "jpg" };
}
if (
buffer.length >= 8 &&
buffer[0] === 0x89 &&
buffer[1] === 0x50 &&
buffer[2] === 0x4e &&
buffer[3] === 0x47
) {
return { mimeType: "image/png", extension: "png" };
}
if (
buffer.length >= 12 &&
buffer.toString("ascii", 0, 4) === "RIFF" &&
buffer.toString("ascii", 8, 12) === "WEBP"
) {
return { mimeType: "image/webp", extension: "webp" };
}
return { mimeType: "image/jpeg", extension: "jpg" };
}
function imageToUploadName(image: ImageGenerationSourceImage, index: number): string {
const fileName = normalizeOptionalString(image.fileName);
if (fileName) {
return fileName;
}
const mimeType = normalizeOptionalString(image.mimeType) ?? "image/png";
const ext =
mimeType === "image/jpeg" || mimeType === "image/jpg"
? "jpg"
: mimeType === "image/webp"
? "webp"
: "png";
return `image-${index + 1}.${ext}`;
}
function imageToAsset(
entry: NonNullable<DeepInfraImageApiResponse["data"]>[number],
index: number,
): GeneratedImageAsset | null {
const b64 = normalizeOptionalString(entry.b64_json);
if (!b64) {
return null;
}
const buffer = Buffer.from(b64, "base64");
const detected = detectImageMimeType(buffer);
const image: GeneratedImageAsset = {
buffer,
mimeType: detected.mimeType,
fileName: `image-${index + 1}.${detected.extension}`,
};
const revisedPrompt = normalizeOptionalString(entry.revised_prompt);
if (revisedPrompt) {
image.revisedPrompt = revisedPrompt;
}
return image;
}
function parseImageResponse(payload: DeepInfraImageApiResponse): GeneratedImageAsset[] {
return (payload.data ?? [])
.map(imageToAsset)
.filter((entry): entry is GeneratedImageAsset => entry !== null);
}
export function buildDeepInfraImageGenerationProvider(): ImageGenerationProvider {
return {
id: "deepinfra",
label: "DeepInfra",
defaultModel: DEFAULT_DEEPINFRA_IMAGE_MODEL,
models: [...DEEPINFRA_IMAGE_MODELS],
isConfigured: ({ agentDir }) =>
isProviderApiKeyConfigured({
provider: "deepinfra",
agentDir,
}),
capabilities: {
generate: {
maxCount: 4,
supportsSize: true,
supportsAspectRatio: false,
supportsResolution: false,
},
edit: {
enabled: true,
maxCount: 1,
maxInputImages: MAX_DEEPINFRA_INPUT_IMAGES,
supportsSize: true,
supportsAspectRatio: false,
supportsResolution: false,
},
geometry: {
sizes: [...DEEPINFRA_IMAGE_SIZES],
},
},
async generateImage(req) {
const inputImages = req.inputImages ?? [];
const isEdit = inputImages.length > 0;
if (inputImages.length > MAX_DEEPINFRA_INPUT_IMAGES) {
throw new Error("DeepInfra image editing supports one reference image.");
}
const auth = await resolveApiKeyForProvider({
provider: "deepinfra",
cfg: req.cfg,
agentDir: req.agentDir,
store: req.authStore,
});
if (!auth.apiKey) {
throw new Error("DeepInfra API key missing");
}
const providerConfig = resolveDeepInfraProviderConfig(req.cfg);
const resolvedBaseUrl = normalizeDeepInfraBaseUrl(
providerConfig?.baseUrl,
DEEPINFRA_BASE_URL,
);
const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } =
resolveProviderHttpRequestConfig({
baseUrl: resolvedBaseUrl,
defaultBaseUrl: DEEPINFRA_BASE_URL,
allowPrivateNetwork: false,
request: sanitizeConfiguredModelProviderRequest(providerConfig?.request),
defaultHeaders: {
Authorization: `Bearer ${auth.apiKey}`,
},
provider: "deepinfra",
capability: "image",
transport: "http",
});
const model = normalizeDeepInfraModelRef(req.model, DEFAULT_DEEPINFRA_IMAGE_MODEL);
const count = isEdit ? 1 : (req.count ?? 1);
const size = normalizeOptionalString(req.size) ?? DEFAULT_DEEPINFRA_IMAGE_SIZE;
const endpoint = isEdit ? "images/edits" : "images/generations";
const request = isEdit
? (() => {
const form = new FormData();
form.set("model", model);
form.set("prompt", req.prompt);
form.set("n", String(count));
form.set("size", size);
form.set("response_format", "b64_json");
const image = inputImages[0];
if (!image) {
throw new Error("DeepInfra image edit missing reference image.");
}
const mimeType = normalizeOptionalString(image.mimeType) ?? "image/png";
form.append(
"image",
new Blob([new Uint8Array(image.buffer)], { type: mimeType }),
imageToUploadName(image, 0),
);
const multipartHeaders = new Headers(headers);
multipartHeaders.delete("Content-Type");
return postMultipartRequest({
url: `${baseUrl}/${endpoint}`,
headers: multipartHeaders,
body: form,
timeoutMs: req.timeoutMs,
fetchFn: fetch,
allowPrivateNetwork,
dispatcherPolicy,
});
})()
: postJsonRequest({
url: `${baseUrl}/${endpoint}`,
headers: new Headers({
...Object.fromEntries(headers.entries()),
"Content-Type": "application/json",
}),
body: {
model,
prompt: req.prompt,
n: count,
size,
response_format: "b64_json",
},
timeoutMs: req.timeoutMs,
fetchFn: fetch,
allowPrivateNetwork,
dispatcherPolicy,
});
const { response, release } = await request;
try {
await assertOkOrThrowHttpError(
response,
isEdit ? "DeepInfra image edit failed" : "DeepInfra image generation failed",
);
const images = parseImageResponse((await response.json()) as DeepInfraImageApiResponse);
if (images.length === 0) {
throw new Error("DeepInfra image response did not include generated image data");
}
return { images, model };
} finally {
await release();
}
},
};
}

View File

@@ -0,0 +1,113 @@
import { describe, expect, it } from "vitest";
import {
createCapturedPluginRegistration,
registerSingleProviderPlugin,
} from "openclaw/plugin-sdk/testing";
import deepinfraPlugin from "./index.js";
describe("deepinfra augmentModelCatalog", () => {
it("returns empty when no configured catalog entries", async () => {
const provider = await registerSingleProviderPlugin(deepinfraPlugin);
const entries = await provider.augmentModelCatalog?.({} as never);
expect(entries).toEqual([]);
});
it("returns configured catalog entries from config", async () => {
const provider = await registerSingleProviderPlugin(deepinfraPlugin);
const entries = await provider.augmentModelCatalog?.({
config: {
models: {
providers: {
deepinfra: {
models: [
{
id: "zai-org/GLM-5.1",
name: "GLM-5.1",
input: ["text"],
reasoning: true,
contextWindow: 202752,
},
],
},
},
},
},
} as never);
expect(entries).toEqual([
{
provider: "deepinfra",
id: "zai-org/GLM-5.1",
name: "GLM-5.1",
input: ["text"],
reasoning: true,
contextWindow: 202752,
},
]);
});
});
describe("deepinfra capability registration", () => {
it("registers all DeepInfra-backed OpenClaw provider surfaces", () => {
const captured = createCapturedPluginRegistration();
deepinfraPlugin.register(captured.api);
expect(captured.providers.map((provider) => provider.id)).toEqual(["deepinfra"]);
expect(captured.imageGenerationProviders.map((provider) => provider.id)).toEqual(["deepinfra"]);
expect(captured.mediaUnderstandingProviders.map((provider) => provider.id)).toEqual([
"deepinfra",
]);
expect(captured.memoryEmbeddingProviders.map((provider) => provider.id)).toEqual(["deepinfra"]);
expect(captured.speechProviders.map((provider) => provider.id)).toEqual(["deepinfra"]);
expect(captured.videoGenerationProviders.map((provider) => provider.id)).toEqual(["deepinfra"]);
});
});
describe("deepinfra isCacheTtlEligible", () => {
it("returns true for anthropic/* proxied models", async () => {
const provider = await registerSingleProviderPlugin(deepinfraPlugin);
expect(
provider.isCacheTtlEligible?.({
provider: "deepinfra",
modelId: "anthropic/claude-4-sonnet",
}),
).toBe(true);
});
// Locked to case-insensitive to stay consistent with the shared proxy cache
// wrapper, which lowercases the modelId before the "anthropic/" prefix check.
it("returns true regardless of modelId case", async () => {
const provider = await registerSingleProviderPlugin(deepinfraPlugin);
expect(
provider.isCacheTtlEligible?.({
provider: "deepinfra",
modelId: "Anthropic/Claude-4-Sonnet",
}),
).toBe(true);
expect(
provider.isCacheTtlEligible?.({
provider: "deepinfra",
modelId: "ANTHROPIC/claude-4-sonnet",
}),
).toBe(true);
});
it("returns false for non-anthropic models", async () => {
const provider = await registerSingleProviderPlugin(deepinfraPlugin);
expect(
provider.isCacheTtlEligible?.({
provider: "deepinfra",
modelId: "meta-llama/Llama-4-Scout-17B-16E-Instruct",
}),
).toBe(false);
expect(
provider.isCacheTtlEligible?.({
provider: "deepinfra",
modelId: "zai-org/GLM-5.1",
}),
).toBe(false);
});
});

View File

@@ -0,0 +1,84 @@
import { readConfiguredProviderCatalogEntries } from "openclaw/plugin-sdk/provider-catalog-shared";
import { defineSingleProviderPluginEntry } from "openclaw/plugin-sdk/provider-entry";
import { PASSTHROUGH_GEMINI_REPLAY_HOOKS } from "openclaw/plugin-sdk/provider-model-shared";
import {
createOpenRouterSystemCacheWrapper,
createOpenRouterWrapper,
isProxyReasoningUnsupported,
} from "openclaw/plugin-sdk/provider-stream";
import { buildDeepInfraImageGenerationProvider } from "./image-generation-provider.js";
import { deepinfraMediaUnderstandingProvider } from "./media-understanding-provider.js";
import { deepinfraMemoryEmbeddingProviderAdapter } from "./memory-embedding-adapter.js";
import { applyDeepInfraConfig } from "./onboard.js";
import { buildDeepInfraProvider, buildStaticDeepInfraProvider } from "./provider-catalog.js";
import { DEEPINFRA_DEFAULT_MODEL_REF } from "./provider-models.js";
import { buildDeepInfraSpeechProvider } from "./speech-provider.js";
import { buildDeepInfraVideoGenerationProvider } from "./video-generation-provider.js";
const PROVIDER_ID = "deepinfra";
export default defineSingleProviderPluginEntry({
id: PROVIDER_ID,
name: "DeepInfra Provider",
description: "Bundled DeepInfra provider plugin",
provider: {
label: "DeepInfra",
docsPath: "/providers/deepinfra",
auth: [
{
methodId: "api-key",
label: "DeepInfra API key",
hint: "Unified API for open source models",
optionKey: "deepinfraApiKey",
flagName: "--deepinfra-api-key",
envVar: "DEEPINFRA_API_KEY",
promptMessage: "Enter DeepInfra API key",
noteTitle: "DeepInfra",
noteMessage: [
"DeepInfra provides an OpenAI-compatible API for open source and frontier models.",
"Get your API key at: https://deepinfra.com/dash/api_keys",
].join("\n"),
defaultModel: DEEPINFRA_DEFAULT_MODEL_REF,
applyConfig: (cfg) => applyDeepInfraConfig(cfg),
wizard: {
choiceId: "deepinfra-api-key",
choiceLabel: "DeepInfra API key",
choiceHint: "Unified API for open source models",
groupId: PROVIDER_ID,
groupLabel: "DeepInfra",
groupHint: "Unified API for open source models",
},
},
],
catalog: {
buildProvider: buildDeepInfraProvider,
buildStaticProvider: buildStaticDeepInfraProvider,
},
augmentModelCatalog: ({ config }) =>
readConfiguredProviderCatalogEntries({
config,
providerId: PROVIDER_ID,
}),
normalizeConfig: ({ providerConfig }) => providerConfig,
normalizeTransport: ({ api, baseUrl }) =>
baseUrl === "https://api.deepinfra.com/v1/openai" ? { api, baseUrl } : undefined,
...PASSTHROUGH_GEMINI_REPLAY_HOOKS,
wrapStreamFn: (ctx) => {
const thinkingLevel = isProxyReasoningUnsupported(ctx.modelId)
? undefined
: ctx.thinkingLevel;
return createOpenRouterSystemCacheWrapper(
createOpenRouterWrapper(ctx.streamFn, thinkingLevel),
);
},
isModernModelRef: () => true,
isCacheTtlEligible: (ctx) => ctx.modelId.toLowerCase().startsWith("anthropic/"),
},
register(api) {
api.registerImageGenerationProvider(buildDeepInfraImageGenerationProvider());
api.registerMediaUnderstandingProvider(deepinfraMediaUnderstandingProvider);
api.registerMemoryEmbeddingProvider(deepinfraMemoryEmbeddingProviderAdapter);
api.registerSpeechProvider(buildDeepInfraSpeechProvider());
api.registerVideoGenerationProvider(buildDeepInfraVideoGenerationProvider());
},
});

View File

@@ -0,0 +1,50 @@
import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime";
import { DEEPINFRA_BASE_URL } from "./provider-models.js";
export { DEEPINFRA_BASE_URL };
export const DEEPINFRA_NATIVE_BASE_URL = "https://api.deepinfra.com/v1/inference";
export const DEFAULT_DEEPINFRA_IMAGE_MODEL = "black-forest-labs/FLUX-1-schnell";
export const DEFAULT_DEEPINFRA_IMAGE_SIZE = "1024x1024";
export const DEEPINFRA_IMAGE_MODELS = [
DEFAULT_DEEPINFRA_IMAGE_MODEL,
"run-diffusion/Juggernaut-Lightning-Flux",
"black-forest-labs/FLUX-1-dev",
"Qwen/Qwen-Image-Max",
"stabilityai/sdxl-turbo",
] as const;
export const DEFAULT_DEEPINFRA_EMBEDDING_MODEL = "BAAI/bge-m3";
export const DEFAULT_DEEPINFRA_AUDIO_TRANSCRIPTION_MODEL = "openai/whisper-large-v3-turbo";
export const DEFAULT_DEEPINFRA_IMAGE_UNDERSTANDING_MODEL = "moonshotai/Kimi-K2.5";
export const DEFAULT_DEEPINFRA_TTS_MODEL = "hexgrad/Kokoro-82M";
export const DEFAULT_DEEPINFRA_TTS_VOICE = "af_alloy";
export const DEEPINFRA_TTS_MODELS = [
DEFAULT_DEEPINFRA_TTS_MODEL,
"ResembleAI/chatterbox-turbo",
"sesame/csm-1b",
"Qwen/Qwen3-TTS",
] as const;
export const DEFAULT_DEEPINFRA_VIDEO_MODEL = "Pixverse/Pixverse-T2V";
export const DEEPINFRA_VIDEO_MODELS = [
DEFAULT_DEEPINFRA_VIDEO_MODEL,
"Pixverse/Pixverse-T2V-HD",
"Wan-AI/Wan2.1-T2V-1.3B",
"google/veo-3.0-fast",
] as const;
export const DEEPINFRA_VIDEO_ASPECT_RATIOS = ["16:9", "4:3", "1:1", "3:4", "9:16"] as const;
export const DEEPINFRA_VIDEO_DURATIONS = [5, 8] as const;
export function normalizeDeepInfraModelRef(model: string | undefined, fallback: string): string {
const value = normalizeOptionalString(model) ?? fallback;
return value.startsWith("deepinfra/") ? value.slice("deepinfra/".length) : value;
}
export function normalizeDeepInfraBaseUrl(value: unknown, fallback = DEEPINFRA_BASE_URL): string {
return (normalizeOptionalString(value) ?? fallback).replace(/\/+$/u, "");
}

View File

@@ -0,0 +1,52 @@
import { describe, expect, it, vi } from "vitest";
import {
deepinfraMediaUnderstandingProvider,
transcribeDeepInfraAudio,
} from "./media-understanding-provider.js";
const { transcribeOpenAiCompatibleAudioMock } = vi.hoisted(() => ({
transcribeOpenAiCompatibleAudioMock: vi.fn(async () => ({ text: "hello", model: "whisper" })),
}));
vi.mock("openclaw/plugin-sdk/media-understanding", async () => {
const actual = await vi.importActual<typeof import("openclaw/plugin-sdk/media-understanding")>(
"openclaw/plugin-sdk/media-understanding",
);
return {
...actual,
transcribeOpenAiCompatibleAudio: transcribeOpenAiCompatibleAudioMock,
};
});
describe("deepinfra media understanding provider", () => {
it("declares image and audio defaults", () => {
expect(deepinfraMediaUnderstandingProvider).toMatchObject({
id: "deepinfra",
capabilities: ["image", "audio"],
defaultModels: {
image: "moonshotai/Kimi-K2.5",
audio: "openai/whisper-large-v3-turbo",
},
});
expect(deepinfraMediaUnderstandingProvider.describeImage).toBeTypeOf("function");
expect(deepinfraMediaUnderstandingProvider.describeImages).toBeTypeOf("function");
});
it("routes audio transcription through the OpenAI-compatible DeepInfra endpoint", async () => {
const result = await transcribeDeepInfraAudio({
buffer: Buffer.from("audio"),
fileName: "clip.mp3",
apiKey: "deepinfra-key",
timeoutMs: 30_000,
});
expect(result.text).toBe("hello");
expect(transcribeOpenAiCompatibleAudioMock).toHaveBeenCalledWith(
expect.objectContaining({
provider: "deepinfra",
defaultBaseUrl: "https://api.deepinfra.com/v1/openai",
defaultModel: "openai/whisper-large-v3-turbo",
}),
);
});
});

View File

@@ -0,0 +1,37 @@
import {
describeImageWithModel,
describeImagesWithModel,
transcribeOpenAiCompatibleAudio,
type AudioTranscriptionRequest,
type MediaUnderstandingProvider,
} from "openclaw/plugin-sdk/media-understanding";
import {
DEEPINFRA_BASE_URL,
DEFAULT_DEEPINFRA_AUDIO_TRANSCRIPTION_MODEL,
DEFAULT_DEEPINFRA_IMAGE_UNDERSTANDING_MODEL,
} from "./media-models.js";
export async function transcribeDeepInfraAudio(params: AudioTranscriptionRequest) {
return await transcribeOpenAiCompatibleAudio({
...params,
provider: "deepinfra",
defaultBaseUrl: DEEPINFRA_BASE_URL,
defaultModel: DEFAULT_DEEPINFRA_AUDIO_TRANSCRIPTION_MODEL,
});
}
export const deepinfraMediaUnderstandingProvider: MediaUnderstandingProvider = {
id: "deepinfra",
capabilities: ["image", "audio"],
defaultModels: {
image: DEFAULT_DEEPINFRA_IMAGE_UNDERSTANDING_MODEL,
audio: DEFAULT_DEEPINFRA_AUDIO_TRANSCRIPTION_MODEL,
},
autoPriority: {
image: 45,
audio: 45,
},
transcribeAudio: transcribeDeepInfraAudio,
describeImage: describeImageWithModel,
describeImages: describeImagesWithModel,
};

View File

@@ -0,0 +1,18 @@
import { describe, expect, it } from "vitest";
import { deepinfraMemoryEmbeddingProviderAdapter } from "./memory-embedding-adapter.js";
describe("deepinfra memory embedding adapter", () => {
it("declares a remote auth-backed embedding provider", () => {
expect(deepinfraMemoryEmbeddingProviderAdapter).toMatchObject({
id: "deepinfra",
defaultModel: "BAAI/bge-m3",
transport: "remote",
authProviderId: "deepinfra",
autoSelectPriority: 55,
allowExplicitWhenConfiguredAuto: true,
});
expect(deepinfraMemoryEmbeddingProviderAdapter.shouldContinueAutoSelection).toBeTypeOf(
"function",
);
});
});

View File

@@ -0,0 +1,35 @@
import {
isMissingEmbeddingApiKeyError,
type MemoryEmbeddingProviderAdapter,
} from "openclaw/plugin-sdk/memory-core-host-engine-embeddings";
import {
createDeepInfraEmbeddingProvider,
DEFAULT_DEEPINFRA_EMBEDDING_MODEL,
} from "./embedding-provider.js";
export const deepinfraMemoryEmbeddingProviderAdapter: MemoryEmbeddingProviderAdapter = {
id: "deepinfra",
defaultModel: DEFAULT_DEEPINFRA_EMBEDDING_MODEL,
transport: "remote",
authProviderId: "deepinfra",
autoSelectPriority: 55,
allowExplicitWhenConfiguredAuto: true,
shouldContinueAutoSelection: isMissingEmbeddingApiKeyError,
create: async (options) => {
const { provider, client } = await createDeepInfraEmbeddingProvider({
...options,
provider: "deepinfra",
fallback: "none",
});
return {
provider,
runtime: {
id: "deepinfra",
cacheKeyData: {
provider: "deepinfra",
model: client.model,
},
},
};
},
};

View File

@@ -0,0 +1,165 @@
import { mkdtempSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import * as providerAuth from "openclaw/plugin-sdk/provider-auth-runtime";
import {
type OpenClawConfig,
resolveAgentModelPrimaryValue,
} from "openclaw/plugin-sdk/provider-onboard";
import { captureEnv } from "openclaw/plugin-sdk/testing";
import { afterEach, describe, expect, it, vi } from "vitest";
import {
applyDeepInfraProviderConfig,
applyDeepInfraConfig,
DEEPINFRA_BASE_URL,
DEEPINFRA_DEFAULT_MODEL_REF,
} from "./onboard.js";
import { DEEPINFRA_DEFAULT_MODEL_ID } from "./provider-models.js";
const { resolveEnvApiKey } = providerAuth;
const emptyCfg: OpenClawConfig = {};
describe("DeepInfra provider config", () => {
describe("constants", () => {
it("DEEPINFRA_BASE_URL points to deepinfra openai endpoint", () => {
expect(DEEPINFRA_BASE_URL).toBe("https://api.deepinfra.com/v1/openai");
});
it("DEEPINFRA_DEFAULT_MODEL_REF includes provider prefix", () => {
expect(DEEPINFRA_DEFAULT_MODEL_REF).toBe("deepinfra/deepseek-ai/DeepSeek-V3.2");
});
it("DEEPINFRA_DEFAULT_MODEL_ID is deepseek-ai/DeepSeek-V3.2", () => {
expect(DEEPINFRA_DEFAULT_MODEL_ID).toBe("deepseek-ai/DeepSeek-V3.2");
});
});
describe("applyDeepInfraProviderConfig", () => {
it("does not set provider models (discovery populates them at runtime)", () => {
const result = applyDeepInfraProviderConfig(emptyCfg, DEEPINFRA_DEFAULT_MODEL_REF);
expect(result.models?.providers?.deepinfra).toBeUndefined();
});
it("sets DeepInfra alias on the provided model ref", () => {
const result = applyDeepInfraProviderConfig(emptyCfg, DEEPINFRA_DEFAULT_MODEL_REF);
const agentModel = result.agents?.defaults?.models?.[DEEPINFRA_DEFAULT_MODEL_REF];
expect(agentModel).toBeDefined();
expect(agentModel?.alias).toBe("DeepInfra");
});
it("attaches the alias to a non-default model ref when provided", () => {
const fallbackRef = "deepinfra/other/awesome-model";
const result = applyDeepInfraProviderConfig(emptyCfg, fallbackRef);
expect(result.agents?.defaults?.models?.[fallbackRef]?.alias).toBe("DeepInfra");
expect(result.agents?.defaults?.models?.[DEEPINFRA_DEFAULT_MODEL_REF]).toBeUndefined();
});
it("preserves existing alias if already set", () => {
const cfg: OpenClawConfig = {
agents: {
defaults: {
models: {
[DEEPINFRA_DEFAULT_MODEL_REF]: { alias: "My Custom Alias" },
},
},
},
};
const result = applyDeepInfraProviderConfig(cfg, DEEPINFRA_DEFAULT_MODEL_REF);
const agentModel = result.agents?.defaults?.models?.[DEEPINFRA_DEFAULT_MODEL_REF];
expect(agentModel?.alias).toBe("My Custom Alias");
});
it("does not change the default model selection", () => {
const cfg: OpenClawConfig = {
agents: {
defaults: {
model: { primary: "openai/gpt-5" },
},
},
};
const result = applyDeepInfraProviderConfig(cfg, DEEPINFRA_DEFAULT_MODEL_REF);
expect(resolveAgentModelPrimaryValue(result.agents?.defaults?.model)).toBe("openai/gpt-5");
});
});
describe("applyDeepInfraConfig", () => {
it("sets the provided model ref as the primary default", () => {
const result = applyDeepInfraConfig(emptyCfg, DEEPINFRA_DEFAULT_MODEL_REF);
expect(resolveAgentModelPrimaryValue(result.agents?.defaults?.model)).toBe(
DEEPINFRA_DEFAULT_MODEL_REF,
);
});
it("sets the DeepInfra alias on the provided ref", () => {
const result = applyDeepInfraConfig(emptyCfg, DEEPINFRA_DEFAULT_MODEL_REF);
const agentModel = result.agents?.defaults?.models?.[DEEPINFRA_DEFAULT_MODEL_REF];
expect(agentModel?.alias).toBe("DeepInfra");
});
it("honors a fallback ref when discovery picked a non-default model", () => {
const fallbackRef = "deepinfra/other/awesome-model";
const result = applyDeepInfraConfig(emptyCfg, fallbackRef);
expect(resolveAgentModelPrimaryValue(result.agents?.defaults?.model)).toBe(fallbackRef);
expect(result.agents?.defaults?.models?.[fallbackRef]?.alias).toBe("DeepInfra");
});
});
describe("env var resolution", () => {
afterEach(() => {
vi.restoreAllMocks();
});
it("resolves DEEPINFRA_API_KEY from env", () => {
const envSnapshot = captureEnv(["DEEPINFRA_API_KEY"]);
process.env.DEEPINFRA_API_KEY = "test-deepinfra-key";
try {
const result = resolveEnvApiKey("deepinfra");
expect(result).not.toBeNull();
expect(result?.apiKey).toBe("test-deepinfra-key");
expect(result?.source).toContain("DEEPINFRA_API_KEY");
} finally {
envSnapshot.restore();
}
});
it("returns null when DEEPINFRA_API_KEY is not set", () => {
const envSnapshot = captureEnv(["DEEPINFRA_API_KEY"]);
delete process.env.DEEPINFRA_API_KEY;
try {
const result = resolveEnvApiKey("deepinfra");
expect(result).toBeNull();
} finally {
envSnapshot.restore();
}
});
it("resolves the deepinfra api key via resolveApiKeyForProvider", async () => {
const agentDir = mkdtempSync(join(tmpdir(), "openclaw-test-"));
const envSnapshot = captureEnv(["DEEPINFRA_API_KEY"]);
process.env.DEEPINFRA_API_KEY = "deepinfra-provider-test-key";
const spy = vi.spyOn(providerAuth, "resolveApiKeyForProvider").mockResolvedValue({
apiKey: "deepinfra-provider-test-key",
source: "env: DEEPINFRA_API_KEY",
mode: "api-key",
});
try {
const auth = await providerAuth.resolveApiKeyForProvider({
provider: "deepinfra",
agentDir,
});
expect(spy).toHaveBeenCalledWith(expect.objectContaining({ provider: "deepinfra" }));
expect(auth.apiKey).toBe("deepinfra-provider-test-key");
expect(auth.mode).toBe("api-key");
expect(auth.source).toContain("DEEPINFRA_API_KEY");
} finally {
envSnapshot.restore();
}
});
});
});

View File

@@ -0,0 +1,36 @@
import {
applyAgentDefaultModelPrimary,
type OpenClawConfig,
} from "openclaw/plugin-sdk/provider-onboard";
import { DEEPINFRA_BASE_URL, DEEPINFRA_DEFAULT_MODEL_REF } from "./provider-models.js";
export { DEEPINFRA_BASE_URL, DEEPINFRA_DEFAULT_MODEL_REF };
export function applyDeepInfraProviderConfig(
cfg: OpenClawConfig,
modelRef: string = DEEPINFRA_DEFAULT_MODEL_REF,
): OpenClawConfig {
const models = { ...cfg.agents?.defaults?.models };
models[modelRef] = {
...models[modelRef],
alias: models[modelRef]?.alias ?? "DeepInfra",
};
return {
...cfg,
agents: {
...cfg.agents,
defaults: {
...cfg.agents?.defaults,
models,
},
},
};
}
export function applyDeepInfraConfig(
cfg: OpenClawConfig,
modelRef: string = DEEPINFRA_DEFAULT_MODEL_REF,
): OpenClawConfig {
return applyAgentDefaultModelPrimary(applyDeepInfraProviderConfig(cfg, modelRef), modelRef);
}

View File

@@ -0,0 +1,191 @@
{
"id": "deepinfra",
"enabledByDefault": true,
"providers": ["deepinfra"],
"providerEndpoints": [
{
"endpointClass": "deepinfra-native",
"hosts": ["api.deepinfra.com"]
}
],
"providerRequest": {
"providers": {
"deepinfra": {
"family": "deepinfra"
}
}
},
"modelCatalog": {
"providers": {
"deepinfra": {
"baseUrl": "https://api.deepinfra.com/v1/openai",
"api": "openai-completions",
"models": [
{
"id": "deepseek-ai/DeepSeek-V3.2",
"name": "DeepSeek V3.2",
"reasoning": false,
"input": ["text"],
"contextWindow": 163840,
"maxTokens": 163840,
"cost": {
"input": 0.26,
"output": 0.38,
"cacheRead": 0.13,
"cacheWrite": 0
},
"compat": {
"supportsUsageInStreaming": true
}
},
{
"id": "zai-org/GLM-5.1",
"name": "GLM-5.1",
"reasoning": true,
"input": ["text"],
"contextWindow": 202752,
"maxTokens": 202752,
"cost": {
"input": 1.05,
"output": 3.5,
"cacheRead": 0.205000005,
"cacheWrite": 0
},
"compat": {
"supportsUsageInStreaming": true
}
},
{
"id": "stepfun-ai/Step-3.5-Flash",
"name": "Step 3.5 Flash",
"reasoning": false,
"input": ["text"],
"contextWindow": 262144,
"maxTokens": 262144,
"cost": {
"input": 0.1,
"output": 0.3,
"cacheRead": 0.02,
"cacheWrite": 0
},
"compat": {
"supportsUsageInStreaming": true
}
},
{
"id": "MiniMaxAI/MiniMax-M2.5",
"name": "MiniMax M2.5",
"reasoning": true,
"input": ["text"],
"contextWindow": 196608,
"maxTokens": 196608,
"cost": {
"input": 0.15,
"output": 1.15,
"cacheRead": 0.03,
"cacheWrite": 0
},
"compat": {
"supportsUsageInStreaming": true
}
},
{
"id": "moonshotai/Kimi-K2.5",
"name": "Kimi K2.5",
"reasoning": true,
"input": ["text", "image"],
"contextWindow": 262144,
"maxTokens": 262144,
"cost": {
"input": 0.45,
"output": 2.25,
"cacheRead": 0.070000002,
"cacheWrite": 0
},
"compat": {
"supportsUsageInStreaming": true
}
},
{
"id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B",
"name": "NVIDIA Nemotron 3 Super 120B A12B",
"reasoning": true,
"input": ["text"],
"contextWindow": 262144,
"maxTokens": 262144,
"cost": {
"input": 0.1,
"output": 0.5,
"cacheRead": 0,
"cacheWrite": 0
},
"compat": {
"supportsUsageInStreaming": true
}
},
{
"id": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
"name": "Llama 3.3 70B Instruct Turbo",
"reasoning": false,
"input": ["text"],
"contextWindow": 131072,
"maxTokens": 131072,
"cost": {
"input": 0.1,
"output": 0.32,
"cacheRead": 0,
"cacheWrite": 0
},
"compat": {
"supportsUsageInStreaming": true
}
}
]
}
}
},
"providerAuthEnvVars": {
"deepinfra": ["DEEPINFRA_API_KEY"]
},
"providerAuthChoices": [
{
"provider": "deepinfra",
"method": "api-key",
"choiceId": "deepinfra-api-key",
"choiceLabel": "DeepInfra API key",
"choiceHint": "Unified API for open source models",
"groupId": "deepinfra",
"groupLabel": "DeepInfra",
"groupHint": "Unified API for open source models",
"optionKey": "deepinfraApiKey",
"cliFlag": "--deepinfra-api-key",
"cliOption": "--deepinfra-api-key <key>",
"cliDescription": "DeepInfra API key"
}
],
"contracts": {
"mediaUnderstandingProviders": ["deepinfra"],
"memoryEmbeddingProviders": ["deepinfra"],
"imageGenerationProviders": ["deepinfra"],
"speechProviders": ["deepinfra"],
"videoGenerationProviders": ["deepinfra"]
},
"mediaUnderstandingProviderMetadata": {
"deepinfra": {
"capabilities": ["image", "audio"],
"defaultModels": {
"image": "moonshotai/Kimi-K2.5",
"audio": "openai/whisper-large-v3-turbo"
},
"autoPriority": {
"image": 45,
"audio": 45
}
}
},
"configSchema": {
"type": "object",
"additionalProperties": false,
"properties": {}
}
}

View File

@@ -0,0 +1,15 @@
{
"name": "@openclaw/deepinfra-provider",
"version": "2026.4.25",
"private": true,
"description": "OpenClaw DeepInfra provider plugin",
"type": "module",
"devDependencies": {
"@openclaw/plugin-sdk": "workspace:*"
},
"openclaw": {
"extensions": [
"./index.ts"
]
}
}

View File

@@ -0,0 +1,24 @@
import { type ModelProviderConfig } from "openclaw/plugin-sdk/provider-model-shared";
import {
DEEPINFRA_BASE_URL,
DEEPINFRA_MODEL_CATALOG,
buildDeepInfraModelDefinition,
discoverDeepInfraModels,
} from "./provider-models.js";
export function buildStaticDeepInfraProvider(): ModelProviderConfig {
return {
baseUrl: DEEPINFRA_BASE_URL,
api: "openai-completions",
models: DEEPINFRA_MODEL_CATALOG.map(buildDeepInfraModelDefinition),
};
}
export async function buildDeepInfraProvider(): Promise<ModelProviderConfig> {
const models = await discoverDeepInfraModels();
return {
baseUrl: DEEPINFRA_BASE_URL,
api: "openai-completions",
models,
};
}

View File

@@ -0,0 +1,169 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
import {
DEEPINFRA_DEFAULT_MODEL_REF,
DEEPINFRA_MODELS_URL,
discoverDeepInfraModels,
resetDeepInfraModelCacheForTest,
} from "./provider-models.js";
beforeEach(() => {
resetDeepInfraModelCacheForTest();
});
function makeModelEntry(overrides: Record<string, unknown> = {}) {
return {
id: "openai/gpt-oss-120b",
object: "model",
owned_by: "deepinfra",
metadata: {
context_length: 131072,
max_tokens: 65536,
pricing: {
input_tokens: 3,
output_tokens: 15,
cache_read_tokens: 0.3,
},
tags: ["vision", "reasoning_effort", "prompt_cache", "reasoning"],
},
...overrides,
};
}
async function withFetchPathTest(
mockFetch: ReturnType<typeof vi.fn>,
runAssertions: () => Promise<void>,
) {
const origNodeEnv = process.env.NODE_ENV;
const origVitest = process.env.VITEST;
delete process.env.NODE_ENV;
delete process.env.VITEST;
vi.stubGlobal("fetch", mockFetch);
try {
await runAssertions();
} finally {
if (origNodeEnv === undefined) {
delete process.env.NODE_ENV;
} else {
process.env.NODE_ENV = origNodeEnv;
}
if (origVitest === undefined) {
delete process.env.VITEST;
} else {
process.env.VITEST = origVitest;
}
vi.unstubAllGlobals();
}
}
describe("discoverDeepInfraModels", () => {
it("returns static catalog in test environment", async () => {
const models = await discoverDeepInfraModels();
expect(DEEPINFRA_DEFAULT_MODEL_REF).toBe("deepinfra/deepseek-ai/DeepSeek-V3.2");
expect(models.some((m) => m.id === "deepseek-ai/DeepSeek-V3.2")).toBe(true);
expect(models.every((m) => m.compat?.supportsUsageInStreaming)).toBe(true);
});
it("fetches DeepInfra's curated LLM catalog and parses model metadata", async () => {
const mockFetch = vi.fn().mockResolvedValue({
ok: true,
json: () => Promise.resolve({ data: [makeModelEntry()] }),
});
await withFetchPathTest(mockFetch, async () => {
const models = await discoverDeepInfraModels();
expect(mockFetch).toHaveBeenCalledWith(
DEEPINFRA_MODELS_URL,
expect.objectContaining({
headers: { Accept: "application/json" },
}),
);
expect(models).toEqual([
expect.objectContaining({
id: "openai/gpt-oss-120b",
name: "openai/gpt-oss-120b",
reasoning: true,
input: ["text", "image"],
contextWindow: 131072,
maxTokens: 65536,
cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 0 },
compat: expect.objectContaining({ supportsUsageInStreaming: true }),
}),
]);
});
});
it("skips non-LLM rows without metadata and deduplicates ids", async () => {
const mockFetch = vi.fn().mockResolvedValue({
ok: true,
json: () =>
Promise.resolve({
data: [
{ id: "BAAI/bge-m3", object: "model", metadata: null },
makeModelEntry(),
makeModelEntry(),
],
}),
});
await withFetchPathTest(mockFetch, async () => {
const models = await discoverDeepInfraModels();
expect(models.map((m) => m.id)).toEqual(["openai/gpt-oss-120b"]);
});
});
it("uses fallback defaults for sparse metadata", async () => {
const mockFetch = vi.fn().mockResolvedValue({
ok: true,
json: () =>
Promise.resolve({
data: [
makeModelEntry({
id: "some/model",
metadata: { tags: [], pricing: {} },
}),
],
}),
});
await withFetchPathTest(mockFetch, async () => {
const [model] = await discoverDeepInfraModels();
expect(model).toMatchObject({
id: "some/model",
reasoning: false,
input: ["text"],
contextWindow: 128000,
maxTokens: 8192,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
});
});
});
it("falls back to the static catalog on network errors", async () => {
const mockFetch = vi.fn().mockRejectedValue(new Error("network error"));
await withFetchPathTest(mockFetch, async () => {
const models = await discoverDeepInfraModels();
expect(models.some((m) => m.id === "deepseek-ai/DeepSeek-V3.2")).toBe(true);
});
});
it("caches successful discovery responses only", async () => {
const mockFetch = vi
.fn()
.mockResolvedValueOnce({
ok: true,
json: () => Promise.resolve({ data: [makeModelEntry({ id: "first/model" })] }),
})
.mockResolvedValueOnce({
ok: true,
json: () => Promise.resolve({ data: [makeModelEntry({ id: "second/model" })] }),
});
await withFetchPathTest(mockFetch, async () => {
expect((await discoverDeepInfraModels()).map((m) => m.id)).toEqual(["first/model"]);
expect((await discoverDeepInfraModels()).map((m) => m.id)).toEqual(["first/model"]);
expect(mockFetch).toHaveBeenCalledTimes(1);
});
});
});

View File

@@ -0,0 +1,213 @@
import { fetchWithTimeout } from "openclaw/plugin-sdk/provider-http";
import type { ModelDefinitionConfig } from "openclaw/plugin-sdk/provider-model-shared";
import { createSubsystemLogger } from "openclaw/plugin-sdk/runtime-env";
const log = createSubsystemLogger("deepinfra-models");
export const DEEPINFRA_BASE_URL = "https://api.deepinfra.com/v1/openai";
export const DEEPINFRA_MODELS_URL = `${DEEPINFRA_BASE_URL}/models?sort_by=openclaw&filter=with_meta`;
export const DEEPINFRA_DEFAULT_MODEL_ID = "deepseek-ai/DeepSeek-V3.2";
export const DEEPINFRA_DEFAULT_MODEL_REF = `deepinfra/${DEEPINFRA_DEFAULT_MODEL_ID}`;
export const DEEPINFRA_DEFAULT_CONTEXT_WINDOW = 128000;
export const DEEPINFRA_DEFAULT_MAX_TOKENS = 8192;
export const DEEPINFRA_MODEL_CATALOG: ModelDefinitionConfig[] = [
{
id: "deepseek-ai/DeepSeek-V3.2",
name: "DeepSeek V3.2",
reasoning: false,
input: ["text"],
contextWindow: 163840,
maxTokens: 163840,
cost: { input: 0.26, output: 0.38, cacheRead: 0.13, cacheWrite: 0 },
},
{
id: "zai-org/GLM-5.1",
name: "GLM-5.1",
reasoning: true,
input: ["text"],
contextWindow: 202752,
maxTokens: 202752,
cost: { input: 1.05, output: 3.5, cacheRead: 0.205000005, cacheWrite: 0 },
},
{
id: "stepfun-ai/Step-3.5-Flash",
name: "Step 3.5 Flash",
reasoning: false,
input: ["text"],
contextWindow: 262144,
maxTokens: 262144,
cost: { input: 0.1, output: 0.3, cacheRead: 0.02, cacheWrite: 0 },
},
{
id: "MiniMaxAI/MiniMax-M2.5",
name: "MiniMax M2.5",
reasoning: true,
input: ["text"],
contextWindow: 196608,
maxTokens: 196608,
cost: { input: 0.15, output: 1.15, cacheRead: 0.03, cacheWrite: 0 },
},
{
id: "moonshotai/Kimi-K2.5",
name: "Kimi K2.5",
reasoning: true,
input: ["text", "image"],
contextWindow: 262144,
maxTokens: 262144,
cost: { input: 0.45, output: 2.25, cacheRead: 0.070000002, cacheWrite: 0 },
},
{
id: "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B",
name: "NVIDIA Nemotron 3 Super 120B A12B",
reasoning: true,
input: ["text"],
contextWindow: 262144,
maxTokens: 262144,
cost: { input: 0.1, output: 0.5, cacheRead: 0, cacheWrite: 0 },
},
{
id: "meta-llama/Llama-3.3-70B-Instruct-Turbo",
name: "Llama 3.3 70B Instruct Turbo",
reasoning: false,
input: ["text"],
contextWindow: 131072,
maxTokens: 131072,
cost: { input: 0.1, output: 0.32, cacheRead: 0, cacheWrite: 0 },
},
];
const DISCOVERY_TIMEOUT_MS = 5000;
const DISCOVERY_CACHE_TTL_MS = 5 * 60 * 1000;
let cachedModels: ModelDefinitionConfig[] | null = null;
let cachedAt = 0;
export function resetDeepInfraModelCacheForTest(): void {
cachedModels = null;
cachedAt = 0;
}
interface DeepInfraModelPricing {
input_tokens?: number;
output_tokens?: number;
cache_read_tokens?: number;
}
interface DeepInfraModelMetadata {
context_length?: number;
max_tokens?: number;
pricing?: DeepInfraModelPricing;
tags?: string[];
}
interface DeepInfraModelEntry {
id: string;
metadata: DeepInfraModelMetadata | null;
}
interface DeepInfraModelsResponse {
data?: DeepInfraModelEntry[];
}
function parseModality(metadata: DeepInfraModelMetadata): Array<"text" | "image"> {
return metadata.tags?.includes("vision") ? ["text", "image"] : ["text"];
}
function parseReasoning(metadata: DeepInfraModelMetadata): boolean {
return Boolean(
metadata.tags?.includes("reasoning") || metadata.tags?.includes("reasoning_effort"),
);
}
export function buildDeepInfraModelDefinition(model: ModelDefinitionConfig): ModelDefinitionConfig {
return {
...model,
compat: {
...model.compat,
supportsUsageInStreaming: model.compat?.supportsUsageInStreaming ?? true,
},
};
}
function toModelDefinition(entry: DeepInfraModelEntry): ModelDefinitionConfig {
const metadata = entry.metadata;
if (!metadata) {
throw new Error("missing metadata");
}
return buildDeepInfraModelDefinition({
id: entry.id,
name: entry.id,
reasoning: parseReasoning(metadata),
input: parseModality(metadata),
contextWindow: metadata.context_length ?? DEEPINFRA_DEFAULT_CONTEXT_WINDOW,
maxTokens: metadata.max_tokens ?? DEEPINFRA_DEFAULT_MAX_TOKENS,
cost: {
input: metadata.pricing?.input_tokens ?? 0,
output: metadata.pricing?.output_tokens ?? 0,
cacheRead: metadata.pricing?.cache_read_tokens ?? 0,
cacheWrite: 0,
},
});
}
function staticCatalog(): ModelDefinitionConfig[] {
return DEEPINFRA_MODEL_CATALOG.map(buildDeepInfraModelDefinition);
}
export async function discoverDeepInfraModels(): Promise<ModelDefinitionConfig[]> {
if (process.env.NODE_ENV === "test" || process.env.VITEST) {
return staticCatalog();
}
if (cachedModels && Date.now() - cachedAt < DISCOVERY_CACHE_TTL_MS) {
return [...cachedModels];
}
try {
const response = await fetchWithTimeout(
DEEPINFRA_MODELS_URL,
{
headers: { Accept: "application/json" },
},
DISCOVERY_TIMEOUT_MS,
);
if (!response.ok) {
log.warn(`Failed to discover models: HTTP ${response.status}, using static catalog`);
return staticCatalog();
}
const body = (await response.json()) as DeepInfraModelsResponse;
if (!Array.isArray(body.data) || body.data.length === 0) {
log.warn("No models found from DeepInfra API, using static catalog");
return staticCatalog();
}
const seen = new Set<string>();
const models: ModelDefinitionConfig[] = [];
for (const entry of body.data) {
const id = typeof entry?.id === "string" ? entry.id.trim() : "";
if (!id || seen.has(id) || !entry.metadata) {
continue;
}
try {
models.push(toModelDefinition({ ...entry, id }));
seen.add(id);
} catch (error) {
log.warn(`Skipping malformed model entry "${id}": ${String(error)}`);
}
}
if (models.length === 0) {
return staticCatalog();
}
cachedModels = models;
cachedAt = Date.now();
return [...models];
} catch (error) {
log.warn(`Discovery failed: ${String(error)}, using static catalog`);
return staticCatalog();
}
}

View File

@@ -0,0 +1,41 @@
import type {
ModelDefinitionConfig,
ModelProviderConfig,
} from "openclaw/plugin-sdk/provider-model-types";
import { describe, expect, it } from "vitest";
import { normalizeConfig } from "./provider-policy-api.js";
function createModel(id: string): ModelDefinitionConfig {
return {
id,
name: id,
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 128_000,
maxTokens: 8_192,
};
}
describe("deepinfra provider policy public artifact", () => {
it("preserves the DeepInfra mid-path /v1 baseUrl without appending another /v1", () => {
const providerConfig: ModelProviderConfig = {
baseUrl: "https://api.deepinfra.com/v1/openai",
api: "openai-completions",
models: [createModel("zai-org/GLM-5")],
};
const normalized = normalizeConfig({ provider: "deepinfra", providerConfig });
expect(normalized.baseUrl).toBe("https://api.deepinfra.com/v1/openai");
expect(normalized.baseUrl).not.toMatch(/\/v1\/openai\/v1$/);
});
it("returns the providerConfig unchanged (referentially equal)", () => {
const providerConfig = {
baseUrl: "https://api.deepinfra.com/v1/openai",
models: [],
};
expect(normalizeConfig({ provider: "deepinfra", providerConfig })).toBe(providerConfig);
});
});

View File

@@ -0,0 +1,21 @@
import type { ModelProviderConfig } from "openclaw/plugin-sdk/provider-model-types";
/**
* Passthrough normalization for DeepInfra provider config.
*
* DeepInfra's OpenAI-compatible base URL is `https://api.deepinfra.com/v1/openai`
* with the `/v1` segment mid-path, not at the end. The generic
* openai-completions config normalizer strips a trailing `/v1` and re-appends
* one, which is idempotent for providers like OpenRouter (`.../api/v1`) but
* doubles to `.../v1/openai/v1` here and breaks inference (404).
*
* Shipping this bundled policy surface short-circuits the fallback normalizer
* chain (see `src/plugins/provider-runtime.ts:normalizeProviderConfigWithPlugin`)
* and preserves the DeepInfra-declared baseUrl as-is.
*/
export function normalizeConfig(params: {
provider: string;
providerConfig: ModelProviderConfig;
}): ModelProviderConfig {
return params.providerConfig;
}

View File

@@ -0,0 +1,3 @@
import { describeProviderContracts } from "openclaw/plugin-sdk/provider-test-contracts";
describeProviderContracts("deepinfra");

View File

@@ -0,0 +1,128 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { buildDeepInfraSpeechProvider } from "./speech-provider.js";
const { assertOkOrThrowHttpErrorMock, postJsonRequestMock, resolveProviderHttpRequestConfigMock } =
vi.hoisted(() => ({
assertOkOrThrowHttpErrorMock: vi.fn(async () => {}),
postJsonRequestMock: vi.fn(),
resolveProviderHttpRequestConfigMock: vi.fn((params: Record<string, unknown>) => ({
baseUrl: params.baseUrl ?? params.defaultBaseUrl ?? "https://api.deepinfra.com/v1/openai",
allowPrivateNetwork: false,
headers: new Headers(params.defaultHeaders as HeadersInit | undefined),
dispatcherPolicy: undefined,
})),
}));
vi.mock("openclaw/plugin-sdk/provider-http", () => ({
assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock,
postJsonRequest: postJsonRequestMock,
resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock,
}));
describe("deepinfra speech provider", () => {
afterEach(() => {
assertOkOrThrowHttpErrorMock.mockClear();
postJsonRequestMock.mockReset();
resolveProviderHttpRequestConfigMock.mockClear();
vi.unstubAllEnvs();
});
it("normalizes provider-owned speech config", () => {
const provider = buildDeepInfraSpeechProvider();
const resolved = provider.resolveConfig?.({
cfg: {} as never,
timeoutMs: 30_000,
rawConfig: {
providers: {
deepinfra: {
apiKey: "sk-test",
baseUrl: "https://api.deepinfra.com/v1/openai/",
modelId: "deepinfra/hexgrad/Kokoro-82M",
voiceId: "af_alloy",
speed: 1.1,
responseFormat: " MP3 ",
},
},
},
});
expect(resolved).toEqual({
apiKey: "sk-test",
baseUrl: "https://api.deepinfra.com/v1/openai",
model: "hexgrad/Kokoro-82M",
voice: "af_alloy",
speed: 1.1,
responseFormat: "mp3",
extraBody: undefined,
});
});
it("synthesizes OpenAI-compatible speech through DeepInfra", async () => {
const release = vi.fn(async () => {});
postJsonRequestMock.mockResolvedValue({
response: new Response(new Uint8Array([1, 2, 3]), { status: 200 }),
release,
});
const provider = buildDeepInfraSpeechProvider();
const result = await provider.synthesize({
text: "hello",
cfg: {
models: {
providers: {
deepinfra: {
apiKey: "sk-deepinfra",
baseUrl: "https://api.deepinfra.com/v1/openai/",
},
},
},
} as never,
providerConfig: {
model: "hexgrad/Kokoro-82M",
voice: "af_alloy",
speed: 1.2,
},
target: "voice-note",
timeoutMs: 12_345,
});
expect(resolveProviderHttpRequestConfigMock).toHaveBeenCalledWith(
expect.objectContaining({
provider: "deepinfra",
capability: "audio",
baseUrl: "https://api.deepinfra.com/v1/openai",
}),
);
expect(postJsonRequestMock).toHaveBeenCalledWith(
expect.objectContaining({
url: "https://api.deepinfra.com/v1/openai/audio/speech",
timeoutMs: 12_345,
body: {
model: "hexgrad/Kokoro-82M",
input: "hello",
voice: "af_alloy",
response_format: "mp3",
speed: 1.2,
},
}),
);
expect(result.audioBuffer).toEqual(Buffer.from([1, 2, 3]));
expect(result.outputFormat).toBe("mp3");
expect(result.fileExtension).toBe(".mp3");
expect(result.voiceCompatible).toBe(true);
expect(release).toHaveBeenCalledOnce();
});
it("uses DEEPINFRA_API_KEY when provider config omits apiKey", () => {
vi.stubEnv("DEEPINFRA_API_KEY", "sk-env");
const provider = buildDeepInfraSpeechProvider();
expect(
provider.isConfigured({
cfg: {} as never,
providerConfig: {},
timeoutMs: 30_000,
}),
).toBe(true);
});
});

View File

@@ -0,0 +1,295 @@
import {
assertOkOrThrowHttpError,
postJsonRequest,
resolveProviderHttpRequestConfig,
} from "openclaw/plugin-sdk/provider-http";
import { normalizeResolvedSecretInputString } from "openclaw/plugin-sdk/secret-input";
import {
asFiniteNumber,
asObject,
trimToUndefined,
type SpeechDirectiveTokenParseContext,
type SpeechProviderConfig,
type SpeechProviderOverrides,
type SpeechProviderPlugin,
} from "openclaw/plugin-sdk/speech";
import { normalizeOptionalLowercaseString } from "openclaw/plugin-sdk/text-runtime";
import {
DEEPINFRA_BASE_URL,
DEEPINFRA_TTS_MODELS,
DEFAULT_DEEPINFRA_TTS_MODEL,
DEFAULT_DEEPINFRA_TTS_VOICE,
normalizeDeepInfraBaseUrl,
normalizeDeepInfraModelRef,
} from "./media-models.js";
const DEEPINFRA_TTS_RESPONSE_FORMATS = ["mp3", "opus", "flac", "wav", "pcm"] as const;
type DeepInfraTtsResponseFormat = (typeof DEEPINFRA_TTS_RESPONSE_FORMATS)[number];
type DeepInfraTtsProviderConfig = {
apiKey?: string;
baseUrl?: string;
model: string;
voice: string;
speed?: number;
responseFormat?: DeepInfraTtsResponseFormat;
extraBody?: Record<string, unknown>;
};
type DeepInfraTtsProviderOverrides = {
model?: string;
voice?: string;
speed?: number;
};
function normalizeDeepInfraTtsResponseFormat(
value: unknown,
): DeepInfraTtsResponseFormat | undefined {
const next = normalizeOptionalLowercaseString(value);
if (!next) {
return undefined;
}
if (DEEPINFRA_TTS_RESPONSE_FORMATS.some((format) => format === next)) {
return next as DeepInfraTtsResponseFormat;
}
throw new Error(`Invalid DeepInfra speech responseFormat: ${next}`);
}
function resolveDeepInfraProviderConfigRecord(
rawConfig: Record<string, unknown>,
): Record<string, unknown> | undefined {
const providers = asObject(rawConfig.providers);
return asObject(providers?.deepinfra) ?? asObject(rawConfig.deepinfra);
}
function normalizeDeepInfraTtsProviderConfig(
rawConfig: Record<string, unknown>,
): DeepInfraTtsProviderConfig {
const raw = resolveDeepInfraProviderConfigRecord(rawConfig);
return {
apiKey: normalizeResolvedSecretInputString({
value: raw?.apiKey,
path: "messages.tts.providers.deepinfra.apiKey",
}),
baseUrl:
trimToUndefined(raw?.baseUrl) == null ? undefined : normalizeDeepInfraBaseUrl(raw?.baseUrl),
model: normalizeDeepInfraModelRef(
trimToUndefined(raw?.model ?? raw?.modelId),
DEFAULT_DEEPINFRA_TTS_MODEL,
),
voice: trimToUndefined(raw?.voice ?? raw?.voiceId) ?? DEFAULT_DEEPINFRA_TTS_VOICE,
speed: asFiniteNumber(raw?.speed),
responseFormat: normalizeDeepInfraTtsResponseFormat(raw?.responseFormat),
extraBody: asObject(raw?.extraBody),
};
}
function readDeepInfraTtsProviderConfig(config: SpeechProviderConfig): DeepInfraTtsProviderConfig {
const normalized = normalizeDeepInfraTtsProviderConfig({});
return {
apiKey: trimToUndefined(config.apiKey) ?? normalized.apiKey,
baseUrl:
trimToUndefined(config.baseUrl) == null
? normalized.baseUrl
: normalizeDeepInfraBaseUrl(config.baseUrl),
model: normalizeDeepInfraModelRef(
trimToUndefined(config.model ?? config.modelId),
normalized.model,
),
voice: trimToUndefined(config.voice ?? config.voiceId) ?? normalized.voice,
speed: asFiniteNumber(config.speed) ?? normalized.speed,
responseFormat:
normalizeDeepInfraTtsResponseFormat(config.responseFormat) ?? normalized.responseFormat,
extraBody: asObject(config.extraBody) ?? normalized.extraBody,
};
}
function readDeepInfraTtsOverrides(
overrides: SpeechProviderOverrides | undefined,
): DeepInfraTtsProviderOverrides {
if (!overrides) {
return {};
}
return {
model: trimToUndefined(overrides.model ?? overrides.modelId),
voice: trimToUndefined(overrides.voice ?? overrides.voiceId),
speed: asFiniteNumber(overrides.speed),
};
}
function resolveDeepInfraTtsApiKey(params: {
cfg?: { models?: { providers?: { deepinfra?: { apiKey?: unknown } } } };
providerConfig: DeepInfraTtsProviderConfig;
}): string | undefined {
return (
params.providerConfig.apiKey ??
normalizeResolvedSecretInputString({
value: params.cfg?.models?.providers?.deepinfra?.apiKey,
path: "models.providers.deepinfra.apiKey",
}) ??
trimToUndefined(process.env.DEEPINFRA_API_KEY)
);
}
function resolveDeepInfraTtsBaseUrl(params: {
cfg?: { models?: { providers?: { deepinfra?: { baseUrl?: unknown } } } };
providerConfig: DeepInfraTtsProviderConfig;
}): string {
return normalizeDeepInfraBaseUrl(
params.providerConfig.baseUrl ??
trimToUndefined(params.cfg?.models?.providers?.deepinfra?.baseUrl) ??
DEEPINFRA_BASE_URL,
);
}
function responseFormatToFileExtension(
format: DeepInfraTtsResponseFormat,
): ".mp3" | ".opus" | ".flac" | ".wav" | ".pcm" {
return `.${format}`;
}
function parseDirectiveToken(ctx: SpeechDirectiveTokenParseContext): {
handled: boolean;
overrides?: SpeechProviderOverrides;
} {
switch (ctx.key) {
case "voice":
case "voice_id":
case "voiceid":
case "deepinfra_voice":
case "deepinfravoice":
if (!ctx.policy.allowVoice) {
return { handled: true };
}
return { handled: true, overrides: { voice: ctx.value } };
case "model":
case "model_id":
case "modelid":
case "deepinfra_model":
case "deepinframodel":
if (!ctx.policy.allowModelId) {
return { handled: true };
}
return { handled: true, overrides: { model: ctx.value } };
default:
return { handled: false };
}
}
export function buildDeepInfraSpeechProvider(): SpeechProviderPlugin {
return {
id: "deepinfra",
label: "DeepInfra",
autoSelectOrder: 45,
models: [...DEEPINFRA_TTS_MODELS],
voices: [DEFAULT_DEEPINFRA_TTS_VOICE],
resolveConfig: ({ rawConfig }) => normalizeDeepInfraTtsProviderConfig(rawConfig),
parseDirectiveToken,
resolveTalkConfig: ({ baseTtsConfig, talkProviderConfig }) => {
const base = normalizeDeepInfraTtsProviderConfig(baseTtsConfig);
const responseFormat = normalizeDeepInfraTtsResponseFormat(talkProviderConfig.responseFormat);
return {
...base,
...(talkProviderConfig.apiKey === undefined
? {}
: {
apiKey: normalizeResolvedSecretInputString({
value: talkProviderConfig.apiKey,
path: "talk.providers.deepinfra.apiKey",
}),
}),
...(trimToUndefined(talkProviderConfig.baseUrl) == null
? {}
: { baseUrl: normalizeDeepInfraBaseUrl(talkProviderConfig.baseUrl) }),
...(trimToUndefined(talkProviderConfig.modelId) == null
? {}
: {
model: normalizeDeepInfraModelRef(
trimToUndefined(talkProviderConfig.modelId),
DEFAULT_DEEPINFRA_TTS_MODEL,
),
}),
...(trimToUndefined(talkProviderConfig.voiceId) == null
? {}
: { voice: trimToUndefined(talkProviderConfig.voiceId) }),
...(asFiniteNumber(talkProviderConfig.speed) == null
? {}
: { speed: asFiniteNumber(talkProviderConfig.speed) }),
...(responseFormat == null ? {} : { responseFormat }),
};
},
resolveTalkOverrides: ({ params }) => ({
...(trimToUndefined(params.voiceId ?? params.voice) == null
? {}
: { voice: trimToUndefined(params.voiceId ?? params.voice) }),
...(trimToUndefined(params.modelId ?? params.model) == null
? {}
: { model: trimToUndefined(params.modelId ?? params.model) }),
...(asFiniteNumber(params.speed) == null ? {} : { speed: asFiniteNumber(params.speed) }),
}),
listVoices: async () => [
{ id: DEFAULT_DEEPINFRA_TTS_VOICE, name: DEFAULT_DEEPINFRA_TTS_VOICE },
],
isConfigured: ({ cfg, providerConfig }) => {
const config = readDeepInfraTtsProviderConfig(providerConfig);
return Boolean(resolveDeepInfraTtsApiKey({ cfg, providerConfig: config }));
},
synthesize: async (req) => {
const config = readDeepInfraTtsProviderConfig(req.providerConfig);
const overrides = readDeepInfraTtsOverrides(req.providerOverrides);
const apiKey = resolveDeepInfraTtsApiKey({ cfg: req.cfg, providerConfig: config });
if (!apiKey) {
throw new Error("DeepInfra API key missing");
}
const baseUrl = resolveDeepInfraTtsBaseUrl({ cfg: req.cfg, providerConfig: config });
const responseFormat = config.responseFormat ?? "mp3";
const speed = overrides.speed ?? config.speed;
const { allowPrivateNetwork, headers, dispatcherPolicy } = resolveProviderHttpRequestConfig({
baseUrl,
defaultBaseUrl: DEEPINFRA_BASE_URL,
allowPrivateNetwork: false,
defaultHeaders: {
Authorization: `Bearer ${apiKey}`,
"Content-Type": "application/json",
},
provider: "deepinfra",
capability: "audio",
transport: "http",
});
const { response, release } = await postJsonRequest({
url: `${baseUrl}/audio/speech`,
headers,
body: {
model: normalizeDeepInfraModelRef(
overrides.model ?? config.model,
DEFAULT_DEEPINFRA_TTS_MODEL,
),
input: req.text,
voice: overrides.voice ?? config.voice,
response_format: responseFormat,
...(speed == null ? {} : { speed }),
...(config.extraBody == null ? {} : { extra_body: config.extraBody }),
},
timeoutMs: req.timeoutMs,
fetchFn: fetch,
allowPrivateNetwork,
dispatcherPolicy,
});
try {
await assertOkOrThrowHttpError(response, "DeepInfra TTS API error");
return {
audioBuffer: Buffer.from(await response.arrayBuffer()),
outputFormat: responseFormat,
fileExtension: responseFormatToFileExtension(responseFormat),
voiceCompatible: responseFormat === "mp3" || responseFormat === "opus",
};
} finally {
await release();
}
},
};
}

View File

@@ -0,0 +1,16 @@
{
"extends": "../tsconfig.package-boundary.base.json",
"compilerOptions": {
"rootDir": "."
},
"include": ["./*.ts", "./src/**/*.ts"],
"exclude": [
"./**/*.test.ts",
"./dist/**",
"./node_modules/**",
"./src/test-support/**",
"./src/**/*test-helpers.ts",
"./src/**/*test-harness.ts",
"./src/**/*test-support.ts"
]
}

View File

@@ -0,0 +1,86 @@
import { beforeAll, describe, expect, it, vi } from "vitest";
import { expectExplicitVideoGenerationCapabilities } from "../../test/helpers/media-generation/provider-capability-assertions.js";
import {
getProviderHttpMocks,
installProviderHttpMockCleanup,
} from "../../test/helpers/media-generation/provider-http-mocks.js";
const { postJsonRequestMock, resolveProviderHttpRequestConfigMock } = getProviderHttpMocks();
let buildDeepInfraVideoGenerationProvider: typeof import("./video-generation-provider.js").buildDeepInfraVideoGenerationProvider;
beforeAll(async () => {
({ buildDeepInfraVideoGenerationProvider } = await import("./video-generation-provider.js"));
});
installProviderHttpMockCleanup();
describe("deepinfra video generation provider", () => {
it("declares explicit mode capabilities", () => {
expectExplicitVideoGenerationCapabilities(buildDeepInfraVideoGenerationProvider());
});
it("creates native text-to-video requests and returns the hosted output URL", async () => {
const release = vi.fn(async () => {});
postJsonRequestMock.mockResolvedValue({
response: {
json: async () => ({
video_url: "/generated/video.mp4",
request_id: "req_123",
seed: 42,
inference_status: { status: "succeeded" },
}),
},
release,
});
const provider = buildDeepInfraVideoGenerationProvider();
const result = await provider.generateVideo({
provider: "deepinfra",
model: "deepinfra/Pixverse/Pixverse-T2V",
prompt: "A bicycle weaving through a rainy neon street",
cfg: {},
aspectRatio: "16:9",
durationSeconds: 8,
providerOptions: {
seed: 42,
negative_prompt: "blur",
style: "anime",
},
});
expect(resolveProviderHttpRequestConfigMock).toHaveBeenCalledWith(
expect.objectContaining({
provider: "deepinfra",
capability: "video",
baseUrl: "https://api.deepinfra.com/v1/inference",
}),
);
expect(postJsonRequestMock).toHaveBeenCalledWith(
expect.objectContaining({
url: "https://api.deepinfra.com/v1/inference/Pixverse/Pixverse-T2V",
body: {
prompt: "A bicycle weaving through a rainy neon street",
aspect_ratio: "16:9",
duration: 8,
seed: 42,
negative_prompt: "blur",
style: "anime",
},
}),
);
expect(result.videos).toEqual([
{
url: "https://api.deepinfra.com/generated/video.mp4",
mimeType: "video/mp4",
fileName: "video-1.mp4",
},
]);
expect(result.metadata).toEqual({
requestId: "req_123",
seed: 42,
status: "succeeded",
});
expect(release).toHaveBeenCalledOnce();
});
});

View File

@@ -0,0 +1,251 @@
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
import {
assertOkOrThrowHttpError,
postJsonRequest,
resolveProviderHttpRequestConfig,
} from "openclaw/plugin-sdk/provider-http";
import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime";
import type {
GeneratedVideoAsset,
VideoGenerationProvider,
VideoGenerationRequest,
} from "openclaw/plugin-sdk/video-generation";
import {
DEEPINFRA_NATIVE_BASE_URL,
DEEPINFRA_VIDEO_ASPECT_RATIOS,
DEEPINFRA_VIDEO_DURATIONS,
DEEPINFRA_VIDEO_MODELS,
DEFAULT_DEEPINFRA_VIDEO_MODEL,
normalizeDeepInfraBaseUrl,
normalizeDeepInfraModelRef,
} from "./media-models.js";
type DeepInfraVideoStatus = {
status?: string;
runtime_ms?: number;
};
type DeepInfraVideoResponse = {
video_url?: string;
seed?: number;
request_id?: string;
inference_status?: DeepInfraVideoStatus;
};
function encodeDeepInfraModelPath(model: string): string {
return model.split("/").map(encodeURIComponent).join("/");
}
function resolveDeepInfraNativeBaseUrl(req: VideoGenerationRequest): string {
const providerConfig = req.cfg?.models?.providers?.deepinfra as
| (Record<string, unknown> & { baseUrl?: unknown })
| undefined;
const nativeBaseUrl = normalizeOptionalString(providerConfig?.nativeBaseUrl);
if (nativeBaseUrl) {
return normalizeDeepInfraBaseUrl(nativeBaseUrl, DEEPINFRA_NATIVE_BASE_URL);
}
const configuredBaseUrl = normalizeOptionalString(providerConfig?.baseUrl);
if (configuredBaseUrl?.includes("/v1/inference")) {
return normalizeDeepInfraBaseUrl(configuredBaseUrl, DEEPINFRA_NATIVE_BASE_URL);
}
return DEEPINFRA_NATIVE_BASE_URL;
}
function normalizeDeepInfraVideoUrl(url: string): string {
if (url.startsWith("http://") || url.startsWith("https://") || url.startsWith("data:")) {
return url;
}
return new URL(url, "https://api.deepinfra.com").href;
}
function parseVideoDataUrl(url: string): GeneratedVideoAsset | undefined {
const match = /^data:([^;,]+);base64,(.+)$/u.exec(url);
if (!match) {
return undefined;
}
const mimeType = match[1] ?? "video/mp4";
const ext = mimeType.includes("webm") ? "webm" : "mp4";
return {
buffer: Buffer.from(match[2] ?? "", "base64"),
mimeType,
fileName: `video-1.${ext}`,
};
}
function coerceProviderNumber(value: unknown): number | undefined {
return typeof value === "number" && Number.isFinite(value) ? value : undefined;
}
function coerceProviderString(value: unknown): string | undefined {
return normalizeOptionalString(value);
}
function resolveDurationSeconds(value: number | undefined): number | undefined {
if (typeof value !== "number" || !Number.isFinite(value)) {
return undefined;
}
return value <= 6.5 ? 5 : 8;
}
function buildDeepInfraVideoBody(
req: VideoGenerationRequest,
model: string,
): Record<string, unknown> {
const options = req.providerOptions ?? {};
const body: Record<string, unknown> = {
prompt: req.prompt,
};
const aspectRatio = normalizeOptionalString(req.aspectRatio);
if (aspectRatio) {
body.aspect_ratio = aspectRatio;
}
const duration = resolveDurationSeconds(req.durationSeconds);
if (duration) {
body.duration = duration;
}
const seed = coerceProviderNumber(options.seed);
if (seed != null) {
body.seed = seed;
}
const negativePrompt =
coerceProviderString(options.negative_prompt) ?? coerceProviderString(options.negativePrompt);
if (negativePrompt) {
body.negative_prompt = negativePrompt;
}
const style = coerceProviderString(options.style);
if (style) {
body.style = style;
}
const guidanceScale =
coerceProviderNumber(options.guidance_scale) ?? coerceProviderNumber(options.guidanceScale);
if (guidanceScale != null && model.startsWith("Wan-AI/")) {
body.guidance_scale = guidanceScale;
}
return body;
}
function extractDeepInfraVideoAsset(payload: DeepInfraVideoResponse): GeneratedVideoAsset {
const videoUrl = normalizeOptionalString(payload.video_url);
if (!videoUrl) {
throw new Error("DeepInfra video response missing video_url");
}
const normalizedUrl = normalizeDeepInfraVideoUrl(videoUrl);
const dataAsset = parseVideoDataUrl(normalizedUrl);
if (dataAsset) {
return dataAsset;
}
return {
url: normalizedUrl,
mimeType: "video/mp4",
fileName: "video-1.mp4",
};
}
function failureMessage(payload: DeepInfraVideoResponse): string | undefined {
const status = normalizeOptionalString(payload.inference_status?.status)?.toLowerCase();
if (status === "failed" || status === "error") {
return "DeepInfra video generation failed";
}
return undefined;
}
export function buildDeepInfraVideoGenerationProvider(): VideoGenerationProvider {
return {
id: "deepinfra",
label: "DeepInfra",
defaultModel: DEFAULT_DEEPINFRA_VIDEO_MODEL,
models: [...DEEPINFRA_VIDEO_MODELS],
isConfigured: ({ agentDir }) =>
isProviderApiKeyConfigured({
provider: "deepinfra",
agentDir,
}),
capabilities: {
generate: {
maxVideos: 1,
maxDurationSeconds: 8,
supportedDurationSeconds: [...DEEPINFRA_VIDEO_DURATIONS],
supportsAspectRatio: true,
aspectRatios: [...DEEPINFRA_VIDEO_ASPECT_RATIOS],
providerOptions: {
seed: "number",
negative_prompt: "string",
negativePrompt: "string",
style: "string",
guidance_scale: "number",
guidanceScale: "number",
},
},
imageToVideo: {
enabled: false,
},
videoToVideo: {
enabled: false,
},
},
async generateVideo(req) {
if ((req.inputImages?.length ?? 0) > 0) {
throw new Error("DeepInfra video generation currently supports text-to-video only.");
}
if ((req.inputVideos?.length ?? 0) > 0) {
throw new Error("DeepInfra video generation does not support video reference inputs.");
}
const auth = await resolveApiKeyForProvider({
provider: "deepinfra",
cfg: req.cfg,
agentDir: req.agentDir,
store: req.authStore,
});
if (!auth.apiKey) {
throw new Error("DeepInfra API key missing");
}
const model = normalizeDeepInfraModelRef(req.model, DEFAULT_DEEPINFRA_VIDEO_MODEL);
const resolvedBaseUrl = resolveDeepInfraNativeBaseUrl(req);
const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } =
resolveProviderHttpRequestConfig({
baseUrl: resolvedBaseUrl,
defaultBaseUrl: DEEPINFRA_NATIVE_BASE_URL,
allowPrivateNetwork: false,
defaultHeaders: {
Authorization: `Bearer ${auth.apiKey}`,
"Content-Type": "application/json",
},
provider: "deepinfra",
capability: "video",
transport: "http",
});
const { response, release } = await postJsonRequest({
url: `${baseUrl}/${encodeDeepInfraModelPath(model)}`,
headers,
body: buildDeepInfraVideoBody(req, model),
timeoutMs: req.timeoutMs,
fetchFn: fetch,
allowPrivateNetwork,
dispatcherPolicy,
});
try {
await assertOkOrThrowHttpError(response, "DeepInfra video generation failed");
const payload = (await response.json()) as DeepInfraVideoResponse;
const failed = failureMessage(payload);
if (failed) {
throw new Error(failed);
}
const video = extractDeepInfraVideoAsset(payload);
return {
videos: [video],
model,
metadata: {
requestId: normalizeOptionalString(payload.request_id),
seed: payload.seed,
status: payload.inference_status?.status,
},
};
} finally {
await release();
}
},
};
}

View File

@@ -40,6 +40,7 @@ import {
import { describe, expect, it } from "vitest";
import alibabaPlugin from "./alibaba/index.js";
import byteplusPlugin from "./byteplus/index.js";
import deepinfraPlugin from "./deepinfra/index.js";
import falPlugin from "./fal/index.js";
import googlePlugin from "./google/index.js";
import minimaxPlugin from "./minimax/index.js";
@@ -80,11 +81,10 @@ type LiveProviderCase = {
providerId: string;
};
type BufferedGeneratedVideo = Required<Pick<GeneratedVideoAsset, "buffer" | "mimeType">> &
Pick<GeneratedVideoAsset, "fileName">;
type LiveGeneratedVideo = GeneratedVideoAsset;
type LiveVideoAttemptStatus =
| { status: "success"; video: BufferedGeneratedVideo }
| { status: "success"; video: LiveGeneratedVideo }
| { status: "skip" }
| { status: "failure" };
@@ -101,6 +101,12 @@ const CASES: LiveProviderCase[] = [
pluginName: "BytePlus Provider",
providerId: "byteplus",
},
{
plugin: deepinfraPlugin,
pluginId: "deepinfra",
pluginName: "DeepInfra Provider",
providerId: "deepinfra",
},
{ plugin: falPlugin, pluginId: "fal", pluginName: "fal Provider", providerId: "fal" },
{ plugin: googlePlugin, pluginId: "google", pluginName: "Google Provider", providerId: "google" },
{
@@ -184,17 +190,18 @@ function maybeLoadShellEnvForVideoProviders(providerIds: string[]): void {
maybeLoadShellEnvForGenerationProviders(providerIds);
}
function expectBufferedVideo(
video: { buffer?: Buffer; mimeType: string; fileName?: string } | undefined,
): BufferedGeneratedVideo {
function expectGeneratedVideo(video: GeneratedVideoAsset | undefined): LiveGeneratedVideo {
expect(video).toBeDefined();
expect(video?.mimeType.startsWith("video/")).toBe(true);
if (!video?.buffer) {
throw new Error("expected generated video buffer");
if (video?.buffer) {
expect(video.buffer.byteLength).toBeGreaterThan(1024);
return video;
}
const { buffer, mimeType, fileName } = video;
expect(buffer.byteLength).toBeGreaterThan(1024);
return { buffer, mimeType, fileName };
if (!video?.url) {
throw new Error("expected generated video buffer or url");
}
expect(video.url).toMatch(/^https?:\/\//u);
return video;
}
function buildLiveCapabilityOverrides(params: {
@@ -262,7 +269,7 @@ async function runLiveVideoAttempt(params: {
try {
const result = await params.provider.generateVideo(params.request);
expect(result.videos.length).toBeGreaterThan(0);
const video = expectBufferedVideo(result.videos[0]);
const video = expectGeneratedVideo(result.videos[0]);
params.attempted.push(
`${params.providerId}:${params.mode}:${params.providerModel} (${params.authLabel})`,
);
@@ -392,7 +399,7 @@ async function runLiveVideoProviderCase(testCase: LiveProviderCase): Promise<voi
});
const liveSize = testCase.providerId === "openai" ? "1280x720" : undefined;
const logPrefix = `[live:video-generation] provider=${testCase.providerId} model=${providerModel}`;
let generatedVideo: BufferedGeneratedVideo | null = null;
let generatedVideo: LiveGeneratedVideo | null = null;
const generateAttempt = await runLiveVideoAttempt({
authLabel,
@@ -503,7 +510,7 @@ async function runLiveVideoProviderCase(testCase: LiveProviderCase): Promise<voi
return;
}
if (!generatedVideo?.buffer) {
skipped.push(`${testCase.providerId}:videoToVideo missing generated seed video`);
skipped.push(`${testCase.providerId}:videoToVideo missing buffer-backed generated seed video`);
expectLiveVideoCasePassed(summaryParams);
return;
}