mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 13:40:44 +00:00
fix(config): accept video and audio model inputs
Preserve configured audio/video model input modalities through provider catalog normalization.\n\nFixes #20721.\nThanks @alvinttang.
This commit is contained in:
@@ -74,6 +74,8 @@ Docs: https://docs.openclaw.ai
|
|||||||
- Channels/status: keep read-only channel lists on manifest and package metadata by default, loading setup runtime only for explicit fallback callers. Thanks @shakkernerd.
|
- Channels/status: keep read-only channel lists on manifest and package metadata by default, loading setup runtime only for explicit fallback callers. Thanks @shakkernerd.
|
||||||
- Plugins/onboarding: defer onboarding install-record index writes until the guarded config commit so setup failures cannot leave the plugin index ahead of `openclaw.json`. Thanks @shakkernerd.
|
- Plugins/onboarding: defer onboarding install-record index writes until the guarded config commit so setup failures cannot leave the plugin index ahead of `openclaw.json`. Thanks @shakkernerd.
|
||||||
- Plugins/registry: resolve web provider ownership from the installed plugin index instead of broad manifest scans on secret, tool, and pricing paths. Thanks @shakkernerd.
|
- Plugins/registry: resolve web provider ownership from the installed plugin index instead of broad manifest scans on secret, tool, and pricing paths. Thanks @shakkernerd.
|
||||||
|
- Config/providers: accept `video` and `audio` in configured model `input` values and
|
||||||
|
preserve them in provider catalog entries. Fixes #20721. Thanks @alvinttang.
|
||||||
- TTS: strip model-emitted TTS directives from streamed block text before channel
|
- TTS: strip model-emitted TTS directives from streamed block text before channel
|
||||||
delivery, including directives split across adjacent blocks, while preserving
|
delivery, including directives split across adjacent blocks, while preserving
|
||||||
the accumulated raw reply for final-mode synthesis. Fixes #38937.
|
the accumulated raw reply for final-mode synthesis. Fixes #38937.
|
||||||
|
|||||||
@@ -269,7 +269,7 @@ export type LmstudioModelBase = {
|
|||||||
trainedForToolUse: boolean;
|
trainedForToolUse: boolean;
|
||||||
loaded: boolean;
|
loaded: boolean;
|
||||||
reasoning: boolean;
|
reasoning: boolean;
|
||||||
input: ModelDefinitionConfig["input"];
|
input: Array<"text" | "image">;
|
||||||
cost: ModelDefinitionConfig["cost"];
|
cost: ModelDefinitionConfig["cost"];
|
||||||
contextWindow: number;
|
contextWindow: number;
|
||||||
contextTokens: number;
|
contextTokens: number;
|
||||||
|
|||||||
@@ -822,6 +822,9 @@ export async function prepareLmstudioDynamicModels(
|
|||||||
provider: PROVIDER_ID,
|
provider: PROVIDER_ID,
|
||||||
api: ctx.providerConfig?.api ?? `openai-completions`,
|
api: ctx.providerConfig?.api ?? `openai-completions`,
|
||||||
baseUrl,
|
baseUrl,
|
||||||
|
input: model.input.filter(
|
||||||
|
(entry): entry is "text" | "image" => entry === "text" || entry === "image",
|
||||||
|
),
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
export type ModelInputType = "text" | "image" | "document";
|
export type ModelInputType = "text" | "image" | "audio" | "video" | "document";
|
||||||
|
|
||||||
export type ModelCatalogEntry = {
|
export type ModelCatalogEntry = {
|
||||||
id: string;
|
id: string;
|
||||||
|
|||||||
@@ -2908,6 +2908,14 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
|
|||||||
type: "string",
|
type: "string",
|
||||||
const: "image",
|
const: "image",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
type: "string",
|
||||||
|
const: "video",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: "string",
|
||||||
|
const: "audio",
|
||||||
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -80,7 +80,7 @@ export type ModelDefinitionConfig = {
|
|||||||
api?: ModelApi;
|
api?: ModelApi;
|
||||||
baseUrl?: string;
|
baseUrl?: string;
|
||||||
reasoning: boolean;
|
reasoning: boolean;
|
||||||
input: Array<"text" | "image">;
|
input: Array<"text" | "image" | "video" | "audio">;
|
||||||
cost: {
|
cost: {
|
||||||
input: number;
|
input: number;
|
||||||
output: number;
|
output: number;
|
||||||
|
|||||||
@@ -312,7 +312,11 @@ export const ModelDefinitionSchema = z
|
|||||||
api: ModelApiSchema.optional(),
|
api: ModelApiSchema.optional(),
|
||||||
baseUrl: z.string().min(1).optional(),
|
baseUrl: z.string().min(1).optional(),
|
||||||
reasoning: z.boolean().optional(),
|
reasoning: z.boolean().optional(),
|
||||||
input: z.array(z.union([z.literal("text"), z.literal("image")])).optional(),
|
input: z
|
||||||
|
.array(
|
||||||
|
z.union([z.literal("text"), z.literal("image"), z.literal("video"), z.literal("audio")]),
|
||||||
|
)
|
||||||
|
.optional(),
|
||||||
cost: z
|
cost: z
|
||||||
.object({
|
.object({
|
||||||
input: z.number().optional(),
|
input: z.number().optional(),
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ export type LmstudioModelBase = {
|
|||||||
trainedForToolUse: boolean;
|
trainedForToolUse: boolean;
|
||||||
loaded: boolean;
|
loaded: boolean;
|
||||||
reasoning: boolean;
|
reasoning: boolean;
|
||||||
input: ModelDefinitionConfig["input"];
|
input: Array<"text" | "image">;
|
||||||
cost: ModelDefinitionConfig["cost"];
|
cost: ModelDefinitionConfig["cost"];
|
||||||
contextWindow: number;
|
contextWindow: number;
|
||||||
contextTokens: number;
|
contextTokens: number;
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import { describe, expect, it } from "vitest";
|
import { describe, expect, it } from "vitest";
|
||||||
import {
|
import {
|
||||||
applyProviderNativeStreamingUsageCompat,
|
applyProviderNativeStreamingUsageCompat,
|
||||||
|
readConfiguredProviderCatalogEntries,
|
||||||
supportsNativeStreamingUsageCompat,
|
supportsNativeStreamingUsageCompat,
|
||||||
} from "./provider-catalog-shared.js";
|
} from "./provider-catalog-shared.js";
|
||||||
import type { ModelDefinitionConfig } from "./provider-model-shared.js";
|
import type { ModelDefinitionConfig } from "./provider-model-shared.js";
|
||||||
@@ -54,3 +55,43 @@ describe("provider-catalog-shared native streaming usage compat", () => {
|
|||||||
expect(provider.models?.[1]?.compat?.supportsUsageInStreaming).toBe(false);
|
expect(provider.models?.[1]?.compat?.supportsUsageInStreaming).toBe(false);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("provider-catalog-shared configured catalog entries", () => {
|
||||||
|
it("preserves configured audio and video input modalities", () => {
|
||||||
|
expect(
|
||||||
|
readConfiguredProviderCatalogEntries({
|
||||||
|
providerId: "kilocode",
|
||||||
|
config: {
|
||||||
|
models: {
|
||||||
|
providers: {
|
||||||
|
kilocode: {
|
||||||
|
baseUrl: "https://api.kilo.ai/api/gateway/",
|
||||||
|
api: "openai-completions",
|
||||||
|
models: [
|
||||||
|
{
|
||||||
|
id: "google/gemini-3-pro-preview",
|
||||||
|
name: "Gemini 3 Pro Preview",
|
||||||
|
input: ["text", "image", "video", "audio"],
|
||||||
|
reasoning: true,
|
||||||
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||||
|
contextWindow: 1048576,
|
||||||
|
maxTokens: 65536,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
).toEqual([
|
||||||
|
{
|
||||||
|
provider: "kilocode",
|
||||||
|
id: "google/gemini-3-pro-preview",
|
||||||
|
name: "Gemini 3 Pro Preview",
|
||||||
|
input: ["text", "image", "video", "audio"],
|
||||||
|
reasoning: true,
|
||||||
|
contextWindow: 1048576,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ export type ConfiguredProviderCatalogEntry = {
|
|||||||
provider: string;
|
provider: string;
|
||||||
contextWindow?: number;
|
contextWindow?: number;
|
||||||
reasoning?: boolean;
|
reasoning?: boolean;
|
||||||
input?: Array<"text" | "image" | "document">;
|
input?: Array<"text" | "image" | "audio" | "video" | "document">;
|
||||||
};
|
};
|
||||||
|
|
||||||
function normalizeConfiguredCatalogModelInput(
|
function normalizeConfiguredCatalogModelInput(
|
||||||
@@ -33,8 +33,12 @@ function normalizeConfiguredCatalogModelInput(
|
|||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
const normalized = input.filter(
|
const normalized = input.filter(
|
||||||
(item): item is "text" | "image" | "document" =>
|
(item): item is "text" | "image" | "audio" | "video" | "document" =>
|
||||||
item === "text" || item === "image" || item === "document",
|
item === "text" ||
|
||||||
|
item === "image" ||
|
||||||
|
item === "audio" ||
|
||||||
|
item === "video" ||
|
||||||
|
item === "document",
|
||||||
);
|
);
|
||||||
return normalized.length > 0 ? normalized : undefined;
|
return normalized.length > 0 ? normalized : undefined;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user