mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-01 18:30:22 +00:00
refactor(media): move provider defaults into media metadata
This commit is contained in:
@@ -126,11 +126,12 @@ function resolveImageGenerationModelCandidates(
|
||||
providerDefaults.set(providerId, `${providerId}/${modelId}`);
|
||||
}
|
||||
|
||||
const primaryProvider = resolveDefaultModelRef(cfg).provider;
|
||||
const orderedProviders = [
|
||||
resolveDefaultModelRef(cfg).provider,
|
||||
"openai",
|
||||
"google",
|
||||
...providerDefaults.keys(),
|
||||
primaryProvider,
|
||||
...[...providerDefaults.keys()]
|
||||
.filter((providerId) => providerId !== primaryProvider)
|
||||
.toSorted(),
|
||||
];
|
||||
const orderedRefs: string[] = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
@@ -274,7 +274,7 @@ function createMinimaxImageConfig(): OpenClawConfig {
|
||||
function createDefaultImageFallbackExpectation(primary: string) {
|
||||
return {
|
||||
primary,
|
||||
fallbacks: ["openai/gpt-5-mini", "anthropic/claude-opus-4-5"],
|
||||
fallbacks: ["openai/gpt-5.4-mini", "anthropic/claude-opus-4-6"],
|
||||
};
|
||||
}
|
||||
|
||||
@@ -618,12 +618,12 @@ describe("image tool implicit imageModel config", () => {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "minimax/MiniMax-M2.7" },
|
||||
imageModel: { primary: "openai/gpt-5-mini" },
|
||||
imageModel: { primary: "openai/gpt-5.4-mini" },
|
||||
},
|
||||
},
|
||||
};
|
||||
expect(resolveImageModelConfigForTool({ cfg, agentDir })).toEqual({
|
||||
primary: "openai/gpt-5-mini",
|
||||
primary: "openai/gpt-5.4-mini",
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -638,7 +638,7 @@ describe("image tool implicit imageModel config", () => {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "acme/vision-1" },
|
||||
imageModel: { primary: "openai/gpt-5-mini" },
|
||||
imageModel: { primary: "openai/gpt-5.4-mini" },
|
||||
},
|
||||
},
|
||||
models: {
|
||||
@@ -652,7 +652,7 @@ describe("image tool implicit imageModel config", () => {
|
||||
};
|
||||
// Tool should still be available for explicit image analysis requests
|
||||
expect(resolveImageModelConfigForTool({ cfg, agentDir })).toEqual({
|
||||
primary: "openai/gpt-5-mini",
|
||||
primary: "openai/gpt-5.4-mini",
|
||||
});
|
||||
const tool = createImageTool({ config: cfg, agentDir, modelHasVision: true });
|
||||
expect(tool).not.toBeNull();
|
||||
@@ -1229,7 +1229,7 @@ describe("image tool response validation", () => {
|
||||
role: "assistant",
|
||||
api: "openai-responses",
|
||||
provider: "openai",
|
||||
model: "gpt-5-mini",
|
||||
model: "gpt-5.4-mini",
|
||||
stopReason: "stop",
|
||||
timestamp: Date.now(),
|
||||
usage: makeZeroUsageSnapshot(),
|
||||
@@ -1278,7 +1278,7 @@ describe("image tool response validation", () => {
|
||||
expect(() =>
|
||||
__testing.coerceImageAssistantText({
|
||||
provider: "openai",
|
||||
model: "gpt-5-mini",
|
||||
model: "gpt-5.4-mini",
|
||||
message,
|
||||
}),
|
||||
).toThrow(expectedError);
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
import { resolve, isAbsolute } from "node:path";
|
||||
import { Type } from "@sinclair/typebox";
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import {
|
||||
resolveAutoMediaKeyProviders,
|
||||
resolveDefaultMediaModel,
|
||||
} from "../../media-understanding/defaults.js";
|
||||
import { getMediaUnderstandingProvider } from "../../media-understanding/provider-registry.js";
|
||||
import { buildProviderRegistry } from "../../media-understanding/runner.js";
|
||||
import { loadWebMedia } from "../../media/web-media.js";
|
||||
@@ -40,8 +44,6 @@ import {
|
||||
} from "./tool-runtime.helpers.js";
|
||||
|
||||
const DEFAULT_PROMPT = "Describe the image.";
|
||||
const ANTHROPIC_IMAGE_PRIMARY = "anthropic/claude-opus-4-6";
|
||||
const ANTHROPIC_IMAGE_FALLBACK = "anthropic/claude-opus-4-5";
|
||||
const DEFAULT_MAX_IMAGES = 20;
|
||||
|
||||
const imageToolProviderDeps = {
|
||||
@@ -103,28 +105,39 @@ export function resolveImageModelConfigForTool(params: {
|
||||
provider: primary.provider,
|
||||
});
|
||||
const primaryCandidates = (() => {
|
||||
if (isMinimaxVlmProvider(primary.provider)) {
|
||||
return [`${primary.provider}/MiniMax-VL-01`];
|
||||
}
|
||||
if (providerVisionFromConfig) {
|
||||
return [providerVisionFromConfig];
|
||||
}
|
||||
if (primary.provider === "zai") {
|
||||
return ["zai/glm-4.6v"];
|
||||
const providerDefault = resolveDefaultMediaModel({
|
||||
cfg: params.cfg,
|
||||
providerId: primary.provider,
|
||||
capability: "image",
|
||||
});
|
||||
if (providerDefault) {
|
||||
return [`${primary.provider}/${providerDefault}`];
|
||||
}
|
||||
if (primary.provider === "openai") {
|
||||
return ["openai/gpt-5-mini"];
|
||||
}
|
||||
if (primary.provider === "anthropic") {
|
||||
return [ANTHROPIC_IMAGE_PRIMARY];
|
||||
if (isMinimaxVlmProvider(primary.provider)) {
|
||||
return [`${primary.provider}/MiniMax-VL-01`];
|
||||
}
|
||||
return [];
|
||||
})();
|
||||
|
||||
const autoCandidates = resolveAutoMediaKeyProviders({
|
||||
cfg: params.cfg,
|
||||
capability: "image",
|
||||
}).map((providerId) => {
|
||||
const modelId = resolveDefaultMediaModel({
|
||||
cfg: params.cfg,
|
||||
providerId,
|
||||
capability: "image",
|
||||
});
|
||||
return modelId ? `${providerId}/${modelId}` : null;
|
||||
});
|
||||
|
||||
return buildToolModelConfigFromCandidates({
|
||||
explicit,
|
||||
agentDir: params.agentDir,
|
||||
candidates: [...primaryCandidates, "openai/gpt-5-mini", ANTHROPIC_IMAGE_FALLBACK],
|
||||
candidates: [...primaryCandidates, ...autoCandidates],
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -4,22 +4,16 @@ import {
|
||||
resolveAgentModelFallbackValues,
|
||||
resolveAgentModelPrimaryValue,
|
||||
} from "../../config/model-input.js";
|
||||
import { providerSupportsNativePdfDocument } from "../../media-understanding/defaults.js";
|
||||
import { extractAssistantText } from "../pi-embedded-utils.js";
|
||||
|
||||
export type PdfModelConfig = { primary?: string; fallbacks?: string[] };
|
||||
|
||||
/**
|
||||
* Providers known to support native PDF document input.
|
||||
* When the model's provider is in this set, the tool sends raw PDF bytes
|
||||
* via provider-specific API calls instead of extracting text/images first.
|
||||
*/
|
||||
export const NATIVE_PDF_PROVIDERS = new Set(["anthropic", "google"]);
|
||||
|
||||
/**
|
||||
* Check whether a provider supports native PDF document input.
|
||||
*/
|
||||
export function providerSupportsNativePdf(provider: string): boolean {
|
||||
return NATIVE_PDF_PROVIDERS.has(provider.toLowerCase().trim());
|
||||
return providerSupportsNativePdfDocument({ providerId: provider });
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -46,7 +46,7 @@ async function withTempAgentDir<T>(run: (agentDir: string) => Promise<T>): Promi
|
||||
}
|
||||
|
||||
const ANTHROPIC_PDF_MODEL = "anthropic/claude-opus-4-6";
|
||||
const OPENAI_PDF_MODEL = "openai/gpt-5-mini";
|
||||
const OPENAI_PDF_MODEL = "openai/gpt-5.4-mini";
|
||||
const TEST_PDF_INPUT = { base64: "dGVzdA==", filename: "doc.pdf" } as const;
|
||||
const FAKE_PDF_MEDIA = {
|
||||
kind: "document",
|
||||
@@ -295,12 +295,12 @@ describe("resolvePdfModelConfigForTool", () => {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "openai/gpt-5.4" },
|
||||
imageModel: { primary: "openai/gpt-5-mini" },
|
||||
imageModel: { primary: "openai/gpt-5.4-mini" },
|
||||
},
|
||||
},
|
||||
};
|
||||
expect(resolvePdfModelConfigForTool({ cfg, agentDir })).toEqual({
|
||||
primary: "openai/gpt-5-mini",
|
||||
primary: "openai/gpt-5.4-mini",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
import { type Context, complete } from "@mariozechner/pi-ai";
|
||||
import { Type } from "@sinclair/typebox";
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import {
|
||||
providerSupportsNativePdfDocument,
|
||||
resolveAutoMediaKeyProviders,
|
||||
resolveDefaultMediaModel,
|
||||
} from "../../media-understanding/defaults.js";
|
||||
import { extractPdfContent, type PdfExtractedContent } from "../../media/pdf-extract.js";
|
||||
import { loadWebMediaRaw } from "../../media/web-media.js";
|
||||
import { resolveUserPath } from "../../utils.js";
|
||||
@@ -43,8 +48,6 @@ const DEFAULT_PROMPT = "Analyze this PDF document.";
|
||||
const DEFAULT_MAX_PDFS = 10;
|
||||
const DEFAULT_MAX_BYTES_MB = 10;
|
||||
const DEFAULT_MAX_PAGES = 20;
|
||||
const ANTHROPIC_PDF_PRIMARY = "anthropic/claude-opus-4-6";
|
||||
const ANTHROPIC_PDF_FALLBACK = "anthropic/claude-opus-4-5";
|
||||
|
||||
const PDF_MIN_TEXT_CHARS = 200;
|
||||
const PDF_MAX_PIXELS = 4_000_000;
|
||||
@@ -75,9 +78,7 @@ export function resolvePdfModelConfigForTool(params: {
|
||||
|
||||
// Auto-detect from available providers
|
||||
const primary = resolveDefaultModelRef(params.cfg);
|
||||
const anthropicOk = hasAuthForProvider({ provider: "anthropic", agentDir: params.agentDir });
|
||||
const googleOk = hasAuthForProvider({ provider: "google", agentDir: params.agentDir });
|
||||
const openaiOk = hasAuthForProvider({ provider: "openai", agentDir: params.agentDir });
|
||||
|
||||
const fallbacks: string[] = [];
|
||||
const addFallback = (ref: string) => {
|
||||
@@ -95,30 +96,54 @@ export function resolvePdfModelConfigForTool(params: {
|
||||
cfg: params.cfg,
|
||||
provider: primary.provider,
|
||||
});
|
||||
const providerDefault = resolveDefaultMediaModel({
|
||||
cfg: params.cfg,
|
||||
providerId: primary.provider,
|
||||
capability: "image",
|
||||
});
|
||||
const nativePdfCandidates = resolveAutoMediaKeyProviders({
|
||||
cfg: params.cfg,
|
||||
capability: "image",
|
||||
})
|
||||
.filter((providerId) => providerSupportsNativePdfDocument({ cfg: params.cfg, providerId }))
|
||||
.filter((providerId) => hasAuthForProvider({ provider: providerId, agentDir: params.agentDir }))
|
||||
.map((providerId) => {
|
||||
const modelId = resolveDefaultMediaModel({
|
||||
cfg: params.cfg,
|
||||
providerId,
|
||||
capability: "image",
|
||||
});
|
||||
return modelId ? `${providerId}/${modelId}` : null;
|
||||
})
|
||||
.filter((value): value is string => Boolean(value));
|
||||
const genericImageCandidates = resolveAutoMediaKeyProviders({
|
||||
cfg: params.cfg,
|
||||
capability: "image",
|
||||
})
|
||||
.filter((providerId) => hasAuthForProvider({ provider: providerId, agentDir: params.agentDir }))
|
||||
.map((providerId) => {
|
||||
const modelId = resolveDefaultMediaModel({
|
||||
cfg: params.cfg,
|
||||
providerId,
|
||||
capability: "image",
|
||||
});
|
||||
return modelId ? `${providerId}/${modelId}` : null;
|
||||
})
|
||||
.filter((value): value is string => Boolean(value));
|
||||
|
||||
if (primary.provider === "anthropic" && anthropicOk) {
|
||||
preferred = ANTHROPIC_PDF_PRIMARY;
|
||||
} else if (primary.provider === "google" && googleOk && providerVision) {
|
||||
if (primary.provider === "google" && googleOk && providerVision) {
|
||||
preferred = providerVision;
|
||||
} else if (providerOk && providerVision) {
|
||||
preferred = providerVision;
|
||||
} else if (anthropicOk) {
|
||||
preferred = ANTHROPIC_PDF_PRIMARY;
|
||||
} else if (googleOk) {
|
||||
preferred = "google/gemini-2.5-pro";
|
||||
} else if (openaiOk) {
|
||||
preferred = "openai/gpt-5-mini";
|
||||
} else if (providerOk && (providerVision || providerDefault)) {
|
||||
preferred = providerVision ?? `${primary.provider}/${providerDefault}`;
|
||||
} else {
|
||||
preferred = nativePdfCandidates[0] ?? genericImageCandidates[0] ?? null;
|
||||
}
|
||||
|
||||
if (preferred?.trim()) {
|
||||
if (anthropicOk && preferred !== ANTHROPIC_PDF_PRIMARY) {
|
||||
addFallback(ANTHROPIC_PDF_PRIMARY);
|
||||
}
|
||||
if (anthropicOk) {
|
||||
addFallback(ANTHROPIC_PDF_FALLBACK);
|
||||
}
|
||||
if (openaiOk) {
|
||||
addFallback("openai/gpt-5-mini");
|
||||
for (const candidate of [...nativePdfCandidates, ...genericImageCandidates]) {
|
||||
if (candidate !== preferred) {
|
||||
addFallback(candidate);
|
||||
}
|
||||
}
|
||||
const pruned = fallbacks.filter((ref) => ref !== preferred);
|
||||
return { primary: preferred, ...(pruned.length > 0 ? { fallbacks: pruned } : {}) };
|
||||
|
||||
Reference in New Issue
Block a user