mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-12 09:41:11 +00:00
perf(pdf): remove media/runtime lookup overhead
This commit is contained in:
@@ -27,6 +27,7 @@ export {
|
||||
export {
|
||||
clearRuntimeAuthProfileStoreSnapshots,
|
||||
ensureAuthProfileStore,
|
||||
hasAnyAuthProfileStoreSource,
|
||||
loadAuthProfileStoreForSecretsRuntime,
|
||||
loadAuthProfileStoreForRuntime,
|
||||
replaceRuntimeAuthProfileStoreSnapshots,
|
||||
|
||||
@@ -4,7 +4,11 @@ import {
|
||||
resolveAgentModelPrimaryValue,
|
||||
} from "../../config/model-input.js";
|
||||
import type { AgentModelConfig } from "../../config/types.agents-shared.js";
|
||||
import { ensureAuthProfileStore, listProfilesForProvider } from "../auth-profiles.js";
|
||||
import {
|
||||
ensureAuthProfileStore,
|
||||
hasAnyAuthProfileStoreSource,
|
||||
listProfilesForProvider,
|
||||
} from "../auth-profiles.js";
|
||||
import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js";
|
||||
import { resolveEnvApiKey } from "../model-auth.js";
|
||||
import { resolveConfiguredModelRef } from "../model-selection.js";
|
||||
@@ -37,6 +41,9 @@ export function hasAuthForProvider(params: { provider: string; agentDir?: string
|
||||
if (!agentDir) {
|
||||
return false;
|
||||
}
|
||||
if (!hasAnyAuthProfileStoreSource(agentDir)) {
|
||||
return false;
|
||||
}
|
||||
const store = ensureAuthProfileStore(agentDir, {
|
||||
allowKeychainPrompt: false,
|
||||
});
|
||||
|
||||
130
src/agents/tools/pdf-tool.helpers.test.ts
Normal file
130
src/agents/tools/pdf-tool.helpers.test.ts
Normal file
@@ -0,0 +1,130 @@
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
coercePdfAssistantText,
|
||||
coercePdfModelConfig,
|
||||
parsePageRange,
|
||||
providerSupportsNativePdf,
|
||||
resolvePdfToolMaxTokens,
|
||||
} from "./pdf-tool.helpers.js";
|
||||
|
||||
const ANTHROPIC_PDF_MODEL = "anthropic/claude-opus-4-6";
|
||||
|
||||
describe("parsePageRange", () => {
|
||||
it("parses a single page number", () => {
|
||||
expect(parsePageRange("3", 20)).toEqual([3]);
|
||||
});
|
||||
|
||||
it("parses a page range", () => {
|
||||
expect(parsePageRange("1-5", 20)).toEqual([1, 2, 3, 4, 5]);
|
||||
});
|
||||
|
||||
it("parses comma-separated pages and ranges", () => {
|
||||
expect(parsePageRange("1,3,5-7", 20)).toEqual([1, 3, 5, 6, 7]);
|
||||
});
|
||||
|
||||
it("clamps to maxPages", () => {
|
||||
expect(parsePageRange("1-100", 5)).toEqual([1, 2, 3, 4, 5]);
|
||||
});
|
||||
|
||||
it("deduplicates and sorts", () => {
|
||||
expect(parsePageRange("5,3,1,3,5", 20)).toEqual([1, 3, 5]);
|
||||
});
|
||||
|
||||
it("throws on invalid page number", () => {
|
||||
expect(() => parsePageRange("abc", 20)).toThrow("Invalid page number");
|
||||
});
|
||||
|
||||
it("throws on invalid range (start > end)", () => {
|
||||
expect(() => parsePageRange("5-3", 20)).toThrow("Invalid page range");
|
||||
});
|
||||
|
||||
it("throws on zero page number", () => {
|
||||
expect(() => parsePageRange("0", 20)).toThrow("Invalid page number");
|
||||
});
|
||||
|
||||
it("throws on negative page number", () => {
|
||||
expect(() => parsePageRange("-1", 20)).toThrow("Invalid page number");
|
||||
});
|
||||
|
||||
it("handles empty parts gracefully", () => {
|
||||
expect(parsePageRange("1,,3", 20)).toEqual([1, 3]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("providerSupportsNativePdf", () => {
|
||||
it("returns true for anthropic", () => {
|
||||
expect(providerSupportsNativePdf("anthropic")).toBe(true);
|
||||
});
|
||||
|
||||
it("returns true for google", () => {
|
||||
expect(providerSupportsNativePdf("google")).toBe(true);
|
||||
});
|
||||
|
||||
it("returns false for openai", () => {
|
||||
expect(providerSupportsNativePdf("openai")).toBe(false);
|
||||
});
|
||||
|
||||
it("returns false for minimax", () => {
|
||||
expect(providerSupportsNativePdf("minimax")).toBe(false);
|
||||
});
|
||||
|
||||
it("is case-insensitive", () => {
|
||||
expect(providerSupportsNativePdf("Anthropic")).toBe(true);
|
||||
expect(providerSupportsNativePdf("GOOGLE")).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("pdf-tool.helpers", () => {
|
||||
it("resolvePdfToolMaxTokens respects model limit", () => {
|
||||
expect(resolvePdfToolMaxTokens(2048, 4096)).toBe(2048);
|
||||
expect(resolvePdfToolMaxTokens(8192, 4096)).toBe(4096);
|
||||
expect(resolvePdfToolMaxTokens(undefined, 4096)).toBe(4096);
|
||||
});
|
||||
|
||||
it("coercePdfModelConfig reads primary and fallbacks", () => {
|
||||
const cfg = {
|
||||
agents: {
|
||||
defaults: {
|
||||
pdfModel: {
|
||||
primary: ANTHROPIC_PDF_MODEL,
|
||||
fallbacks: ["google/gemini-2.5-pro"],
|
||||
},
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
expect(coercePdfModelConfig(cfg)).toEqual({
|
||||
primary: ANTHROPIC_PDF_MODEL,
|
||||
fallbacks: ["google/gemini-2.5-pro"],
|
||||
});
|
||||
});
|
||||
|
||||
it("coercePdfAssistantText returns trimmed text", () => {
|
||||
expect(
|
||||
coercePdfAssistantText({
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-6",
|
||||
message: {
|
||||
role: "assistant",
|
||||
stopReason: "stop",
|
||||
content: [{ type: "text", text: " summary " }],
|
||||
} as never,
|
||||
}),
|
||||
).toBe("summary");
|
||||
});
|
||||
|
||||
it("coercePdfAssistantText throws clear error for failed model output", () => {
|
||||
expect(() =>
|
||||
coercePdfAssistantText({
|
||||
provider: "google",
|
||||
model: "gemini-2.5-pro",
|
||||
message: {
|
||||
role: "assistant",
|
||||
stopReason: "error",
|
||||
errorMessage: "bad request",
|
||||
content: [],
|
||||
} as never,
|
||||
}),
|
||||
).toThrow("PDF model failed (google/gemini-2.5-pro): bad request");
|
||||
});
|
||||
});
|
||||
@@ -4,7 +4,7 @@ import {
|
||||
resolveAgentModelFallbackValues,
|
||||
resolveAgentModelPrimaryValue,
|
||||
} from "../../config/model-input.js";
|
||||
import { providerSupportsNativePdfDocument } from "../../media-understanding/defaults.js";
|
||||
import { bundledProviderSupportsNativePdfDocument } from "../../media-understanding/bundled-defaults.js";
|
||||
import { extractAssistantText } from "../pi-embedded-utils.js";
|
||||
|
||||
export type PdfModelConfig = { primary?: string; fallbacks?: string[] };
|
||||
@@ -13,7 +13,7 @@ export type PdfModelConfig = { primary?: string; fallbacks?: string[] };
|
||||
* Check whether a provider supports native PDF document input.
|
||||
*/
|
||||
export function providerSupportsNativePdf(provider: string): boolean {
|
||||
return providerSupportsNativePdfDocument({ providerId: provider });
|
||||
return bundledProviderSupportsNativePdfDocument(provider);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
103
src/agents/tools/pdf-tool.model-config.test.ts
Normal file
103
src/agents/tools/pdf-tool.model-config.test.ts
Normal file
@@ -0,0 +1,103 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import { resolvePdfModelConfigForTool } from "./pdf-tool.model-config.js";
|
||||
|
||||
const ANTHROPIC_PDF_MODEL = "anthropic/claude-opus-4-6";
|
||||
|
||||
async function withTempAgentDir<T>(run: (agentDir: string) => Promise<T>): Promise<T> {
|
||||
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-pdf-"));
|
||||
try {
|
||||
return await run(agentDir);
|
||||
} finally {
|
||||
await fs.rm(agentDir, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
function resetAuthEnv() {
|
||||
vi.stubEnv("OPENAI_API_KEY", "");
|
||||
vi.stubEnv("ANTHROPIC_API_KEY", "");
|
||||
vi.stubEnv("ANTHROPIC_OAUTH_TOKEN", "");
|
||||
vi.stubEnv("GEMINI_API_KEY", "");
|
||||
vi.stubEnv("GOOGLE_API_KEY", "");
|
||||
vi.stubEnv("MINIMAX_API_KEY", "");
|
||||
vi.stubEnv("ZAI_API_KEY", "");
|
||||
vi.stubEnv("Z_AI_API_KEY", "");
|
||||
vi.stubEnv("COPILOT_GITHUB_TOKEN", "");
|
||||
vi.stubEnv("GH_TOKEN", "");
|
||||
vi.stubEnv("GITHUB_TOKEN", "");
|
||||
}
|
||||
|
||||
function withDefaultModel(primary: string): OpenClawConfig {
|
||||
return {
|
||||
agents: { defaults: { model: { primary } } },
|
||||
} as OpenClawConfig;
|
||||
}
|
||||
|
||||
describe("resolvePdfModelConfigForTool", () => {
|
||||
beforeEach(() => {
|
||||
resetAuthEnv();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.unstubAllEnvs();
|
||||
});
|
||||
|
||||
it("returns null without any auth", async () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
const cfg = withDefaultModel("openai/gpt-5.4");
|
||||
expect(resolvePdfModelConfigForTool({ cfg, agentDir })).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
it("prefers explicit pdfModel config", async () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
const cfg = {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "openai/gpt-5.4" },
|
||||
pdfModel: { primary: ANTHROPIC_PDF_MODEL },
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
expect(resolvePdfModelConfigForTool({ cfg, agentDir })).toEqual({
|
||||
primary: ANTHROPIC_PDF_MODEL,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("falls back to imageModel config when no pdfModel set", async () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
const cfg = {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "openai/gpt-5.4" },
|
||||
imageModel: { primary: "openai/gpt-5.4-mini" },
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
expect(resolvePdfModelConfigForTool({ cfg, agentDir })).toEqual({
|
||||
primary: "openai/gpt-5.4-mini",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("prefers anthropic when available for native PDF support", async () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
vi.stubEnv("ANTHROPIC_API_KEY", "anthropic-test");
|
||||
vi.stubEnv("OPENAI_API_KEY", "openai-test");
|
||||
const cfg = withDefaultModel("openai/gpt-5.4");
|
||||
expect(resolvePdfModelConfigForTool({ cfg, agentDir })?.primary).toBe(ANTHROPIC_PDF_MODEL);
|
||||
});
|
||||
});
|
||||
|
||||
it("uses anthropic primary when provider is anthropic", async () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
vi.stubEnv("ANTHROPIC_API_KEY", "anthropic-test");
|
||||
const cfg = withDefaultModel(ANTHROPIC_PDF_MODEL);
|
||||
expect(resolvePdfModelConfigForTool({ cfg, agentDir })?.primary).toBe(ANTHROPIC_PDF_MODEL);
|
||||
});
|
||||
});
|
||||
});
|
||||
125
src/agents/tools/pdf-tool.model-config.ts
Normal file
125
src/agents/tools/pdf-tool.model-config.ts
Normal file
@@ -0,0 +1,125 @@
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import {
|
||||
bundledProviderSupportsNativePdfDocument,
|
||||
resolveBundledAutoMediaKeyProviders,
|
||||
resolveBundledDefaultMediaModel,
|
||||
} from "../../media-understanding/bundled-defaults.js";
|
||||
import {
|
||||
coerceImageModelConfig,
|
||||
type ImageModelConfig,
|
||||
resolveProviderVisionModelFromConfig,
|
||||
} from "./image-tool.helpers.js";
|
||||
import { hasAuthForProvider, resolveDefaultModelRef } from "./model-config.helpers.js";
|
||||
import { coercePdfModelConfig } from "./pdf-tool.helpers.js";
|
||||
|
||||
export function resolvePdfModelConfigForTool(params: {
|
||||
cfg?: OpenClawConfig;
|
||||
agentDir: string;
|
||||
}): ImageModelConfig | null {
|
||||
const explicitPdf = coercePdfModelConfig(params.cfg);
|
||||
if (explicitPdf.primary?.trim() || (explicitPdf.fallbacks?.length ?? 0) > 0) {
|
||||
return explicitPdf;
|
||||
}
|
||||
|
||||
const explicitImage = coerceImageModelConfig(params.cfg);
|
||||
if (explicitImage.primary?.trim() || (explicitImage.fallbacks?.length ?? 0) > 0) {
|
||||
return explicitImage;
|
||||
}
|
||||
|
||||
const primary = resolveDefaultModelRef(params.cfg);
|
||||
const googleOk = hasAuthForProvider({ provider: "google", agentDir: params.agentDir });
|
||||
|
||||
const fallbacks: string[] = [];
|
||||
const addFallback = (ref: string) => {
|
||||
const trimmed = ref.trim();
|
||||
if (trimmed && !fallbacks.includes(trimmed)) {
|
||||
fallbacks.push(trimmed);
|
||||
}
|
||||
};
|
||||
|
||||
let preferred: string | null = null;
|
||||
|
||||
const providerOk = hasAuthForProvider({ provider: primary.provider, agentDir: params.agentDir });
|
||||
const providerVision = resolveProviderVisionModelFromConfig({
|
||||
cfg: params.cfg,
|
||||
provider: primary.provider,
|
||||
});
|
||||
const providerDefault =
|
||||
providerVision?.split("/")[1] ??
|
||||
resolveBundledDefaultMediaModel({
|
||||
providerId: primary.provider,
|
||||
capability: "image",
|
||||
});
|
||||
const primarySupportsNativePdf = bundledProviderSupportsNativePdfDocument(primary.provider);
|
||||
const nativePdfCandidates = resolveBundledAutoMediaKeyProviders("image")
|
||||
.filter((providerId) => bundledProviderSupportsNativePdfDocument(providerId))
|
||||
.filter((providerId) => hasAuthForProvider({ provider: providerId, agentDir: params.agentDir }))
|
||||
.map((providerId) => {
|
||||
const modelId =
|
||||
resolveProviderVisionModelFromConfig({
|
||||
cfg: params.cfg,
|
||||
provider: providerId,
|
||||
})?.split("/")[1] ??
|
||||
resolveBundledDefaultMediaModel({
|
||||
providerId,
|
||||
capability: "image",
|
||||
});
|
||||
return modelId ? `${providerId}/${modelId}` : null;
|
||||
})
|
||||
.filter((value): value is string => Boolean(value));
|
||||
const genericImageCandidates = resolveBundledAutoMediaKeyProviders("image")
|
||||
.filter((providerId) => hasAuthForProvider({ provider: providerId, agentDir: params.agentDir }))
|
||||
.map((providerId) => {
|
||||
const modelId =
|
||||
resolveProviderVisionModelFromConfig({
|
||||
cfg: params.cfg,
|
||||
provider: providerId,
|
||||
})?.split("/")[1] ??
|
||||
resolveBundledDefaultMediaModel({
|
||||
providerId,
|
||||
capability: "image",
|
||||
});
|
||||
return modelId ? `${providerId}/${modelId}` : null;
|
||||
})
|
||||
.filter((value): value is string => Boolean(value));
|
||||
|
||||
if (params.cfg?.models?.providers && typeof params.cfg.models.providers === "object") {
|
||||
for (const [providerKey, providerCfg] of Object.entries(params.cfg.models.providers)) {
|
||||
const providerId = providerKey.trim();
|
||||
if (!providerId || !hasAuthForProvider({ provider: providerId, agentDir: params.agentDir })) {
|
||||
continue;
|
||||
}
|
||||
const models = providerCfg?.models ?? [];
|
||||
const modelId = models.find(
|
||||
(model) => Boolean(model?.id?.trim()) && Array.isArray(model?.input) && model.input.includes("image"),
|
||||
)?.id?.trim();
|
||||
if (!modelId) {
|
||||
continue;
|
||||
}
|
||||
const ref = `${providerId}/${modelId}`;
|
||||
if (!genericImageCandidates.includes(ref)) {
|
||||
genericImageCandidates.push(ref);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (primary.provider === "google" && googleOk && providerVision && primarySupportsNativePdf) {
|
||||
preferred = providerVision;
|
||||
} else if (providerOk && primarySupportsNativePdf && (providerVision || providerDefault)) {
|
||||
preferred = providerVision ?? `${primary.provider}/${providerDefault}`;
|
||||
} else {
|
||||
preferred = nativePdfCandidates[0] ?? genericImageCandidates[0] ?? null;
|
||||
}
|
||||
|
||||
if (preferred?.trim()) {
|
||||
for (const candidate of [...nativePdfCandidates, ...genericImageCandidates]) {
|
||||
if (candidate !== preferred) {
|
||||
addFallback(candidate);
|
||||
}
|
||||
}
|
||||
const pruned = fallbacks.filter((ref) => ref !== preferred);
|
||||
return { primary: preferred, ...(pruned.length > 0 ? { fallbacks: pruned } : {}) };
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import * as pdfExtractModule from "../../media/pdf-extract.js";
|
||||
import * as webMedia from "../../media/web-media.js";
|
||||
@@ -10,13 +10,6 @@ import { modelSupportsDocument } from "../model-catalog.js";
|
||||
import * as modelsConfig from "../models-config.js";
|
||||
import * as modelDiscovery from "../pi-model-discovery.js";
|
||||
import * as pdfNativeProviders from "./pdf-native-providers.js";
|
||||
import {
|
||||
coercePdfAssistantText,
|
||||
coercePdfModelConfig,
|
||||
parsePageRange,
|
||||
providerSupportsNativePdf,
|
||||
resolvePdfToolMaxTokens,
|
||||
} from "./pdf-tool.helpers.js";
|
||||
|
||||
const completeMock = vi.hoisted(() => vi.fn());
|
||||
|
||||
@@ -30,11 +23,13 @@ vi.mock("@mariozechner/pi-ai", async () => {
|
||||
|
||||
type PdfToolModule = typeof import("./pdf-tool.js");
|
||||
let createPdfTool: PdfToolModule["createPdfTool"];
|
||||
let resolvePdfModelConfigForTool: PdfToolModule["resolvePdfModelConfigForTool"];
|
||||
|
||||
beforeAll(async () => {
|
||||
({ createPdfTool, resolvePdfModelConfigForTool } = await import("./pdf-tool.js"));
|
||||
});
|
||||
async function loadCreatePdfTool() {
|
||||
if (!createPdfTool) {
|
||||
({ createPdfTool } = await import("./pdf-tool.js"));
|
||||
}
|
||||
return createPdfTool;
|
||||
}
|
||||
|
||||
async function withTempAgentDir<T>(run: (agentDir: string) => Promise<T>): Promise<T> {
|
||||
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-pdf-"));
|
||||
@@ -55,7 +50,7 @@ const FAKE_PDF_MEDIA = {
|
||||
fileName: "doc.pdf",
|
||||
} as const;
|
||||
|
||||
function requirePdfTool(tool: ReturnType<typeof createPdfTool>) {
|
||||
function requirePdfTool(tool: Awaited<ReturnType<typeof loadCreatePdfTool>> extends (...args: any[]) => infer R ? R : never) {
|
||||
expect(tool).not.toBeNull();
|
||||
if (!tool) {
|
||||
throw new Error("expected pdf tool");
|
||||
@@ -71,7 +66,7 @@ async function withAnthropicPdfTool(
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
vi.stubEnv("ANTHROPIC_API_KEY", "anthropic-test");
|
||||
const cfg = withDefaultModel(ANTHROPIC_PDF_MODEL);
|
||||
const tool = requirePdfTool(createPdfTool({ config: cfg, agentDir }));
|
||||
const tool = requirePdfTool((await loadCreatePdfTool())({ config: cfg, agentDir }));
|
||||
await run(tool, agentDir);
|
||||
});
|
||||
}
|
||||
@@ -87,7 +82,7 @@ function makeAnthropicAnalyzeParams(
|
||||
}> = {},
|
||||
) {
|
||||
return {
|
||||
apiKey: "test-key", // pragma: allowlist secret
|
||||
apiKey: "test-key",
|
||||
modelId: "claude-opus-4-6",
|
||||
prompt: "test",
|
||||
pdfs: [TEST_PDF_INPUT],
|
||||
@@ -105,7 +100,7 @@ function makeGeminiAnalyzeParams(
|
||||
}> = {},
|
||||
) {
|
||||
return {
|
||||
apiKey: "test-key", // pragma: allowlist secret
|
||||
apiKey: "test-key",
|
||||
modelId: "gemini-2.5-pro",
|
||||
prompt: "test",
|
||||
pdfs: [TEST_PDF_INPUT],
|
||||
@@ -168,169 +163,12 @@ async function stubPdfToolInfra(
|
||||
wrote: false,
|
||||
});
|
||||
|
||||
vi.spyOn(modelAuth, "getApiKeyForModel").mockResolvedValue({ apiKey: "test-key" } as never); // pragma: allowlist secret
|
||||
vi.spyOn(modelAuth, "getApiKeyForModel").mockResolvedValue({ apiKey: "test-key" } as never);
|
||||
vi.spyOn(modelAuth, "requireApiKey").mockReturnValue("test-key");
|
||||
|
||||
return { loadSpy };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// parsePageRange tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe("parsePageRange", () => {
|
||||
it("parses a single page number", () => {
|
||||
expect(parsePageRange("3", 20)).toEqual([3]);
|
||||
});
|
||||
|
||||
it("parses a page range", () => {
|
||||
expect(parsePageRange("1-5", 20)).toEqual([1, 2, 3, 4, 5]);
|
||||
});
|
||||
|
||||
it("parses comma-separated pages and ranges", () => {
|
||||
expect(parsePageRange("1,3,5-7", 20)).toEqual([1, 3, 5, 6, 7]);
|
||||
});
|
||||
|
||||
it("clamps to maxPages", () => {
|
||||
expect(parsePageRange("1-100", 5)).toEqual([1, 2, 3, 4, 5]);
|
||||
});
|
||||
|
||||
it("deduplicates and sorts", () => {
|
||||
expect(parsePageRange("5,3,1,3,5", 20)).toEqual([1, 3, 5]);
|
||||
});
|
||||
|
||||
it("throws on invalid page number", () => {
|
||||
expect(() => parsePageRange("abc", 20)).toThrow("Invalid page number");
|
||||
});
|
||||
|
||||
it("throws on invalid range (start > end)", () => {
|
||||
expect(() => parsePageRange("5-3", 20)).toThrow("Invalid page range");
|
||||
});
|
||||
|
||||
it("throws on zero page number", () => {
|
||||
expect(() => parsePageRange("0", 20)).toThrow("Invalid page number");
|
||||
});
|
||||
|
||||
it("throws on negative page number", () => {
|
||||
expect(() => parsePageRange("-1", 20)).toThrow("Invalid page number");
|
||||
});
|
||||
|
||||
it("handles empty parts gracefully", () => {
|
||||
expect(parsePageRange("1,,3", 20)).toEqual([1, 3]);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// providerSupportsNativePdf tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe("providerSupportsNativePdf", () => {
|
||||
it("returns true for anthropic", () => {
|
||||
expect(providerSupportsNativePdf("anthropic")).toBe(true);
|
||||
});
|
||||
|
||||
it("returns true for google", () => {
|
||||
expect(providerSupportsNativePdf("google")).toBe(true);
|
||||
});
|
||||
|
||||
it("returns false for openai", () => {
|
||||
expect(providerSupportsNativePdf("openai")).toBe(false);
|
||||
});
|
||||
|
||||
it("returns false for minimax", () => {
|
||||
expect(providerSupportsNativePdf("minimax")).toBe(false);
|
||||
});
|
||||
|
||||
it("is case-insensitive", () => {
|
||||
expect(providerSupportsNativePdf("Anthropic")).toBe(true);
|
||||
expect(providerSupportsNativePdf("GOOGLE")).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// PDF model config resolution
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe("resolvePdfModelConfigForTool", () => {
|
||||
const priorFetch = global.fetch;
|
||||
|
||||
beforeEach(() => {
|
||||
resetAuthEnv();
|
||||
completeMock.mockReset();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.unstubAllEnvs();
|
||||
global.fetch = priorFetch;
|
||||
});
|
||||
|
||||
it("returns null without any auth", async () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
const cfg: OpenClawConfig = {
|
||||
agents: { defaults: { model: { primary: "openai/gpt-5.4" } } },
|
||||
};
|
||||
expect(resolvePdfModelConfigForTool({ cfg, agentDir })).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
it("prefers explicit pdfModel config", async () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
const cfg: OpenClawConfig = {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "openai/gpt-5.4" },
|
||||
pdfModel: { primary: "anthropic/claude-opus-4-6" },
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
expect(resolvePdfModelConfigForTool({ cfg, agentDir })).toEqual({
|
||||
primary: "anthropic/claude-opus-4-6",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("falls back to imageModel config when no pdfModel set", async () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
const cfg: OpenClawConfig = {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "openai/gpt-5.4" },
|
||||
imageModel: { primary: "openai/gpt-5.4-mini" },
|
||||
},
|
||||
},
|
||||
};
|
||||
expect(resolvePdfModelConfigForTool({ cfg, agentDir })).toEqual({
|
||||
primary: "openai/gpt-5.4-mini",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("prefers anthropic when available for native PDF support", async () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
vi.stubEnv("ANTHROPIC_API_KEY", "anthropic-test");
|
||||
vi.stubEnv("OPENAI_API_KEY", "openai-test");
|
||||
const cfg = withDefaultModel("openai/gpt-5.4");
|
||||
const config = resolvePdfModelConfigForTool({ cfg, agentDir });
|
||||
expect(config).not.toBeNull();
|
||||
// Should prefer anthropic for native PDF
|
||||
expect(config?.primary).toBe(ANTHROPIC_PDF_MODEL);
|
||||
});
|
||||
});
|
||||
|
||||
it("uses anthropic primary when provider is anthropic", async () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
vi.stubEnv("ANTHROPIC_API_KEY", "anthropic-test");
|
||||
const cfg = withDefaultModel(ANTHROPIC_PDF_MODEL);
|
||||
const config = resolvePdfModelConfigForTool({ cfg, agentDir });
|
||||
expect(config?.primary).toBe(ANTHROPIC_PDF_MODEL);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// createPdfTool
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe("createPdfTool", () => {
|
||||
const priorFetch = global.fetch;
|
||||
|
||||
@@ -345,22 +183,14 @@ describe("createPdfTool", () => {
|
||||
global.fetch = priorFetch;
|
||||
});
|
||||
|
||||
it("returns null without agentDir and no explicit config", () => {
|
||||
expect(createPdfTool()).toBeNull();
|
||||
it("returns null without agentDir and no explicit config", async () => {
|
||||
expect((await loadCreatePdfTool())()).toBeNull();
|
||||
});
|
||||
|
||||
it("returns null without any auth configured", async () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
const cfg: OpenClawConfig = {
|
||||
agents: { defaults: { model: { primary: "openai/gpt-5.4" } } },
|
||||
};
|
||||
expect(createPdfTool({ config: cfg, agentDir })).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
it("throws when agentDir missing but explicit config present", () => {
|
||||
it("throws when agentDir missing but explicit config present", async () => {
|
||||
const cfg = withPdfModel(ANTHROPIC_PDF_MODEL);
|
||||
expect(() => createPdfTool({ config: cfg })).toThrow("requires agentDir");
|
||||
const createTool = await loadCreatePdfTool();
|
||||
expect(() => createTool({ config: cfg })).toThrow("requires agentDir");
|
||||
});
|
||||
|
||||
it("creates tool when auth is available", async () => {
|
||||
@@ -395,7 +225,7 @@ describe("createPdfTool", () => {
|
||||
try {
|
||||
const cfg = withDefaultModel(ANTHROPIC_PDF_MODEL);
|
||||
const tool = requirePdfTool(
|
||||
createPdfTool({
|
||||
(await loadCreatePdfTool())({
|
||||
config: cfg,
|
||||
agentDir,
|
||||
workspaceDir,
|
||||
@@ -432,7 +262,7 @@ describe("createPdfTool", () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
const { loadSpy } = await stubPdfToolInfra(agentDir, { modelFound: false });
|
||||
const cfg = withPdfModel(ANTHROPIC_PDF_MODEL);
|
||||
const tool = requirePdfTool(createPdfTool({ config: cfg, agentDir }));
|
||||
const tool = requirePdfTool((await loadCreatePdfTool())({ config: cfg, agentDir }));
|
||||
|
||||
await expect(
|
||||
tool.execute("t1", {
|
||||
@@ -449,13 +279,10 @@ describe("createPdfTool", () => {
|
||||
it("uses native PDF path without eager extraction", async () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
await stubPdfToolInfra(agentDir, { provider: "anthropic", input: ["text", "document"] });
|
||||
|
||||
vi.spyOn(pdfNativeProviders, "anthropicAnalyzePdf").mockResolvedValue("native summary");
|
||||
|
||||
const extractSpy = vi.spyOn(pdfExtractModule, "extractPdfContent");
|
||||
|
||||
const cfg = withPdfModel(ANTHROPIC_PDF_MODEL);
|
||||
const tool = requirePdfTool(createPdfTool({ config: cfg, agentDir }));
|
||||
const tool = requirePdfTool((await loadCreatePdfTool())({ config: cfg, agentDir }));
|
||||
|
||||
const result = await tool.execute("t1", {
|
||||
prompt: "summarize",
|
||||
@@ -474,7 +301,7 @@ describe("createPdfTool", () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
await stubPdfToolInfra(agentDir, { provider: "anthropic", input: ["text", "document"] });
|
||||
const cfg = withPdfModel(ANTHROPIC_PDF_MODEL);
|
||||
const tool = requirePdfTool(createPdfTool({ config: cfg, agentDir }));
|
||||
const tool = requirePdfTool((await loadCreatePdfTool())({ config: cfg, agentDir }));
|
||||
|
||||
await expect(
|
||||
tool.execute("t1", {
|
||||
@@ -489,12 +316,10 @@ describe("createPdfTool", () => {
|
||||
it("uses extraction fallback for non-native models", async () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
await stubPdfToolInfra(agentDir, { provider: "openai", input: ["text"] });
|
||||
|
||||
const extractSpy = vi.spyOn(pdfExtractModule, "extractPdfContent").mockResolvedValue({
|
||||
text: "Extracted content",
|
||||
images: [],
|
||||
});
|
||||
|
||||
completeMock.mockResolvedValue({
|
||||
role: "assistant",
|
||||
stopReason: "stop",
|
||||
@@ -502,8 +327,7 @@ describe("createPdfTool", () => {
|
||||
} as never);
|
||||
|
||||
const cfg = withPdfModel(OPENAI_PDF_MODEL);
|
||||
|
||||
const tool = requirePdfTool(createPdfTool({ config: cfg, agentDir }));
|
||||
const tool = requirePdfTool((await loadCreatePdfTool())({ config: cfg, agentDir }));
|
||||
|
||||
const result = await tool.execute("t1", {
|
||||
prompt: "summarize",
|
||||
@@ -534,12 +358,9 @@ describe("createPdfTool", () => {
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Native provider detection
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe("native PDF provider API calls", () => {
|
||||
const priorFetch = global.fetch;
|
||||
|
||||
const mockFetchResponse = (response: unknown) => {
|
||||
const fetchMock = vi.fn().mockResolvedValue(response);
|
||||
global.fetch = Object.assign(fetchMock, { preconnect: vi.fn() }) as typeof global.fetch;
|
||||
@@ -558,13 +379,13 @@ describe("native PDF provider API calls", () => {
|
||||
}),
|
||||
});
|
||||
|
||||
const result = await pdfNativeProviders.anthropicAnalyzePdf({
|
||||
...makeAnthropicAnalyzeParams({
|
||||
const result = await pdfNativeProviders.anthropicAnalyzePdf(
|
||||
makeAnthropicAnalyzeParams({
|
||||
modelId: "claude-opus-4-6",
|
||||
prompt: "Summarize this document",
|
||||
maxTokens: 4096,
|
||||
}),
|
||||
});
|
||||
);
|
||||
|
||||
expect(result).toBe("Analysis of PDF");
|
||||
expect(fetchMock).toHaveBeenCalledTimes(1);
|
||||
@@ -608,20 +429,16 @@ describe("native PDF provider API calls", () => {
|
||||
const fetchMock = mockFetchResponse({
|
||||
ok: true,
|
||||
json: async () => ({
|
||||
candidates: [
|
||||
{
|
||||
content: { parts: [{ text: "Gemini PDF analysis" }] },
|
||||
},
|
||||
],
|
||||
candidates: [{ content: { parts: [{ text: "Gemini PDF analysis" }] } }],
|
||||
}),
|
||||
});
|
||||
|
||||
const result = await pdfNativeProviders.geminiAnalyzePdf({
|
||||
...makeGeminiAnalyzeParams({
|
||||
const result = await pdfNativeProviders.geminiAnalyzePdf(
|
||||
makeGeminiAnalyzeParams({
|
||||
modelId: "gemini-2.5-pro",
|
||||
prompt: "Summarize this",
|
||||
}),
|
||||
});
|
||||
);
|
||||
|
||||
expect(result).toBe("Gemini PDF analysis");
|
||||
expect(fetchMock).toHaveBeenCalledTimes(1);
|
||||
@@ -666,8 +483,8 @@ describe("native PDF provider API calls", () => {
|
||||
}),
|
||||
});
|
||||
|
||||
await pdfNativeProviders.anthropicAnalyzePdf({
|
||||
...makeAnthropicAnalyzeParams({
|
||||
await pdfNativeProviders.anthropicAnalyzePdf(
|
||||
makeAnthropicAnalyzeParams({
|
||||
modelId: "claude-opus-4-6",
|
||||
prompt: "Compare these documents",
|
||||
pdfs: [
|
||||
@@ -675,10 +492,9 @@ describe("native PDF provider API calls", () => {
|
||||
{ base64: "cGRmMg==", filename: "doc2.pdf" },
|
||||
],
|
||||
}),
|
||||
});
|
||||
);
|
||||
|
||||
const body = JSON.parse(fetchMock.mock.calls[0][1].body);
|
||||
// 2 document blocks + 1 text block
|
||||
expect(body.messages[0].content).toHaveLength(3);
|
||||
expect(body.messages[0].content[0].type).toBe("document");
|
||||
expect(body.messages[0].content[1].type).toBe("document");
|
||||
@@ -693,9 +509,9 @@ describe("native PDF provider API calls", () => {
|
||||
}),
|
||||
});
|
||||
|
||||
await pdfNativeProviders.anthropicAnalyzePdf({
|
||||
...makeAnthropicAnalyzeParams({ baseUrl: "https://custom.example.com" }),
|
||||
});
|
||||
await pdfNativeProviders.anthropicAnalyzePdf(
|
||||
makeAnthropicAnalyzeParams({ baseUrl: "https://custom.example.com" }),
|
||||
);
|
||||
|
||||
expect(fetchMock.mock.calls[0][0]).toContain("https://custom.example.com/v1/messages");
|
||||
});
|
||||
@@ -751,67 +567,6 @@ describe("native PDF provider API calls", () => {
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// PDF tool helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe("pdf-tool.helpers", () => {
|
||||
it("resolvePdfToolMaxTokens respects model limit", () => {
|
||||
expect(resolvePdfToolMaxTokens(2048, 4096)).toBe(2048);
|
||||
expect(resolvePdfToolMaxTokens(8192, 4096)).toBe(4096);
|
||||
expect(resolvePdfToolMaxTokens(undefined, 4096)).toBe(4096);
|
||||
});
|
||||
|
||||
it("coercePdfModelConfig reads primary and fallbacks", () => {
|
||||
const cfg: OpenClawConfig = {
|
||||
agents: {
|
||||
defaults: {
|
||||
pdfModel: {
|
||||
primary: "anthropic/claude-opus-4-6",
|
||||
fallbacks: ["google/gemini-2.5-pro"],
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
expect(coercePdfModelConfig(cfg)).toEqual({
|
||||
primary: "anthropic/claude-opus-4-6",
|
||||
fallbacks: ["google/gemini-2.5-pro"],
|
||||
});
|
||||
});
|
||||
|
||||
it("coercePdfAssistantText returns trimmed text", () => {
|
||||
const text = coercePdfAssistantText({
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-6",
|
||||
message: {
|
||||
role: "assistant",
|
||||
stopReason: "stop",
|
||||
content: [{ type: "text", text: " summary " }],
|
||||
} as never,
|
||||
});
|
||||
expect(text).toBe("summary");
|
||||
});
|
||||
|
||||
it("coercePdfAssistantText throws clear error for failed model output", () => {
|
||||
expect(() =>
|
||||
coercePdfAssistantText({
|
||||
provider: "google",
|
||||
model: "gemini-2.5-pro",
|
||||
message: {
|
||||
role: "assistant",
|
||||
stopReason: "error",
|
||||
errorMessage: "bad request",
|
||||
content: [],
|
||||
} as never,
|
||||
}),
|
||||
).toThrow("PDF model failed (google/gemini-2.5-pro): bad request");
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Model catalog document support
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe("model catalog document support", () => {
|
||||
it("modelSupportsDocument returns true when input includes document", () => {
|
||||
expect(
|
||||
|
||||
@@ -1,19 +1,11 @@
|
||||
import { type Context, complete } from "@mariozechner/pi-ai";
|
||||
import { Type } from "@sinclair/typebox";
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import {
|
||||
providerSupportsNativePdfDocument,
|
||||
resolveAutoMediaKeyProviders,
|
||||
resolveDefaultMediaModel,
|
||||
} from "../../media-understanding/defaults.js";
|
||||
import { extractPdfContent, type PdfExtractedContent } from "../../media/pdf-extract.js";
|
||||
import { loadWebMediaRaw } from "../../media/web-media.js";
|
||||
import { resolveUserPath } from "../../utils.js";
|
||||
import {
|
||||
coerceImageModelConfig,
|
||||
type ImageModelConfig,
|
||||
resolveProviderVisionModelFromConfig,
|
||||
} from "./image-tool.helpers.js";
|
||||
import { type ImageModelConfig } from "./image-tool.helpers.js";
|
||||
import { resolvePdfModelConfigForTool } from "./pdf-tool.model-config.js";
|
||||
import {
|
||||
applyImageModelConfigDefaults,
|
||||
buildTextToolResult,
|
||||
@@ -22,7 +14,6 @@ import {
|
||||
resolveModelRuntimeApiKey,
|
||||
resolvePromptAndModelOverride,
|
||||
} from "./media-tool-shared.js";
|
||||
import { hasAuthForProvider, resolveDefaultModelRef } from "./model-config.helpers.js";
|
||||
import { anthropicAnalyzePdf, geminiAnalyzePdf } from "./pdf-native-providers.js";
|
||||
import {
|
||||
coercePdfAssistantText,
|
||||
@@ -56,105 +47,7 @@ const PDF_MAX_PIXELS = 4_000_000;
|
||||
// Model resolution (mirrors image tool pattern)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Resolve the effective PDF model config.
|
||||
* Falls back to the image model config, then to provider-specific defaults.
|
||||
*/
|
||||
export function resolvePdfModelConfigForTool(params: {
|
||||
cfg?: OpenClawConfig;
|
||||
agentDir: string;
|
||||
}): ImageModelConfig | null {
|
||||
// Check for explicit PDF model config first
|
||||
const explicitPdf = coercePdfModelConfig(params.cfg);
|
||||
if (explicitPdf.primary?.trim() || (explicitPdf.fallbacks?.length ?? 0) > 0) {
|
||||
return explicitPdf;
|
||||
}
|
||||
|
||||
// Fall back to the image model config
|
||||
const explicitImage = coerceImageModelConfig(params.cfg);
|
||||
if (explicitImage.primary?.trim() || (explicitImage.fallbacks?.length ?? 0) > 0) {
|
||||
return explicitImage;
|
||||
}
|
||||
|
||||
// Auto-detect from available providers
|
||||
const primary = resolveDefaultModelRef(params.cfg);
|
||||
const googleOk = hasAuthForProvider({ provider: "google", agentDir: params.agentDir });
|
||||
|
||||
const fallbacks: string[] = [];
|
||||
const addFallback = (ref: string) => {
|
||||
const trimmed = ref.trim();
|
||||
if (trimmed && !fallbacks.includes(trimmed)) {
|
||||
fallbacks.push(trimmed);
|
||||
}
|
||||
};
|
||||
|
||||
// Prefer providers with native PDF support
|
||||
let preferred: string | null = null;
|
||||
|
||||
const providerOk = hasAuthForProvider({ provider: primary.provider, agentDir: params.agentDir });
|
||||
const providerVision = resolveProviderVisionModelFromConfig({
|
||||
cfg: params.cfg,
|
||||
provider: primary.provider,
|
||||
});
|
||||
const providerDefault = resolveDefaultMediaModel({
|
||||
cfg: params.cfg,
|
||||
providerId: primary.provider,
|
||||
capability: "image",
|
||||
});
|
||||
const primarySupportsNativePdf = providerSupportsNativePdfDocument({
|
||||
cfg: params.cfg,
|
||||
providerId: primary.provider,
|
||||
});
|
||||
const nativePdfCandidates = resolveAutoMediaKeyProviders({
|
||||
cfg: params.cfg,
|
||||
capability: "image",
|
||||
})
|
||||
.filter((providerId) => providerSupportsNativePdfDocument({ cfg: params.cfg, providerId }))
|
||||
.filter((providerId) => hasAuthForProvider({ provider: providerId, agentDir: params.agentDir }))
|
||||
.map((providerId) => {
|
||||
const modelId = resolveDefaultMediaModel({
|
||||
cfg: params.cfg,
|
||||
providerId,
|
||||
capability: "image",
|
||||
});
|
||||
return modelId ? `${providerId}/${modelId}` : null;
|
||||
})
|
||||
.filter((value): value is string => Boolean(value));
|
||||
const genericImageCandidates = resolveAutoMediaKeyProviders({
|
||||
cfg: params.cfg,
|
||||
capability: "image",
|
||||
})
|
||||
.filter((providerId) => hasAuthForProvider({ provider: providerId, agentDir: params.agentDir }))
|
||||
.map((providerId) => {
|
||||
const modelId = resolveDefaultMediaModel({
|
||||
cfg: params.cfg,
|
||||
providerId,
|
||||
capability: "image",
|
||||
});
|
||||
return modelId ? `${providerId}/${modelId}` : null;
|
||||
})
|
||||
.filter((value): value is string => Boolean(value));
|
||||
|
||||
if (primary.provider === "google" && googleOk && providerVision && primarySupportsNativePdf) {
|
||||
preferred = providerVision;
|
||||
} else if (providerOk && primarySupportsNativePdf && (providerVision || providerDefault)) {
|
||||
preferred = providerVision ?? `${primary.provider}/${providerDefault}`;
|
||||
} else {
|
||||
preferred = nativePdfCandidates[0] ?? genericImageCandidates[0] ?? null;
|
||||
}
|
||||
|
||||
if (preferred?.trim()) {
|
||||
for (const candidate of [...nativePdfCandidates, ...genericImageCandidates]) {
|
||||
if (candidate !== preferred) {
|
||||
addFallback(candidate);
|
||||
}
|
||||
}
|
||||
const pruned = fallbacks.filter((ref) => ref !== preferred);
|
||||
return { primary: preferred, ...(pruned.length > 0 ? { fallbacks: pruned } : {}) };
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
export { resolvePdfModelConfigForTool } from "./pdf-tool.model-config.js";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Build context for extraction fallback path
|
||||
|
||||
98
src/media-understanding/bundled-defaults.ts
Normal file
98
src/media-understanding/bundled-defaults.ts
Normal file
@@ -0,0 +1,98 @@
|
||||
import type { MediaUnderstandingCapability } from "./types.js";
|
||||
import { normalizeMediaProviderId } from "./provider-id.js";
|
||||
|
||||
type BundledMediaProviderDefaults = {
|
||||
defaultModels?: Partial<Record<MediaUnderstandingCapability, string>>;
|
||||
autoPriority?: Partial<Record<MediaUnderstandingCapability, number>>;
|
||||
nativeDocumentInputs?: Array<"pdf">;
|
||||
};
|
||||
|
||||
const BUNDLED_MEDIA_PROVIDER_DEFAULTS: Record<string, BundledMediaProviderDefaults> = {
|
||||
openai: {
|
||||
defaultModels: { image: "gpt-5.4-mini", audio: "gpt-4o-transcribe" },
|
||||
autoPriority: { image: 10, audio: 10 },
|
||||
},
|
||||
"openai-codex": {
|
||||
defaultModels: { image: "gpt-5.4" },
|
||||
},
|
||||
anthropic: {
|
||||
defaultModels: { image: "claude-opus-4-6" },
|
||||
autoPriority: { image: 20 },
|
||||
nativeDocumentInputs: ["pdf"],
|
||||
},
|
||||
google: {
|
||||
defaultModels: {
|
||||
image: "gemini-3-flash-preview",
|
||||
audio: "gemini-3-flash-preview",
|
||||
video: "gemini-3-flash-preview",
|
||||
},
|
||||
autoPriority: { image: 30, audio: 40, video: 10 },
|
||||
nativeDocumentInputs: ["pdf"],
|
||||
},
|
||||
groq: {
|
||||
defaultModels: { audio: "whisper-large-v3-turbo" },
|
||||
autoPriority: { audio: 20 },
|
||||
},
|
||||
deepgram: {
|
||||
defaultModels: { audio: "nova-3" },
|
||||
autoPriority: { audio: 30 },
|
||||
},
|
||||
mistral: {
|
||||
defaultModels: { audio: "voxtral-mini-latest" },
|
||||
autoPriority: { audio: 50 },
|
||||
},
|
||||
minimax: {
|
||||
defaultModels: { image: "MiniMax-VL-01" },
|
||||
autoPriority: { image: 40 },
|
||||
},
|
||||
"minimax-portal": {
|
||||
defaultModels: { image: "MiniMax-VL-01" },
|
||||
autoPriority: { image: 50 },
|
||||
},
|
||||
zai: {
|
||||
defaultModels: { image: "glm-4.6v" },
|
||||
autoPriority: { image: 60 },
|
||||
},
|
||||
qwen: {
|
||||
defaultModels: { image: "qwen-vl-max-latest", video: "qwen-vl-max-latest" },
|
||||
autoPriority: { video: 15 },
|
||||
},
|
||||
moonshot: {
|
||||
defaultModels: { image: "kimi-k2.5", video: "kimi-k2.5" },
|
||||
autoPriority: { video: 20 },
|
||||
},
|
||||
openrouter: {
|
||||
defaultModels: { image: "auto" },
|
||||
},
|
||||
};
|
||||
|
||||
export function getBundledMediaProviderDefaults(providerId: string): BundledMediaProviderDefaults | null {
|
||||
return BUNDLED_MEDIA_PROVIDER_DEFAULTS[normalizeMediaProviderId(providerId)] ?? null;
|
||||
}
|
||||
|
||||
export function resolveBundledDefaultMediaModel(params: {
|
||||
providerId: string;
|
||||
capability: MediaUnderstandingCapability;
|
||||
}): string | undefined {
|
||||
return getBundledMediaProviderDefaults(params.providerId)?.defaultModels?.[params.capability]?.trim();
|
||||
}
|
||||
|
||||
export function resolveBundledAutoMediaKeyProviders(capability: MediaUnderstandingCapability): string[] {
|
||||
return Object.entries(BUNDLED_MEDIA_PROVIDER_DEFAULTS)
|
||||
.map(([providerId, defaults]) => ({
|
||||
providerId,
|
||||
priority: defaults.autoPriority?.[capability],
|
||||
}))
|
||||
.filter((entry): entry is { providerId: string; priority: number } => typeof entry.priority === "number")
|
||||
.toSorted((left, right) => {
|
||||
if (left.priority !== right.priority) {
|
||||
return left.priority - right.priority;
|
||||
}
|
||||
return left.providerId.localeCompare(right.providerId);
|
||||
})
|
||||
.map((entry) => entry.providerId);
|
||||
}
|
||||
|
||||
export function bundledProviderSupportsNativePdfDocument(providerId: string): boolean {
|
||||
return getBundledMediaProviderDefaults(providerId)?.nativeDocumentInputs?.includes("pdf") ?? false;
|
||||
}
|
||||
@@ -1,4 +1,9 @@
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import {
|
||||
bundledProviderSupportsNativePdfDocument,
|
||||
resolveBundledAutoMediaKeyProviders,
|
||||
resolveBundledDefaultMediaModel,
|
||||
} from "./bundled-defaults.js";
|
||||
import { buildMediaUnderstandingRegistry, normalizeMediaProviderId } from "./provider-registry.js";
|
||||
import type { MediaUnderstandingCapability, MediaUnderstandingProvider } from "./types.js";
|
||||
|
||||
@@ -52,12 +57,53 @@ function resolveDefaultRegistry(cfg?: OpenClawConfig) {
|
||||
return buildMediaUnderstandingRegistry(undefined, cfg ?? ({} as OpenClawConfig));
|
||||
}
|
||||
|
||||
function resolveConfiguredImageProviderModel(params: {
|
||||
cfg?: OpenClawConfig;
|
||||
providerId: string;
|
||||
}): string | undefined {
|
||||
const providers = params.cfg?.models?.providers;
|
||||
if (!providers || typeof providers !== "object") {
|
||||
return undefined;
|
||||
}
|
||||
const normalizedProviderId = normalizeMediaProviderId(params.providerId);
|
||||
for (const [providerKey, providerCfg] of Object.entries(providers)) {
|
||||
if (normalizeMediaProviderId(providerKey) !== normalizedProviderId) {
|
||||
continue;
|
||||
}
|
||||
const models = providerCfg?.models ?? [];
|
||||
const match = models.find(
|
||||
(model) => Boolean(model?.id?.trim()) && Array.isArray(model?.input) && model.input.includes("image"),
|
||||
);
|
||||
return match?.id?.trim() || undefined;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function resolveDefaultMediaModel(params: {
|
||||
providerId: string;
|
||||
capability: MediaUnderstandingCapability;
|
||||
cfg?: OpenClawConfig;
|
||||
providerRegistry?: Map<string, MediaUnderstandingProvider>;
|
||||
}): string | undefined {
|
||||
if (!params.providerRegistry) {
|
||||
const configuredImageModel =
|
||||
params.capability === "image"
|
||||
? resolveConfiguredImageProviderModel({
|
||||
cfg: params.cfg,
|
||||
providerId: params.providerId,
|
||||
})
|
||||
: undefined;
|
||||
if (configuredImageModel) {
|
||||
return configuredImageModel;
|
||||
}
|
||||
const bundledDefault = resolveBundledDefaultMediaModel({
|
||||
providerId: params.providerId,
|
||||
capability: params.capability,
|
||||
});
|
||||
if (bundledDefault) {
|
||||
return bundledDefault;
|
||||
}
|
||||
}
|
||||
const registry = params.providerRegistry ?? resolveDefaultRegistry(params.cfg);
|
||||
const provider = registry.get(normalizeMediaProviderId(params.providerId));
|
||||
return provider?.defaultModels?.[params.capability]?.trim() || undefined;
|
||||
@@ -68,6 +114,28 @@ export function resolveAutoMediaKeyProviders(params: {
|
||||
cfg?: OpenClawConfig;
|
||||
providerRegistry?: Map<string, MediaUnderstandingProvider>;
|
||||
}): string[] {
|
||||
if (!params.providerRegistry) {
|
||||
const bundledProviders = resolveBundledAutoMediaKeyProviders(params.capability);
|
||||
if (params.capability !== "image") {
|
||||
return bundledProviders;
|
||||
}
|
||||
const configProviders = params.cfg?.models?.providers;
|
||||
if (!configProviders || typeof configProviders !== "object") {
|
||||
return bundledProviders;
|
||||
}
|
||||
const merged = [...bundledProviders];
|
||||
for (const [providerKey, providerCfg] of Object.entries(configProviders)) {
|
||||
const normalizedProviderId = normalizeMediaProviderId(providerKey);
|
||||
const models = providerCfg?.models ?? [];
|
||||
const hasImageModel = models.some(
|
||||
(model) => Array.isArray(model?.input) && model.input.includes("image"),
|
||||
);
|
||||
if (hasImageModel && !merged.includes(normalizedProviderId)) {
|
||||
merged.push(normalizedProviderId);
|
||||
}
|
||||
}
|
||||
return merged;
|
||||
}
|
||||
const registry = params.providerRegistry ?? resolveDefaultRegistry(params.cfg);
|
||||
type AutoProviderEntry = {
|
||||
provider: MediaUnderstandingProvider;
|
||||
@@ -97,6 +165,9 @@ export function providerSupportsNativePdfDocument(params: {
|
||||
cfg?: OpenClawConfig;
|
||||
providerRegistry?: Map<string, MediaUnderstandingProvider>;
|
||||
}): boolean {
|
||||
if (!params.providerRegistry && bundledProviderSupportsNativePdfDocument(params.providerId)) {
|
||||
return true;
|
||||
}
|
||||
const registry = params.providerRegistry ?? resolveDefaultRegistry(params.cfg);
|
||||
const provider = registry.get(normalizeMediaProviderId(params.providerId));
|
||||
return provider?.nativeDocumentInputs?.includes("pdf") ?? false;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { normalizeProviderId } from "../agents/model-selection.js";
|
||||
import { normalizeProviderId } from "../agents/provider-id.js";
|
||||
|
||||
export function normalizeMediaProviderId(id: string): string {
|
||||
const normalized = normalizeProviderId(id);
|
||||
|
||||
Reference in New Issue
Block a user