fix(pdf-tool): defer PDF model resolution (#76728)

Defers automatic PDF model/auth resolution until the PDF tool is executed, avoiding agent-turn tool prep stalls while preserving explicit PDF model registration.

Fixes #76644.
Thanks @hclsys.
This commit is contained in:
hcl
2026-05-03 23:55:14 +08:00
committed by GitHub
parent 6becfcb275
commit 02b1075a57
5 changed files with 137 additions and 10 deletions

View File

@@ -41,6 +41,7 @@ Docs: https://docs.openclaw.ai
- Active Memory: apply `setupGraceTimeoutMs` to the embedded recall runner as well as the outer prompt-build watchdog, so very-cold first recalls keep the configured setup grace end-to-end. (#74480) Thanks @volcano303.
- Channels/Feishu: cap how long the per-chat sequential queue blocks subsequent same-key tasks behind a single in-flight task (5 min default), so a single hung dispatch no longer leaves later same-chat messages in `queued` state until gateway restart; the stuck task continues running but is evicted from the blocking chain and a warning is logged. Fixes #70133. (#76687) Thanks @martingarramon and @bek91.
- Active Memory: skip scoped Telegram forum-topic conversation ids (containing `:`) when resolving the embedded recall run channel, falling back to `messageProvider` instead, so Active Memory no longer throws a bundled-plugin dirName validation error in forum-topic sessions. Fixes #76704.
- Agents/tools: defer automatic PDF model/auth resolution until the PDF tool is used, keeping agent-turn tool prep from probing auth profiles on messages without PDFs while preserving explicit PDF model registration. Fixes #76644. Thanks @hclsys.
- CLI/config: keep JSON dry-run patches validating touched channel configuration against bundled channel schemas even when the patch only contains SecretRef objects.
- Plugins/tools: keep disabled bundled tool plugins out of explicit runtime allowlist ownership and fall back from loaded-but-empty channel registries to tool-bearing plugin registries, so Active Memory can use bundled `memory-core` search/get tools even when `memory-lancedb` is disabled. Fixes #76603. Thanks @jwong-art.
- Plugins/install: run `npm install` from the managed npm-root manifest so installing one `@openclaw/*` plugin preserves already installed sibling plugins instead of pruning them. Fixes #76571. (#76602) Thanks @byungskers and @crpol.

View File

@@ -13,6 +13,7 @@ import type { PluginMetadataSnapshot } from "../plugins/plugin-metadata-snapshot
import { clearSecretsRuntimeSnapshot } from "../secrets/runtime.js";
import type { AuthProfileStore } from "./auth-profiles/types.js";
import { __testing, createOpenClawTools } from "./openclaw-tools.js";
import * as pdfModelConfigModule from "./tools/pdf-tool.model-config.js";
function createAuthStore(providers: string[] = []): AuthProfileStore {
return {
@@ -285,6 +286,21 @@ describe("optional media tool factory planning", () => {
});
});
it("defers PDF model resolution from the tool-prep hot path", () => {
const config: OpenClawConfig = {};
installSnapshot(config, []);
const resolveSpy = vi.spyOn(pdfModelConfigModule, "resolvePdfModelConfigForTool");
const tools = createOpenClawTools({
config,
agentDir: "/tmp/openclaw-agent-main",
authProfileStore: createAuthStore(["anthropic"]),
});
expect(tools.map((tool) => tool.name)).toContain("pdf");
expect(resolveSpy).not.toHaveBeenCalled();
});
it("keeps enabled external manifest capability providers on the factory path", () => {
const config: OpenClawConfig = {};
installSnapshot(config, [

View File

@@ -414,6 +414,7 @@ export function createOpenClawTools(
workspaceDir,
sandbox,
fsPolicy: options?.fsPolicy,
deferAutoModelResolution: true,
})
: null;
options?.recordToolPrepStage?.("openclaw-tools:pdf-tool");

View File

@@ -5,10 +5,12 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../../config/config.js";
import * as pdfExtractModule from "../../media/pdf-extract.js";
import * as webMedia from "../../media/web-media.js";
import type { AuthProfileStore } from "../auth-profiles/types.js";
import * as modelAuth from "../model-auth.js";
import * as modelsConfig from "../models-config.js";
import * as modelDiscovery from "../pi-model-discovery.js";
import * as pdfNativeProviders from "./pdf-native-providers.js";
import * as pdfModelConfigModule from "./pdf-tool.model-config.js";
import { resetPdfToolAuthEnv, withTempPdfAgentDir } from "./pdf-tool.test-support.js";
const completeMock = vi.hoisted(() => vi.fn());
@@ -71,6 +73,12 @@ function withPdfModel(primary: string): OpenClawConfig {
} as OpenClawConfig;
}
function withDefaultModel(primary: string): OpenClawConfig {
return {
agents: { defaults: { model: { primary } } },
} as OpenClawConfig;
}
async function stubPdfToolInfra(
agentDir: string,
params?: {
@@ -159,6 +167,77 @@ describe("createPdfTool", () => {
});
});
it("defers automatic model config resolution during registration (#76644)", async () => {
const resolveSpy = vi.spyOn(pdfModelConfigModule, "resolvePdfModelConfigForTool");
const cfg = withDefaultModel("openai/gpt-5.4");
const authProfileStore = {
version: 1,
profiles: {
"anthropic:default": {
type: "api_key",
provider: "anthropic",
key: "test-key",
},
},
} satisfies AuthProfileStore;
const createTool = await loadCreatePdfTool();
await withTempPdfAgentDir(async (agentDir) => {
expect(
createTool({
config: cfg,
agentDir,
authProfileStore,
deferAutoModelResolution: true,
})?.name,
).toBe("pdf");
expect(resolveSpy).not.toHaveBeenCalled();
});
resolveSpy.mockRestore();
});
it("keeps explicit model config resolution eager even when automatic resolution is deferred", async () => {
const resolveSpy = vi.spyOn(pdfModelConfigModule, "resolvePdfModelConfigForTool");
const createTool = await loadCreatePdfTool();
await withTempPdfAgentDir(async (agentDir) => {
expect(
createTool({
config: withPdfModel(ANTHROPIC_PDF_MODEL),
agentDir,
deferAutoModelResolution: true,
})?.name,
).toBe("pdf");
expect(resolveSpy).toHaveBeenCalledTimes(1);
});
resolveSpy.mockRestore();
});
it("resolves deferred model config on execution before loading PDFs", async () => {
const resolveSpy = vi
.spyOn(pdfModelConfigModule, "resolvePdfModelConfigForTool")
.mockReturnValue(null);
const loadSpy = vi.spyOn(webMedia, "loadWebMediaRaw");
const createTool = await loadCreatePdfTool();
const cfg = withDefaultModel("openai/gpt-5.4");
await withTempPdfAgentDir(async (agentDir) => {
const tool = requirePdfTool(
createTool({
config: cfg,
agentDir,
deferAutoModelResolution: true,
}),
);
await expect(
tool.execute("t1", {
prompt: "summarize",
pdf: "/tmp/doc.pdf",
}),
).rejects.toThrow("No PDF model configured.");
});
expect(resolveSpy).toHaveBeenCalledTimes(1);
expect(loadSpy).not.toHaveBeenCalled();
resolveSpy.mockRestore();
});
it("rejects when no pdf input provided", async () => {
await withConfiguredPdfTool(async (tool) => {
await expect(tool.execute("t1", { prompt: "test" })).rejects.toThrow("pdf required");

View File

@@ -13,7 +13,8 @@ import {
} from "../../shared/string-coerce.js";
import { resolveUserPath } from "../../utils.js";
import type { AuthProfileStore } from "../auth-profiles/types.js";
import { type ImageModelConfig } from "./image-tool.helpers.js";
import { ToolInputError } from "./common.js";
import { coerceImageModelConfig, type ImageModelConfig } from "./image-tool.helpers.js";
import {
applyImageModelConfigDefaults,
buildTextToolResult,
@@ -23,6 +24,7 @@ import {
resolvePromptAndModelOverride,
resolveRemoteMediaSsrfPolicy,
} from "./media-tool-shared.js";
import { hasToolModelConfig } from "./model-config.helpers.js";
import { anthropicAnalyzePdf, geminiAnalyzePdf } from "./pdf-native-providers.js";
import {
coercePdfAssistantText,
@@ -77,6 +79,13 @@ export const PdfToolSchema = Type.Object({
export { resolvePdfModelConfigForTool } from "./pdf-tool.model-config.js";
function hasExplicitPdfToolModelConfig(config?: OpenClawConfig): boolean {
return (
hasToolModelConfig(coercePdfModelConfig(config)) ||
hasToolModelConfig(coerceImageModelConfig(config))
);
}
// ---------------------------------------------------------------------------
// Build context for extraction fallback path
// ---------------------------------------------------------------------------
@@ -249,23 +258,32 @@ export function createPdfTool(options?: {
workspaceDir?: string;
sandbox?: PdfSandboxConfig;
fsPolicy?: ToolFsPolicy;
/**
* Avoid resolving auto PDF-provider/model candidates while registering the
* tool. The concrete PDF model is still resolved before execution.
*/
deferAutoModelResolution?: boolean;
}): AnyAgentTool | null {
const agentDir = options?.agentDir?.trim();
const hasExplicitModelConfig = hasExplicitPdfToolModelConfig(options?.config);
if (!agentDir) {
const explicit = coercePdfModelConfig(options?.config);
if (explicit.primary?.trim() || (explicit.fallbacks?.length ?? 0) > 0) {
if (hasExplicitModelConfig) {
throw new Error("createPdfTool requires agentDir when enabled");
}
return null;
}
const pdfModelConfig = resolvePdfModelConfigForTool({
cfg: options?.config,
agentDir,
workspaceDir: options?.workspaceDir,
authStore: options?.authProfileStore,
});
if (!pdfModelConfig) {
const shouldDeferAutoModelResolution =
options?.deferAutoModelResolution === true && !hasExplicitModelConfig;
const registrationPdfModelConfig = shouldDeferAutoModelResolution
? null
: resolvePdfModelConfigForTool({
cfg: options?.config,
agentDir,
workspaceDir: options?.workspaceDir,
authStore: options?.authProfileStore,
});
if (!registrationPdfModelConfig && !shouldDeferAutoModelResolution) {
return null;
}
@@ -325,6 +343,18 @@ export function createPdfTool(options?: {
// Parse page range
const pagesRaw = normalizeOptionalString(record.pages);
const pdfModelConfig =
registrationPdfModelConfig ??
resolvePdfModelConfigForTool({
cfg: options?.config,
agentDir,
workspaceDir: options?.workspaceDir,
authStore: options?.authProfileStore,
});
if (!pdfModelConfig) {
throw new ToolInputError("No PDF model configured.");
}
const sandboxConfig: SandboxedBridgeMediaPathConfig | null =
options?.sandbox && options.sandbox.root.trim()
? {