fix(pdf-tool): defer PDF model resolution (#76728)

Defers automatic PDF model/auth resolution until the PDF tool is executed, avoiding agent-turn tool prep stalls while preserving explicit PDF model registration. Fixes #76644. Thanks @hclsys.
2026-05-06 07:30:43 +00:00 · 2026-05-03 23:55:14 +08:00
parent 6becfcb275
commit 02b1075a57
5 changed files with 137 additions and 10 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -41,6 +41,7 @@ Docs: https://docs.openclaw.ai
 - Active Memory: apply `setupGraceTimeoutMs` to the embedded recall runner as well as the outer prompt-build watchdog, so very-cold first recalls keep the configured setup grace end-to-end. (#74480) Thanks @volcano303.
 - Channels/Feishu: cap how long the per-chat sequential queue blocks subsequent same-key tasks behind a single in-flight task (5 min default), so a single hung dispatch no longer leaves later same-chat messages in `queued` state until gateway restart; the stuck task continues running but is evicted from the blocking chain and a warning is logged. Fixes #70133. (#76687) Thanks @martingarramon and @bek91.
 - Active Memory: skip scoped Telegram forum-topic conversation ids (containing `:`) when resolving the embedded recall run channel, falling back to `messageProvider` instead, so Active Memory no longer throws a bundled-plugin dirName validation error in forum-topic sessions. Fixes #76704.
+- Agents/tools: defer automatic PDF model/auth resolution until the PDF tool is used, keeping agent-turn tool prep from probing auth profiles on messages without PDFs while preserving explicit PDF model registration. Fixes #76644. Thanks @hclsys.
 - CLI/config: keep JSON dry-run patches validating touched channel configuration against bundled channel schemas even when the patch only contains SecretRef objects.
 - Plugins/tools: keep disabled bundled tool plugins out of explicit runtime allowlist ownership and fall back from loaded-but-empty channel registries to tool-bearing plugin registries, so Active Memory can use bundled `memory-core` search/get tools even when `memory-lancedb` is disabled. Fixes #76603. Thanks @jwong-art.
 - Plugins/install: run `npm install` from the managed npm-root manifest so installing one `@openclaw/*` plugin preserves already installed sibling plugins instead of pruning them. Fixes #76571. (#76602) Thanks @byungskers and @crpol.
--- a/src/agents/openclaw-tools.media-factory-plan.test.ts
+++ b/src/agents/openclaw-tools.media-factory-plan.test.ts
@@ -13,6 +13,7 @@ import type { PluginMetadataSnapshot } from "../plugins/plugin-metadata-snapshot
 import { clearSecretsRuntimeSnapshot } from "../secrets/runtime.js";
 import type { AuthProfileStore } from "./auth-profiles/types.js";
 import { __testing, createOpenClawTools } from "./openclaw-tools.js";
+import * as pdfModelConfigModule from "./tools/pdf-tool.model-config.js";

 function createAuthStore(providers: string[] = []): AuthProfileStore {
  return {
@@ -285,6 +286,21 @@ describe("optional media tool factory planning", () => {
    });
  });

+  it("defers PDF model resolution from the tool-prep hot path", () => {
+    const config: OpenClawConfig = {};
+    installSnapshot(config, []);
+    const resolveSpy = vi.spyOn(pdfModelConfigModule, "resolvePdfModelConfigForTool");
+
+    const tools = createOpenClawTools({
+      config,
+      agentDir: "/tmp/openclaw-agent-main",
+      authProfileStore: createAuthStore(["anthropic"]),
+    });
+
+    expect(tools.map((tool) => tool.name)).toContain("pdf");
+    expect(resolveSpy).not.toHaveBeenCalled();
+  });
+
  it("keeps enabled external manifest capability providers on the factory path", () => {
    const config: OpenClawConfig = {};
    installSnapshot(config, [
--- a/src/agents/openclaw-tools.ts
+++ b/src/agents/openclaw-tools.ts
@@ -414,6 +414,7 @@ export function createOpenClawTools(
          workspaceDir,
          sandbox,
          fsPolicy: options?.fsPolicy,
+          deferAutoModelResolution: true,
        })
      : null;
  options?.recordToolPrepStage?.("openclaw-tools:pdf-tool");
--- a/src/agents/tools/pdf-tool.test.ts
+++ b/src/agents/tools/pdf-tool.test.ts
@@ -5,10 +5,12 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 import type { OpenClawConfig } from "../../config/config.js";
 import * as pdfExtractModule from "../../media/pdf-extract.js";
 import * as webMedia from "../../media/web-media.js";
+import type { AuthProfileStore } from "../auth-profiles/types.js";
 import * as modelAuth from "../model-auth.js";
 import * as modelsConfig from "../models-config.js";
 import * as modelDiscovery from "../pi-model-discovery.js";
 import * as pdfNativeProviders from "./pdf-native-providers.js";
+import * as pdfModelConfigModule from "./pdf-tool.model-config.js";
 import { resetPdfToolAuthEnv, withTempPdfAgentDir } from "./pdf-tool.test-support.js";

 const completeMock = vi.hoisted(() => vi.fn());
@@ -71,6 +73,12 @@ function withPdfModel(primary: string): OpenClawConfig {
  } as OpenClawConfig;
 }

+function withDefaultModel(primary: string): OpenClawConfig {
+  return {
+    agents: { defaults: { model: { primary } } },
+  } as OpenClawConfig;
+}
+
 async function stubPdfToolInfra(
  agentDir: string,
  params?: {
@@ -159,6 +167,77 @@ describe("createPdfTool", () => {
    });
  });

+  it("defers automatic model config resolution during registration (#76644)", async () => {
+    const resolveSpy = vi.spyOn(pdfModelConfigModule, "resolvePdfModelConfigForTool");
+    const cfg = withDefaultModel("openai/gpt-5.4");
+    const authProfileStore = {
+      version: 1,
+      profiles: {
+        "anthropic:default": {
+          type: "api_key",
+          provider: "anthropic",
+          key: "test-key",
+        },
+      },
+    } satisfies AuthProfileStore;
+    const createTool = await loadCreatePdfTool();
+    await withTempPdfAgentDir(async (agentDir) => {
+      expect(
+        createTool({
+          config: cfg,
+          agentDir,
+          authProfileStore,
+          deferAutoModelResolution: true,
+        })?.name,
+      ).toBe("pdf");
+      expect(resolveSpy).not.toHaveBeenCalled();
+    });
+    resolveSpy.mockRestore();
+  });
+
+  it("keeps explicit model config resolution eager even when automatic resolution is deferred", async () => {
+    const resolveSpy = vi.spyOn(pdfModelConfigModule, "resolvePdfModelConfigForTool");
+    const createTool = await loadCreatePdfTool();
+    await withTempPdfAgentDir(async (agentDir) => {
+      expect(
+        createTool({
+          config: withPdfModel(ANTHROPIC_PDF_MODEL),
+          agentDir,
+          deferAutoModelResolution: true,
+        })?.name,
+      ).toBe("pdf");
+      expect(resolveSpy).toHaveBeenCalledTimes(1);
+    });
+    resolveSpy.mockRestore();
+  });
+
+  it("resolves deferred model config on execution before loading PDFs", async () => {
+    const resolveSpy = vi
+      .spyOn(pdfModelConfigModule, "resolvePdfModelConfigForTool")
+      .mockReturnValue(null);
+    const loadSpy = vi.spyOn(webMedia, "loadWebMediaRaw");
+    const createTool = await loadCreatePdfTool();
+    const cfg = withDefaultModel("openai/gpt-5.4");
+    await withTempPdfAgentDir(async (agentDir) => {
+      const tool = requirePdfTool(
+        createTool({
+          config: cfg,
+          agentDir,
+          deferAutoModelResolution: true,
+        }),
+      );
+      await expect(
+        tool.execute("t1", {
+          prompt: "summarize",
+          pdf: "/tmp/doc.pdf",
+        }),
+      ).rejects.toThrow("No PDF model configured.");
+    });
+    expect(resolveSpy).toHaveBeenCalledTimes(1);
+    expect(loadSpy).not.toHaveBeenCalled();
+    resolveSpy.mockRestore();
+  });
+
  it("rejects when no pdf input provided", async () => {
    await withConfiguredPdfTool(async (tool) => {
      await expect(tool.execute("t1", { prompt: "test" })).rejects.toThrow("pdf required");
--- a/src/agents/tools/pdf-tool.ts
+++ b/src/agents/tools/pdf-tool.ts
@@ -13,7 +13,8 @@ import {
 } from "../../shared/string-coerce.js";
 import { resolveUserPath } from "../../utils.js";
 import type { AuthProfileStore } from "../auth-profiles/types.js";
-import { type ImageModelConfig } from "./image-tool.helpers.js";
+import { ToolInputError } from "./common.js";
+import { coerceImageModelConfig, type ImageModelConfig } from "./image-tool.helpers.js";
 import {
  applyImageModelConfigDefaults,
  buildTextToolResult,
@@ -23,6 +24,7 @@ import {
  resolvePromptAndModelOverride,
  resolveRemoteMediaSsrfPolicy,
 } from "./media-tool-shared.js";
+import { hasToolModelConfig } from "./model-config.helpers.js";
 import { anthropicAnalyzePdf, geminiAnalyzePdf } from "./pdf-native-providers.js";
 import {
  coercePdfAssistantText,
@@ -77,6 +79,13 @@ export const PdfToolSchema = Type.Object({

 export { resolvePdfModelConfigForTool } from "./pdf-tool.model-config.js";

+function hasExplicitPdfToolModelConfig(config?: OpenClawConfig): boolean {
+  return (
+    hasToolModelConfig(coercePdfModelConfig(config)) ||
+    hasToolModelConfig(coerceImageModelConfig(config))
+  );
+}
+
 // ---------------------------------------------------------------------------
 // Build context for extraction fallback path
 // ---------------------------------------------------------------------------
@@ -249,23 +258,32 @@ export function createPdfTool(options?: {
  workspaceDir?: string;
  sandbox?: PdfSandboxConfig;
  fsPolicy?: ToolFsPolicy;
+  /**
+   * Avoid resolving auto PDF-provider/model candidates while registering the
+   * tool. The concrete PDF model is still resolved before execution.
+   */
+  deferAutoModelResolution?: boolean;
 }): AnyAgentTool | null {
  const agentDir = options?.agentDir?.trim();
+  const hasExplicitModelConfig = hasExplicitPdfToolModelConfig(options?.config);
  if (!agentDir) {
-    const explicit = coercePdfModelConfig(options?.config);
-    if (explicit.primary?.trim() || (explicit.fallbacks?.length ?? 0) > 0) {
+    if (hasExplicitModelConfig) {
      throw new Error("createPdfTool requires agentDir when enabled");
    }
    return null;
  }

-  const pdfModelConfig = resolvePdfModelConfigForTool({
-    cfg: options?.config,
-    agentDir,
-    workspaceDir: options?.workspaceDir,
-    authStore: options?.authProfileStore,
-  });
-  if (!pdfModelConfig) {
+  const shouldDeferAutoModelResolution =
+    options?.deferAutoModelResolution === true && !hasExplicitModelConfig;
+  const registrationPdfModelConfig = shouldDeferAutoModelResolution
+    ? null
+    : resolvePdfModelConfigForTool({
+        cfg: options?.config,
+        agentDir,
+        workspaceDir: options?.workspaceDir,
+        authStore: options?.authProfileStore,
+      });
+  if (!registrationPdfModelConfig && !shouldDeferAutoModelResolution) {
    return null;
  }

@@ -325,6 +343,18 @@ export function createPdfTool(options?: {
      // Parse page range
      const pagesRaw = normalizeOptionalString(record.pages);

+      const pdfModelConfig =
+        registrationPdfModelConfig ??
+        resolvePdfModelConfigForTool({
+          cfg: options?.config,
+          agentDir,
+          workspaceDir: options?.workspaceDir,
+          authStore: options?.authProfileStore,
+        });
+      if (!pdfModelConfig) {
+        throw new ToolInputError("No PDF model configured.");
+      }
+
      const sandboxConfig: SandboxedBridgeMediaPathConfig | null =
        options?.sandbox && options.sandbox.root.trim()
          ? {