diff --git a/src/agents/tools/web-fetch-utils.ts b/src/agents/tools/web-fetch-utils.ts index 056a4890ca5..16808fa605c 100644 --- a/src/agents/tools/web-fetch-utils.ts +++ b/src/agents/tools/web-fetch-utils.ts @@ -1,5 +1,12 @@ import { sanitizeHtml, stripInvisibleUnicode } from "./web-fetch-visibility.js"; +/** + * Lightweight HTML/text extraction utilities for the web_fetch tool. + * + * This intentionally handles common markup without a heavy renderer so provider + * responses stay bounded and deterministic. + */ +/** Output mode requested by web_fetch extraction. */ export type ExtractMode = "markdown" | "text"; function decodeEntities(value: string): string { @@ -18,6 +25,7 @@ function stripTags(value: string): string { return decodeEntities(value.replace(/<[^>]+>/g, "")); } +/** Collapses display whitespace while preserving paragraph breaks. */ export function normalizeWhitespace(value: string): string { return value .replace(/\r/g, "") @@ -27,6 +35,7 @@ export function normalizeWhitespace(value: string): string { .trim(); } +/** Converts sanitized HTML into coarse markdown plus an optional title. */ export function htmlToMarkdown(html: string): { text: string; title?: string } { const titleMatch = html.match(/]*>([\s\S]*?)<\/title>/i); const title = titleMatch ? normalizeWhitespace(stripTags(titleMatch[1])) : undefined; @@ -39,6 +48,7 @@ export function htmlToMarkdown(html: string): { text: string; title?: string } { if (!label) { return href; } + // Preserve link targets in markdown mode so fetched pages remain source-auditable. return `[${label}](${href})`; }); text = text.replace(/]*>([\s\S]*?)<\/h\1>/gi, (_, level, body) => { @@ -58,6 +68,7 @@ export function htmlToMarkdown(html: string): { text: string; title?: string } { return { text, title }; } +/** Removes markdown decoration for plain text extraction. */ export function markdownToText(markdown: string): string { let text = markdown; text = text.replace(/!\[[^\]]*]\([^)]+\)/g, ""); @@ -72,6 +83,7 @@ export function markdownToText(markdown: string): string { return normalizeWhitespace(text); } +/** Truncates text by characters and reports whether truncation occurred. */ export function truncateText( value: string, maxChars: number, @@ -82,6 +94,7 @@ export function truncateText( return { text: value.slice(0, maxChars), truncated: true }; } +/** Sanitizes HTML and extracts either markdown or plain text content. */ export async function extractBasicHtmlContent(params: { html: string; extractMode: ExtractMode; diff --git a/src/agents/tools/web-guarded-fetch.ts b/src/agents/tools/web-guarded-fetch.ts index d6cb1e7f059..e172a93ffc8 100644 --- a/src/agents/tools/web-guarded-fetch.ts +++ b/src/agents/tools/web-guarded-fetch.ts @@ -12,6 +12,12 @@ import { } from "../../infra/net/ssrf.js"; import { readPositiveIntegerParam } from "./common.js"; +/** + * Guarded fetch wrappers for web tools. + * + * These helpers apply SSRF policy, timeout normalization, and optional trusted + * env proxy mode before tool-specific response handling runs. + */ const WEB_TOOLS_SELF_HOSTED_NETWORK_SSRF_POLICY: SsrFPolicy = { dangerouslyAllowPrivateNetwork: true, allowRfc2544BenchmarkRange: true, @@ -45,6 +51,7 @@ function resolveTimeoutMs(params: { return undefined; } +/** Runs a guarded fetch with strict or trusted-env-proxy web tool policy. */ export async function fetchWithWebToolsNetworkGuard( params: WebToolGuardedFetchOptions, ): Promise { @@ -72,6 +79,7 @@ async function withWebToolsNetworkGuard( } } +/** Runs a fetch for trusted endpoints, allowing env proxy with pinned-host policy. */ export async function withTrustedWebToolsEndpoint( params: WebToolEndpointFetchOptions, run: (result: { response: Response; finalUrl: string }) => Promise, @@ -87,6 +95,7 @@ export async function withTrustedWebToolsEndpoint( ); } +/** Runs a fetch for configured self-hosted endpoints with private-network access allowed. */ export async function withSelfHostedWebToolsEndpoint( params: WebToolEndpointFetchOptions, run: (result: { response: Response; finalUrl: string }) => Promise, @@ -101,6 +110,7 @@ export async function withSelfHostedWebToolsEndpoint( ); } +/** Runs a fetch under strict SSRF protection without env proxy trust. */ export async function withStrictWebToolsEndpoint( params: WebToolEndpointFetchOptions, run: (result: { response: Response; finalUrl: string }) => Promise, diff --git a/src/agents/tools/web-tools.ts b/src/agents/tools/web-tools.ts index baf24c7839d..08e5596736f 100644 --- a/src/agents/tools/web-tools.ts +++ b/src/agents/tools/web-tools.ts @@ -1,2 +1,8 @@ +/** + * Barrel for web_fetch and web_search tool factories. + * + * Higher-level tool assembly imports this narrow module so tests can mock both + * web tools together without loading provider-specific implementations. + */ export { createWebFetchTool } from "./web-fetch.js"; export { createWebSearchTool } from "./web-search.js";