diff --git a/src/agents/tools/web-fetch-utils.ts b/src/agents/tools/web-fetch-utils.ts
index 056a4890ca5..16808fa605c 100644
--- a/src/agents/tools/web-fetch-utils.ts
+++ b/src/agents/tools/web-fetch-utils.ts
@@ -1,5 +1,12 @@
import { sanitizeHtml, stripInvisibleUnicode } from "./web-fetch-visibility.js";
+/**
+ * Lightweight HTML/text extraction utilities for the web_fetch tool.
+ *
+ * This intentionally handles common markup without a heavy renderer so provider
+ * responses stay bounded and deterministic.
+ */
+/** Output mode requested by web_fetch extraction. */
export type ExtractMode = "markdown" | "text";
function decodeEntities(value: string): string {
@@ -18,6 +25,7 @@ function stripTags(value: string): string {
return decodeEntities(value.replace(/<[^>]+>/g, ""));
}
+/** Collapses display whitespace while preserving paragraph breaks. */
export function normalizeWhitespace(value: string): string {
return value
.replace(/\r/g, "")
@@ -27,6 +35,7 @@ export function normalizeWhitespace(value: string): string {
.trim();
}
+/** Converts sanitized HTML into coarse markdown plus an optional title. */
export function htmlToMarkdown(html: string): { text: string; title?: string } {
const titleMatch = html.match(/
]*>([\s\S]*?)<\/title>/i);
const title = titleMatch ? normalizeWhitespace(stripTags(titleMatch[1])) : undefined;
@@ -39,6 +48,7 @@ export function htmlToMarkdown(html: string): { text: string; title?: string } {
if (!label) {
return href;
}
+ // Preserve link targets in markdown mode so fetched pages remain source-auditable.
return `[${label}](${href})`;
});
text = text.replace(/]*>([\s\S]*?)<\/h\1>/gi, (_, level, body) => {
@@ -58,6 +68,7 @@ export function htmlToMarkdown(html: string): { text: string; title?: string } {
return { text, title };
}
+/** Removes markdown decoration for plain text extraction. */
export function markdownToText(markdown: string): string {
let text = markdown;
text = text.replace(/!\[[^\]]*]\([^)]+\)/g, "");
@@ -72,6 +83,7 @@ export function markdownToText(markdown: string): string {
return normalizeWhitespace(text);
}
+/** Truncates text by characters and reports whether truncation occurred. */
export function truncateText(
value: string,
maxChars: number,
@@ -82,6 +94,7 @@ export function truncateText(
return { text: value.slice(0, maxChars), truncated: true };
}
+/** Sanitizes HTML and extracts either markdown or plain text content. */
export async function extractBasicHtmlContent(params: {
html: string;
extractMode: ExtractMode;
diff --git a/src/agents/tools/web-guarded-fetch.ts b/src/agents/tools/web-guarded-fetch.ts
index d6cb1e7f059..e172a93ffc8 100644
--- a/src/agents/tools/web-guarded-fetch.ts
+++ b/src/agents/tools/web-guarded-fetch.ts
@@ -12,6 +12,12 @@ import {
} from "../../infra/net/ssrf.js";
import { readPositiveIntegerParam } from "./common.js";
+/**
+ * Guarded fetch wrappers for web tools.
+ *
+ * These helpers apply SSRF policy, timeout normalization, and optional trusted
+ * env proxy mode before tool-specific response handling runs.
+ */
const WEB_TOOLS_SELF_HOSTED_NETWORK_SSRF_POLICY: SsrFPolicy = {
dangerouslyAllowPrivateNetwork: true,
allowRfc2544BenchmarkRange: true,
@@ -45,6 +51,7 @@ function resolveTimeoutMs(params: {
return undefined;
}
+/** Runs a guarded fetch with strict or trusted-env-proxy web tool policy. */
export async function fetchWithWebToolsNetworkGuard(
params: WebToolGuardedFetchOptions,
): Promise {
@@ -72,6 +79,7 @@ async function withWebToolsNetworkGuard(
}
}
+/** Runs a fetch for trusted endpoints, allowing env proxy with pinned-host policy. */
export async function withTrustedWebToolsEndpoint(
params: WebToolEndpointFetchOptions,
run: (result: { response: Response; finalUrl: string }) => Promise,
@@ -87,6 +95,7 @@ export async function withTrustedWebToolsEndpoint(
);
}
+/** Runs a fetch for configured self-hosted endpoints with private-network access allowed. */
export async function withSelfHostedWebToolsEndpoint(
params: WebToolEndpointFetchOptions,
run: (result: { response: Response; finalUrl: string }) => Promise,
@@ -101,6 +110,7 @@ export async function withSelfHostedWebToolsEndpoint(
);
}
+/** Runs a fetch under strict SSRF protection without env proxy trust. */
export async function withStrictWebToolsEndpoint(
params: WebToolEndpointFetchOptions,
run: (result: { response: Response; finalUrl: string }) => Promise,
diff --git a/src/agents/tools/web-tools.ts b/src/agents/tools/web-tools.ts
index baf24c7839d..08e5596736f 100644
--- a/src/agents/tools/web-tools.ts
+++ b/src/agents/tools/web-tools.ts
@@ -1,2 +1,8 @@
+/**
+ * Barrel for web_fetch and web_search tool factories.
+ *
+ * Higher-level tool assembly imports this narrow module so tests can mock both
+ * web tools together without loading provider-specific implementations.
+ */
export { createWebFetchTool } from "./web-fetch.js";
export { createWebSearchTool } from "./web-search.js";