From 4bfa7d17a3b16c7d7d437f3f7d4f12ebdaf046e3 Mon Sep 17 00:00:00 2001 From: Ayaan Zaidi Date: Sun, 26 Apr 2026 11:23:08 +0530 Subject: [PATCH] refactor(agents): dedupe thinking tag scanner --- src/agents/pi-embedded-subscribe.ts | 10 ++++---- src/agents/pi-embedded-utils.ts | 37 +++++++++++++++++++---------- 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/src/agents/pi-embedded-subscribe.ts b/src/agents/pi-embedded-subscribe.ts index 24b4bdb4718..5ab03b66f5b 100644 --- a/src/agents/pi-embedded-subscribe.ts +++ b/src/agents/pi-embedded-subscribe.ts @@ -33,11 +33,13 @@ import type { import { isPromiseLike } from "./pi-embedded-subscribe.promise.js"; import { filterToolResultMediaUrls } from "./pi-embedded-subscribe.tools.js"; import type { SubscribeEmbeddedPiSessionParams } from "./pi-embedded-subscribe.types.js"; -import { formatReasoningMessage, stripDowngradedToolCallText } from "./pi-embedded-utils.js"; +import { + formatReasoningMessage, + stripDowngradedToolCallText, + THINKING_TAG_SCAN_RE, +} from "./pi-embedded-utils.js"; import { hasNonzeroUsage, normalizeUsage, type UsageLike } from "./usage.js"; -const THINKING_TAG_SCAN_RE = - /<\s*(\/?)\s*(?:(?:antml:)?(?:think(?:ing)?|thought)|antthinking)\s*>/gi; const FINAL_TAG_SCAN_RE = /<\s*(\/?)\s*final\s*>/gi; const log = createSubsystemLogger("agent/embedded"); @@ -516,7 +518,6 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar const inlineStateStart = state.inlineCode ?? createInlineCodeState(); const codeSpans = buildCodeSpanIndex(text, inlineStateStart); - // 1. Handle blocks (stateful, strip content inside) let processed = ""; THINKING_TAG_SCAN_RE.lastIndex = 0; let lastIndex = 0; @@ -538,7 +539,6 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar } state.thinking = inThinking; - // 2. Handle blocks (stateful, strip content OUTSIDE) // If enforcement is disabled, we still strip the tags themselves to prevent // hallucinations (e.g. Minimax copying the style) from leaking, but we // do not enforce buffering/extraction logic. diff --git a/src/agents/pi-embedded-utils.ts b/src/agents/pi-embedded-utils.ts index 67b4b6adb47..5b27f0f10b6 100644 --- a/src/agents/pi-embedded-utils.ts +++ b/src/agents/pi-embedded-utils.ts @@ -181,6 +181,25 @@ type ThinkTaggedSplitBlock = | { type: "thinking"; thinking: string } | { type: "text"; text: string }; +const THINKING_TAG_NAME_PATTERN = String.raw`(?:(?:antml:)?(?:think(?:ing)?|thought)|antthinking)`; +const THINKING_TAG_OPEN_RE = new RegExp(String.raw`<\s*${THINKING_TAG_NAME_PATTERN}\s*>`, "i"); +const THINKING_TAG_CLOSE_RE = new RegExp( + String.raw`<\s*\/\s*${THINKING_TAG_NAME_PATTERN}\s*>`, + "i", +); +const THINKING_TAG_OPEN_GLOBAL_RE = new RegExp( + String.raw`<\s*${THINKING_TAG_NAME_PATTERN}\s*>`, + "gi", +); +const THINKING_TAG_CLOSE_GLOBAL_RE = new RegExp( + String.raw`<\s*\/\s*${THINKING_TAG_NAME_PATTERN}\s*>`, + "gi", +); +export const THINKING_TAG_SCAN_RE = new RegExp( + String.raw`<\s*(\/?)\s*${THINKING_TAG_NAME_PATTERN}\s*>`, + "gi", +); + export function splitThinkingTaggedText(text: string): ThinkTaggedSplitBlock[] | null { const trimmedStart = text.trimStart(); // Avoid false positives: only treat it as structured thinking when it begins @@ -189,16 +208,13 @@ export function splitThinkingTaggedText(text: string): ThinkTaggedSplitBlock[] | if (!trimmedStart.startsWith("<")) { return null; } - const openRe = /<\s*(?:(?:antml:)?(?:think(?:ing)?|thought)|antthinking)\s*>/i; - const closeRe = /<\s*\/\s*(?:(?:antml:)?(?:think(?:ing)?|thought)|antthinking)\s*>/i; - if (!openRe.test(trimmedStart)) { + if (!THINKING_TAG_OPEN_RE.test(trimmedStart)) { return null; } - if (!closeRe.test(text)) { + if (!THINKING_TAG_CLOSE_RE.test(text)) { return null; } - const scanRe = /<\s*(\/?)\s*(?:(?:antml:)?(?:think(?:ing)?|thought)|antthinking)\s*>/gi; let inThinking = false; let cursor = 0; let thinkingStart = 0; @@ -218,7 +234,7 @@ export function splitThinkingTaggedText(text: string): ThinkTaggedSplitBlock[] | blocks.push({ type: "thinking", thinking: cleaned }); }; - for (const match of text.matchAll(scanRe)) { + for (const match of text.matchAll(THINKING_TAG_SCAN_RE)) { const index = match.index ?? 0; const isClose = match[1]?.includes("/") ?? false; @@ -299,11 +315,10 @@ export function extractThinkingFromTaggedText(text: string): string { if (!text) { return ""; } - const scanRe = /<\s*(\/?)\s*(?:(?:antml:)?(?:think(?:ing)?|thought)|antthinking)\s*>/gi; let result = ""; let lastIndex = 0; let inThinking = false; - for (const match of text.matchAll(scanRe)) { + for (const match of text.matchAll(THINKING_TAG_SCAN_RE)) { const idx = match.index ?? 0; if (inThinking) { result += text.slice(lastIndex, idx); @@ -324,13 +339,11 @@ export function extractThinkingFromTaggedStream(text: string): string { return closed; } - const openRe = /<\s*(?:(?:antml:)?(?:think(?:ing)?|thought)|antthinking)\s*>/gi; - const closeRe = /<\s*\/\s*(?:(?:antml:)?(?:think(?:ing)?|thought)|antthinking)\s*>/gi; - const openMatches = [...text.matchAll(openRe)]; + const openMatches = [...text.matchAll(THINKING_TAG_OPEN_GLOBAL_RE)]; if (openMatches.length === 0) { return ""; } - const closeMatches = [...text.matchAll(closeRe)]; + const closeMatches = [...text.matchAll(THINKING_TAG_CLOSE_GLOBAL_RE)]; const lastOpen = openMatches[openMatches.length - 1]; const lastClose = closeMatches[closeMatches.length - 1]; if (lastClose && (lastClose.index ?? -1) > (lastOpen.index ?? -1)) {