refactor(agents): dedupe thinking tag scanner

This commit is contained in:
Ayaan Zaidi
2026-04-26 11:23:08 +05:30
parent d7da3d470e
commit 4bfa7d17a3
2 changed files with 30 additions and 17 deletions

View File

@@ -33,11 +33,13 @@ import type {
import { isPromiseLike } from "./pi-embedded-subscribe.promise.js";
import { filterToolResultMediaUrls } from "./pi-embedded-subscribe.tools.js";
import type { SubscribeEmbeddedPiSessionParams } from "./pi-embedded-subscribe.types.js";
import { formatReasoningMessage, stripDowngradedToolCallText } from "./pi-embedded-utils.js";
import {
formatReasoningMessage,
stripDowngradedToolCallText,
THINKING_TAG_SCAN_RE,
} from "./pi-embedded-utils.js";
import { hasNonzeroUsage, normalizeUsage, type UsageLike } from "./usage.js";
const THINKING_TAG_SCAN_RE =
/<\s*(\/?)\s*(?:(?:antml:)?(?:think(?:ing)?|thought)|antthinking)\s*>/gi;
const FINAL_TAG_SCAN_RE = /<\s*(\/?)\s*final\s*>/gi;
const log = createSubsystemLogger("agent/embedded");
@@ -516,7 +518,6 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
const inlineStateStart = state.inlineCode ?? createInlineCodeState();
const codeSpans = buildCodeSpanIndex(text, inlineStateStart);
// 1. Handle <think> blocks (stateful, strip content inside)
let processed = "";
THINKING_TAG_SCAN_RE.lastIndex = 0;
let lastIndex = 0;
@@ -538,7 +539,6 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
}
state.thinking = inThinking;
// 2. Handle <final> blocks (stateful, strip content OUTSIDE)
// If enforcement is disabled, we still strip the tags themselves to prevent
// hallucinations (e.g. Minimax copying the style) from leaking, but we
// do not enforce buffering/extraction logic.

View File

@@ -181,6 +181,25 @@ type ThinkTaggedSplitBlock =
| { type: "thinking"; thinking: string }
| { type: "text"; text: string };
const THINKING_TAG_NAME_PATTERN = String.raw`(?:(?:antml:)?(?:think(?:ing)?|thought)|antthinking)`;
const THINKING_TAG_OPEN_RE = new RegExp(String.raw`<\s*${THINKING_TAG_NAME_PATTERN}\s*>`, "i");
const THINKING_TAG_CLOSE_RE = new RegExp(
String.raw`<\s*\/\s*${THINKING_TAG_NAME_PATTERN}\s*>`,
"i",
);
const THINKING_TAG_OPEN_GLOBAL_RE = new RegExp(
String.raw`<\s*${THINKING_TAG_NAME_PATTERN}\s*>`,
"gi",
);
const THINKING_TAG_CLOSE_GLOBAL_RE = new RegExp(
String.raw`<\s*\/\s*${THINKING_TAG_NAME_PATTERN}\s*>`,
"gi",
);
export const THINKING_TAG_SCAN_RE = new RegExp(
String.raw`<\s*(\/?)\s*${THINKING_TAG_NAME_PATTERN}\s*>`,
"gi",
);
export function splitThinkingTaggedText(text: string): ThinkTaggedSplitBlock[] | null {
const trimmedStart = text.trimStart();
// Avoid false positives: only treat it as structured thinking when it begins
@@ -189,16 +208,13 @@ export function splitThinkingTaggedText(text: string): ThinkTaggedSplitBlock[] |
if (!trimmedStart.startsWith("<")) {
return null;
}
const openRe = /<\s*(?:(?:antml:)?(?:think(?:ing)?|thought)|antthinking)\s*>/i;
const closeRe = /<\s*\/\s*(?:(?:antml:)?(?:think(?:ing)?|thought)|antthinking)\s*>/i;
if (!openRe.test(trimmedStart)) {
if (!THINKING_TAG_OPEN_RE.test(trimmedStart)) {
return null;
}
if (!closeRe.test(text)) {
if (!THINKING_TAG_CLOSE_RE.test(text)) {
return null;
}
const scanRe = /<\s*(\/?)\s*(?:(?:antml:)?(?:think(?:ing)?|thought)|antthinking)\s*>/gi;
let inThinking = false;
let cursor = 0;
let thinkingStart = 0;
@@ -218,7 +234,7 @@ export function splitThinkingTaggedText(text: string): ThinkTaggedSplitBlock[] |
blocks.push({ type: "thinking", thinking: cleaned });
};
for (const match of text.matchAll(scanRe)) {
for (const match of text.matchAll(THINKING_TAG_SCAN_RE)) {
const index = match.index ?? 0;
const isClose = match[1]?.includes("/") ?? false;
@@ -299,11 +315,10 @@ export function extractThinkingFromTaggedText(text: string): string {
if (!text) {
return "";
}
const scanRe = /<\s*(\/?)\s*(?:(?:antml:)?(?:think(?:ing)?|thought)|antthinking)\s*>/gi;
let result = "";
let lastIndex = 0;
let inThinking = false;
for (const match of text.matchAll(scanRe)) {
for (const match of text.matchAll(THINKING_TAG_SCAN_RE)) {
const idx = match.index ?? 0;
if (inThinking) {
result += text.slice(lastIndex, idx);
@@ -324,13 +339,11 @@ export function extractThinkingFromTaggedStream(text: string): string {
return closed;
}
const openRe = /<\s*(?:(?:antml:)?(?:think(?:ing)?|thought)|antthinking)\s*>/gi;
const closeRe = /<\s*\/\s*(?:(?:antml:)?(?:think(?:ing)?|thought)|antthinking)\s*>/gi;
const openMatches = [...text.matchAll(openRe)];
const openMatches = [...text.matchAll(THINKING_TAG_OPEN_GLOBAL_RE)];
if (openMatches.length === 0) {
return "";
}
const closeMatches = [...text.matchAll(closeRe)];
const closeMatches = [...text.matchAll(THINKING_TAG_CLOSE_GLOBAL_RE)];
const lastOpen = openMatches[openMatches.length - 1];
const lastClose = closeMatches[closeMatches.length - 1];
if (lastClose && (lastClose.index ?? -1) > (lastOpen.index ?? -1)) {