refactor: share balanced json extraction

This commit is contained in:
Peter Steinberger
2026-04-20 14:40:21 +01:00
parent 655e0be3d7
commit f006678f3c
4 changed files with 95 additions and 138 deletions

View File

@@ -1,4 +1,5 @@
import type { CliBackendConfig } from "../config/types.js";
import { extractBalancedJsonFragments } from "../shared/balanced-json.js";
import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js";
import { isRecord } from "../utils.js";
@@ -39,48 +40,7 @@ function usesClaudeStreamJsonDialect(params: {
}
function extractJsonObjectCandidates(raw: string): string[] {
const candidates: string[] = [];
let depth = 0;
let start = -1;
let inString = false;
let escaped = false;
for (let index = 0; index < raw.length; index += 1) {
const char = raw[index] ?? "";
if (escaped) {
escaped = false;
continue;
}
if (char === "\\") {
if (inString) {
escaped = true;
}
continue;
}
if (char === '"') {
inString = !inString;
continue;
}
if (inString) {
continue;
}
if (char === "{") {
if (depth === 0) {
start = index;
}
depth += 1;
continue;
}
if (char === "}" && depth > 0) {
depth -= 1;
if (depth === 0 && start >= 0) {
candidates.push(raw.slice(start, index + 1));
start = -1;
}
}
}
return candidates;
return extractBalancedJsonFragments(raw, { openers: ["{"] }).map((fragment) => fragment.json);
}
function parseJsonRecordCandidates(raw: string): Record<string, unknown>[] {

View File

@@ -4,6 +4,7 @@ import {
createHtmlEntityToolCallArgumentDecodingWrapper,
decodeHtmlEntitiesInObject,
} from "../../../plugin-sdk/provider-stream-shared.js";
import { extractBalancedJsonPrefix } from "../../../shared/balanced-json.js";
import { normalizeProviderId } from "../../model-selection.js";
import { log } from "../logger.js";
import { wrapStreamObjectEvents } from "./stream-wrapper.js";
@@ -12,60 +13,6 @@ function isToolCallBlockType(type: unknown): boolean {
return type === "toolCall" || type === "toolUse" || type === "functionCall";
}
type BalancedJsonPrefix = {
json: string;
startIndex: number;
};
function extractBalancedJsonPrefix(raw: string): BalancedJsonPrefix | null {
let start = 0;
while (start < raw.length) {
const char = raw[start];
if (char === "{" || char === "[") {
break;
}
start += 1;
}
if (start >= raw.length) {
return null;
}
let depth = 0;
let inString = false;
let escaped = false;
for (let i = start; i < raw.length; i += 1) {
const char = raw[i];
if (char === undefined) {
break;
}
if (inString) {
if (escaped) {
escaped = false;
} else if (char === "\\") {
escaped = true;
} else if (char === '"') {
inString = false;
}
continue;
}
if (char === '"') {
inString = true;
continue;
}
if (char === "{" || char === "[") {
depth += 1;
continue;
}
if (char === "}" || char === "]") {
depth -= 1;
if (depth === 0) {
return { json: raw.slice(start, i + 1), startIndex: start };
}
}
}
return null;
}
const MAX_TOOLCALL_REPAIR_BUFFER_CHARS = 64_000;
const MAX_TOOLCALL_REPAIR_LEADING_CHARS = 96;
const MAX_TOOLCALL_REPAIR_TRAILING_CHARS = 3;

View File

@@ -1,5 +1,6 @@
import { formatErrorMessage } from "../../infra/errors.js";
import { createSubsystemLogger } from "../../logging/subsystem.js";
import { extractBalancedJsonPrefix } from "../../shared/balanced-json.js";
import { normalizeLowercaseStringOrEmpty } from "../../shared/string-coerce.js";
const log = createSubsystemLogger("memory");
@@ -34,7 +35,7 @@ export function parseQmdQueryJson(stdout: string, stderr: string): QmdQueryResul
if (parsed !== null) {
return parsed;
}
const noisyPayload = extractFirstJsonArray(trimmedStdout);
const noisyPayload = extractBalancedJsonPrefix(trimmedStdout, { openers: ["["] })?.json;
if (!noisyPayload) {
throw new Error("qmd query JSON response was not an array");
}
@@ -113,44 +114,3 @@ function parseQmdQueryResultArray(raw: string): QmdQueryResult[] | null {
function parseQmdLineNumber(value: unknown): number | undefined {
return typeof value === "number" && Number.isFinite(value) && value > 0 ? value : undefined;
}
function extractFirstJsonArray(raw: string): string | null {
const start = raw.indexOf("[");
if (start < 0) {
return null;
}
let depth = 0;
let inString = false;
let escaped = false;
for (let i = start; i < raw.length; i += 1) {
const char = raw[i];
if (char === undefined) {
break;
}
if (inString) {
if (escaped) {
escaped = false;
continue;
}
if (char === "\\") {
escaped = true;
} else if (char === '"') {
inString = false;
}
continue;
}
if (char === '"') {
inString = true;
continue;
}
if (char === "[") {
depth += 1;
} else if (char === "]") {
depth -= 1;
if (depth === 0) {
return raw.slice(start, i + 1);
}
}
}
return null;
}

View File

@@ -0,0 +1,90 @@
export type JsonOpeningDelimiter = "{" | "[";
export type BalancedJsonFragment = {
json: string;
startIndex: number;
endIndex: number;
};
const CLOSING_DELIMITER: Record<JsonOpeningDelimiter, "}" | "]"> = {
"{": "}",
"[": "]",
};
function isJsonOpeningDelimiter(
char: string | undefined,
openers: readonly JsonOpeningDelimiter[],
): char is JsonOpeningDelimiter {
return char === "{" ? openers.includes("{") : char === "[" && openers.includes("[");
}
export function extractBalancedJsonPrefix(
raw: string,
opts: { openers?: readonly JsonOpeningDelimiter[] } = {},
): BalancedJsonFragment | null {
const openers = opts.openers ?? (["{", "["] as const);
let start = 0;
while (start < raw.length && !isJsonOpeningDelimiter(raw[start], openers)) {
start += 1;
}
if (start >= raw.length) {
return null;
}
const stack: JsonOpeningDelimiter[] = [];
let inString = false;
let escaped = false;
for (let i = start; i < raw.length; i += 1) {
const char = raw[i];
if (char === undefined) {
break;
}
if (inString) {
if (escaped) {
escaped = false;
} else if (char === "\\") {
escaped = true;
} else if (char === '"') {
inString = false;
}
continue;
}
if (char === '"') {
inString = true;
continue;
}
if (isJsonOpeningDelimiter(char, openers)) {
stack.push(char);
continue;
}
const opener = stack.at(-1);
if (opener && char === CLOSING_DELIMITER[opener]) {
stack.pop();
if (stack.length === 0) {
return { json: raw.slice(start, i + 1), startIndex: start, endIndex: i };
}
}
}
return null;
}
export function extractBalancedJsonFragments(
raw: string,
opts: { openers?: readonly JsonOpeningDelimiter[] } = {},
): BalancedJsonFragment[] {
const fragments: BalancedJsonFragment[] = [];
let offset = 0;
while (offset < raw.length) {
const fragment = extractBalancedJsonPrefix(raw.slice(offset), opts);
if (!fragment) {
break;
}
fragments.push({
json: fragment.json,
startIndex: offset + fragment.startIndex,
endIndex: offset + fragment.endIndex,
});
offset += fragment.endIndex + 1;
}
return fragments;
}