Files
openclaw/src/agents/session-transcript-repair.ts
Peter Steinberger bb46b79d3c refactor: internalize OpenClaw agent runtime (#85341)
* refactor: extract agent core package

Introduce packages/agent-core as the OpenClaw-owned home for reusable agent loop, harness, session, prompt, and runtime dependency contracts.

* refactor: extract shared llm runtime

Move provider model registries, stream wrappers, OAuth helpers, and LLM utilities into src/llm with plugin-sdk barrels instead of depending on the old embedded runtime layout.

* refactor: remove pi runtime internals

Rename remaining Pi-shaped agent surfaces to OpenClaw agent runtime names, delete obsolete Pi docs and package graph checks, and add the third-party notice for incorporated code.

* refactor: tighten agent session runtime

Make agent-core/runtime dependencies explicit, consolidate compaction and session transcript helpers, and move model/session helpers behind OpenClaw-owned contracts.

* refactor: remove static model and pi auth paths

Drop static model catalogs and Pi auth bridges, move model/provider facts to manifest-owned runtime contracts, and harden internal embedded-agent utilities.

* refactor: remove legacy provider compat paths

* docs: remove agent parity notes

* fix: skip provider wildcard metadata parsing

* refactor: share session extension sdk loading

* refactor: inline acpx proxy error formatter

* refactor: fold edit recovery into edit tool

* fix: accept extension batch separator

* test: align startup provider plugin expectations

* fix: restore provider-scoped release discovery

* test: align static asset packaging expectations

* fix: run static provider catalogs during scoped discovery

* fix: add provider entry catalogs for scoped live discovery

* fix: load lightweight provider catalog entries

* fix: refresh provider-scoped plugin metadata

* fix: keep provider catalog entries on release live path

* fix: keep static manifest models in release live checks

* fix: harden release model discovery

* fix: reduce OpenAI live cache probe reasoning

* fix: disable OpenAI cache probe reasoning

* ci: extend OpenAI gateway live timeout

* fix: extend live gateway model budget

* fix: stabilize release validation regressions

* fix: honor provider aliases in model rows

* fix: stabilize release validation lanes

* fix: stabilize release memory qa

* ci: stabilize release validation lanes

* ci: prefer ipv4 for live docker node calls

* fix: restore shared tool-call stream wrapper

* ci: remove legacy pi test shard alias

* fix: clean up embedded agent test drift

* fix: stabilize runtime alias status

* fix: clean up embedded agent ci drift

* fix: restore release ci invariants

* fix: clean up post-rebase runtime drift

* fix: restore release ci checks

* fix: restore release ci after rebase

* fix: remove stale pi runtime path

* test: align compaction runtime expectations

* test: update plugin prerelease expectations

* fix: handle claude live tool approvals

* fix: stabilize release validation gates

* fix: finish agent runtime import

* test: finish post-rebase agent runtime mocks

* fix: keep codex compaction native

* fix: stabilize codex app-server hook tests

* test: isolate codex diagnostic active run

* test: remove codex diagnostic completion race

# Conflicts:
#	extensions/codex/src/app-server/run-attempt.test.ts

* ci: fix full release manifest performance run id

* refactor: narrow llm plugin sdk boundary

* chore: drop generated google boundary stamps

* fix: repair rebase fallout

* fix: clean up rebased runtime references

* fix: decode codex jwt payloads as base64url

* fix: preserve shipped pi runtime alias

* fix: add scoped sdk virtual modules

* fix: decode llm codex oauth jwt as base64url

* fix: avoid stale vertex adc negative cache

* fix: harden tool arg decoding and codeql path

* fix: keep vertex adc negative checks live

* refactor: consolidate codex jwt and edit helpers

* fix: await codex oauth node runtime imports

* fix: preserve sdk tool and notice contracts

* fix: preserve shipped compat config boundaries

* fix: align codex oauth callback host

* fix: terminate agent-core loop streams on failure

* fix: keep codex oauth callback alive during fallback

* ci: include session tools in critical codeql scans

* fix: keep Cloudflare Anthropic provider auth header

* docs: redirect legacy pi runtime pages

* fix: honor bundled web provider compat discovery

* fix: protect session output spill files

* fix: keep legacy agent dir env blocked

* fix: contain auto-discovered skill symlinks

* fix: harden agent core sdk proxy surfaces

* fix: restore approval reaction sdk compat

* fix: keep live docker runs bounded

* fix: keep codex oauth redirect host aligned

* fix: resolve post-rebase agent runtime drift

* fix: redact anthropic oauth parse failures

* fix: preserve responses strict tool shaping

* fix: repair agent runtime rebase cleanup

* docs: redirect retired parity pages

* fix: bound auto-discovered resources to roots

* fix: repair post-rebase agent test drift

* fix: preserve bundled provider allowlist migration

* fix: preserve manifest-owned provider aliases

* fix: declare photon image dependency

* fix: keep provider headers out of proxy body

* fix: preserve shipped env aliases

* fix: refresh control ui i18n generated state

* fix: quote read fallback paths

* fix: preview edits through configured backend

* test: satisfy core test typecheck

* fix: preserve ZAI usage auth fallback

* test: repair codex diagnostic test

* fix: repair agent runtime rebase drift

* test: finish embedded runner import rename

* fix: repair agent runtime rebase integrations

* test: align compaction oauth fallback expectations

* fix: allow sdk-auth session models

* fix: update doctor tool schema import

* fix: preserve bedrock plugin region

* fix: stream harmony-like prose immediately

* ci: include session runtime in codeql shards

* fix: repair latest rebase integrations

* fix: honor explicit codex websocket transport

* fix: keep openai-compatible credentials provider-scoped

* fix: refresh sdk api baseline after rebase

* fix: route cli runtime aliases through openclaw harness

* test: rename stale harness mock expectation

* test: rename embedded agent overflow calls

* test: clean embedded auth test wording

* test: use openclaw stream types in deepinfra cache test

* fix: refresh sdk api baseline on latest main

* fix: honor bundled discovery compat allowlists

* fix: refresh sdk api baseline after latest rebase

* fix: remove stale rebase imports

* test: rename stale model catalog mock

* test: mock renamed doctor runtime modules

* fix: map canonical kimi env auth

* fix: use internal model registry in bench script

* fix: migrate deepinfra provider catalog entry

* fix: enforce builtin tool suppression

* fix: route compaction auth and proxy payloads safely

* refactor: prune unused llm registry leftovers

* test: update codex hooks session import

* test: fix model picker ci coverage

* test: align model picker auth mock types
2026-05-27 19:24:04 +01:00

645 lines
20 KiB
TypeScript

import {
hasNonEmptyString as hasNonEmptyStringField,
normalizeOptionalString,
readStringValue,
} from "../shared/string-coerce.js";
import type { AgentMessage } from "./runtime/index.js";
import {
extractToolCallsFromAssistant,
extractToolResultId,
extractToolResultIds,
} from "./tool-call-id.js";
import { isAllowedToolCallName, normalizeAllowedToolNames } from "./tool-call-shared.js";
type RawToolCallBlock = {
type?: unknown;
id?: unknown;
call_id?: unknown;
toolCallId?: unknown;
toolUseId?: unknown;
tool_call_id?: unknown;
tool_use_id?: unknown;
name?: unknown;
input?: unknown;
arguments?: unknown;
};
const RAW_TOOL_CALL_BLOCK_TYPES = new Set([
"toolCall",
"toolUse",
"functionCall",
"tool_call",
"tool_use",
"function_call",
]);
function isThinkingLikeBlock(block: unknown): boolean {
if (!block || typeof block !== "object") {
return false;
}
const type = (block as { type?: unknown }).type;
return type === "thinking" || type === "redacted_thinking";
}
function isRawToolCallBlock(block: unknown): block is RawToolCallBlock {
if (!block || typeof block !== "object") {
return false;
}
const type = (block as { type?: unknown }).type;
return typeof type === "string" && RAW_TOOL_CALL_BLOCK_TYPES.has(type);
}
function hasToolCallInput(block: RawToolCallBlock): boolean {
const hasInput = "input" in block ? block.input !== undefined && block.input !== null : false;
const hasArguments =
"arguments" in block ? block.arguments !== undefined && block.arguments !== null : false;
return hasInput || hasArguments;
}
function hasToolCallId(block: RawToolCallBlock): boolean {
return (
hasNonEmptyStringField(block.id) ||
hasNonEmptyStringField(block.call_id) ||
hasNonEmptyStringField(block.toolCallId) ||
hasNonEmptyStringField(block.toolUseId) ||
hasNonEmptyStringField(block.tool_call_id) ||
hasNonEmptyStringField(block.tool_use_id)
);
}
function sanitizeToolCallBlock(block: RawToolCallBlock): RawToolCallBlock {
// This repair path normalizes replay shape only. Tool payloads are local
// trusted-operator transcript state per SECURITY.md, so do not redact or
// rewrite sessions_spawn arguments here.
const rawName = readStringValue(block.name);
const trimmedName = rawName?.trim();
const hasTrimmedName = typeof trimmedName === "string" && trimmedName.length > 0;
const normalizedName = hasTrimmedName ? trimmedName : undefined;
const nameChanged = hasTrimmedName && rawName !== trimmedName;
if (!nameChanged) {
return block;
}
const next = { ...(block as Record<string, unknown>) };
if (nameChanged && normalizedName) {
next.name = normalizedName;
}
return next as RawToolCallBlock;
}
function countRawToolCallBlocks(content: unknown[]): number {
let count = 0;
for (const block of content) {
if (isRawToolCallBlock(block)) {
count += 1;
}
}
return count;
}
function isReplaySafeThinkingAssistantTurn(
content: unknown[],
allowedToolNames: Set<string> | null,
): boolean {
let sawToolCall = false;
const seenToolCallIds = new Set<string>();
for (const block of content) {
if (!isRawToolCallBlock(block)) {
continue;
}
sawToolCall = true;
const toolCallId = typeof block.id === "string" ? block.id.trim() : "";
if (
!hasToolCallInput(block) ||
!toolCallId ||
seenToolCallIds.has(toolCallId) ||
!isAllowedToolCallName(block.name, allowedToolNames)
) {
return false;
}
seenToolCallIds.add(toolCallId);
if (sanitizeToolCallBlock(block) !== block) {
return false;
}
}
return sawToolCall;
}
function hasSessionsSpawnAttachmentToolCall(content: unknown[]): boolean {
for (const block of content) {
if (!isRawToolCallBlock(block) || block.name !== "sessions_spawn") {
continue;
}
const input = block.input;
if (!input || typeof input !== "object") {
continue;
}
const attachments = (input as { attachments?: unknown }).attachments;
if (Array.isArray(attachments) && attachments.length > 0) {
return true;
}
}
return false;
}
function makeMissingToolResult(params: {
toolCallId: string;
toolName?: string;
// OpenAI Responses/Codex replay should match upstream Codex's "aborted"
// function_call_output normalization; live coverage in
// openai-reasoning-compat.live.test.ts and tool-replay-repair.live.test.ts
// sends this repaired history to real models. Other providers keep the older,
// explicit OpenClaw diagnostic text unless the caller opts in.
text?: string;
}): Extract<AgentMessage, { role: "toolResult" }> {
return {
role: "toolResult",
toolCallId: params.toolCallId,
toolName: params.toolName ?? "unknown",
content: [
{
type: "text",
text:
params.text ??
"[openclaw] missing tool result in session history; inserted synthetic error result for transcript repair.",
},
],
isError: true,
timestamp: Date.now(),
} as Extract<AgentMessage, { role: "toolResult" }>;
}
function normalizeToolResultName(
message: Extract<AgentMessage, { role: "toolResult" }>,
fallbackName?: string,
): Extract<AgentMessage, { role: "toolResult" }> {
const rawToolName = (message as { toolName?: unknown }).toolName;
const normalizedToolName = normalizeOptionalString(rawToolName);
if (normalizedToolName) {
if (rawToolName === normalizedToolName) {
return message;
}
return { ...message, toolName: normalizedToolName };
}
const normalizedFallback = normalizeOptionalString(fallbackName);
if (normalizedFallback) {
return { ...message, toolName: normalizedFallback };
}
if (typeof rawToolName === "string") {
return { ...message, toolName: "unknown" };
}
return message;
}
function normalizeLegacyToolResultId(
message: Extract<AgentMessage, { role: "toolResult" }>,
toolCalls: Array<{ id: string; name?: string }>,
): Extract<AgentMessage, { role: "toolResult" }> {
if (extractToolResultId(message) || toolCalls.length !== 1) {
return message;
}
const [toolCall] = toolCalls;
const toolResultName = normalizeOptionalString((message as { toolName?: unknown }).toolName);
const toolCallName = normalizeOptionalString(toolCall.name);
if (toolResultName && toolCallName && toolResultName !== toolCallName) {
return message;
}
return { ...message, toolCallId: toolCall.id, isError: true };
}
export { makeMissingToolResult };
type ToolCallInputRepairReport = {
messages: AgentMessage[];
droppedToolCalls: number;
droppedAssistantMessages: number;
};
type ToolCallInputRepairOptions = {
allowedToolNames?: Iterable<string>;
allowProviderOwnedThinkingReplay?: boolean;
};
type ErroredAssistantResultPolicy = "preserve" | "drop";
type ToolUseResultPairingOptions = {
erroredAssistantResultPolicy?: ErroredAssistantResultPolicy;
missingToolResultText?: string;
};
export function stripToolResultDetails(messages: AgentMessage[]): AgentMessage[] {
let touched = false;
const out: AgentMessage[] = [];
for (const msg of messages) {
if (!msg || typeof msg !== "object" || (msg as { role?: unknown }).role !== "toolResult") {
out.push(msg);
continue;
}
if (!("details" in msg)) {
out.push(msg);
continue;
}
const sanitized = { ...(msg as object) } as { details?: unknown };
delete sanitized.details;
touched = true;
out.push(sanitized as unknown as AgentMessage);
}
return touched ? out : messages;
}
function collectFollowingToolResults(
messages: AgentMessage[],
index: number,
): { ids: Set<string>; displaced: boolean } {
const ids = new Set<string>();
const assistant = messages[index];
const currentToolCalls =
assistant && typeof assistant === "object" && assistant.role === "assistant"
? extractToolCallsFromAssistant(assistant)
: [];
let sawNonToolResult = false;
let displaced = false;
for (let nextIndex = index + 1; nextIndex < messages.length; nextIndex += 1) {
const message = messages[nextIndex];
if (!message || typeof message !== "object") {
sawNonToolResult = true;
continue;
}
if (message.role === "assistant" && assistantHasToolCalls(message)) {
break;
}
if (message.role === "toolResult") {
const normalizedLegacyResult = normalizeLegacyToolResultId(message, currentToolCalls);
const resultIds = extractToolResultIds(normalizedLegacyResult);
for (const id of resultIds) {
ids.add(id);
}
displaced ||= resultIds.length > 0 && sawNonToolResult;
continue;
}
sawNonToolResult = true;
}
return { ids, displaced };
}
function repairToolCallInputs(
messages: AgentMessage[],
options?: ToolCallInputRepairOptions,
): ToolCallInputRepairReport {
let droppedToolCalls = 0;
let droppedAssistantMessages = 0;
let changed = false;
const out: AgentMessage[] = [];
const allowedToolNames = normalizeAllowedToolNames(options?.allowedToolNames);
const allowProviderOwnedThinkingReplay = options?.allowProviderOwnedThinkingReplay === true;
const preservedThinkingToolCallIds = new Set<string>();
const priorToolCallIds = new Set<string>();
for (let index = 0; index < messages.length; index += 1) {
const msg = messages[index];
if (!msg || typeof msg !== "object") {
out.push(msg);
continue;
}
if (msg.role !== "assistant" || !Array.isArray(msg.content)) {
out.push(msg);
continue;
}
if (
allowProviderOwnedThinkingReplay &&
msg.content.some((block) => isThinkingLikeBlock(block)) &&
countRawToolCallBlocks(msg.content) > 0
) {
// Signed Anthropic thinking blocks must remain byte-for-byte stable on
// replay. Preserve the turn when every sibling tool call is already valid;
// the later pairing repair can synthesize missing legacy tool results
// without mutating provider-owned assistant content.
const replaySafeToolCalls = extractToolCallsFromAssistant(msg);
const followingToolResults = collectFollowingToolResults(messages, index);
if (
isReplaySafeThinkingAssistantTurn(msg.content, allowedToolNames) &&
replaySafeToolCalls.every(
(toolCall) =>
!preservedThinkingToolCallIds.has(toolCall.id) &&
(!hasSessionsSpawnAttachmentToolCall(msg.content) ||
followingToolResults.ids.has(toolCall.id)) &&
(!followingToolResults.displaced || !priorToolCallIds.has(toolCall.id)),
)
) {
for (const toolCall of replaySafeToolCalls) {
preservedThinkingToolCallIds.add(toolCall.id);
priorToolCallIds.add(toolCall.id);
}
changed ||= followingToolResults.displaced;
out.push(msg);
} else {
droppedToolCalls += countRawToolCallBlocks(msg.content);
droppedAssistantMessages += 1;
changed = true;
}
continue;
}
const nextContent: typeof msg.content = [];
let droppedInMessage = 0;
let messageChanged = false;
for (const block of msg.content) {
if (
isRawToolCallBlock(block) &&
(!hasToolCallInput(block) ||
!hasToolCallId(block) ||
!isAllowedToolCallName((block as RawToolCallBlock).name, allowedToolNames))
) {
droppedToolCalls += 1;
droppedInMessage += 1;
changed = true;
messageChanged = true;
continue;
}
if (isRawToolCallBlock(block)) {
if (RAW_TOOL_CALL_BLOCK_TYPES.has((block as { type?: string }).type ?? "")) {
const sanitized = sanitizeToolCallBlock(block);
if (sanitized !== block) {
changed = true;
messageChanged = true;
}
nextContent.push(sanitized as typeof block);
continue;
}
} else {
nextContent.push(block);
}
}
if (droppedInMessage > 0) {
if (nextContent.length === 0) {
droppedAssistantMessages += 1;
changed = true;
continue;
}
const nextMessage = { ...msg, content: nextContent };
for (const toolCall of extractToolCallsFromAssistant(nextMessage)) {
priorToolCallIds.add(toolCall.id);
}
out.push(nextMessage);
continue;
}
if (messageChanged) {
const nextMessage = { ...msg, content: nextContent };
for (const toolCall of extractToolCallsFromAssistant(nextMessage)) {
priorToolCallIds.add(toolCall.id);
}
out.push(nextMessage);
continue;
}
for (const toolCall of extractToolCallsFromAssistant(msg)) {
priorToolCallIds.add(toolCall.id);
}
out.push(msg);
}
return {
messages: changed ? out : messages,
droppedToolCalls,
droppedAssistantMessages,
};
}
export function sanitizeToolCallInputs(
messages: AgentMessage[],
options?: ToolCallInputRepairOptions,
): AgentMessage[] {
return repairToolCallInputs(messages, options).messages;
}
export function sanitizeToolUseResultPairing(
messages: AgentMessage[],
options?: ToolUseResultPairingOptions,
): AgentMessage[] {
return repairToolUseResultPairing(messages, options).messages;
}
type ToolUseRepairReport = {
messages: AgentMessage[];
added: Array<Extract<AgentMessage, { role: "toolResult" }>>;
droppedDuplicateCount: number;
droppedOrphanCount: number;
moved: boolean;
};
function shouldDropErroredAssistantResults(options?: ToolUseResultPairingOptions): boolean {
return options?.erroredAssistantResultPolicy === "drop";
}
function assistantHasToolCalls(message: AgentMessage): boolean {
if (!message || typeof message !== "object" || message.role !== "assistant") {
return false;
}
return extractToolCallsFromAssistant(message).length > 0;
}
export function repairToolUseResultPairing(
messages: AgentMessage[],
options?: ToolUseResultPairingOptions,
): ToolUseRepairReport {
// Anthropic (and Cloud Code Assist) reject transcripts where assistant tool calls are not
// immediately followed by matching tool results. Session files can end up with results
// displaced (e.g. after user turns) or duplicated. Repair by:
// - moving matching toolResult messages directly after their assistant toolCall turn
// - inserting synthetic error toolResults for missing ids
// - dropping duplicate toolResults for the same id (anywhere in the transcript)
const out: AgentMessage[] = [];
const added: Array<Extract<AgentMessage, { role: "toolResult" }>> = [];
const seenToolResultIds = new Set<string>();
let droppedDuplicateCount = 0;
let droppedOrphanCount = 0;
let moved = false;
let changed = false;
const pushToolResult = (msg: Extract<AgentMessage, { role: "toolResult" }>) => {
const id = extractToolResultId(msg);
if (id && seenToolResultIds.has(id)) {
droppedDuplicateCount += 1;
changed = true;
return;
}
if (id) {
seenToolResultIds.add(id);
}
out.push(msg);
};
for (let i = 0; i < messages.length; i += 1) {
const msg = messages[i];
if (!msg || typeof msg !== "object") {
out.push(msg);
continue;
}
const role = (msg as { role?: unknown }).role;
if (role !== "assistant") {
// Tool results must only appear directly after the matching assistant tool call turn.
// Any "free-floating" toolResult entries in session history can make strict providers
// (Anthropic-compatible APIs, MiniMax, Cloud Code Assist) reject the entire request.
if (role !== "toolResult") {
out.push(msg);
} else {
droppedOrphanCount += 1;
changed = true;
}
continue;
}
const assistant = msg as Extract<AgentMessage, { role: "assistant" }>;
const toolCalls = extractToolCallsFromAssistant(assistant);
if (toolCalls.length === 0) {
out.push(msg);
continue;
}
const toolCallIds = new Set<string>();
const toolCallNamesById = new Map<string, string>();
for (const toolCall of toolCalls) {
toolCallIds.add(toolCall.id);
if (typeof toolCall.name === "string") {
toolCallNamesById.set(toolCall.id, toolCall.name);
}
}
const spanResultsById = new Map<string, Extract<AgentMessage, { role: "toolResult" }>>();
const remainder: AgentMessage[] = [];
let j = i + 1;
for (; j < messages.length; j += 1) {
const next = messages[j];
if (!next || typeof next !== "object") {
remainder.push(next);
continue;
}
const nextRole = (next as { role?: unknown }).role;
if (nextRole === "assistant") {
if (assistantHasToolCalls(next)) {
break;
}
remainder.push(next);
continue;
}
if (nextRole === "toolResult") {
const toolResult = normalizeLegacyToolResultId(
next as Extract<AgentMessage, { role: "toolResult" }>,
toolCalls,
);
const id = extractToolResultId(toolResult);
if (id && toolCallIds.has(id)) {
if (seenToolResultIds.has(id)) {
droppedDuplicateCount += 1;
changed = true;
continue;
}
if (toolResult !== next) {
changed = true;
}
const normalizedToolResult = normalizeToolResultName(
toolResult,
toolCallNamesById.get(id),
);
if (normalizedToolResult !== toolResult) {
changed = true;
}
if (!spanResultsById.has(id)) {
spanResultsById.set(id, normalizedToolResult);
}
continue;
}
}
// Drop tool results that don't match the current assistant tool calls.
if (nextRole !== "toolResult") {
remainder.push(next);
} else {
droppedOrphanCount += 1;
changed = true;
}
}
// Aborted/errored assistant turns should never synthesize missing tool results, but
// the replay sanitizer can still legitimately retain real tool results for surviving
// tool calls in the same turn after malformed siblings are dropped.
const stopReason = (assistant as { stopReason?: string }).stopReason;
if (stopReason === "error" || stopReason === "aborted") {
if (!shouldDropErroredAssistantResults(options)) {
out.push(msg);
for (const toolCall of toolCalls) {
const result = spanResultsById.get(toolCall.id);
if (!result) {
continue;
}
pushToolResult(result);
}
} else if (spanResultsById.size > 0) {
changed = true;
} else {
changed = true;
}
for (const rem of remainder) {
out.push(rem);
}
i = j - 1;
continue;
}
out.push(msg);
if (spanResultsById.size > 0 && remainder.length > 0) {
// Preserve real late-arriving results before synthesizing missing siblings;
// otherwise parallel tool replay can replace useful output with repair noise.
moved = true;
changed = true;
}
for (const call of toolCalls) {
const existing = spanResultsById.get(call.id);
if (existing) {
pushToolResult(existing);
} else {
const missing = makeMissingToolResult({
toolCallId: call.id,
toolName: call.name,
text: options?.missingToolResultText,
});
added.push(missing);
changed = true;
pushToolResult(missing);
}
}
for (const rem of remainder) {
if (!rem || typeof rem !== "object") {
out.push(rem);
continue;
}
out.push(rem);
}
i = j - 1;
}
const changedOrMoved = changed || moved;
return {
messages: changedOrMoved ? out : messages,
added,
droppedDuplicateCount,
droppedOrphanCount,
moved: changedOrMoved,
};
}