fix(agents): repair strict provider tool replay

This commit is contained in:
Peter Steinberger
2026-04-25 05:52:33 +01:00
parent e31aef7e19
commit 7f6452897e
16 changed files with 1091 additions and 73 deletions

View File

@@ -75,6 +75,7 @@ Docs: https://docs.openclaw.ai
- Gateway/tools: allow `POST /tools/invoke` to reach plugin-backed catalog tools such as `browser` when no core implementation exists, while still preferring built-in tools for real core names. Thanks @chat2way.
- Browser/security: require `operator.admin` for the `browser.request` gateway method, matching the host/browser-node control authority exposed by that route. Thanks @RichardCao.
- Browser/profiles: allow local managed profiles to override `browser.executablePath`, so different profiles can launch different Chromium-based browsers. Thanks @nobrainer-tech.
- Agents/replay: repair displaced or missing tool results before strict provider replay, use Codex-compatible `aborted` outputs for OpenAI Responses history, and drop partial aborted/error transport turns before retries.
- Reply media: allow sandboxed replies to deliver OpenClaw-managed `media/outbound` and `media/tool-*` attachments without treating them as sandbox escapes, while keeping alias-escape checks on the managed media root. Fixes #71138. Thanks @mayor686, @truffle-dev, and @neeravmakwana.
- CLI/agent: keep `openclaw agent --json` stdout reserved for the JSON response by routing gateway, plugin, and embedded-fallback diagnostics to stderr before execution starts. Fixes #71319.
- Agents/Gemini: retry reasoning-only, empty, and planning-only Gemini turns instead of letting sessions silently stall. Fixes #71074. (#71362) Thanks @neeravmakwana.

View File

@@ -114,9 +114,9 @@ external end-user instructions.
- Image sanitization only.
- Drop orphaned reasoning signatures (standalone reasoning items without a following content block) for OpenAI Responses/Codex transcripts, and drop replayable OpenAI reasoning after a model route switch.
- No tool call id sanitization.
- No tool result pairing repair.
- Tool result pairing repair may move real matched outputs and synthesize Codex-style `aborted` outputs for missing tool calls.
- No turn validation or reordering.
- No synthetic tool results.
- Missing OpenAI Responses-family tool outputs are synthesized as `aborted` to match Codex replay normalization.
- No thought signature stripping.
**Google (Generative AI / Gemini CLI / Antigravity)**

View File

@@ -1,10 +1,14 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import { completeSimple, type Api, type Model } from "@mariozechner/pi-ai";
import { SessionManager } from "@mariozechner/pi-coding-agent";
import { Type } from "typebox";
import { describe, expect, it } from "vitest";
import { loadConfig } from "../config/config.js";
import { resolveOpenClawAgentDir } from "./agent-paths.js";
import { isLiveProfileKeyModeEnabled, isLiveTestEnabled } from "./live-test-helpers.js";
import { getApiKeyForModel, requireApiKey } from "./model-auth.js";
import { ensureOpenClawModelsJson } from "./models-config.js";
import { sanitizeSessionHistory } from "./pi-embedded-runner/replay-history.js";
import { discoverAuthStorage, discoverModels } from "./pi-model-discovery.js";
const LIVE = isLiveTestEnabled();
@@ -169,4 +173,141 @@ describeLive("openai reasoning compat live", () => {
},
3 * 60 * 1000,
);
it(
"accepts repaired OpenAI Codex parallel tool replay with aborted missing results",
async () => {
const { provider, modelId } = resolveTargetModelRef();
const cfg = loadConfig();
await ensureOpenClawModelsJson(cfg);
const agentDir = resolveOpenClawAgentDir();
const authStorage = discoverAuthStorage(agentDir);
const modelRegistry = discoverModels(authStorage, agentDir);
const model = modelRegistry.find(provider, modelId) as Model<Api> | null;
if (!model) {
logProgress(`[openai-reasoning-compat] model missing from registry: ${TARGET_MODEL_REF}`);
return;
}
let apiKeyInfo;
try {
apiKeyInfo = await getApiKeyForModel({
model,
cfg,
credentialPrecedence: LIVE_CREDENTIAL_PRECEDENCE,
});
} catch (error) {
logProgress(`[openai-reasoning-compat] skip (${String(error)})`);
return;
}
if (REQUIRE_PROFILE_KEYS && !apiKeyInfo.source.startsWith("profile:")) {
logProgress(
`[openai-reasoning-compat] skip (non-profile credential source: ${apiKeyInfo.source})`,
);
return;
}
const messages = [
{
role: "user",
content: "Use noop.",
timestamp: Date.now(),
},
{
role: "assistant",
provider: model.provider,
api: model.api,
model: model.id,
stopReason: "toolUse",
timestamp: Date.now(),
content: [
{ type: "toolCall", id: "call_keep", name: "noop", arguments: {} },
{ type: "toolCall", id: "call_missing_a", name: "noop", arguments: {} },
{ type: "toolCall", id: "call_missing_b", name: "noop", arguments: {} },
],
},
{
role: "user",
content: "Reply with exactly: replay ok.",
timestamp: Date.now(),
},
{
role: "toolResult",
toolCallId: "call_keep",
toolName: "noop",
content: [{ type: "text", text: "ok" }],
isError: false,
timestamp: Date.now(),
},
] as unknown as AgentMessage[];
const sanitized = await sanitizeSessionHistory({
messages,
modelApi: model.api,
provider: model.provider,
modelId: model.id,
sessionManager: SessionManager.inMemory(),
sessionId: "openai-codex-tool-replay-live",
});
expect(sanitized.map((message) => message.role)).toEqual([
"user",
"assistant",
"toolResult",
"toolResult",
"toolResult",
"user",
]);
expect(
sanitized.slice(2, 5).map((message) => (message as { toolCallId?: string }).toolCallId),
).toEqual(["call_keep", "call_missing_a", "call_missing_b"]);
expect(
sanitized
.slice(3, 5)
.map((message) => (message as Extract<AgentMessage, { role: "toolResult" }>).content),
).toEqual([[{ type: "text", text: "aborted" }], [{ type: "text", text: "aborted" }]]);
expect(JSON.stringify(sanitized)).not.toContain("missing tool result");
const response = await completeSimpleWithTimeout(
model,
{
systemPrompt: "You are a concise assistant. Follow the user's instruction exactly.",
messages: sanitized as never,
tools: [
{
name: "noop",
description: "Return ok.",
parameters: Type.Object({}, { additionalProperties: false }),
},
],
},
{
apiKey: requireApiKey(apiKeyInfo, model.provider),
reasoning: "low",
maxTokens: 64,
},
120_000,
);
const text = response.content
.filter((block) => block.type === "text")
.map((block) => block.text.trim())
.join(" ")
.trim();
const errorMessage =
typeof (response as { errorMessage?: unknown }).errorMessage === "string"
? ((response as { errorMessage?: string }).errorMessage ?? "")
: "";
if (errorMessage && isKnownLiveBlocker(errorMessage)) {
logProgress(`[openai-reasoning-compat] skip (${errorMessage})`);
return;
}
expect(text).toMatch(/^replay ok\.?$/i);
},
3 * 60 * 1000,
);
});

View File

@@ -688,20 +688,181 @@ describe("sanitizeSessionHistory", () => {
expect(result[1]?.role).toBe("assistant");
});
it("synthesizes missing tool results for openai-responses after repair", async () => {
it("synthesizes Codex-style aborted tool results for openai-responses after repair", async () => {
const messages: AgentMessage[] = [
makeUserMessage("start"),
makeAssistantMessage([{ type: "toolCall", id: "call_1", name: "read", arguments: {} }], {
stopReason: "toolUse",
}),
makeUserMessage("continue"),
];
const result = await sanitizeOpenAIHistory(messages);
expect(result.map((message) => message.role)).toEqual([
"user",
"assistant",
"toolResult",
"user",
]);
expect((result[2] as { toolCallId?: string }).toolCallId).toBe("call1");
expect((result[2] as Extract<AgentMessage, { role: "toolResult" }>).content).toEqual([
{ type: "text", text: "aborted" },
]);
expect(JSON.stringify(result)).not.toContain("missing tool result");
});
it("synthesizes Codex-style aborted tool results for openai-codex-responses", async () => {
const messages: AgentMessage[] = [
makeAssistantMessage(
[
{ type: "toolCall", id: "call_a", name: "exec", arguments: {} },
{ type: "toolCall", id: "call_b", name: "exec", arguments: {} },
{ type: "toolCall", id: "call_c", name: "exec", arguments: {} },
],
{ stopReason: "toolUse" },
),
makeUserMessage("status?"),
];
const result = await sanitizeSessionHistory({
messages,
modelApi: "openai-codex-responses",
provider: "openai-codex",
sessionManager: mockSessionManager,
sessionId: TEST_SESSION_ID,
});
expect(result.map((message) => message.role)).toEqual([
"assistant",
"toolResult",
"toolResult",
"toolResult",
"user",
]);
expect(
result.slice(1, 4).map((message) => (message as { toolCallId?: string }).toolCallId),
).toEqual(["calla", "callb", "callc"]);
for (const message of result.slice(1, 4)) {
expect((message as Extract<AgentMessage, { role: "toolResult" }>).content).toEqual([
{ type: "text", text: "aborted" },
]);
}
expect(JSON.stringify(result)).not.toContain("missing tool result");
});
it("keeps real parallel tool results for openai-responses and aborts missing siblings", async () => {
const messages: AgentMessage[] = [
makeAssistantMessage(
[
{ type: "toolCall", id: "call_1", name: "read", arguments: {} },
{ type: "toolCall", id: "call_2", name: "exec", arguments: {} },
{ type: "toolCall", id: "call_3", name: "write", arguments: {} },
],
{ stopReason: "toolUse" },
),
makeUserMessage("continue"),
castAgentMessage({
role: "toolResult",
toolCallId: "call_2",
toolName: "exec",
content: [{ type: "text", text: "ok" }],
isError: false,
}),
];
const result = await sanitizeOpenAIHistory(messages);
// repairToolUseResultPairing now runs for all providers (including OpenAI)
// to fix orphaned function_call_output items that OpenAI would reject.
expect(result).toHaveLength(2);
expect(result[0]?.role).toBe("assistant");
expect(result[1]?.role).toBe("toolResult");
expect(result.map((message) => message.role)).toEqual([
"assistant",
"toolResult",
"toolResult",
"toolResult",
"user",
]);
expect(
extractToolCallsFromAssistant(result[0] as Extract<AgentMessage, { role: "assistant" }>),
).toMatchObject([
{ id: "call1", name: "read" },
{ id: "call2", name: "exec" },
{ id: "call3", name: "write" },
]);
expect(
result.slice(1, 4).map((message) => (message as { toolCallId?: string }).toolCallId),
).toEqual(["call1", "call2", "call3"]);
expect((result[1] as Extract<AgentMessage, { role: "toolResult" }>).content).toEqual([
{ type: "text", text: "aborted" },
]);
expect((result[2] as Extract<AgentMessage, { role: "toolResult" }>).content).toEqual([
{ type: "text", text: "ok" },
]);
expect((result[3] as Extract<AgentMessage, { role: "toolResult" }>).content).toEqual([
{ type: "text", text: "aborted" },
]);
expect(JSON.stringify(result)).not.toContain("missing tool result");
});
it("applies aborted missing-result repair to azure-openai-responses", async () => {
const messages: AgentMessage[] = [
makeAssistantMessage([{ type: "toolCall", id: "call_azure", name: "read", arguments: {} }], {
stopReason: "toolUse",
}),
makeUserMessage("continue"),
];
const result = await sanitizeSessionHistory({
messages,
modelApi: "azure-openai-responses",
provider: "azure-openai-responses",
sessionManager: mockSessionManager,
sessionId: TEST_SESSION_ID,
});
expect(result.map((message) => message.role)).toEqual(["assistant", "toolResult", "user"]);
expect((result[1] as { toolCallId?: string }).toolCallId).toBe("callazure");
expect((result[1] as Extract<AgentMessage, { role: "toolResult" }>).content).toEqual([
{ type: "text", text: "aborted" },
]);
});
it("drops duplicate and orphan OpenAI outputs while preserving the first real result", async () => {
const messages: AgentMessage[] = [
castAgentMessage({
role: "toolResult",
toolCallId: "call_orphan",
toolName: "read",
content: [{ type: "text", text: "orphan" }],
isError: false,
}),
makeAssistantMessage([{ type: "toolCall", id: "call_keep", name: "read", arguments: {} }], {
stopReason: "toolUse",
}),
castAgentMessage({
role: "toolResult",
toolCallId: "call_keep",
toolName: "read",
content: [{ type: "text", text: "first" }],
isError: false,
}),
castAgentMessage({
role: "toolResult",
toolCallId: "call_keep",
toolName: "read",
content: [{ type: "text", text: "duplicate" }],
isError: false,
}),
makeUserMessage("continue"),
];
const result = await sanitizeOpenAIHistory(messages);
expect(result.map((message) => message.role)).toEqual(["assistant", "toolResult", "user"]);
expect((result[1] as { toolCallId?: string }).toolCallId).toBe("callkeep");
expect((result[1] as Extract<AgentMessage, { role: "toolResult" }>).content).toEqual([
{ type: "text", text: "first" },
]);
expect(JSON.stringify(result)).not.toContain("orphan");
expect(JSON.stringify(result)).not.toContain("duplicate");
});
it.each([

View File

@@ -810,6 +810,12 @@ export async function compactEmbeddedPiSessionDirect(
config: params.config,
contextWindowTokens: ctxInfo.tokens,
allowSyntheticToolResults: transcriptPolicy.allowSyntheticToolResults,
missingToolResultText:
model.api === "openai-responses" ||
model.api === "azure-openai-responses" ||
model.api === "openai-codex-responses"
? "aborted"
: undefined,
allowedToolNames,
});
checkpointSnapshot = captureCompactionCheckpointSnapshot({
@@ -965,6 +971,11 @@ export async function compactEmbeddedPiSessionDirect(
const limited = transcriptPolicy.repairToolUseResultPairing
? sanitizeToolUseResultPairing(truncated, {
erroredAssistantResultPolicy: "drop",
...(model.api === "openai-responses" ||
model.api === "azure-openai-responses" ||
model.api === "openai-codex-responses"
? { missingToolResultText: "aborted" }
: {}),
})
: truncated;
if (limited.length > 0) {

View File

@@ -493,13 +493,17 @@ export async function sanitizeSessionHistory(params: {
allowedToolNames: params.allowedToolNames,
allowProviderOwnedThinkingReplay,
});
// OpenAI's fc_* pairing downgrade needs the raw call_id|fc_id separator intact,
// but displaced tool results must first be repaired back next to their
// assistant turn so the downgrade can rewrite both sides consistently.
// OpenAI Responses rejects orphan/missing function_call_output items. Upstream
// Codex repairs those gaps with "aborted"; keep that before the fc_* downgrade
// so both call and result ids are rewritten together. Covered by unit replay
// tests plus live OpenAI/Codex and generic replay-repair model tests.
const openAIRepairedToolCalls =
isOpenAIResponsesApi && policy.repairToolUseResultPairing
? sanitizeToolUseResultPairing(sanitizedToolCalls, {
erroredAssistantResultPolicy: "drop",
// Match upstream Codex history normalization for OpenAI Responses:
// missing function_call_output entries are model-visible "aborted".
missingToolResultText: "aborted",
})
: sanitizedToolCalls;
const openAISafeToolCalls = isOpenAIResponsesApi
@@ -517,6 +521,9 @@ export async function sanitizeSessionHistory(params: {
allowedToolNames: params.allowedToolNames,
})
: openAISafeToolCalls;
// Gemini/Anthropic-class providers also require tool results to stay adjacent
// to their assistant tool calls. They do not use Codex's "aborted" text, but
// the same ordering repair is live-tested with Gemini 3 Flash.
const repairedTools =
!isOpenAIResponsesApi && policy.repairToolUseResultPairing
? sanitizeToolUseResultPairing(sanitizedToolIds, {

View File

@@ -61,6 +61,65 @@ describe("sanitizeReplayToolCallIdsForStream", () => {
]);
});
it("synthesizes missing tool results after strict id sanitization", () => {
const rawId = "call_function_av7cbkigmk7x1";
const out = sanitizeReplayToolCallIdsForStream({
messages: [
{
role: "assistant",
content: [
{ type: "toolUse", id: rawId, name: "read", input: { path: "." } },
{ type: "toolUse", id: "call_missing", name: "exec", input: { cmd: "true" } },
],
} as never,
{
role: "toolResult",
toolCallId: rawId,
toolUseId: rawId,
toolName: "read",
content: [{ type: "text", text: "ok" }],
isError: false,
} as never,
],
mode: "strict",
repairToolUseResultPairing: true,
});
expect(out.map((message) => message.role)).toEqual(["assistant", "toolResult", "toolResult"]);
expect((out[0] as Extract<AgentMessage, { role: "assistant" }>).content).toMatchObject([
{ type: "toolUse", id: "callfunctionav7cbkigmk7x1", name: "read" },
{ type: "toolUse", id: "callmissing", name: "exec" },
]);
expect(out[1]).toMatchObject({
role: "toolResult",
toolCallId: "callfunctionav7cbkigmk7x1",
toolUseId: "callfunctionav7cbkigmk7x1",
});
expect(out[2]).toMatchObject({
role: "toolResult",
toolCallId: "callmissing",
isError: true,
});
});
it("synthesizes missing tool results when repair is enabled", () => {
const out = sanitizeReplayToolCallIdsForStream({
messages: [
{
role: "assistant",
content: [{ type: "toolUse", id: "call_missing", name: "exec", input: { cmd: "true" } }],
} as never,
],
mode: "strict",
repairToolUseResultPairing: true,
});
expect(out).toMatchObject([
{ role: "assistant" },
{ role: "toolResult", toolCallId: "callmissing", isError: true },
]);
});
it("keeps real tool results for aborted assistant spans", () => {
const rawId = "call_function_av7cbkigmk7x1";
const out = sanitizeReplayToolCallIdsForStream({

View File

@@ -1193,6 +1193,12 @@ export async function runEmbeddedAttempt(
contextWindowTokens: params.contextTokenBudget,
inputProvenance: params.inputProvenance,
allowSyntheticToolResults: transcriptPolicy.allowSyntheticToolResults,
missingToolResultText:
params.model.api === "openai-responses" ||
params.model.api === "azure-openai-responses" ||
params.model.api === "openai-codex-responses"
? "aborted"
: undefined,
allowedToolNames,
});
trackSessionManagerAccess(params.sessionFile);
@@ -1840,6 +1846,7 @@ export async function runEmbeddedAttempt(
const limited = transcriptPolicy.repairToolUseResultPairing
? sanitizeToolUseResultPairing(truncated, {
erroredAssistantResultPolicy: "drop",
...(isOpenAIResponsesApi ? { missingToolResultText: "aborted" } : {}),
})
: truncated;
cacheTrace?.recordStage("session:limited", { messages: limited });

View File

@@ -29,6 +29,7 @@ export function guardSessionManager(
contextWindowTokens?: number;
inputProvenance?: InputProvenance;
allowSyntheticToolResults?: boolean;
missingToolResultText?: string;
allowedToolNames?: Iterable<string>;
},
): GuardedSessionManager {
@@ -75,6 +76,7 @@ export function guardSessionManager(
applyInputProvenanceToUserMessage(message, opts?.inputProvenance),
transformToolResultForPersistence: transform,
allowSyntheticToolResults: opts?.allowSyntheticToolResults,
missingToolResultText: opts?.missingToolResultText,
allowedToolNames: opts?.allowedToolNames,
beforeMessageWriteHook: beforeMessageWrite,
maxToolResultChars:

View File

@@ -111,6 +111,18 @@ describe("installSessionToolResultGuard", () => {
expectPersistedRoles(sm, ["assistant", "toolResult"]);
});
it("uses configured text for synthetic tool results", () => {
const sm = SessionManager.inMemory();
const guard = installSessionToolResultGuard(sm, {
missingToolResultText: "aborted",
});
sm.appendMessage(toolCallMessage);
guard.flushPendingToolResults();
expect(getToolResultText(getPersistedMessages(sm))).toBe("aborted");
});
it("clears pending tool calls without inserting synthetic tool results", () => {
const sm = SessionManager.inMemory();
const guard = installSessionToolResultGuard(sm);

View File

@@ -90,6 +90,7 @@ export function installSessionToolResultGuard(
* Defaults to true.
*/
allowSyntheticToolResults?: boolean;
missingToolResultText?: string;
/**
* Optional set/list of tool names accepted for assistant toolCall/toolUse blocks.
* When set, tool calls with unknown names are dropped before persistence.
@@ -127,6 +128,7 @@ export function installSessionToolResultGuard(
};
const allowSyntheticToolResults = opts?.allowSyntheticToolResults ?? true;
const missingToolResultText = opts?.missingToolResultText;
const beforeWrite = opts?.beforeMessageWriteHook;
const maxToolResultChars = resolveMaxToolResultChars(opts);
@@ -154,7 +156,11 @@ export function installSessionToolResultGuard(
}
if (allowSyntheticToolResults) {
for (const [id, name] of pendingState.entries()) {
const synthetic = makeMissingToolResult({ toolCallId: id, toolName: name });
const synthetic = makeMissingToolResult({
toolCallId: id,
toolName: name,
text: missingToolResultText,
});
const flushed = applyBeforeWriteHook(
persistToolResult(persistMessage(synthetic), {
toolCallId: id,

View File

@@ -76,6 +76,68 @@ describe("sanitizeToolUseResultPairing", () => {
expect(out[3]?.role).toBe("user");
});
it("uses custom text for synthesized missing tool results", () => {
const input = castAgentMessages([
{
role: "assistant",
content: [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }],
},
{ role: "user", content: "user message that should come after tool use" },
]);
const result = repairToolUseResultPairing(input, {
missingToolResultText: "aborted",
});
expect(result.added).toHaveLength(1);
expect(result.messages.map((m) => m.role)).toEqual(["assistant", "toolResult", "user"]);
expect(result.added[0]?.content).toEqual([{ type: "text", text: "aborted" }]);
});
it("keeps matched parallel tool results and synthesizes only missing siblings", () => {
const input = castAgentMessages([
{
role: "assistant",
content: [
{ type: "text", text: "checking" },
{ type: "toolCall", id: "call_1", name: "read", arguments: {} },
{ type: "toolCall", id: "call_2", name: "exec", arguments: {} },
{ type: "toolCall", id: "call_3", name: "write", arguments: {} },
],
},
{ role: "user", content: "user message that should come after tool use" },
{
role: "toolResult",
toolCallId: "call_2",
toolName: "exec",
content: [{ type: "text", text: "ok" }],
isError: false,
},
]);
const result = repairToolUseResultPairing(input, {
missingToolResultText: "aborted",
});
expect(result.added.map((message) => message.toolCallId)).toEqual(["call_1", "call_3"]);
expect(result.messages.map((m) => m.role)).toEqual([
"assistant",
"toolResult",
"toolResult",
"toolResult",
"user",
]);
expect(getAssistantToolCallBlocks(result.messages)).toMatchObject([
{ id: "call_1", name: "read" },
{ id: "call_2", name: "exec" },
{ id: "call_3", name: "write" },
]);
expect((result.messages[1] as { toolCallId?: string }).toolCallId).toBe("call_1");
expect((result.messages[2] as { toolCallId?: string }).toolCallId).toBe("call_2");
expect((result.messages[3] as { toolCallId?: string }).toolCallId).toBe("call_3");
expect(JSON.stringify(result.added)).not.toContain("missing tool result");
});
it("repairs blank tool result names from matching tool calls", () => {
const input = castAgentMessages([
{
@@ -248,9 +310,8 @@ describe("sanitizeToolUseResultPairing", () => {
});
expect(result.droppedOrphanCount).toBe(0);
expect(result.messages).toHaveLength(2);
expect(result.messages[0]?.role).toBe("assistant");
expect(result.messages[1]?.role).toBe("user");
expect(result.messages).toHaveLength(1);
expect(result.messages[0]?.role).toBe("user");
expect(result.added).toHaveLength(0);
});
});

View File

@@ -175,6 +175,12 @@ function isReplaySafeThinkingAssistantTurn(
function makeMissingToolResult(params: {
toolCallId: string;
toolName?: string;
// OpenAI Responses/Codex replay should match upstream Codex's "aborted"
// function_call_output normalization; live coverage in
// openai-reasoning-compat.live.test.ts and tool-replay-repair.live.test.ts
// sends this repaired history to real models. Other providers keep the older,
// explicit OpenClaw diagnostic text unless the caller opts in.
text?: string;
}): Extract<AgentMessage, { role: "toolResult" }> {
return {
role: "toolResult",
@@ -183,7 +189,9 @@ function makeMissingToolResult(params: {
content: [
{
type: "text",
text: "[openclaw] missing tool result in session history; inserted synthetic error result for transcript repair.",
text:
params.text ??
"[openclaw] missing tool result in session history; inserted synthetic error result for transcript repair.",
},
],
isError: true,
@@ -232,6 +240,7 @@ export type ErroredAssistantResultPolicy = "preserve" | "drop";
export type ToolUseResultPairingOptions = {
erroredAssistantResultPolicy?: ErroredAssistantResultPolicy;
missingToolResultText?: string;
};
export function stripToolResultDetails(messages: AgentMessage[]): AgentMessage[] {
@@ -529,8 +538,8 @@ export function repairToolUseResultPairing(
// tool calls in the same turn after malformed siblings are dropped.
const stopReason = (assistant as { stopReason?: string }).stopReason;
if (stopReason === "error" || stopReason === "aborted") {
out.push(msg);
if (!shouldDropErroredAssistantResults(options)) {
out.push(msg);
for (const toolCall of toolCalls) {
const result = spanResultsById.get(toolCall.id);
if (!result) {
@@ -540,6 +549,8 @@ export function repairToolUseResultPairing(
}
} else if (spanResultsById.size > 0) {
changed = true;
} else {
changed = true;
}
for (const rem of remainder) {
out.push(rem);
@@ -551,6 +562,8 @@ export function repairToolUseResultPairing(
out.push(msg);
if (spanResultsById.size > 0 && remainder.length > 0) {
// Preserve real late-arriving results before synthesizing missing siblings;
// otherwise parallel tool replay can replace useful output with repair noise.
moved = true;
changed = true;
}
@@ -563,6 +576,7 @@ export function repairToolUseResultPairing(
const missing = makeMissingToolResult({
toolCallId: call.id,
toolName: call.name,
text: options?.missingToolResultText,
});
added.push(missing);
changed = true;

View File

@@ -0,0 +1,386 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import { completeSimple, type Api, type Context, type Model } from "@mariozechner/pi-ai";
import { SessionManager } from "@mariozechner/pi-coding-agent";
import { Type } from "typebox";
import { describe, expect, it } from "vitest";
import { loadConfig } from "../config/config.js";
import { resolveOpenClawAgentDir } from "./agent-paths.js";
import { isLiveProfileKeyModeEnabled, isLiveTestEnabled } from "./live-test-helpers.js";
import { getApiKeyForModel, requireApiKey } from "./model-auth.js";
import { ensureOpenClawModelsJson } from "./models-config.js";
import { sanitizeSessionHistory } from "./pi-embedded-runner/replay-history.js";
import { discoverAuthStorage, discoverModels } from "./pi-model-discovery.js";
import { transformTransportMessages } from "./transport-message-transform.js";
const LIVE = isLiveTestEnabled();
const REQUIRE_PROFILE_KEYS = isLiveProfileKeyModeEnabled();
const LIVE_CREDENTIAL_PRECEDENCE = REQUIRE_PROFILE_KEYS ? "profile-first" : "env-first";
const DEFAULT_TARGET_MODEL_REFS = "openai-codex/gpt-5.5,google/gemini-3-flash-preview";
const TARGET_MODEL_REFS = parseTargetModelRefs(
process.env.OPENCLAW_LIVE_TOOL_REPLAY_REPAIR_MODELS ?? DEFAULT_TARGET_MODEL_REFS,
);
const describeLive = LIVE ? describe : describe.skip;
type TargetModelRef = {
ref: string;
provider: string;
modelId: string;
};
function parseTargetModelRefs(raw: string | undefined): TargetModelRef[] {
return (raw ?? "")
.split(",")
.map((item) => item.trim())
.filter(Boolean)
.map((ref) => {
const [provider, ...rest] = ref.split("/");
const modelId = rest.join("/").trim();
if (!provider?.trim() || !modelId) {
throw new Error(
`Invalid OPENCLAW_LIVE_TOOL_REPLAY_REPAIR_MODELS entry: ${JSON.stringify(ref)}`,
);
}
return { ref, provider: provider.trim(), modelId };
});
}
function logProgress(message: string): void {
process.stderr.write(`[live] ${message}\n`);
}
async function completeSimpleWithTimeout<TApi extends Api>(
model: Model<TApi>,
context: Parameters<typeof completeSimple<TApi>>[1],
options: Parameters<typeof completeSimple<TApi>>[2],
timeoutMs: number,
): Promise<Awaited<ReturnType<typeof completeSimple<TApi>>>> {
const controller = new AbortController();
const abortTimer = setTimeout(() => {
controller.abort();
}, timeoutMs);
abortTimer.unref?.();
try {
return await Promise.race([
completeSimple(model, context, {
...options,
signal: controller.signal,
}),
new Promise<never>((_, reject) => {
const hardTimer = setTimeout(() => {
reject(new Error(`model call timed out after ${timeoutMs}ms`));
}, timeoutMs);
hardTimer.unref?.();
}),
]);
} finally {
clearTimeout(abortTimer);
}
}
function isOpenAIResponsesFamily(api: string): boolean {
return (
api === "openai-responses" ||
api === "openai-codex-responses" ||
api === "azure-openai-responses"
);
}
function buildReplayMessages(model: Model<Api>): AgentMessage[] {
const now = Date.now();
// Gemini source metadata deliberately simulates a model switch from a
// provider-owned transcript. That forces the same id sanitization and replay
// repair path that failed in real session replays, not just the happy path for
// a same-provider synthetic fixture.
const source =
model.provider === "google"
? {
api: "google-gemini-cli",
provider: "google-antigravity",
model: "claude-sonnet-4-20250514",
}
: {
api: model.api,
provider: model.provider,
model: model.id,
};
return [
{
role: "user",
content: "Use noop.",
timestamp: now,
},
{
role: "assistant",
provider: source.provider,
api: source.api,
model: source.model,
stopReason: "toolUse",
timestamp: now + 1,
content: [
{ type: "toolCall", id: "call_keep", name: "noop", arguments: {} },
{ type: "toolCall", id: "call_missing_a", name: "noop", arguments: {} },
{ type: "toolCall", id: "call_missing_b", name: "noop", arguments: {} },
],
},
{
role: "user",
content: "Reply with exactly: replay repair ok.",
timestamp: now + 2,
},
{
role: "toolResult",
toolCallId: "call_keep",
toolName: "noop",
content: [{ type: "text", text: "ok" }],
isError: false,
timestamp: now + 3,
},
] as unknown as AgentMessage[];
}
function buildAbortedTransportMessages(model: Model<Api>): Context["messages"] {
const now = Date.now();
return [
{
role: "assistant",
provider: model.provider,
api: model.api,
model: model.id,
stopReason: "aborted",
timestamp: now,
content: [{ type: "toolCall", id: "call_transport_aborted", name: "noop", arguments: {} }],
},
{
role: "user",
content: "Reply with exactly: transport replay ok.",
timestamp: now + 1,
},
] as Context["messages"];
}
function syntheticToolResultText(message: AgentMessage): string | undefined {
if (message.role !== "toolResult") {
return undefined;
}
const first = message.content[0] as { type?: unknown; text?: unknown } | undefined;
return first?.type === "text" && typeof first.text === "string" ? first.text : undefined;
}
function assistantToolCallIds(message: AgentMessage): string[] {
if (message.role !== "assistant") {
return [];
}
return message.content.filter((block) => block.type === "toolCall").map((block) => block.id);
}
function isKnownLiveBlocker(errorMessage: string): boolean {
return (
/not supported when using codex with a chatgpt account/i.test(errorMessage) ||
/hit your chatgpt usage limit/i.test(errorMessage)
);
}
describeLive("tool replay repair live", () => {
for (const target of TARGET_MODEL_REFS) {
it(
`accepts repaired displaced and missing tool results with ${target.ref}`,
async () => {
const cfg = loadConfig();
await ensureOpenClawModelsJson(cfg);
const agentDir = resolveOpenClawAgentDir();
const authStorage = discoverAuthStorage(agentDir);
const modelRegistry = discoverModels(authStorage, agentDir);
const model = modelRegistry.find(target.provider, target.modelId) as Model<Api> | null;
if (!model) {
logProgress(`[tool-replay-repair] model missing from registry: ${target.ref}`);
return;
}
let apiKeyInfo;
try {
apiKeyInfo = await getApiKeyForModel({
model,
cfg,
credentialPrecedence: LIVE_CREDENTIAL_PRECEDENCE,
});
} catch (error) {
logProgress(`[tool-replay-repair] skip ${target.ref} (${String(error)})`);
return;
}
if (REQUIRE_PROFILE_KEYS && !apiKeyInfo.source.startsWith("profile:")) {
logProgress(
`[tool-replay-repair] skip ${target.ref} (non-profile credential source: ${apiKeyInfo.source})`,
);
return;
}
logProgress(`[tool-replay-repair] target=${target.ref} auth source=${apiKeyInfo.source}`);
const sanitized = await sanitizeSessionHistory({
messages: buildReplayMessages(model),
modelApi: model.api,
provider: model.provider,
modelId: model.id,
sessionManager: SessionManager.inMemory(),
sessionId: `tool-replay-repair-live-${target.provider}-${target.modelId}`,
});
expect(sanitized.map((message) => message.role)).toEqual([
"user",
"assistant",
"toolResult",
"toolResult",
"toolResult",
"user",
]);
const assistantMessage = sanitized[1];
expect(assistantMessage?.role).toBe("assistant");
expect(
sanitized.slice(2, 5).map((message) => (message as { toolCallId?: string }).toolCallId),
).toEqual(assistantToolCallIds(assistantMessage));
// These assertions are the model-visible contract: OpenAI Responses
// gets Codex-compatible "aborted" outputs, while Gemini proves the
// generic repair does not leak OpenAI wording into other providers.
const insertedTexts = sanitized.slice(3, 5).map(syntheticToolResultText);
if (isOpenAIResponsesFamily(model.api)) {
expect(insertedTexts).toEqual(["aborted", "aborted"]);
} else {
expect(insertedTexts).not.toContain("aborted");
}
// Sending the repaired transcript to the real model is the live proof:
// providers reject malformed tool-call adjacency before generation, so
// any non-error response here validates the repair shape end to end.
const response = await completeSimpleWithTimeout(
model,
{
systemPrompt: "You are a concise assistant. Follow the user's instruction exactly.",
messages: sanitized as never,
tools: [
{
name: "noop",
description: "Return ok.",
parameters: Type.Object({}, { additionalProperties: false }),
},
],
},
{
apiKey: requireApiKey(apiKeyInfo, model.provider),
reasoning: "low",
maxTokens: 96,
},
120_000,
);
const text = response.content
.filter((block) => block.type === "text")
.map((block) => block.text.trim())
.join(" ")
.trim();
const errorMessage =
typeof (response as { errorMessage?: unknown }).errorMessage === "string"
? ((response as { errorMessage?: string }).errorMessage ?? "")
: "";
if (errorMessage && isKnownLiveBlocker(errorMessage)) {
logProgress(`[tool-replay-repair] skip ${target.ref} (${errorMessage})`);
return;
}
expect(response.stopReason).not.toBe("error");
if (text.length > 0) {
expect(text).toMatch(/^replay repair ok\.?$/i);
}
},
3 * 60 * 1000,
);
it(
`accepts transport replay after dropping aborted assistant tool calls with ${target.ref}`,
async () => {
const cfg = loadConfig();
await ensureOpenClawModelsJson(cfg);
const agentDir = resolveOpenClawAgentDir();
const authStorage = discoverAuthStorage(agentDir);
const modelRegistry = discoverModels(authStorage, agentDir);
const model = modelRegistry.find(target.provider, target.modelId) as Model<Api> | null;
if (!model) {
logProgress(`[tool-replay-repair] model missing from registry: ${target.ref}`);
return;
}
let apiKeyInfo;
try {
apiKeyInfo = await getApiKeyForModel({
model,
cfg,
credentialPrecedence: LIVE_CREDENTIAL_PRECEDENCE,
});
} catch (error) {
logProgress(`[tool-replay-repair] skip ${target.ref} (${String(error)})`);
return;
}
if (REQUIRE_PROFILE_KEYS && !apiKeyInfo.source.startsWith("profile:")) {
logProgress(
`[tool-replay-repair] skip ${target.ref} (non-profile credential source: ${apiKeyInfo.source})`,
);
return;
}
const transformed = transformTransportMessages(buildAbortedTransportMessages(model), model);
expect(transformed.map((message) => message.role)).toEqual(["user"]);
expect(JSON.stringify(transformed)).not.toContain("call_transport_aborted");
// This is the transport replay regression proof: providers reject
// assistant(tool_call)->user replays without a matching result, so the
// dropped transcript must still be accepted by real model APIs.
const response = await completeSimpleWithTimeout(
model,
{
systemPrompt: "You are a concise assistant. Follow the user's instruction exactly.",
messages: transformed as never,
tools: [
{
name: "noop",
description: "Return ok.",
parameters: Type.Object({}, { additionalProperties: false }),
},
],
},
{
apiKey: requireApiKey(apiKeyInfo, model.provider),
reasoning: "low",
maxTokens: 96,
},
120_000,
);
const text = response.content
.filter((block) => block.type === "text")
.map((block) => block.text.trim())
.join(" ")
.trim();
const errorMessage =
typeof (response as { errorMessage?: unknown }).errorMessage === "string"
? ((response as { errorMessage?: string }).errorMessage ?? "")
: "";
if (errorMessage && isKnownLiveBlocker(errorMessage)) {
logProgress(`[tool-replay-repair] skip ${target.ref} (${errorMessage})`);
return;
}
expect(response.stopReason).not.toBe("error");
if (text.length > 0) {
expect(text).toMatch(/^transport replay ok\.?$/i);
}
},
3 * 60 * 1000,
);
}
});

View File

@@ -9,20 +9,21 @@ function makeModel(api: Api, provider: string, id: string): Model<Api> {
function assistantToolCall(
id: string,
name = "read",
stopReason: Extract<Context["messages"][number], { role: "assistant" }>["stopReason"] = "toolUse",
): Extract<Context["messages"][number], { role: "assistant" }> {
return {
role: "assistant",
provider: "openai",
api: "openai-responses",
model: "gpt-5.4",
stopReason: "toolUse",
stopReason,
timestamp: Date.now(),
content: [{ type: "toolCall", id, name, arguments: {} }],
} as Extract<Context["messages"][number], { role: "assistant" }>;
}
describe("transformTransportMessages synthetic tool-result policy", () => {
it("does not synthesize missing tool results for OpenAI-compatible transports", () => {
it("synthesizes Codex-style aborted tool results for OpenAI Responses transports", () => {
const messages: Context["messages"] = [
assistantToolCall("call_openai_1"),
{ role: "user", content: "continue", timestamp: Date.now() },
@@ -33,7 +34,166 @@ describe("transformTransportMessages synthetic tool-result policy", () => {
makeModel("openai-responses", "openai", "gpt-5.4"),
);
expect(result.map((msg) => msg.role)).toEqual(["assistant", "user"]);
expect(result.map((msg) => msg.role)).toEqual(["assistant", "toolResult", "user"]);
expect(result[1]).toMatchObject({
role: "toolResult",
toolCallId: "call_openai_1",
isError: true,
content: [{ type: "text", text: "aborted" }],
});
});
it("preserves real OpenAI transport results and aborts missing parallel siblings", () => {
const messages: Context["messages"] = [
{
...assistantToolCall("call_keep"),
content: [
{ type: "toolCall", id: "call_keep", name: "read", arguments: {} },
{ type: "toolCall", id: "call_missing", name: "exec", arguments: {} },
],
},
{
role: "toolResult",
toolCallId: "call_keep",
toolName: "read",
content: [{ type: "text", text: "ok" }],
isError: false,
timestamp: Date.now(),
},
{ role: "user", content: "continue", timestamp: Date.now() },
];
const result = transformTransportMessages(
messages,
makeModel("openclaw-openai-responses-transport" as Api, "openai", "gpt-5.4"),
);
expect(result.map((msg) => msg.role)).toEqual([
"assistant",
"toolResult",
"toolResult",
"user",
]);
expect(result.slice(1, 3)).toMatchObject([
{ role: "toolResult", toolCallId: "call_keep", content: [{ type: "text", text: "ok" }] },
{
role: "toolResult",
toolCallId: "call_missing",
content: [{ type: "text", text: "aborted" }],
},
]);
});
it("moves displaced OpenAI transport results before synthesizing missing siblings", () => {
const messages: Context["messages"] = [
{
...assistantToolCall("call_keep"),
content: [
{ type: "toolCall", id: "call_keep", name: "read", arguments: {} },
{ type: "toolCall", id: "call_missing", name: "exec", arguments: {} },
],
},
{ role: "user", content: "continue", timestamp: Date.now() },
{
role: "toolResult",
toolCallId: "call_keep",
toolName: "read",
content: [{ type: "text", text: "late ok" }],
isError: false,
timestamp: Date.now(),
},
];
const result = transformTransportMessages(
messages,
makeModel("openai-responses", "openai", "gpt-5.4"),
);
expect(result.map((msg) => msg.role)).toEqual([
"assistant",
"toolResult",
"toolResult",
"user",
]);
expect(result.slice(1, 3)).toMatchObject([
{ role: "toolResult", toolCallId: "call_keep", content: [{ type: "text", text: "late ok" }] },
{
role: "toolResult",
toolCallId: "call_missing",
content: [{ type: "text", text: "aborted" }],
},
]);
});
it("drops aborted OpenAI transport assistant tool calls before replay", () => {
const messages: Context["messages"] = [
assistantToolCall("call_aborted", "exec", "aborted"),
{ role: "user", content: "retry after abort", timestamp: Date.now() },
];
const result = transformTransportMessages(
messages,
makeModel("openai-responses", "openai", "gpt-5.4"),
);
expect(result.map((msg) => msg.role)).toEqual(["user"]);
expect(JSON.stringify(result)).not.toContain("call_aborted");
});
it("drops text-only aborted and errored transport assistant turns before replay", () => {
const messages: Context["messages"] = [
{
role: "assistant",
provider: "openai",
api: "openai-responses",
model: "gpt-5.4",
stopReason: "aborted",
timestamp: Date.now(),
content: [{ type: "text", text: "partial aborted output" }],
} as Extract<Context["messages"][number], { role: "assistant" }>,
{
role: "assistant",
provider: "openai",
api: "openai-responses",
model: "gpt-5.4",
stopReason: "error",
timestamp: Date.now(),
content: [{ type: "text", text: "partial error output" }],
} as Extract<Context["messages"][number], { role: "assistant" }>,
{ role: "user", content: "retry after failed text turns", timestamp: Date.now() },
];
const result = transformTransportMessages(
messages,
makeModel("openai-responses", "openai", "gpt-5.4"),
);
expect(result.map((msg) => msg.role)).toEqual(["user"]);
expect(JSON.stringify(result)).not.toContain("partial aborted output");
expect(JSON.stringify(result)).not.toContain("partial error output");
});
it("drops errored Anthropic transport assistant tool calls and matching results before replay", () => {
const messages: Context["messages"] = [
assistantToolCall("call_error", "exec", "error"),
{
role: "toolResult",
toolCallId: "call_error",
toolName: "exec",
content: [{ type: "text", text: "partial" }],
isError: true,
timestamp: Date.now(),
},
{ role: "user", content: "retry after error", timestamp: Date.now() },
];
const result = transformTransportMessages(
messages,
makeModel("anthropic-messages", "anthropic", "claude-opus-4-6"),
);
expect(result.map((msg) => msg.role)).toEqual(["user"]);
expect(JSON.stringify(result)).not.toContain("call_error");
});
it("still synthesizes missing tool results for Anthropic transports", () => {
@@ -72,6 +232,10 @@ describe("transformTransportMessages synthetic tool-result policy", () => {
makeModel("openclaw-google-generative-ai-transport" as Api, "google", "gemini-2.5-pro"),
);
expect(googleAlias.map((msg) => msg.role)).toEqual(["assistant", "toolResult", "user"]);
expect(googleAlias[1]).toMatchObject({
role: "toolResult",
content: [{ type: "text", text: "No result provided" }],
});
const bedrockCanonical = transformTransportMessages(
messages,

View File

@@ -1,4 +1,5 @@
import type { Api, Context, Model } from "@mariozechner/pi-ai";
import { repairToolUseResultPairing } from "./session-transcript-repair.js";
const SYNTHETIC_TOOL_RESULT_APIS = new Set<string>([
"anthropic-messages",
@@ -6,31 +7,34 @@ const SYNTHETIC_TOOL_RESULT_APIS = new Set<string>([
"bedrock-converse-stream",
"google-generative-ai",
"openclaw-google-generative-ai-transport",
"openai-responses",
"openai-codex-responses",
"azure-openai-responses",
"openclaw-openai-responses-transport",
"openclaw-azure-openai-responses-transport",
]);
type PendingToolCall = { id: string; name: string };
// "aborted" is an OpenAI Responses-family convention from upstream Codex
// history normalization. Gemini/Anthropic transports use their own text while
// still needing synthetic results to satisfy provider turn-shape contracts;
// tool-replay-repair.live.test.ts exercises both paths against real models.
const CODEX_STYLE_ABORTED_OUTPUT_APIS = new Set<string>([
"openai-responses",
"openai-codex-responses",
"azure-openai-responses",
"openclaw-openai-responses-transport",
"openclaw-azure-openai-responses-transport",
]);
function defaultAllowSyntheticToolResults(modelApi: Api): boolean {
return SYNTHETIC_TOOL_RESULT_APIS.has(modelApi);
}
function appendMissingToolResults(
result: Context["messages"],
pendingToolCalls: PendingToolCall[],
existingToolResultIds: ReadonlySet<string>,
): void {
for (const toolCall of pendingToolCalls) {
if (!existingToolResultIds.has(toolCall.id)) {
result.push({
role: "toolResult",
toolCallId: toolCall.id,
toolName: toolCall.name,
content: [{ type: "text", text: "No result provided" }],
isError: true,
timestamp: Date.now(),
});
}
function isFailedAssistantTurn(message: Context["messages"][number]): boolean {
if (message.role !== "assistant") {
return false;
}
return message.stopReason === "error" || message.stopReason === "aborted";
}
export function transformTransportMessages(
@@ -43,6 +47,9 @@ export function transformTransportMessages(
) => string,
): Context["messages"] {
const allowSyntheticToolResults = defaultAllowSyntheticToolResults(model.api);
const syntheticToolResultText = CODEX_STYLE_ABORTED_OUTPUT_APIS.has(model.api)
? "aborted"
: "No result provided";
const toolCallIdMap = new Map<string, string>();
const transformed = messages.map((msg) => {
if (msg.role === "user") {
@@ -102,42 +109,21 @@ export function transformTransportMessages(
}
return { ...msg, content };
});
// Preserve the old transport replay filter: failed streamed turns can contain
// partial text, partial tool calls, or both, and strict providers can treat
// them as valid assistant context on retry unless we drop the whole turn.
const replayable = transformed.filter((msg) => !isFailedAssistantTurn(msg));
const result: Context["messages"] = [];
let pendingToolCalls: PendingToolCall[] = [];
let existingToolResultIds = new Set<string>();
for (const msg of transformed) {
if (msg.role === "assistant") {
if (allowSyntheticToolResults && pendingToolCalls.length > 0) {
appendMissingToolResults(result, pendingToolCalls, existingToolResultIds);
}
pendingToolCalls = [];
existingToolResultIds = new Set();
if (msg.stopReason === "error" || msg.stopReason === "aborted") {
continue;
}
const toolCalls = msg.content.filter(
(block): block is Extract<(typeof msg.content)[number], { type: "toolCall" }> =>
block.type === "toolCall",
);
if (toolCalls.length > 0) {
pendingToolCalls = toolCalls.map((block) => ({ id: block.id, name: block.name }));
existingToolResultIds = new Set();
}
result.push(msg);
continue;
}
if (msg.role === "toolResult") {
existingToolResultIds.add(msg.toolCallId);
result.push(msg);
continue;
}
if (allowSyntheticToolResults && pendingToolCalls.length > 0) {
appendMissingToolResults(result, pendingToolCalls, existingToolResultIds);
}
pendingToolCalls = [];
existingToolResultIds = new Set();
result.push(msg);
if (!allowSyntheticToolResults) {
return replayable;
}
return result;
// PI's local transform can synthesize missing results, but it does not move
// displaced real results back before an intervening user turn. Shared repair
// handles both, while preserving the previous transport behavior of dropping
// aborted/error assistant tool-call turns before replaying strict providers.
return repairToolUseResultPairing(replayable, {
erroredAssistantResultPolicy: "drop",
missingToolResultText: syntheticToolResultText,
}).messages as Context["messages"];
}