mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 11:50:43 +00:00
fix(agents): repair strict provider tool replay
This commit is contained in:
@@ -75,6 +75,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Gateway/tools: allow `POST /tools/invoke` to reach plugin-backed catalog tools such as `browser` when no core implementation exists, while still preferring built-in tools for real core names. Thanks @chat2way.
|
||||
- Browser/security: require `operator.admin` for the `browser.request` gateway method, matching the host/browser-node control authority exposed by that route. Thanks @RichardCao.
|
||||
- Browser/profiles: allow local managed profiles to override `browser.executablePath`, so different profiles can launch different Chromium-based browsers. Thanks @nobrainer-tech.
|
||||
- Agents/replay: repair displaced or missing tool results before strict provider replay, use Codex-compatible `aborted` outputs for OpenAI Responses history, and drop partial aborted/error transport turns before retries.
|
||||
- Reply media: allow sandboxed replies to deliver OpenClaw-managed `media/outbound` and `media/tool-*` attachments without treating them as sandbox escapes, while keeping alias-escape checks on the managed media root. Fixes #71138. Thanks @mayor686, @truffle-dev, and @neeravmakwana.
|
||||
- CLI/agent: keep `openclaw agent --json` stdout reserved for the JSON response by routing gateway, plugin, and embedded-fallback diagnostics to stderr before execution starts. Fixes #71319.
|
||||
- Agents/Gemini: retry reasoning-only, empty, and planning-only Gemini turns instead of letting sessions silently stall. Fixes #71074. (#71362) Thanks @neeravmakwana.
|
||||
|
||||
@@ -114,9 +114,9 @@ external end-user instructions.
|
||||
- Image sanitization only.
|
||||
- Drop orphaned reasoning signatures (standalone reasoning items without a following content block) for OpenAI Responses/Codex transcripts, and drop replayable OpenAI reasoning after a model route switch.
|
||||
- No tool call id sanitization.
|
||||
- No tool result pairing repair.
|
||||
- Tool result pairing repair may move real matched outputs and synthesize Codex-style `aborted` outputs for missing tool calls.
|
||||
- No turn validation or reordering.
|
||||
- No synthetic tool results.
|
||||
- Missing OpenAI Responses-family tool outputs are synthesized as `aborted` to match Codex replay normalization.
|
||||
- No thought signature stripping.
|
||||
|
||||
**Google (Generative AI / Gemini CLI / Antigravity)**
|
||||
|
||||
@@ -1,10 +1,14 @@
|
||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
import { completeSimple, type Api, type Model } from "@mariozechner/pi-ai";
|
||||
import { SessionManager } from "@mariozechner/pi-coding-agent";
|
||||
import { Type } from "typebox";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { loadConfig } from "../config/config.js";
|
||||
import { resolveOpenClawAgentDir } from "./agent-paths.js";
|
||||
import { isLiveProfileKeyModeEnabled, isLiveTestEnabled } from "./live-test-helpers.js";
|
||||
import { getApiKeyForModel, requireApiKey } from "./model-auth.js";
|
||||
import { ensureOpenClawModelsJson } from "./models-config.js";
|
||||
import { sanitizeSessionHistory } from "./pi-embedded-runner/replay-history.js";
|
||||
import { discoverAuthStorage, discoverModels } from "./pi-model-discovery.js";
|
||||
|
||||
const LIVE = isLiveTestEnabled();
|
||||
@@ -169,4 +173,141 @@ describeLive("openai reasoning compat live", () => {
|
||||
},
|
||||
3 * 60 * 1000,
|
||||
);
|
||||
|
||||
it(
|
||||
"accepts repaired OpenAI Codex parallel tool replay with aborted missing results",
|
||||
async () => {
|
||||
const { provider, modelId } = resolveTargetModelRef();
|
||||
const cfg = loadConfig();
|
||||
await ensureOpenClawModelsJson(cfg);
|
||||
|
||||
const agentDir = resolveOpenClawAgentDir();
|
||||
const authStorage = discoverAuthStorage(agentDir);
|
||||
const modelRegistry = discoverModels(authStorage, agentDir);
|
||||
const model = modelRegistry.find(provider, modelId) as Model<Api> | null;
|
||||
|
||||
if (!model) {
|
||||
logProgress(`[openai-reasoning-compat] model missing from registry: ${TARGET_MODEL_REF}`);
|
||||
return;
|
||||
}
|
||||
|
||||
let apiKeyInfo;
|
||||
try {
|
||||
apiKeyInfo = await getApiKeyForModel({
|
||||
model,
|
||||
cfg,
|
||||
credentialPrecedence: LIVE_CREDENTIAL_PRECEDENCE,
|
||||
});
|
||||
} catch (error) {
|
||||
logProgress(`[openai-reasoning-compat] skip (${String(error)})`);
|
||||
return;
|
||||
}
|
||||
|
||||
if (REQUIRE_PROFILE_KEYS && !apiKeyInfo.source.startsWith("profile:")) {
|
||||
logProgress(
|
||||
`[openai-reasoning-compat] skip (non-profile credential source: ${apiKeyInfo.source})`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const messages = [
|
||||
{
|
||||
role: "user",
|
||||
content: "Use noop.",
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
{
|
||||
role: "assistant",
|
||||
provider: model.provider,
|
||||
api: model.api,
|
||||
model: model.id,
|
||||
stopReason: "toolUse",
|
||||
timestamp: Date.now(),
|
||||
content: [
|
||||
{ type: "toolCall", id: "call_keep", name: "noop", arguments: {} },
|
||||
{ type: "toolCall", id: "call_missing_a", name: "noop", arguments: {} },
|
||||
{ type: "toolCall", id: "call_missing_b", name: "noop", arguments: {} },
|
||||
],
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: "Reply with exactly: replay ok.",
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "call_keep",
|
||||
toolName: "noop",
|
||||
content: [{ type: "text", text: "ok" }],
|
||||
isError: false,
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
] as unknown as AgentMessage[];
|
||||
|
||||
const sanitized = await sanitizeSessionHistory({
|
||||
messages,
|
||||
modelApi: model.api,
|
||||
provider: model.provider,
|
||||
modelId: model.id,
|
||||
sessionManager: SessionManager.inMemory(),
|
||||
sessionId: "openai-codex-tool-replay-live",
|
||||
});
|
||||
|
||||
expect(sanitized.map((message) => message.role)).toEqual([
|
||||
"user",
|
||||
"assistant",
|
||||
"toolResult",
|
||||
"toolResult",
|
||||
"toolResult",
|
||||
"user",
|
||||
]);
|
||||
expect(
|
||||
sanitized.slice(2, 5).map((message) => (message as { toolCallId?: string }).toolCallId),
|
||||
).toEqual(["call_keep", "call_missing_a", "call_missing_b"]);
|
||||
expect(
|
||||
sanitized
|
||||
.slice(3, 5)
|
||||
.map((message) => (message as Extract<AgentMessage, { role: "toolResult" }>).content),
|
||||
).toEqual([[{ type: "text", text: "aborted" }], [{ type: "text", text: "aborted" }]]);
|
||||
expect(JSON.stringify(sanitized)).not.toContain("missing tool result");
|
||||
|
||||
const response = await completeSimpleWithTimeout(
|
||||
model,
|
||||
{
|
||||
systemPrompt: "You are a concise assistant. Follow the user's instruction exactly.",
|
||||
messages: sanitized as never,
|
||||
tools: [
|
||||
{
|
||||
name: "noop",
|
||||
description: "Return ok.",
|
||||
parameters: Type.Object({}, { additionalProperties: false }),
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
apiKey: requireApiKey(apiKeyInfo, model.provider),
|
||||
reasoning: "low",
|
||||
maxTokens: 64,
|
||||
},
|
||||
120_000,
|
||||
);
|
||||
|
||||
const text = response.content
|
||||
.filter((block) => block.type === "text")
|
||||
.map((block) => block.text.trim())
|
||||
.join(" ")
|
||||
.trim();
|
||||
const errorMessage =
|
||||
typeof (response as { errorMessage?: unknown }).errorMessage === "string"
|
||||
? ((response as { errorMessage?: string }).errorMessage ?? "")
|
||||
: "";
|
||||
if (errorMessage && isKnownLiveBlocker(errorMessage)) {
|
||||
logProgress(`[openai-reasoning-compat] skip (${errorMessage})`);
|
||||
return;
|
||||
}
|
||||
|
||||
expect(text).toMatch(/^replay ok\.?$/i);
|
||||
},
|
||||
3 * 60 * 1000,
|
||||
);
|
||||
});
|
||||
|
||||
@@ -688,20 +688,181 @@ describe("sanitizeSessionHistory", () => {
|
||||
expect(result[1]?.role).toBe("assistant");
|
||||
});
|
||||
|
||||
it("synthesizes missing tool results for openai-responses after repair", async () => {
|
||||
it("synthesizes Codex-style aborted tool results for openai-responses after repair", async () => {
|
||||
const messages: AgentMessage[] = [
|
||||
makeUserMessage("start"),
|
||||
makeAssistantMessage([{ type: "toolCall", id: "call_1", name: "read", arguments: {} }], {
|
||||
stopReason: "toolUse",
|
||||
}),
|
||||
makeUserMessage("continue"),
|
||||
];
|
||||
|
||||
const result = await sanitizeOpenAIHistory(messages);
|
||||
|
||||
expect(result.map((message) => message.role)).toEqual([
|
||||
"user",
|
||||
"assistant",
|
||||
"toolResult",
|
||||
"user",
|
||||
]);
|
||||
expect((result[2] as { toolCallId?: string }).toolCallId).toBe("call1");
|
||||
expect((result[2] as Extract<AgentMessage, { role: "toolResult" }>).content).toEqual([
|
||||
{ type: "text", text: "aborted" },
|
||||
]);
|
||||
expect(JSON.stringify(result)).not.toContain("missing tool result");
|
||||
});
|
||||
|
||||
it("synthesizes Codex-style aborted tool results for openai-codex-responses", async () => {
|
||||
const messages: AgentMessage[] = [
|
||||
makeAssistantMessage(
|
||||
[
|
||||
{ type: "toolCall", id: "call_a", name: "exec", arguments: {} },
|
||||
{ type: "toolCall", id: "call_b", name: "exec", arguments: {} },
|
||||
{ type: "toolCall", id: "call_c", name: "exec", arguments: {} },
|
||||
],
|
||||
{ stopReason: "toolUse" },
|
||||
),
|
||||
makeUserMessage("status?"),
|
||||
];
|
||||
|
||||
const result = await sanitizeSessionHistory({
|
||||
messages,
|
||||
modelApi: "openai-codex-responses",
|
||||
provider: "openai-codex",
|
||||
sessionManager: mockSessionManager,
|
||||
sessionId: TEST_SESSION_ID,
|
||||
});
|
||||
|
||||
expect(result.map((message) => message.role)).toEqual([
|
||||
"assistant",
|
||||
"toolResult",
|
||||
"toolResult",
|
||||
"toolResult",
|
||||
"user",
|
||||
]);
|
||||
expect(
|
||||
result.slice(1, 4).map((message) => (message as { toolCallId?: string }).toolCallId),
|
||||
).toEqual(["calla", "callb", "callc"]);
|
||||
for (const message of result.slice(1, 4)) {
|
||||
expect((message as Extract<AgentMessage, { role: "toolResult" }>).content).toEqual([
|
||||
{ type: "text", text: "aborted" },
|
||||
]);
|
||||
}
|
||||
expect(JSON.stringify(result)).not.toContain("missing tool result");
|
||||
});
|
||||
|
||||
it("keeps real parallel tool results for openai-responses and aborts missing siblings", async () => {
|
||||
const messages: AgentMessage[] = [
|
||||
makeAssistantMessage(
|
||||
[
|
||||
{ type: "toolCall", id: "call_1", name: "read", arguments: {} },
|
||||
{ type: "toolCall", id: "call_2", name: "exec", arguments: {} },
|
||||
{ type: "toolCall", id: "call_3", name: "write", arguments: {} },
|
||||
],
|
||||
{ stopReason: "toolUse" },
|
||||
),
|
||||
makeUserMessage("continue"),
|
||||
castAgentMessage({
|
||||
role: "toolResult",
|
||||
toolCallId: "call_2",
|
||||
toolName: "exec",
|
||||
content: [{ type: "text", text: "ok" }],
|
||||
isError: false,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = await sanitizeOpenAIHistory(messages);
|
||||
|
||||
// repairToolUseResultPairing now runs for all providers (including OpenAI)
|
||||
// to fix orphaned function_call_output items that OpenAI would reject.
|
||||
expect(result).toHaveLength(2);
|
||||
expect(result[0]?.role).toBe("assistant");
|
||||
expect(result[1]?.role).toBe("toolResult");
|
||||
expect(result.map((message) => message.role)).toEqual([
|
||||
"assistant",
|
||||
"toolResult",
|
||||
"toolResult",
|
||||
"toolResult",
|
||||
"user",
|
||||
]);
|
||||
expect(
|
||||
extractToolCallsFromAssistant(result[0] as Extract<AgentMessage, { role: "assistant" }>),
|
||||
).toMatchObject([
|
||||
{ id: "call1", name: "read" },
|
||||
{ id: "call2", name: "exec" },
|
||||
{ id: "call3", name: "write" },
|
||||
]);
|
||||
expect(
|
||||
result.slice(1, 4).map((message) => (message as { toolCallId?: string }).toolCallId),
|
||||
).toEqual(["call1", "call2", "call3"]);
|
||||
expect((result[1] as Extract<AgentMessage, { role: "toolResult" }>).content).toEqual([
|
||||
{ type: "text", text: "aborted" },
|
||||
]);
|
||||
expect((result[2] as Extract<AgentMessage, { role: "toolResult" }>).content).toEqual([
|
||||
{ type: "text", text: "ok" },
|
||||
]);
|
||||
expect((result[3] as Extract<AgentMessage, { role: "toolResult" }>).content).toEqual([
|
||||
{ type: "text", text: "aborted" },
|
||||
]);
|
||||
expect(JSON.stringify(result)).not.toContain("missing tool result");
|
||||
});
|
||||
|
||||
it("applies aborted missing-result repair to azure-openai-responses", async () => {
|
||||
const messages: AgentMessage[] = [
|
||||
makeAssistantMessage([{ type: "toolCall", id: "call_azure", name: "read", arguments: {} }], {
|
||||
stopReason: "toolUse",
|
||||
}),
|
||||
makeUserMessage("continue"),
|
||||
];
|
||||
|
||||
const result = await sanitizeSessionHistory({
|
||||
messages,
|
||||
modelApi: "azure-openai-responses",
|
||||
provider: "azure-openai-responses",
|
||||
sessionManager: mockSessionManager,
|
||||
sessionId: TEST_SESSION_ID,
|
||||
});
|
||||
|
||||
expect(result.map((message) => message.role)).toEqual(["assistant", "toolResult", "user"]);
|
||||
expect((result[1] as { toolCallId?: string }).toolCallId).toBe("callazure");
|
||||
expect((result[1] as Extract<AgentMessage, { role: "toolResult" }>).content).toEqual([
|
||||
{ type: "text", text: "aborted" },
|
||||
]);
|
||||
});
|
||||
|
||||
it("drops duplicate and orphan OpenAI outputs while preserving the first real result", async () => {
|
||||
const messages: AgentMessage[] = [
|
||||
castAgentMessage({
|
||||
role: "toolResult",
|
||||
toolCallId: "call_orphan",
|
||||
toolName: "read",
|
||||
content: [{ type: "text", text: "orphan" }],
|
||||
isError: false,
|
||||
}),
|
||||
makeAssistantMessage([{ type: "toolCall", id: "call_keep", name: "read", arguments: {} }], {
|
||||
stopReason: "toolUse",
|
||||
}),
|
||||
castAgentMessage({
|
||||
role: "toolResult",
|
||||
toolCallId: "call_keep",
|
||||
toolName: "read",
|
||||
content: [{ type: "text", text: "first" }],
|
||||
isError: false,
|
||||
}),
|
||||
castAgentMessage({
|
||||
role: "toolResult",
|
||||
toolCallId: "call_keep",
|
||||
toolName: "read",
|
||||
content: [{ type: "text", text: "duplicate" }],
|
||||
isError: false,
|
||||
}),
|
||||
makeUserMessage("continue"),
|
||||
];
|
||||
|
||||
const result = await sanitizeOpenAIHistory(messages);
|
||||
|
||||
expect(result.map((message) => message.role)).toEqual(["assistant", "toolResult", "user"]);
|
||||
expect((result[1] as { toolCallId?: string }).toolCallId).toBe("callkeep");
|
||||
expect((result[1] as Extract<AgentMessage, { role: "toolResult" }>).content).toEqual([
|
||||
{ type: "text", text: "first" },
|
||||
]);
|
||||
expect(JSON.stringify(result)).not.toContain("orphan");
|
||||
expect(JSON.stringify(result)).not.toContain("duplicate");
|
||||
});
|
||||
|
||||
it.each([
|
||||
|
||||
@@ -810,6 +810,12 @@ export async function compactEmbeddedPiSessionDirect(
|
||||
config: params.config,
|
||||
contextWindowTokens: ctxInfo.tokens,
|
||||
allowSyntheticToolResults: transcriptPolicy.allowSyntheticToolResults,
|
||||
missingToolResultText:
|
||||
model.api === "openai-responses" ||
|
||||
model.api === "azure-openai-responses" ||
|
||||
model.api === "openai-codex-responses"
|
||||
? "aborted"
|
||||
: undefined,
|
||||
allowedToolNames,
|
||||
});
|
||||
checkpointSnapshot = captureCompactionCheckpointSnapshot({
|
||||
@@ -965,6 +971,11 @@ export async function compactEmbeddedPiSessionDirect(
|
||||
const limited = transcriptPolicy.repairToolUseResultPairing
|
||||
? sanitizeToolUseResultPairing(truncated, {
|
||||
erroredAssistantResultPolicy: "drop",
|
||||
...(model.api === "openai-responses" ||
|
||||
model.api === "azure-openai-responses" ||
|
||||
model.api === "openai-codex-responses"
|
||||
? { missingToolResultText: "aborted" }
|
||||
: {}),
|
||||
})
|
||||
: truncated;
|
||||
if (limited.length > 0) {
|
||||
|
||||
@@ -493,13 +493,17 @@ export async function sanitizeSessionHistory(params: {
|
||||
allowedToolNames: params.allowedToolNames,
|
||||
allowProviderOwnedThinkingReplay,
|
||||
});
|
||||
// OpenAI's fc_* pairing downgrade needs the raw call_id|fc_id separator intact,
|
||||
// but displaced tool results must first be repaired back next to their
|
||||
// assistant turn so the downgrade can rewrite both sides consistently.
|
||||
// OpenAI Responses rejects orphan/missing function_call_output items. Upstream
|
||||
// Codex repairs those gaps with "aborted"; keep that before the fc_* downgrade
|
||||
// so both call and result ids are rewritten together. Covered by unit replay
|
||||
// tests plus live OpenAI/Codex and generic replay-repair model tests.
|
||||
const openAIRepairedToolCalls =
|
||||
isOpenAIResponsesApi && policy.repairToolUseResultPairing
|
||||
? sanitizeToolUseResultPairing(sanitizedToolCalls, {
|
||||
erroredAssistantResultPolicy: "drop",
|
||||
// Match upstream Codex history normalization for OpenAI Responses:
|
||||
// missing function_call_output entries are model-visible "aborted".
|
||||
missingToolResultText: "aborted",
|
||||
})
|
||||
: sanitizedToolCalls;
|
||||
const openAISafeToolCalls = isOpenAIResponsesApi
|
||||
@@ -517,6 +521,9 @@ export async function sanitizeSessionHistory(params: {
|
||||
allowedToolNames: params.allowedToolNames,
|
||||
})
|
||||
: openAISafeToolCalls;
|
||||
// Gemini/Anthropic-class providers also require tool results to stay adjacent
|
||||
// to their assistant tool calls. They do not use Codex's "aborted" text, but
|
||||
// the same ordering repair is live-tested with Gemini 3 Flash.
|
||||
const repairedTools =
|
||||
!isOpenAIResponsesApi && policy.repairToolUseResultPairing
|
||||
? sanitizeToolUseResultPairing(sanitizedToolIds, {
|
||||
|
||||
@@ -61,6 +61,65 @@ describe("sanitizeReplayToolCallIdsForStream", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it("synthesizes missing tool results after strict id sanitization", () => {
|
||||
const rawId = "call_function_av7cbkigmk7x1";
|
||||
const out = sanitizeReplayToolCallIdsForStream({
|
||||
messages: [
|
||||
{
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "toolUse", id: rawId, name: "read", input: { path: "." } },
|
||||
{ type: "toolUse", id: "call_missing", name: "exec", input: { cmd: "true" } },
|
||||
],
|
||||
} as never,
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: rawId,
|
||||
toolUseId: rawId,
|
||||
toolName: "read",
|
||||
content: [{ type: "text", text: "ok" }],
|
||||
isError: false,
|
||||
} as never,
|
||||
],
|
||||
mode: "strict",
|
||||
repairToolUseResultPairing: true,
|
||||
});
|
||||
|
||||
expect(out.map((message) => message.role)).toEqual(["assistant", "toolResult", "toolResult"]);
|
||||
expect((out[0] as Extract<AgentMessage, { role: "assistant" }>).content).toMatchObject([
|
||||
{ type: "toolUse", id: "callfunctionav7cbkigmk7x1", name: "read" },
|
||||
{ type: "toolUse", id: "callmissing", name: "exec" },
|
||||
]);
|
||||
expect(out[1]).toMatchObject({
|
||||
role: "toolResult",
|
||||
toolCallId: "callfunctionav7cbkigmk7x1",
|
||||
toolUseId: "callfunctionav7cbkigmk7x1",
|
||||
});
|
||||
expect(out[2]).toMatchObject({
|
||||
role: "toolResult",
|
||||
toolCallId: "callmissing",
|
||||
isError: true,
|
||||
});
|
||||
});
|
||||
|
||||
it("synthesizes missing tool results when repair is enabled", () => {
|
||||
const out = sanitizeReplayToolCallIdsForStream({
|
||||
messages: [
|
||||
{
|
||||
role: "assistant",
|
||||
content: [{ type: "toolUse", id: "call_missing", name: "exec", input: { cmd: "true" } }],
|
||||
} as never,
|
||||
],
|
||||
mode: "strict",
|
||||
repairToolUseResultPairing: true,
|
||||
});
|
||||
|
||||
expect(out).toMatchObject([
|
||||
{ role: "assistant" },
|
||||
{ role: "toolResult", toolCallId: "callmissing", isError: true },
|
||||
]);
|
||||
});
|
||||
|
||||
it("keeps real tool results for aborted assistant spans", () => {
|
||||
const rawId = "call_function_av7cbkigmk7x1";
|
||||
const out = sanitizeReplayToolCallIdsForStream({
|
||||
|
||||
@@ -1193,6 +1193,12 @@ export async function runEmbeddedAttempt(
|
||||
contextWindowTokens: params.contextTokenBudget,
|
||||
inputProvenance: params.inputProvenance,
|
||||
allowSyntheticToolResults: transcriptPolicy.allowSyntheticToolResults,
|
||||
missingToolResultText:
|
||||
params.model.api === "openai-responses" ||
|
||||
params.model.api === "azure-openai-responses" ||
|
||||
params.model.api === "openai-codex-responses"
|
||||
? "aborted"
|
||||
: undefined,
|
||||
allowedToolNames,
|
||||
});
|
||||
trackSessionManagerAccess(params.sessionFile);
|
||||
@@ -1840,6 +1846,7 @@ export async function runEmbeddedAttempt(
|
||||
const limited = transcriptPolicy.repairToolUseResultPairing
|
||||
? sanitizeToolUseResultPairing(truncated, {
|
||||
erroredAssistantResultPolicy: "drop",
|
||||
...(isOpenAIResponsesApi ? { missingToolResultText: "aborted" } : {}),
|
||||
})
|
||||
: truncated;
|
||||
cacheTrace?.recordStage("session:limited", { messages: limited });
|
||||
|
||||
@@ -29,6 +29,7 @@ export function guardSessionManager(
|
||||
contextWindowTokens?: number;
|
||||
inputProvenance?: InputProvenance;
|
||||
allowSyntheticToolResults?: boolean;
|
||||
missingToolResultText?: string;
|
||||
allowedToolNames?: Iterable<string>;
|
||||
},
|
||||
): GuardedSessionManager {
|
||||
@@ -75,6 +76,7 @@ export function guardSessionManager(
|
||||
applyInputProvenanceToUserMessage(message, opts?.inputProvenance),
|
||||
transformToolResultForPersistence: transform,
|
||||
allowSyntheticToolResults: opts?.allowSyntheticToolResults,
|
||||
missingToolResultText: opts?.missingToolResultText,
|
||||
allowedToolNames: opts?.allowedToolNames,
|
||||
beforeMessageWriteHook: beforeMessageWrite,
|
||||
maxToolResultChars:
|
||||
|
||||
@@ -111,6 +111,18 @@ describe("installSessionToolResultGuard", () => {
|
||||
expectPersistedRoles(sm, ["assistant", "toolResult"]);
|
||||
});
|
||||
|
||||
it("uses configured text for synthetic tool results", () => {
|
||||
const sm = SessionManager.inMemory();
|
||||
const guard = installSessionToolResultGuard(sm, {
|
||||
missingToolResultText: "aborted",
|
||||
});
|
||||
|
||||
sm.appendMessage(toolCallMessage);
|
||||
guard.flushPendingToolResults();
|
||||
|
||||
expect(getToolResultText(getPersistedMessages(sm))).toBe("aborted");
|
||||
});
|
||||
|
||||
it("clears pending tool calls without inserting synthetic tool results", () => {
|
||||
const sm = SessionManager.inMemory();
|
||||
const guard = installSessionToolResultGuard(sm);
|
||||
|
||||
@@ -90,6 +90,7 @@ export function installSessionToolResultGuard(
|
||||
* Defaults to true.
|
||||
*/
|
||||
allowSyntheticToolResults?: boolean;
|
||||
missingToolResultText?: string;
|
||||
/**
|
||||
* Optional set/list of tool names accepted for assistant toolCall/toolUse blocks.
|
||||
* When set, tool calls with unknown names are dropped before persistence.
|
||||
@@ -127,6 +128,7 @@ export function installSessionToolResultGuard(
|
||||
};
|
||||
|
||||
const allowSyntheticToolResults = opts?.allowSyntheticToolResults ?? true;
|
||||
const missingToolResultText = opts?.missingToolResultText;
|
||||
const beforeWrite = opts?.beforeMessageWriteHook;
|
||||
const maxToolResultChars = resolveMaxToolResultChars(opts);
|
||||
|
||||
@@ -154,7 +156,11 @@ export function installSessionToolResultGuard(
|
||||
}
|
||||
if (allowSyntheticToolResults) {
|
||||
for (const [id, name] of pendingState.entries()) {
|
||||
const synthetic = makeMissingToolResult({ toolCallId: id, toolName: name });
|
||||
const synthetic = makeMissingToolResult({
|
||||
toolCallId: id,
|
||||
toolName: name,
|
||||
text: missingToolResultText,
|
||||
});
|
||||
const flushed = applyBeforeWriteHook(
|
||||
persistToolResult(persistMessage(synthetic), {
|
||||
toolCallId: id,
|
||||
|
||||
@@ -76,6 +76,68 @@ describe("sanitizeToolUseResultPairing", () => {
|
||||
expect(out[3]?.role).toBe("user");
|
||||
});
|
||||
|
||||
it("uses custom text for synthesized missing tool results", () => {
|
||||
const input = castAgentMessages([
|
||||
{
|
||||
role: "assistant",
|
||||
content: [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }],
|
||||
},
|
||||
{ role: "user", content: "user message that should come after tool use" },
|
||||
]);
|
||||
|
||||
const result = repairToolUseResultPairing(input, {
|
||||
missingToolResultText: "aborted",
|
||||
});
|
||||
|
||||
expect(result.added).toHaveLength(1);
|
||||
expect(result.messages.map((m) => m.role)).toEqual(["assistant", "toolResult", "user"]);
|
||||
expect(result.added[0]?.content).toEqual([{ type: "text", text: "aborted" }]);
|
||||
});
|
||||
|
||||
it("keeps matched parallel tool results and synthesizes only missing siblings", () => {
|
||||
const input = castAgentMessages([
|
||||
{
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "text", text: "checking" },
|
||||
{ type: "toolCall", id: "call_1", name: "read", arguments: {} },
|
||||
{ type: "toolCall", id: "call_2", name: "exec", arguments: {} },
|
||||
{ type: "toolCall", id: "call_3", name: "write", arguments: {} },
|
||||
],
|
||||
},
|
||||
{ role: "user", content: "user message that should come after tool use" },
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "call_2",
|
||||
toolName: "exec",
|
||||
content: [{ type: "text", text: "ok" }],
|
||||
isError: false,
|
||||
},
|
||||
]);
|
||||
|
||||
const result = repairToolUseResultPairing(input, {
|
||||
missingToolResultText: "aborted",
|
||||
});
|
||||
|
||||
expect(result.added.map((message) => message.toolCallId)).toEqual(["call_1", "call_3"]);
|
||||
expect(result.messages.map((m) => m.role)).toEqual([
|
||||
"assistant",
|
||||
"toolResult",
|
||||
"toolResult",
|
||||
"toolResult",
|
||||
"user",
|
||||
]);
|
||||
expect(getAssistantToolCallBlocks(result.messages)).toMatchObject([
|
||||
{ id: "call_1", name: "read" },
|
||||
{ id: "call_2", name: "exec" },
|
||||
{ id: "call_3", name: "write" },
|
||||
]);
|
||||
expect((result.messages[1] as { toolCallId?: string }).toolCallId).toBe("call_1");
|
||||
expect((result.messages[2] as { toolCallId?: string }).toolCallId).toBe("call_2");
|
||||
expect((result.messages[3] as { toolCallId?: string }).toolCallId).toBe("call_3");
|
||||
expect(JSON.stringify(result.added)).not.toContain("missing tool result");
|
||||
});
|
||||
|
||||
it("repairs blank tool result names from matching tool calls", () => {
|
||||
const input = castAgentMessages([
|
||||
{
|
||||
@@ -248,9 +310,8 @@ describe("sanitizeToolUseResultPairing", () => {
|
||||
});
|
||||
|
||||
expect(result.droppedOrphanCount).toBe(0);
|
||||
expect(result.messages).toHaveLength(2);
|
||||
expect(result.messages[0]?.role).toBe("assistant");
|
||||
expect(result.messages[1]?.role).toBe("user");
|
||||
expect(result.messages).toHaveLength(1);
|
||||
expect(result.messages[0]?.role).toBe("user");
|
||||
expect(result.added).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -175,6 +175,12 @@ function isReplaySafeThinkingAssistantTurn(
|
||||
function makeMissingToolResult(params: {
|
||||
toolCallId: string;
|
||||
toolName?: string;
|
||||
// OpenAI Responses/Codex replay should match upstream Codex's "aborted"
|
||||
// function_call_output normalization; live coverage in
|
||||
// openai-reasoning-compat.live.test.ts and tool-replay-repair.live.test.ts
|
||||
// sends this repaired history to real models. Other providers keep the older,
|
||||
// explicit OpenClaw diagnostic text unless the caller opts in.
|
||||
text?: string;
|
||||
}): Extract<AgentMessage, { role: "toolResult" }> {
|
||||
return {
|
||||
role: "toolResult",
|
||||
@@ -183,7 +189,9 @@ function makeMissingToolResult(params: {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "[openclaw] missing tool result in session history; inserted synthetic error result for transcript repair.",
|
||||
text:
|
||||
params.text ??
|
||||
"[openclaw] missing tool result in session history; inserted synthetic error result for transcript repair.",
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
@@ -232,6 +240,7 @@ export type ErroredAssistantResultPolicy = "preserve" | "drop";
|
||||
|
||||
export type ToolUseResultPairingOptions = {
|
||||
erroredAssistantResultPolicy?: ErroredAssistantResultPolicy;
|
||||
missingToolResultText?: string;
|
||||
};
|
||||
|
||||
export function stripToolResultDetails(messages: AgentMessage[]): AgentMessage[] {
|
||||
@@ -529,8 +538,8 @@ export function repairToolUseResultPairing(
|
||||
// tool calls in the same turn after malformed siblings are dropped.
|
||||
const stopReason = (assistant as { stopReason?: string }).stopReason;
|
||||
if (stopReason === "error" || stopReason === "aborted") {
|
||||
out.push(msg);
|
||||
if (!shouldDropErroredAssistantResults(options)) {
|
||||
out.push(msg);
|
||||
for (const toolCall of toolCalls) {
|
||||
const result = spanResultsById.get(toolCall.id);
|
||||
if (!result) {
|
||||
@@ -540,6 +549,8 @@ export function repairToolUseResultPairing(
|
||||
}
|
||||
} else if (spanResultsById.size > 0) {
|
||||
changed = true;
|
||||
} else {
|
||||
changed = true;
|
||||
}
|
||||
for (const rem of remainder) {
|
||||
out.push(rem);
|
||||
@@ -551,6 +562,8 @@ export function repairToolUseResultPairing(
|
||||
out.push(msg);
|
||||
|
||||
if (spanResultsById.size > 0 && remainder.length > 0) {
|
||||
// Preserve real late-arriving results before synthesizing missing siblings;
|
||||
// otherwise parallel tool replay can replace useful output with repair noise.
|
||||
moved = true;
|
||||
changed = true;
|
||||
}
|
||||
@@ -563,6 +576,7 @@ export function repairToolUseResultPairing(
|
||||
const missing = makeMissingToolResult({
|
||||
toolCallId: call.id,
|
||||
toolName: call.name,
|
||||
text: options?.missingToolResultText,
|
||||
});
|
||||
added.push(missing);
|
||||
changed = true;
|
||||
|
||||
386
src/agents/tool-replay-repair.live.test.ts
Normal file
386
src/agents/tool-replay-repair.live.test.ts
Normal file
@@ -0,0 +1,386 @@
|
||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
import { completeSimple, type Api, type Context, type Model } from "@mariozechner/pi-ai";
|
||||
import { SessionManager } from "@mariozechner/pi-coding-agent";
|
||||
import { Type } from "typebox";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { loadConfig } from "../config/config.js";
|
||||
import { resolveOpenClawAgentDir } from "./agent-paths.js";
|
||||
import { isLiveProfileKeyModeEnabled, isLiveTestEnabled } from "./live-test-helpers.js";
|
||||
import { getApiKeyForModel, requireApiKey } from "./model-auth.js";
|
||||
import { ensureOpenClawModelsJson } from "./models-config.js";
|
||||
import { sanitizeSessionHistory } from "./pi-embedded-runner/replay-history.js";
|
||||
import { discoverAuthStorage, discoverModels } from "./pi-model-discovery.js";
|
||||
import { transformTransportMessages } from "./transport-message-transform.js";
|
||||
|
||||
const LIVE = isLiveTestEnabled();
|
||||
const REQUIRE_PROFILE_KEYS = isLiveProfileKeyModeEnabled();
|
||||
const LIVE_CREDENTIAL_PRECEDENCE = REQUIRE_PROFILE_KEYS ? "profile-first" : "env-first";
|
||||
const DEFAULT_TARGET_MODEL_REFS = "openai-codex/gpt-5.5,google/gemini-3-flash-preview";
|
||||
const TARGET_MODEL_REFS = parseTargetModelRefs(
|
||||
process.env.OPENCLAW_LIVE_TOOL_REPLAY_REPAIR_MODELS ?? DEFAULT_TARGET_MODEL_REFS,
|
||||
);
|
||||
const describeLive = LIVE ? describe : describe.skip;
|
||||
|
||||
type TargetModelRef = {
|
||||
ref: string;
|
||||
provider: string;
|
||||
modelId: string;
|
||||
};
|
||||
|
||||
function parseTargetModelRefs(raw: string | undefined): TargetModelRef[] {
|
||||
return (raw ?? "")
|
||||
.split(",")
|
||||
.map((item) => item.trim())
|
||||
.filter(Boolean)
|
||||
.map((ref) => {
|
||||
const [provider, ...rest] = ref.split("/");
|
||||
const modelId = rest.join("/").trim();
|
||||
if (!provider?.trim() || !modelId) {
|
||||
throw new Error(
|
||||
`Invalid OPENCLAW_LIVE_TOOL_REPLAY_REPAIR_MODELS entry: ${JSON.stringify(ref)}`,
|
||||
);
|
||||
}
|
||||
return { ref, provider: provider.trim(), modelId };
|
||||
});
|
||||
}
|
||||
|
||||
function logProgress(message: string): void {
|
||||
process.stderr.write(`[live] ${message}\n`);
|
||||
}
|
||||
|
||||
async function completeSimpleWithTimeout<TApi extends Api>(
|
||||
model: Model<TApi>,
|
||||
context: Parameters<typeof completeSimple<TApi>>[1],
|
||||
options: Parameters<typeof completeSimple<TApi>>[2],
|
||||
timeoutMs: number,
|
||||
): Promise<Awaited<ReturnType<typeof completeSimple<TApi>>>> {
|
||||
const controller = new AbortController();
|
||||
const abortTimer = setTimeout(() => {
|
||||
controller.abort();
|
||||
}, timeoutMs);
|
||||
abortTimer.unref?.();
|
||||
try {
|
||||
return await Promise.race([
|
||||
completeSimple(model, context, {
|
||||
...options,
|
||||
signal: controller.signal,
|
||||
}),
|
||||
new Promise<never>((_, reject) => {
|
||||
const hardTimer = setTimeout(() => {
|
||||
reject(new Error(`model call timed out after ${timeoutMs}ms`));
|
||||
}, timeoutMs);
|
||||
hardTimer.unref?.();
|
||||
}),
|
||||
]);
|
||||
} finally {
|
||||
clearTimeout(abortTimer);
|
||||
}
|
||||
}
|
||||
|
||||
function isOpenAIResponsesFamily(api: string): boolean {
|
||||
return (
|
||||
api === "openai-responses" ||
|
||||
api === "openai-codex-responses" ||
|
||||
api === "azure-openai-responses"
|
||||
);
|
||||
}
|
||||
|
||||
function buildReplayMessages(model: Model<Api>): AgentMessage[] {
|
||||
const now = Date.now();
|
||||
// Gemini source metadata deliberately simulates a model switch from a
|
||||
// provider-owned transcript. That forces the same id sanitization and replay
|
||||
// repair path that failed in real session replays, not just the happy path for
|
||||
// a same-provider synthetic fixture.
|
||||
const source =
|
||||
model.provider === "google"
|
||||
? {
|
||||
api: "google-gemini-cli",
|
||||
provider: "google-antigravity",
|
||||
model: "claude-sonnet-4-20250514",
|
||||
}
|
||||
: {
|
||||
api: model.api,
|
||||
provider: model.provider,
|
||||
model: model.id,
|
||||
};
|
||||
|
||||
return [
|
||||
{
|
||||
role: "user",
|
||||
content: "Use noop.",
|
||||
timestamp: now,
|
||||
},
|
||||
{
|
||||
role: "assistant",
|
||||
provider: source.provider,
|
||||
api: source.api,
|
||||
model: source.model,
|
||||
stopReason: "toolUse",
|
||||
timestamp: now + 1,
|
||||
content: [
|
||||
{ type: "toolCall", id: "call_keep", name: "noop", arguments: {} },
|
||||
{ type: "toolCall", id: "call_missing_a", name: "noop", arguments: {} },
|
||||
{ type: "toolCall", id: "call_missing_b", name: "noop", arguments: {} },
|
||||
],
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: "Reply with exactly: replay repair ok.",
|
||||
timestamp: now + 2,
|
||||
},
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "call_keep",
|
||||
toolName: "noop",
|
||||
content: [{ type: "text", text: "ok" }],
|
||||
isError: false,
|
||||
timestamp: now + 3,
|
||||
},
|
||||
] as unknown as AgentMessage[];
|
||||
}
|
||||
|
||||
function buildAbortedTransportMessages(model: Model<Api>): Context["messages"] {
|
||||
const now = Date.now();
|
||||
return [
|
||||
{
|
||||
role: "assistant",
|
||||
provider: model.provider,
|
||||
api: model.api,
|
||||
model: model.id,
|
||||
stopReason: "aborted",
|
||||
timestamp: now,
|
||||
content: [{ type: "toolCall", id: "call_transport_aborted", name: "noop", arguments: {} }],
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: "Reply with exactly: transport replay ok.",
|
||||
timestamp: now + 1,
|
||||
},
|
||||
] as Context["messages"];
|
||||
}
|
||||
|
||||
function syntheticToolResultText(message: AgentMessage): string | undefined {
|
||||
if (message.role !== "toolResult") {
|
||||
return undefined;
|
||||
}
|
||||
const first = message.content[0] as { type?: unknown; text?: unknown } | undefined;
|
||||
return first?.type === "text" && typeof first.text === "string" ? first.text : undefined;
|
||||
}
|
||||
|
||||
function assistantToolCallIds(message: AgentMessage): string[] {
|
||||
if (message.role !== "assistant") {
|
||||
return [];
|
||||
}
|
||||
return message.content.filter((block) => block.type === "toolCall").map((block) => block.id);
|
||||
}
|
||||
|
||||
function isKnownLiveBlocker(errorMessage: string): boolean {
|
||||
return (
|
||||
/not supported when using codex with a chatgpt account/i.test(errorMessage) ||
|
||||
/hit your chatgpt usage limit/i.test(errorMessage)
|
||||
);
|
||||
}
|
||||
|
||||
describeLive("tool replay repair live", () => {
|
||||
for (const target of TARGET_MODEL_REFS) {
|
||||
it(
|
||||
`accepts repaired displaced and missing tool results with ${target.ref}`,
|
||||
async () => {
|
||||
const cfg = loadConfig();
|
||||
await ensureOpenClawModelsJson(cfg);
|
||||
|
||||
const agentDir = resolveOpenClawAgentDir();
|
||||
const authStorage = discoverAuthStorage(agentDir);
|
||||
const modelRegistry = discoverModels(authStorage, agentDir);
|
||||
const model = modelRegistry.find(target.provider, target.modelId) as Model<Api> | null;
|
||||
|
||||
if (!model) {
|
||||
logProgress(`[tool-replay-repair] model missing from registry: ${target.ref}`);
|
||||
return;
|
||||
}
|
||||
|
||||
let apiKeyInfo;
|
||||
try {
|
||||
apiKeyInfo = await getApiKeyForModel({
|
||||
model,
|
||||
cfg,
|
||||
credentialPrecedence: LIVE_CREDENTIAL_PRECEDENCE,
|
||||
});
|
||||
} catch (error) {
|
||||
logProgress(`[tool-replay-repair] skip ${target.ref} (${String(error)})`);
|
||||
return;
|
||||
}
|
||||
|
||||
if (REQUIRE_PROFILE_KEYS && !apiKeyInfo.source.startsWith("profile:")) {
|
||||
logProgress(
|
||||
`[tool-replay-repair] skip ${target.ref} (non-profile credential source: ${apiKeyInfo.source})`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
logProgress(`[tool-replay-repair] target=${target.ref} auth source=${apiKeyInfo.source}`);
|
||||
const sanitized = await sanitizeSessionHistory({
|
||||
messages: buildReplayMessages(model),
|
||||
modelApi: model.api,
|
||||
provider: model.provider,
|
||||
modelId: model.id,
|
||||
sessionManager: SessionManager.inMemory(),
|
||||
sessionId: `tool-replay-repair-live-${target.provider}-${target.modelId}`,
|
||||
});
|
||||
|
||||
expect(sanitized.map((message) => message.role)).toEqual([
|
||||
"user",
|
||||
"assistant",
|
||||
"toolResult",
|
||||
"toolResult",
|
||||
"toolResult",
|
||||
"user",
|
||||
]);
|
||||
const assistantMessage = sanitized[1];
|
||||
expect(assistantMessage?.role).toBe("assistant");
|
||||
expect(
|
||||
sanitized.slice(2, 5).map((message) => (message as { toolCallId?: string }).toolCallId),
|
||||
).toEqual(assistantToolCallIds(assistantMessage));
|
||||
|
||||
// These assertions are the model-visible contract: OpenAI Responses
|
||||
// gets Codex-compatible "aborted" outputs, while Gemini proves the
|
||||
// generic repair does not leak OpenAI wording into other providers.
|
||||
const insertedTexts = sanitized.slice(3, 5).map(syntheticToolResultText);
|
||||
if (isOpenAIResponsesFamily(model.api)) {
|
||||
expect(insertedTexts).toEqual(["aborted", "aborted"]);
|
||||
} else {
|
||||
expect(insertedTexts).not.toContain("aborted");
|
||||
}
|
||||
|
||||
// Sending the repaired transcript to the real model is the live proof:
|
||||
// providers reject malformed tool-call adjacency before generation, so
|
||||
// any non-error response here validates the repair shape end to end.
|
||||
const response = await completeSimpleWithTimeout(
|
||||
model,
|
||||
{
|
||||
systemPrompt: "You are a concise assistant. Follow the user's instruction exactly.",
|
||||
messages: sanitized as never,
|
||||
tools: [
|
||||
{
|
||||
name: "noop",
|
||||
description: "Return ok.",
|
||||
parameters: Type.Object({}, { additionalProperties: false }),
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
apiKey: requireApiKey(apiKeyInfo, model.provider),
|
||||
reasoning: "low",
|
||||
maxTokens: 96,
|
||||
},
|
||||
120_000,
|
||||
);
|
||||
|
||||
const text = response.content
|
||||
.filter((block) => block.type === "text")
|
||||
.map((block) => block.text.trim())
|
||||
.join(" ")
|
||||
.trim();
|
||||
const errorMessage =
|
||||
typeof (response as { errorMessage?: unknown }).errorMessage === "string"
|
||||
? ((response as { errorMessage?: string }).errorMessage ?? "")
|
||||
: "";
|
||||
if (errorMessage && isKnownLiveBlocker(errorMessage)) {
|
||||
logProgress(`[tool-replay-repair] skip ${target.ref} (${errorMessage})`);
|
||||
return;
|
||||
}
|
||||
|
||||
expect(response.stopReason).not.toBe("error");
|
||||
if (text.length > 0) {
|
||||
expect(text).toMatch(/^replay repair ok\.?$/i);
|
||||
}
|
||||
},
|
||||
3 * 60 * 1000,
|
||||
);
|
||||
|
||||
it(
|
||||
`accepts transport replay after dropping aborted assistant tool calls with ${target.ref}`,
|
||||
async () => {
|
||||
const cfg = loadConfig();
|
||||
await ensureOpenClawModelsJson(cfg);
|
||||
|
||||
const agentDir = resolveOpenClawAgentDir();
|
||||
const authStorage = discoverAuthStorage(agentDir);
|
||||
const modelRegistry = discoverModels(authStorage, agentDir);
|
||||
const model = modelRegistry.find(target.provider, target.modelId) as Model<Api> | null;
|
||||
|
||||
if (!model) {
|
||||
logProgress(`[tool-replay-repair] model missing from registry: ${target.ref}`);
|
||||
return;
|
||||
}
|
||||
|
||||
let apiKeyInfo;
|
||||
try {
|
||||
apiKeyInfo = await getApiKeyForModel({
|
||||
model,
|
||||
cfg,
|
||||
credentialPrecedence: LIVE_CREDENTIAL_PRECEDENCE,
|
||||
});
|
||||
} catch (error) {
|
||||
logProgress(`[tool-replay-repair] skip ${target.ref} (${String(error)})`);
|
||||
return;
|
||||
}
|
||||
|
||||
if (REQUIRE_PROFILE_KEYS && !apiKeyInfo.source.startsWith("profile:")) {
|
||||
logProgress(
|
||||
`[tool-replay-repair] skip ${target.ref} (non-profile credential source: ${apiKeyInfo.source})`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const transformed = transformTransportMessages(buildAbortedTransportMessages(model), model);
|
||||
expect(transformed.map((message) => message.role)).toEqual(["user"]);
|
||||
expect(JSON.stringify(transformed)).not.toContain("call_transport_aborted");
|
||||
|
||||
// This is the transport replay regression proof: providers reject
|
||||
// assistant(tool_call)->user replays without a matching result, so the
|
||||
// dropped transcript must still be accepted by real model APIs.
|
||||
const response = await completeSimpleWithTimeout(
|
||||
model,
|
||||
{
|
||||
systemPrompt: "You are a concise assistant. Follow the user's instruction exactly.",
|
||||
messages: transformed as never,
|
||||
tools: [
|
||||
{
|
||||
name: "noop",
|
||||
description: "Return ok.",
|
||||
parameters: Type.Object({}, { additionalProperties: false }),
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
apiKey: requireApiKey(apiKeyInfo, model.provider),
|
||||
reasoning: "low",
|
||||
maxTokens: 96,
|
||||
},
|
||||
120_000,
|
||||
);
|
||||
|
||||
const text = response.content
|
||||
.filter((block) => block.type === "text")
|
||||
.map((block) => block.text.trim())
|
||||
.join(" ")
|
||||
.trim();
|
||||
const errorMessage =
|
||||
typeof (response as { errorMessage?: unknown }).errorMessage === "string"
|
||||
? ((response as { errorMessage?: string }).errorMessage ?? "")
|
||||
: "";
|
||||
if (errorMessage && isKnownLiveBlocker(errorMessage)) {
|
||||
logProgress(`[tool-replay-repair] skip ${target.ref} (${errorMessage})`);
|
||||
return;
|
||||
}
|
||||
|
||||
expect(response.stopReason).not.toBe("error");
|
||||
if (text.length > 0) {
|
||||
expect(text).toMatch(/^transport replay ok\.?$/i);
|
||||
}
|
||||
},
|
||||
3 * 60 * 1000,
|
||||
);
|
||||
}
|
||||
});
|
||||
@@ -9,20 +9,21 @@ function makeModel(api: Api, provider: string, id: string): Model<Api> {
|
||||
function assistantToolCall(
|
||||
id: string,
|
||||
name = "read",
|
||||
stopReason: Extract<Context["messages"][number], { role: "assistant" }>["stopReason"] = "toolUse",
|
||||
): Extract<Context["messages"][number], { role: "assistant" }> {
|
||||
return {
|
||||
role: "assistant",
|
||||
provider: "openai",
|
||||
api: "openai-responses",
|
||||
model: "gpt-5.4",
|
||||
stopReason: "toolUse",
|
||||
stopReason,
|
||||
timestamp: Date.now(),
|
||||
content: [{ type: "toolCall", id, name, arguments: {} }],
|
||||
} as Extract<Context["messages"][number], { role: "assistant" }>;
|
||||
}
|
||||
|
||||
describe("transformTransportMessages synthetic tool-result policy", () => {
|
||||
it("does not synthesize missing tool results for OpenAI-compatible transports", () => {
|
||||
it("synthesizes Codex-style aborted tool results for OpenAI Responses transports", () => {
|
||||
const messages: Context["messages"] = [
|
||||
assistantToolCall("call_openai_1"),
|
||||
{ role: "user", content: "continue", timestamp: Date.now() },
|
||||
@@ -33,7 +34,166 @@ describe("transformTransportMessages synthetic tool-result policy", () => {
|
||||
makeModel("openai-responses", "openai", "gpt-5.4"),
|
||||
);
|
||||
|
||||
expect(result.map((msg) => msg.role)).toEqual(["assistant", "user"]);
|
||||
expect(result.map((msg) => msg.role)).toEqual(["assistant", "toolResult", "user"]);
|
||||
expect(result[1]).toMatchObject({
|
||||
role: "toolResult",
|
||||
toolCallId: "call_openai_1",
|
||||
isError: true,
|
||||
content: [{ type: "text", text: "aborted" }],
|
||||
});
|
||||
});
|
||||
|
||||
it("preserves real OpenAI transport results and aborts missing parallel siblings", () => {
|
||||
const messages: Context["messages"] = [
|
||||
{
|
||||
...assistantToolCall("call_keep"),
|
||||
content: [
|
||||
{ type: "toolCall", id: "call_keep", name: "read", arguments: {} },
|
||||
{ type: "toolCall", id: "call_missing", name: "exec", arguments: {} },
|
||||
],
|
||||
},
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "call_keep",
|
||||
toolName: "read",
|
||||
content: [{ type: "text", text: "ok" }],
|
||||
isError: false,
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
{ role: "user", content: "continue", timestamp: Date.now() },
|
||||
];
|
||||
|
||||
const result = transformTransportMessages(
|
||||
messages,
|
||||
makeModel("openclaw-openai-responses-transport" as Api, "openai", "gpt-5.4"),
|
||||
);
|
||||
|
||||
expect(result.map((msg) => msg.role)).toEqual([
|
||||
"assistant",
|
||||
"toolResult",
|
||||
"toolResult",
|
||||
"user",
|
||||
]);
|
||||
expect(result.slice(1, 3)).toMatchObject([
|
||||
{ role: "toolResult", toolCallId: "call_keep", content: [{ type: "text", text: "ok" }] },
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "call_missing",
|
||||
content: [{ type: "text", text: "aborted" }],
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("moves displaced OpenAI transport results before synthesizing missing siblings", () => {
|
||||
const messages: Context["messages"] = [
|
||||
{
|
||||
...assistantToolCall("call_keep"),
|
||||
content: [
|
||||
{ type: "toolCall", id: "call_keep", name: "read", arguments: {} },
|
||||
{ type: "toolCall", id: "call_missing", name: "exec", arguments: {} },
|
||||
],
|
||||
},
|
||||
{ role: "user", content: "continue", timestamp: Date.now() },
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "call_keep",
|
||||
toolName: "read",
|
||||
content: [{ type: "text", text: "late ok" }],
|
||||
isError: false,
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
];
|
||||
|
||||
const result = transformTransportMessages(
|
||||
messages,
|
||||
makeModel("openai-responses", "openai", "gpt-5.4"),
|
||||
);
|
||||
|
||||
expect(result.map((msg) => msg.role)).toEqual([
|
||||
"assistant",
|
||||
"toolResult",
|
||||
"toolResult",
|
||||
"user",
|
||||
]);
|
||||
expect(result.slice(1, 3)).toMatchObject([
|
||||
{ role: "toolResult", toolCallId: "call_keep", content: [{ type: "text", text: "late ok" }] },
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "call_missing",
|
||||
content: [{ type: "text", text: "aborted" }],
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("drops aborted OpenAI transport assistant tool calls before replay", () => {
|
||||
const messages: Context["messages"] = [
|
||||
assistantToolCall("call_aborted", "exec", "aborted"),
|
||||
{ role: "user", content: "retry after abort", timestamp: Date.now() },
|
||||
];
|
||||
|
||||
const result = transformTransportMessages(
|
||||
messages,
|
||||
makeModel("openai-responses", "openai", "gpt-5.4"),
|
||||
);
|
||||
|
||||
expect(result.map((msg) => msg.role)).toEqual(["user"]);
|
||||
expect(JSON.stringify(result)).not.toContain("call_aborted");
|
||||
});
|
||||
|
||||
it("drops text-only aborted and errored transport assistant turns before replay", () => {
|
||||
const messages: Context["messages"] = [
|
||||
{
|
||||
role: "assistant",
|
||||
provider: "openai",
|
||||
api: "openai-responses",
|
||||
model: "gpt-5.4",
|
||||
stopReason: "aborted",
|
||||
timestamp: Date.now(),
|
||||
content: [{ type: "text", text: "partial aborted output" }],
|
||||
} as Extract<Context["messages"][number], { role: "assistant" }>,
|
||||
{
|
||||
role: "assistant",
|
||||
provider: "openai",
|
||||
api: "openai-responses",
|
||||
model: "gpt-5.4",
|
||||
stopReason: "error",
|
||||
timestamp: Date.now(),
|
||||
content: [{ type: "text", text: "partial error output" }],
|
||||
} as Extract<Context["messages"][number], { role: "assistant" }>,
|
||||
{ role: "user", content: "retry after failed text turns", timestamp: Date.now() },
|
||||
];
|
||||
|
||||
const result = transformTransportMessages(
|
||||
messages,
|
||||
makeModel("openai-responses", "openai", "gpt-5.4"),
|
||||
);
|
||||
|
||||
expect(result.map((msg) => msg.role)).toEqual(["user"]);
|
||||
expect(JSON.stringify(result)).not.toContain("partial aborted output");
|
||||
expect(JSON.stringify(result)).not.toContain("partial error output");
|
||||
});
|
||||
|
||||
it("drops errored Anthropic transport assistant tool calls and matching results before replay", () => {
|
||||
const messages: Context["messages"] = [
|
||||
assistantToolCall("call_error", "exec", "error"),
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "call_error",
|
||||
toolName: "exec",
|
||||
content: [{ type: "text", text: "partial" }],
|
||||
isError: true,
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
{ role: "user", content: "retry after error", timestamp: Date.now() },
|
||||
];
|
||||
|
||||
const result = transformTransportMessages(
|
||||
messages,
|
||||
makeModel("anthropic-messages", "anthropic", "claude-opus-4-6"),
|
||||
);
|
||||
|
||||
expect(result.map((msg) => msg.role)).toEqual(["user"]);
|
||||
expect(JSON.stringify(result)).not.toContain("call_error");
|
||||
});
|
||||
|
||||
it("still synthesizes missing tool results for Anthropic transports", () => {
|
||||
@@ -72,6 +232,10 @@ describe("transformTransportMessages synthetic tool-result policy", () => {
|
||||
makeModel("openclaw-google-generative-ai-transport" as Api, "google", "gemini-2.5-pro"),
|
||||
);
|
||||
expect(googleAlias.map((msg) => msg.role)).toEqual(["assistant", "toolResult", "user"]);
|
||||
expect(googleAlias[1]).toMatchObject({
|
||||
role: "toolResult",
|
||||
content: [{ type: "text", text: "No result provided" }],
|
||||
});
|
||||
|
||||
const bedrockCanonical = transformTransportMessages(
|
||||
messages,
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import type { Api, Context, Model } from "@mariozechner/pi-ai";
|
||||
import { repairToolUseResultPairing } from "./session-transcript-repair.js";
|
||||
|
||||
const SYNTHETIC_TOOL_RESULT_APIS = new Set<string>([
|
||||
"anthropic-messages",
|
||||
@@ -6,31 +7,34 @@ const SYNTHETIC_TOOL_RESULT_APIS = new Set<string>([
|
||||
"bedrock-converse-stream",
|
||||
"google-generative-ai",
|
||||
"openclaw-google-generative-ai-transport",
|
||||
"openai-responses",
|
||||
"openai-codex-responses",
|
||||
"azure-openai-responses",
|
||||
"openclaw-openai-responses-transport",
|
||||
"openclaw-azure-openai-responses-transport",
|
||||
]);
|
||||
|
||||
type PendingToolCall = { id: string; name: string };
|
||||
// "aborted" is an OpenAI Responses-family convention from upstream Codex
|
||||
// history normalization. Gemini/Anthropic transports use their own text while
|
||||
// still needing synthetic results to satisfy provider turn-shape contracts;
|
||||
// tool-replay-repair.live.test.ts exercises both paths against real models.
|
||||
const CODEX_STYLE_ABORTED_OUTPUT_APIS = new Set<string>([
|
||||
"openai-responses",
|
||||
"openai-codex-responses",
|
||||
"azure-openai-responses",
|
||||
"openclaw-openai-responses-transport",
|
||||
"openclaw-azure-openai-responses-transport",
|
||||
]);
|
||||
|
||||
function defaultAllowSyntheticToolResults(modelApi: Api): boolean {
|
||||
return SYNTHETIC_TOOL_RESULT_APIS.has(modelApi);
|
||||
}
|
||||
|
||||
function appendMissingToolResults(
|
||||
result: Context["messages"],
|
||||
pendingToolCalls: PendingToolCall[],
|
||||
existingToolResultIds: ReadonlySet<string>,
|
||||
): void {
|
||||
for (const toolCall of pendingToolCalls) {
|
||||
if (!existingToolResultIds.has(toolCall.id)) {
|
||||
result.push({
|
||||
role: "toolResult",
|
||||
toolCallId: toolCall.id,
|
||||
toolName: toolCall.name,
|
||||
content: [{ type: "text", text: "No result provided" }],
|
||||
isError: true,
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}
|
||||
function isFailedAssistantTurn(message: Context["messages"][number]): boolean {
|
||||
if (message.role !== "assistant") {
|
||||
return false;
|
||||
}
|
||||
return message.stopReason === "error" || message.stopReason === "aborted";
|
||||
}
|
||||
|
||||
export function transformTransportMessages(
|
||||
@@ -43,6 +47,9 @@ export function transformTransportMessages(
|
||||
) => string,
|
||||
): Context["messages"] {
|
||||
const allowSyntheticToolResults = defaultAllowSyntheticToolResults(model.api);
|
||||
const syntheticToolResultText = CODEX_STYLE_ABORTED_OUTPUT_APIS.has(model.api)
|
||||
? "aborted"
|
||||
: "No result provided";
|
||||
const toolCallIdMap = new Map<string, string>();
|
||||
const transformed = messages.map((msg) => {
|
||||
if (msg.role === "user") {
|
||||
@@ -102,42 +109,21 @@ export function transformTransportMessages(
|
||||
}
|
||||
return { ...msg, content };
|
||||
});
|
||||
// Preserve the old transport replay filter: failed streamed turns can contain
|
||||
// partial text, partial tool calls, or both, and strict providers can treat
|
||||
// them as valid assistant context on retry unless we drop the whole turn.
|
||||
const replayable = transformed.filter((msg) => !isFailedAssistantTurn(msg));
|
||||
|
||||
const result: Context["messages"] = [];
|
||||
let pendingToolCalls: PendingToolCall[] = [];
|
||||
let existingToolResultIds = new Set<string>();
|
||||
for (const msg of transformed) {
|
||||
if (msg.role === "assistant") {
|
||||
if (allowSyntheticToolResults && pendingToolCalls.length > 0) {
|
||||
appendMissingToolResults(result, pendingToolCalls, existingToolResultIds);
|
||||
}
|
||||
pendingToolCalls = [];
|
||||
existingToolResultIds = new Set();
|
||||
if (msg.stopReason === "error" || msg.stopReason === "aborted") {
|
||||
continue;
|
||||
}
|
||||
const toolCalls = msg.content.filter(
|
||||
(block): block is Extract<(typeof msg.content)[number], { type: "toolCall" }> =>
|
||||
block.type === "toolCall",
|
||||
);
|
||||
if (toolCalls.length > 0) {
|
||||
pendingToolCalls = toolCalls.map((block) => ({ id: block.id, name: block.name }));
|
||||
existingToolResultIds = new Set();
|
||||
}
|
||||
result.push(msg);
|
||||
continue;
|
||||
}
|
||||
if (msg.role === "toolResult") {
|
||||
existingToolResultIds.add(msg.toolCallId);
|
||||
result.push(msg);
|
||||
continue;
|
||||
}
|
||||
if (allowSyntheticToolResults && pendingToolCalls.length > 0) {
|
||||
appendMissingToolResults(result, pendingToolCalls, existingToolResultIds);
|
||||
}
|
||||
pendingToolCalls = [];
|
||||
existingToolResultIds = new Set();
|
||||
result.push(msg);
|
||||
if (!allowSyntheticToolResults) {
|
||||
return replayable;
|
||||
}
|
||||
return result;
|
||||
|
||||
// PI's local transform can synthesize missing results, but it does not move
|
||||
// displaced real results back before an intervening user turn. Shared repair
|
||||
// handles both, while preserving the previous transport behavior of dropping
|
||||
// aborted/error assistant tool-call turns before replaying strict providers.
|
||||
return repairToolUseResultPairing(replayable, {
|
||||
erroredAssistantResultPolicy: "drop",
|
||||
missingToolResultText: syntheticToolResultText,
|
||||
}).messages as Context["messages"];
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user