Agents: validate persisted tool-call names

This commit is contained in:
Vignesh Natarajan
2026-02-21 23:06:44 -08:00
parent 29a782b9cd
commit cdfe45eeb8
11 changed files with 248 additions and 8 deletions

View File

@@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai
- TUI/Input: enable multiline-paste burst coalescing on macOS Terminal.app and iTerm so pasted blocks no longer submit line-by-line as separate messages. (#18809) Thanks @fwends.
- TUI/Status: request immediate renders after setting `sending`/`waiting` activity states so in-flight runs always show visible progress indicators instead of appearing idle until completion. (#21549) Thanks @13Guinness.
- Agents/Fallbacks: treat JSON payloads with `type: "api_error"` + `"Internal server error"` as transient failover errors so Anthropic 500-style failures trigger model fallback. (#23193) Thanks @jarvis-lane.
- Agents/Transcripts: validate assistant tool-call names (syntax/length + registered tool allowlist) before transcript persistence and during replay sanitization so malformed failover tool names no longer poison sessions with repeated provider HTTP 400 errors. (#23324) Thanks @johnsantry.
- Agents/Diagnostics: include resolved lifecycle error text in `embedded run agent end` warnings so UI/TUI “Connection error” runs expose actionable provider failure reasons in gateway logs. (#23054) Thanks @Raize.
- Plugins/Hooks: run legacy `before_agent_start` once per agent turn and reuse that result across model-resolve and prompt-build compatibility paths, preventing duplicate hook side effects (for example duplicate external API calls). (#23289) Thanks @ksato8710.
- Models/Config: default missing Anthropic provider/model `api` fields to `anthropic-messages` during config validation so custom relay model entries are preserved instead of being dropped by runtime model registry validation. (#23332) Thanks @bigbigmonkey123.

View File

@@ -203,6 +203,54 @@ describe("sanitizeSessionHistory", () => {
expect(result.map((msg) => msg.role)).toEqual(["user"]);
});
it("drops malformed tool calls with invalid/overlong names", async () => {
const messages = [
{
role: "assistant",
content: [
{
type: "toolCall",
id: "call_bad",
name: 'toolu_01mvznfebfuu <|tool_call_argument_begin|> {"command"',
arguments: {},
},
{ type: "toolCall", id: "call_long", name: `read_${"x".repeat(80)}`, arguments: {} },
],
},
{ role: "user", content: "hello" },
] as unknown as AgentMessage[];
const result = await sanitizeSessionHistory({
messages,
modelApi: "openai-responses",
provider: "openai",
sessionManager: mockSessionManager,
sessionId: TEST_SESSION_ID,
});
expect(result.map((msg) => msg.role)).toEqual(["user"]);
});
it("drops tool calls that are not in the allowed tool set", async () => {
const messages = [
{
role: "assistant",
content: [{ type: "toolCall", id: "call_1", name: "write", arguments: {} }],
},
] as unknown as AgentMessage[];
const result = await sanitizeSessionHistory({
messages,
modelApi: "openai-responses",
provider: "openai",
allowedToolNames: ["read"],
sessionManager: mockSessionManager,
sessionId: TEST_SESSION_ID,
});
expect(result).toEqual([]);
});
it("downgrades orphaned openai reasoning even when the model has not changed", async () => {
const sessionEntries = [
makeModelSnapshotEntry({

View File

@@ -78,6 +78,7 @@ import {
buildEmbeddedSystemPrompt,
createSystemPromptOverride,
} from "./system-prompt.js";
import { collectAllowedToolNames } from "./tool-name-allowlist.js";
import { splitSdkTools } from "./tool-split.js";
import type { EmbeddedPiCompactResult } from "./types.js";
import { describeUnknownError, mapThinkingLevel } from "./utils.js";
@@ -383,6 +384,7 @@ export async function compactEmbeddedPiSessionDirect(
modelAuthMode: resolveModelAuthMode(model.provider, params.config),
});
const tools = sanitizeToolsForGoogle({ tools: toolsRaw, provider });
const allowedToolNames = collectAllowedToolNames({ tools });
logToolSchemasForGoogle({ tools, provider });
const machineName = await getMachineDisplayName();
const runtimeChannel = normalizeMessageChannel(params.messageChannel ?? params.messageProvider);
@@ -532,6 +534,7 @@ export async function compactEmbeddedPiSessionDirect(
agentId: sessionAgentId,
sessionKey: params.sessionKey,
allowSyntheticToolResults: transcriptPolicy.allowSyntheticToolResults,
allowedToolNames,
});
trackSessionManagerAccess(params.sessionFile);
const settingsManager = SettingsManager.create(effectiveWorkspace, agentDir);
@@ -587,6 +590,7 @@ export async function compactEmbeddedPiSessionDirect(
modelApi: model.api,
modelId,
provider,
allowedToolNames,
config: params.config,
sessionManager,
sessionId: params.sessionId,

View File

@@ -426,6 +426,7 @@ export async function sanitizeSessionHistory(params: {
modelApi?: string | null;
modelId?: string;
provider?: string;
allowedToolNames?: Iterable<string>;
config?: OpenClawConfig;
sessionManager: SessionManager;
sessionId: string;
@@ -458,7 +459,9 @@ export async function sanitizeSessionHistory(params: {
const sanitizedThinking = policy.sanitizeThinkingSignatures
? sanitizeAntigravityThinkingBlocks(droppedThinking)
: droppedThinking;
const sanitizedToolCalls = sanitizeToolCallInputs(sanitizedThinking);
const sanitizedToolCalls = sanitizeToolCallInputs(sanitizedThinking, {
allowedToolNames: params.allowedToolNames,
});
const repairedTools = policy.repairToolUseResultPairing
? sanitizeToolUseResultPairing(sanitizedToolCalls)
: sanitizedToolCalls;

View File

@@ -105,6 +105,7 @@ import {
createSystemPromptOverride,
} from "../system-prompt.js";
import { dropThinkingBlocks } from "../thinking.js";
import { collectAllowedToolNames } from "../tool-name-allowlist.js";
import { installToolResultContextGuard } from "../tool-result-context-guard.js";
import { splitSdkTools } from "../tool-split.js";
import { describeUnknownError, mapThinkingLevel } from "../utils.js";
@@ -395,6 +396,10 @@ export async function runEmbeddedAttempt(
disableMessageTool: params.disableMessageTool,
});
const tools = sanitizeToolsForGoogle({ tools: toolsRaw, provider: params.provider });
const allowedToolNames = collectAllowedToolNames({
tools,
clientTools: params.clientTools,
});
logToolSchemasForGoogle({ tools, provider: params.provider });
const machineName = await getMachineDisplayName();
@@ -591,6 +596,7 @@ export async function runEmbeddedAttempt(
sessionKey: params.sessionKey,
inputProvenance: params.inputProvenance,
allowSyntheticToolResults: transcriptPolicy.allowSyntheticToolResults,
allowedToolNames,
});
trackSessionManagerAccess(params.sessionFile);
@@ -777,6 +783,7 @@ export async function runEmbeddedAttempt(
modelApi: params.model.api,
modelId: params.modelId,
provider: params.provider,
allowedToolNames,
config: params.config,
sessionManager,
sessionId: params.sessionId,

View File

@@ -0,0 +1,26 @@
import type { AgentTool } from "@mariozechner/pi-agent-core";
import type { ClientToolDefinition } from "./run/params.js";
function addName(names: Set<string>, value: unknown): void {
if (typeof value !== "string") {
return;
}
const trimmed = value.trim();
if (trimmed) {
names.add(trimmed);
}
}
export function collectAllowedToolNames(params: {
tools: AgentTool[];
clientTools?: ClientToolDefinition[];
}): Set<string> {
const names = new Set<string>();
for (const tool of params.tools) {
addName(names, tool.name);
}
for (const tool of params.clientTools ?? []) {
addName(names, tool.function?.name);
}
return names;
}

View File

@@ -22,6 +22,7 @@ export function guardSessionManager(
sessionKey?: string;
inputProvenance?: InputProvenance;
allowSyntheticToolResults?: boolean;
allowedToolNames?: Iterable<string>;
},
): GuardedSessionManager {
if (typeof (sessionManager as GuardedSessionManager).flushPendingToolResults === "function") {
@@ -64,6 +65,7 @@ export function guardSessionManager(
applyInputProvenanceToUserMessage(message, opts?.inputProvenance),
transformToolResultForPersistence: transform,
allowSyntheticToolResults: opts?.allowSyntheticToolResults,
allowedToolNames: opts?.allowedToolNames,
beforeMessageWriteHook: beforeMessageWrite,
});
(sessionManager as GuardedSessionManager).flushPendingToolResults = guard.flushPendingToolResults;

View File

@@ -191,6 +191,43 @@ describe("installSessionToolResultGuard", () => {
expect(messages).toHaveLength(0);
});
it("drops malformed tool calls with invalid name tokens before persistence", () => {
const sm = SessionManager.inMemory();
installSessionToolResultGuard(sm);
sm.appendMessage(
asAppendMessage({
role: "assistant",
content: [
{
type: "toolCall",
id: "call_bad_name",
name: 'toolu_01mvznfebfuu <|tool_call_argument_begin|> {"command"',
arguments: {},
},
],
}),
);
expect(getPersistedMessages(sm)).toHaveLength(0);
});
it("drops tool calls not present in allowedToolNames", () => {
const sm = SessionManager.inMemory();
installSessionToolResultGuard(sm, {
allowedToolNames: ["read"],
});
sm.appendMessage(
asAppendMessage({
role: "assistant",
content: [{ type: "toolCall", id: "call_1", name: "write", arguments: {} }],
}),
);
expect(getPersistedMessages(sm)).toHaveLength(0);
});
it("flushes pending tool results when a sanitized assistant message is dropped", () => {
const sm = SessionManager.inMemory();
installSessionToolResultGuard(sm);

View File

@@ -96,6 +96,11 @@ export function installSessionToolResultGuard(
* Defaults to true.
*/
allowSyntheticToolResults?: boolean;
/**
* Optional set/list of tool names accepted for assistant toolCall/toolUse blocks.
* When set, tool calls with unknown names are dropped before persistence.
*/
allowedToolNames?: Iterable<string>;
/**
* Synchronous hook invoked before any message is written to the session JSONL.
* If the hook returns { block: true }, the message is silently dropped.
@@ -171,7 +176,9 @@ export function installSessionToolResultGuard(
let nextMessage = message;
const role = (message as { role?: unknown }).role;
if (role === "assistant") {
const sanitized = sanitizeToolCallInputs([message]);
const sanitized = sanitizeToolCallInputs([message], {
allowedToolNames: opts?.allowedToolNames,
});
if (sanitized.length === 0) {
if (allowSyntheticToolResults && pending.size > 0) {
flushPendingToolResults();

View File

@@ -241,6 +241,65 @@ describe("sanitizeToolCallInputs", () => {
expect((toolCalls[0] as { id?: unknown }).id).toBe("call_ok");
});
it("drops tool calls with malformed or overlong names", () => {
const input = [
{
role: "assistant",
content: [
{ type: "toolCall", id: "call_ok", name: "read", arguments: {} },
{
type: "toolCall",
id: "call_bad_chars",
name: 'toolu_01abc <|tool_call_argument_begin|> {"command"',
arguments: {},
},
{
type: "toolUse",
id: "call_too_long",
name: `read_${"x".repeat(80)}`,
input: {},
},
],
},
] as unknown as AgentMessage[];
const out = sanitizeToolCallInputs(input);
const assistant = out[0] as Extract<AgentMessage, { role: "assistant" }>;
const toolCalls = Array.isArray(assistant.content)
? assistant.content.filter((block) => {
const type = (block as { type?: unknown }).type;
return typeof type === "string" && ["toolCall", "toolUse", "functionCall"].includes(type);
})
: [];
expect(toolCalls).toHaveLength(1);
expect((toolCalls[0] as { name?: unknown }).name).toBe("read");
});
it("drops unknown tool names when an allowlist is provided", () => {
const input = [
{
role: "assistant",
content: [
{ type: "toolCall", id: "call_ok", name: "read", arguments: {} },
{ type: "toolCall", id: "call_unknown", name: "write", arguments: {} },
],
},
] as unknown as AgentMessage[];
const out = sanitizeToolCallInputs(input, { allowedToolNames: ["read"] });
const assistant = out[0] as Extract<AgentMessage, { role: "assistant" }>;
const toolCalls = Array.isArray(assistant.content)
? assistant.content.filter((block) => {
const type = (block as { type?: unknown }).type;
return typeof type === "string" && ["toolCall", "toolUse", "functionCall"].includes(type);
})
: [];
expect(toolCalls).toHaveLength(1);
expect((toolCalls[0] as { name?: unknown }).name).toBe("read");
});
it("keeps valid tool calls and preserves text blocks", () => {
const input = [
{

View File

@@ -1,6 +1,9 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import { extractToolCallsFromAssistant, extractToolResultId } from "./tool-call-id.js";
const TOOL_CALL_NAME_MAX_CHARS = 64;
const TOOL_CALL_NAME_RE = /^[A-Za-z0-9_-]+$/;
type ToolCallBlock = {
type?: unknown;
id?: unknown;
@@ -35,8 +38,38 @@ function hasToolCallId(block: ToolCallBlock): boolean {
return hasNonEmptyStringField(block.id);
}
function hasToolCallName(block: ToolCallBlock): boolean {
return hasNonEmptyStringField(block.name);
function normalizeAllowedToolNames(allowedToolNames?: Iterable<string>): Set<string> | null {
if (!allowedToolNames) {
return null;
}
const normalized = new Set<string>();
for (const name of allowedToolNames) {
if (typeof name !== "string") {
continue;
}
const trimmed = name.trim();
if (trimmed) {
normalized.add(trimmed.toLowerCase());
}
}
return normalized.size > 0 ? normalized : null;
}
function hasToolCallName(block: ToolCallBlock, allowedToolNames: Set<string> | null): boolean {
if (typeof block.name !== "string") {
return false;
}
const trimmed = block.name.trim();
if (!trimmed || trimmed !== block.name) {
return false;
}
if (trimmed.length > TOOL_CALL_NAME_MAX_CHARS || !TOOL_CALL_NAME_RE.test(trimmed)) {
return false;
}
if (!allowedToolNames) {
return true;
}
return allowedToolNames.has(trimmed.toLowerCase());
}
function makeMissingToolResult(params: {
@@ -66,6 +99,10 @@ export type ToolCallInputRepairReport = {
droppedAssistantMessages: number;
};
export type ToolCallInputRepairOptions = {
allowedToolNames?: Iterable<string>;
};
export function stripToolResultDetails(messages: AgentMessage[]): AgentMessage[] {
let touched = false;
const out: AgentMessage[] = [];
@@ -85,11 +122,15 @@ export function stripToolResultDetails(messages: AgentMessage[]): AgentMessage[]
return touched ? out : messages;
}
export function repairToolCallInputs(messages: AgentMessage[]): ToolCallInputRepairReport {
export function repairToolCallInputs(
messages: AgentMessage[],
options?: ToolCallInputRepairOptions,
): ToolCallInputRepairReport {
let droppedToolCalls = 0;
let droppedAssistantMessages = 0;
let changed = false;
const out: AgentMessage[] = [];
const allowedToolNames = normalizeAllowedToolNames(options?.allowedToolNames);
for (const msg of messages) {
if (!msg || typeof msg !== "object") {
@@ -108,7 +149,9 @@ export function repairToolCallInputs(messages: AgentMessage[]): ToolCallInputRep
for (const block of msg.content) {
if (
isToolCallBlock(block) &&
(!hasToolCallInput(block) || !hasToolCallId(block) || !hasToolCallName(block))
(!hasToolCallInput(block) ||
!hasToolCallId(block) ||
!hasToolCallName(block, allowedToolNames))
) {
droppedToolCalls += 1;
droppedInMessage += 1;
@@ -138,8 +181,11 @@ export function repairToolCallInputs(messages: AgentMessage[]): ToolCallInputRep
};
}
export function sanitizeToolCallInputs(messages: AgentMessage[]): AgentMessage[] {
return repairToolCallInputs(messages).messages;
export function sanitizeToolCallInputs(
messages: AgentMessage[],
options?: ToolCallInputRepairOptions,
): AgentMessage[] {
return repairToolCallInputs(messages, options).messages;
}
export function sanitizeToolUseResultPairing(messages: AgentMessage[]): AgentMessage[] {