mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-12 15:30:39 +00:00
Agents: validate persisted tool-call names
This commit is contained in:
@@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai
|
||||
- TUI/Input: enable multiline-paste burst coalescing on macOS Terminal.app and iTerm so pasted blocks no longer submit line-by-line as separate messages. (#18809) Thanks @fwends.
|
||||
- TUI/Status: request immediate renders after setting `sending`/`waiting` activity states so in-flight runs always show visible progress indicators instead of appearing idle until completion. (#21549) Thanks @13Guinness.
|
||||
- Agents/Fallbacks: treat JSON payloads with `type: "api_error"` + `"Internal server error"` as transient failover errors so Anthropic 500-style failures trigger model fallback. (#23193) Thanks @jarvis-lane.
|
||||
- Agents/Transcripts: validate assistant tool-call names (syntax/length + registered tool allowlist) before transcript persistence and during replay sanitization so malformed failover tool names no longer poison sessions with repeated provider HTTP 400 errors. (#23324) Thanks @johnsantry.
|
||||
- Agents/Diagnostics: include resolved lifecycle error text in `embedded run agent end` warnings so UI/TUI “Connection error” runs expose actionable provider failure reasons in gateway logs. (#23054) Thanks @Raize.
|
||||
- Plugins/Hooks: run legacy `before_agent_start` once per agent turn and reuse that result across model-resolve and prompt-build compatibility paths, preventing duplicate hook side effects (for example duplicate external API calls). (#23289) Thanks @ksato8710.
|
||||
- Models/Config: default missing Anthropic provider/model `api` fields to `anthropic-messages` during config validation so custom relay model entries are preserved instead of being dropped by runtime model registry validation. (#23332) Thanks @bigbigmonkey123.
|
||||
|
||||
@@ -203,6 +203,54 @@ describe("sanitizeSessionHistory", () => {
|
||||
expect(result.map((msg) => msg.role)).toEqual(["user"]);
|
||||
});
|
||||
|
||||
it("drops malformed tool calls with invalid/overlong names", async () => {
|
||||
const messages = [
|
||||
{
|
||||
role: "assistant",
|
||||
content: [
|
||||
{
|
||||
type: "toolCall",
|
||||
id: "call_bad",
|
||||
name: 'toolu_01mvznfebfuu <|tool_call_argument_begin|> {"command"',
|
||||
arguments: {},
|
||||
},
|
||||
{ type: "toolCall", id: "call_long", name: `read_${"x".repeat(80)}`, arguments: {} },
|
||||
],
|
||||
},
|
||||
{ role: "user", content: "hello" },
|
||||
] as unknown as AgentMessage[];
|
||||
|
||||
const result = await sanitizeSessionHistory({
|
||||
messages,
|
||||
modelApi: "openai-responses",
|
||||
provider: "openai",
|
||||
sessionManager: mockSessionManager,
|
||||
sessionId: TEST_SESSION_ID,
|
||||
});
|
||||
|
||||
expect(result.map((msg) => msg.role)).toEqual(["user"]);
|
||||
});
|
||||
|
||||
it("drops tool calls that are not in the allowed tool set", async () => {
|
||||
const messages = [
|
||||
{
|
||||
role: "assistant",
|
||||
content: [{ type: "toolCall", id: "call_1", name: "write", arguments: {} }],
|
||||
},
|
||||
] as unknown as AgentMessage[];
|
||||
|
||||
const result = await sanitizeSessionHistory({
|
||||
messages,
|
||||
modelApi: "openai-responses",
|
||||
provider: "openai",
|
||||
allowedToolNames: ["read"],
|
||||
sessionManager: mockSessionManager,
|
||||
sessionId: TEST_SESSION_ID,
|
||||
});
|
||||
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it("downgrades orphaned openai reasoning even when the model has not changed", async () => {
|
||||
const sessionEntries = [
|
||||
makeModelSnapshotEntry({
|
||||
|
||||
@@ -78,6 +78,7 @@ import {
|
||||
buildEmbeddedSystemPrompt,
|
||||
createSystemPromptOverride,
|
||||
} from "./system-prompt.js";
|
||||
import { collectAllowedToolNames } from "./tool-name-allowlist.js";
|
||||
import { splitSdkTools } from "./tool-split.js";
|
||||
import type { EmbeddedPiCompactResult } from "./types.js";
|
||||
import { describeUnknownError, mapThinkingLevel } from "./utils.js";
|
||||
@@ -383,6 +384,7 @@ export async function compactEmbeddedPiSessionDirect(
|
||||
modelAuthMode: resolveModelAuthMode(model.provider, params.config),
|
||||
});
|
||||
const tools = sanitizeToolsForGoogle({ tools: toolsRaw, provider });
|
||||
const allowedToolNames = collectAllowedToolNames({ tools });
|
||||
logToolSchemasForGoogle({ tools, provider });
|
||||
const machineName = await getMachineDisplayName();
|
||||
const runtimeChannel = normalizeMessageChannel(params.messageChannel ?? params.messageProvider);
|
||||
@@ -532,6 +534,7 @@ export async function compactEmbeddedPiSessionDirect(
|
||||
agentId: sessionAgentId,
|
||||
sessionKey: params.sessionKey,
|
||||
allowSyntheticToolResults: transcriptPolicy.allowSyntheticToolResults,
|
||||
allowedToolNames,
|
||||
});
|
||||
trackSessionManagerAccess(params.sessionFile);
|
||||
const settingsManager = SettingsManager.create(effectiveWorkspace, agentDir);
|
||||
@@ -587,6 +590,7 @@ export async function compactEmbeddedPiSessionDirect(
|
||||
modelApi: model.api,
|
||||
modelId,
|
||||
provider,
|
||||
allowedToolNames,
|
||||
config: params.config,
|
||||
sessionManager,
|
||||
sessionId: params.sessionId,
|
||||
|
||||
@@ -426,6 +426,7 @@ export async function sanitizeSessionHistory(params: {
|
||||
modelApi?: string | null;
|
||||
modelId?: string;
|
||||
provider?: string;
|
||||
allowedToolNames?: Iterable<string>;
|
||||
config?: OpenClawConfig;
|
||||
sessionManager: SessionManager;
|
||||
sessionId: string;
|
||||
@@ -458,7 +459,9 @@ export async function sanitizeSessionHistory(params: {
|
||||
const sanitizedThinking = policy.sanitizeThinkingSignatures
|
||||
? sanitizeAntigravityThinkingBlocks(droppedThinking)
|
||||
: droppedThinking;
|
||||
const sanitizedToolCalls = sanitizeToolCallInputs(sanitizedThinking);
|
||||
const sanitizedToolCalls = sanitizeToolCallInputs(sanitizedThinking, {
|
||||
allowedToolNames: params.allowedToolNames,
|
||||
});
|
||||
const repairedTools = policy.repairToolUseResultPairing
|
||||
? sanitizeToolUseResultPairing(sanitizedToolCalls)
|
||||
: sanitizedToolCalls;
|
||||
|
||||
@@ -105,6 +105,7 @@ import {
|
||||
createSystemPromptOverride,
|
||||
} from "../system-prompt.js";
|
||||
import { dropThinkingBlocks } from "../thinking.js";
|
||||
import { collectAllowedToolNames } from "../tool-name-allowlist.js";
|
||||
import { installToolResultContextGuard } from "../tool-result-context-guard.js";
|
||||
import { splitSdkTools } from "../tool-split.js";
|
||||
import { describeUnknownError, mapThinkingLevel } from "../utils.js";
|
||||
@@ -395,6 +396,10 @@ export async function runEmbeddedAttempt(
|
||||
disableMessageTool: params.disableMessageTool,
|
||||
});
|
||||
const tools = sanitizeToolsForGoogle({ tools: toolsRaw, provider: params.provider });
|
||||
const allowedToolNames = collectAllowedToolNames({
|
||||
tools,
|
||||
clientTools: params.clientTools,
|
||||
});
|
||||
logToolSchemasForGoogle({ tools, provider: params.provider });
|
||||
|
||||
const machineName = await getMachineDisplayName();
|
||||
@@ -591,6 +596,7 @@ export async function runEmbeddedAttempt(
|
||||
sessionKey: params.sessionKey,
|
||||
inputProvenance: params.inputProvenance,
|
||||
allowSyntheticToolResults: transcriptPolicy.allowSyntheticToolResults,
|
||||
allowedToolNames,
|
||||
});
|
||||
trackSessionManagerAccess(params.sessionFile);
|
||||
|
||||
@@ -777,6 +783,7 @@ export async function runEmbeddedAttempt(
|
||||
modelApi: params.model.api,
|
||||
modelId: params.modelId,
|
||||
provider: params.provider,
|
||||
allowedToolNames,
|
||||
config: params.config,
|
||||
sessionManager,
|
||||
sessionId: params.sessionId,
|
||||
|
||||
26
src/agents/pi-embedded-runner/tool-name-allowlist.ts
Normal file
26
src/agents/pi-embedded-runner/tool-name-allowlist.ts
Normal file
@@ -0,0 +1,26 @@
|
||||
import type { AgentTool } from "@mariozechner/pi-agent-core";
|
||||
import type { ClientToolDefinition } from "./run/params.js";
|
||||
|
||||
function addName(names: Set<string>, value: unknown): void {
|
||||
if (typeof value !== "string") {
|
||||
return;
|
||||
}
|
||||
const trimmed = value.trim();
|
||||
if (trimmed) {
|
||||
names.add(trimmed);
|
||||
}
|
||||
}
|
||||
|
||||
export function collectAllowedToolNames(params: {
|
||||
tools: AgentTool[];
|
||||
clientTools?: ClientToolDefinition[];
|
||||
}): Set<string> {
|
||||
const names = new Set<string>();
|
||||
for (const tool of params.tools) {
|
||||
addName(names, tool.name);
|
||||
}
|
||||
for (const tool of params.clientTools ?? []) {
|
||||
addName(names, tool.function?.name);
|
||||
}
|
||||
return names;
|
||||
}
|
||||
@@ -22,6 +22,7 @@ export function guardSessionManager(
|
||||
sessionKey?: string;
|
||||
inputProvenance?: InputProvenance;
|
||||
allowSyntheticToolResults?: boolean;
|
||||
allowedToolNames?: Iterable<string>;
|
||||
},
|
||||
): GuardedSessionManager {
|
||||
if (typeof (sessionManager as GuardedSessionManager).flushPendingToolResults === "function") {
|
||||
@@ -64,6 +65,7 @@ export function guardSessionManager(
|
||||
applyInputProvenanceToUserMessage(message, opts?.inputProvenance),
|
||||
transformToolResultForPersistence: transform,
|
||||
allowSyntheticToolResults: opts?.allowSyntheticToolResults,
|
||||
allowedToolNames: opts?.allowedToolNames,
|
||||
beforeMessageWriteHook: beforeMessageWrite,
|
||||
});
|
||||
(sessionManager as GuardedSessionManager).flushPendingToolResults = guard.flushPendingToolResults;
|
||||
|
||||
@@ -191,6 +191,43 @@ describe("installSessionToolResultGuard", () => {
|
||||
expect(messages).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("drops malformed tool calls with invalid name tokens before persistence", () => {
|
||||
const sm = SessionManager.inMemory();
|
||||
installSessionToolResultGuard(sm);
|
||||
|
||||
sm.appendMessage(
|
||||
asAppendMessage({
|
||||
role: "assistant",
|
||||
content: [
|
||||
{
|
||||
type: "toolCall",
|
||||
id: "call_bad_name",
|
||||
name: 'toolu_01mvznfebfuu <|tool_call_argument_begin|> {"command"',
|
||||
arguments: {},
|
||||
},
|
||||
],
|
||||
}),
|
||||
);
|
||||
|
||||
expect(getPersistedMessages(sm)).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("drops tool calls not present in allowedToolNames", () => {
|
||||
const sm = SessionManager.inMemory();
|
||||
installSessionToolResultGuard(sm, {
|
||||
allowedToolNames: ["read"],
|
||||
});
|
||||
|
||||
sm.appendMessage(
|
||||
asAppendMessage({
|
||||
role: "assistant",
|
||||
content: [{ type: "toolCall", id: "call_1", name: "write", arguments: {} }],
|
||||
}),
|
||||
);
|
||||
|
||||
expect(getPersistedMessages(sm)).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("flushes pending tool results when a sanitized assistant message is dropped", () => {
|
||||
const sm = SessionManager.inMemory();
|
||||
installSessionToolResultGuard(sm);
|
||||
|
||||
@@ -96,6 +96,11 @@ export function installSessionToolResultGuard(
|
||||
* Defaults to true.
|
||||
*/
|
||||
allowSyntheticToolResults?: boolean;
|
||||
/**
|
||||
* Optional set/list of tool names accepted for assistant toolCall/toolUse blocks.
|
||||
* When set, tool calls with unknown names are dropped before persistence.
|
||||
*/
|
||||
allowedToolNames?: Iterable<string>;
|
||||
/**
|
||||
* Synchronous hook invoked before any message is written to the session JSONL.
|
||||
* If the hook returns { block: true }, the message is silently dropped.
|
||||
@@ -171,7 +176,9 @@ export function installSessionToolResultGuard(
|
||||
let nextMessage = message;
|
||||
const role = (message as { role?: unknown }).role;
|
||||
if (role === "assistant") {
|
||||
const sanitized = sanitizeToolCallInputs([message]);
|
||||
const sanitized = sanitizeToolCallInputs([message], {
|
||||
allowedToolNames: opts?.allowedToolNames,
|
||||
});
|
||||
if (sanitized.length === 0) {
|
||||
if (allowSyntheticToolResults && pending.size > 0) {
|
||||
flushPendingToolResults();
|
||||
|
||||
@@ -241,6 +241,65 @@ describe("sanitizeToolCallInputs", () => {
|
||||
expect((toolCalls[0] as { id?: unknown }).id).toBe("call_ok");
|
||||
});
|
||||
|
||||
it("drops tool calls with malformed or overlong names", () => {
|
||||
const input = [
|
||||
{
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "toolCall", id: "call_ok", name: "read", arguments: {} },
|
||||
{
|
||||
type: "toolCall",
|
||||
id: "call_bad_chars",
|
||||
name: 'toolu_01abc <|tool_call_argument_begin|> {"command"',
|
||||
arguments: {},
|
||||
},
|
||||
{
|
||||
type: "toolUse",
|
||||
id: "call_too_long",
|
||||
name: `read_${"x".repeat(80)}`,
|
||||
input: {},
|
||||
},
|
||||
],
|
||||
},
|
||||
] as unknown as AgentMessage[];
|
||||
|
||||
const out = sanitizeToolCallInputs(input);
|
||||
const assistant = out[0] as Extract<AgentMessage, { role: "assistant" }>;
|
||||
const toolCalls = Array.isArray(assistant.content)
|
||||
? assistant.content.filter((block) => {
|
||||
const type = (block as { type?: unknown }).type;
|
||||
return typeof type === "string" && ["toolCall", "toolUse", "functionCall"].includes(type);
|
||||
})
|
||||
: [];
|
||||
|
||||
expect(toolCalls).toHaveLength(1);
|
||||
expect((toolCalls[0] as { name?: unknown }).name).toBe("read");
|
||||
});
|
||||
|
||||
it("drops unknown tool names when an allowlist is provided", () => {
|
||||
const input = [
|
||||
{
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "toolCall", id: "call_ok", name: "read", arguments: {} },
|
||||
{ type: "toolCall", id: "call_unknown", name: "write", arguments: {} },
|
||||
],
|
||||
},
|
||||
] as unknown as AgentMessage[];
|
||||
|
||||
const out = sanitizeToolCallInputs(input, { allowedToolNames: ["read"] });
|
||||
const assistant = out[0] as Extract<AgentMessage, { role: "assistant" }>;
|
||||
const toolCalls = Array.isArray(assistant.content)
|
||||
? assistant.content.filter((block) => {
|
||||
const type = (block as { type?: unknown }).type;
|
||||
return typeof type === "string" && ["toolCall", "toolUse", "functionCall"].includes(type);
|
||||
})
|
||||
: [];
|
||||
|
||||
expect(toolCalls).toHaveLength(1);
|
||||
expect((toolCalls[0] as { name?: unknown }).name).toBe("read");
|
||||
});
|
||||
|
||||
it("keeps valid tool calls and preserves text blocks", () => {
|
||||
const input = [
|
||||
{
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
import { extractToolCallsFromAssistant, extractToolResultId } from "./tool-call-id.js";
|
||||
|
||||
const TOOL_CALL_NAME_MAX_CHARS = 64;
|
||||
const TOOL_CALL_NAME_RE = /^[A-Za-z0-9_-]+$/;
|
||||
|
||||
type ToolCallBlock = {
|
||||
type?: unknown;
|
||||
id?: unknown;
|
||||
@@ -35,8 +38,38 @@ function hasToolCallId(block: ToolCallBlock): boolean {
|
||||
return hasNonEmptyStringField(block.id);
|
||||
}
|
||||
|
||||
function hasToolCallName(block: ToolCallBlock): boolean {
|
||||
return hasNonEmptyStringField(block.name);
|
||||
function normalizeAllowedToolNames(allowedToolNames?: Iterable<string>): Set<string> | null {
|
||||
if (!allowedToolNames) {
|
||||
return null;
|
||||
}
|
||||
const normalized = new Set<string>();
|
||||
for (const name of allowedToolNames) {
|
||||
if (typeof name !== "string") {
|
||||
continue;
|
||||
}
|
||||
const trimmed = name.trim();
|
||||
if (trimmed) {
|
||||
normalized.add(trimmed.toLowerCase());
|
||||
}
|
||||
}
|
||||
return normalized.size > 0 ? normalized : null;
|
||||
}
|
||||
|
||||
function hasToolCallName(block: ToolCallBlock, allowedToolNames: Set<string> | null): boolean {
|
||||
if (typeof block.name !== "string") {
|
||||
return false;
|
||||
}
|
||||
const trimmed = block.name.trim();
|
||||
if (!trimmed || trimmed !== block.name) {
|
||||
return false;
|
||||
}
|
||||
if (trimmed.length > TOOL_CALL_NAME_MAX_CHARS || !TOOL_CALL_NAME_RE.test(trimmed)) {
|
||||
return false;
|
||||
}
|
||||
if (!allowedToolNames) {
|
||||
return true;
|
||||
}
|
||||
return allowedToolNames.has(trimmed.toLowerCase());
|
||||
}
|
||||
|
||||
function makeMissingToolResult(params: {
|
||||
@@ -66,6 +99,10 @@ export type ToolCallInputRepairReport = {
|
||||
droppedAssistantMessages: number;
|
||||
};
|
||||
|
||||
export type ToolCallInputRepairOptions = {
|
||||
allowedToolNames?: Iterable<string>;
|
||||
};
|
||||
|
||||
export function stripToolResultDetails(messages: AgentMessage[]): AgentMessage[] {
|
||||
let touched = false;
|
||||
const out: AgentMessage[] = [];
|
||||
@@ -85,11 +122,15 @@ export function stripToolResultDetails(messages: AgentMessage[]): AgentMessage[]
|
||||
return touched ? out : messages;
|
||||
}
|
||||
|
||||
export function repairToolCallInputs(messages: AgentMessage[]): ToolCallInputRepairReport {
|
||||
export function repairToolCallInputs(
|
||||
messages: AgentMessage[],
|
||||
options?: ToolCallInputRepairOptions,
|
||||
): ToolCallInputRepairReport {
|
||||
let droppedToolCalls = 0;
|
||||
let droppedAssistantMessages = 0;
|
||||
let changed = false;
|
||||
const out: AgentMessage[] = [];
|
||||
const allowedToolNames = normalizeAllowedToolNames(options?.allowedToolNames);
|
||||
|
||||
for (const msg of messages) {
|
||||
if (!msg || typeof msg !== "object") {
|
||||
@@ -108,7 +149,9 @@ export function repairToolCallInputs(messages: AgentMessage[]): ToolCallInputRep
|
||||
for (const block of msg.content) {
|
||||
if (
|
||||
isToolCallBlock(block) &&
|
||||
(!hasToolCallInput(block) || !hasToolCallId(block) || !hasToolCallName(block))
|
||||
(!hasToolCallInput(block) ||
|
||||
!hasToolCallId(block) ||
|
||||
!hasToolCallName(block, allowedToolNames))
|
||||
) {
|
||||
droppedToolCalls += 1;
|
||||
droppedInMessage += 1;
|
||||
@@ -138,8 +181,11 @@ export function repairToolCallInputs(messages: AgentMessage[]): ToolCallInputRep
|
||||
};
|
||||
}
|
||||
|
||||
export function sanitizeToolCallInputs(messages: AgentMessage[]): AgentMessage[] {
|
||||
return repairToolCallInputs(messages).messages;
|
||||
export function sanitizeToolCallInputs(
|
||||
messages: AgentMessage[],
|
||||
options?: ToolCallInputRepairOptions,
|
||||
): AgentMessage[] {
|
||||
return repairToolCallInputs(messages, options).messages;
|
||||
}
|
||||
|
||||
export function sanitizeToolUseResultPairing(messages: AgentMessage[]): AgentMessage[] {
|
||||
|
||||
Reference in New Issue
Block a user