fix: canonicalize replay tool names

Regeneration-Prompt: |
  Address the PR review findings that the replay-only tool-call validator is stricter than the normal dispatch path: it incorrectly rejects allowlisted tool names with punctuation and strips mixed-case allowlisted names on replay. Keep the replay sanitizer scoped to live provider replays, but resolve replayed tool names through the same canonical exact/structured/case-insensitive allowlist helpers used elsewhere in attempt.ts. Add regression tests for a dotted allowlisted tool name and a mixed-case allowlisted tool name so those tool calls survive replay and keep their canonical names.
This commit is contained in:
Josh Lehman
2026-03-18 13:42:14 -07:00
parent 42c9a1e6cf
commit d96aaf6d59
2 changed files with 71 additions and 17 deletions

View File

@@ -875,10 +875,59 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
name?: string;
input?: { attachments?: Array<{ content?: string }> };
};
expect(toolCall.name).toBe("SESSIONS_SPAWN");
expect(toolCall.name).toBe("sessions_spawn");
expect(toolCall.input?.attachments?.[0]?.content).toBe(attachmentContent);
});
it("preserves allowlisted tool names that contain punctuation", async () => {
const messages = [
{
role: "assistant",
content: [{ type: "toolUse", id: "call_1", name: "admin.export", input: { scope: "all" } }],
},
];
const baseFn = vi.fn((_model, _context) =>
createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }),
);
const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
baseFn as never,
new Set(["admin.export"]),
);
const stream = wrapped({} as never, { messages } as never, {} as never) as
| FakeWrappedStream
| Promise<FakeWrappedStream>;
await Promise.resolve(stream);
expect(baseFn).toHaveBeenCalledTimes(1);
const seenContext = baseFn.mock.calls[0]?.[1] as { messages: unknown[] };
expect(seenContext.messages).toBe(messages);
});
it("canonicalizes mixed-case allowlisted tool names on replay", async () => {
const messages = [
{
role: "assistant",
content: [{ type: "toolCall", id: "call_1", name: "readfile", arguments: {} }],
},
];
const baseFn = vi.fn((_model, _context) =>
createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }),
);
const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["ReadFile"]));
const stream = wrapped({} as never, { messages } as never, {} as never) as
| FakeWrappedStream
| Promise<FakeWrappedStream>;
await Promise.resolve(stream);
expect(baseFn).toHaveBeenCalledTimes(1);
const seenContext = baseFn.mock.calls[0]?.[1] as {
messages: Array<{ content?: Array<{ name?: string }> }>;
};
expect(seenContext.messages[0]?.content?.[0]?.name).toBe("ReadFile");
});
it("drops orphaned tool results after replay sanitization removes a tool-call turn", async () => {
const messages = [
{

View File

@@ -649,7 +649,6 @@ function isToolCallBlockType(type: unknown): boolean {
}
const REPLAY_TOOL_CALL_NAME_MAX_CHARS = 64;
const REPLAY_TOOL_CALL_NAME_RE = /^[A-Za-z0-9_-]+$/;
type ReplayToolCallBlock = {
type?: unknown;
@@ -677,21 +676,21 @@ function replayToolCallNonEmptyString(value: unknown): value is string {
return typeof value === "string" && value.trim().length > 0;
}
function replayToolCallHasName(
block: ReplayToolCallBlock,
function resolveReplayAllowedToolName(
rawName: string,
allowedToolNames?: Set<string>,
): block is ReplayToolCallBlock & { name: string } {
if (!replayToolCallNonEmptyString(block.name)) {
return false;
}
const trimmed = block.name.trim();
if (trimmed.length > REPLAY_TOOL_CALL_NAME_MAX_CHARS || !REPLAY_TOOL_CALL_NAME_RE.test(trimmed)) {
return false;
): string | null {
const trimmed = rawName.trim();
if (!trimmed || trimmed.length > REPLAY_TOOL_CALL_NAME_MAX_CHARS || /\s/.test(trimmed)) {
return null;
}
if (!allowedToolNames || allowedToolNames.size === 0) {
return true;
return trimmed;
}
return allowedToolNames.has(trimmed.toLowerCase());
return (
resolveExactAllowedToolName(trimmed, allowedToolNames) ??
resolveStructuredAllowedToolName(trimmed, allowedToolNames)
);
}
function sanitizeReplayToolCallInputs(
@@ -723,16 +722,22 @@ function sanitizeReplayToolCallInputs(
if (
!replayToolCallHasInput(block) ||
!replayToolCallNonEmptyString(block.id) ||
!replayToolCallHasName(block, allowedToolNames)
!replayToolCallNonEmptyString(block.name)
) {
changed = true;
messageChanged = true;
continue;
}
const trimmedName = block.name.trim();
if (block.name !== trimmedName) {
nextContent.push({ ...(block as object), name: trimmedName } as typeof block);
const resolvedName = resolveReplayAllowedToolName(block.name, allowedToolNames);
if (!resolvedName) {
changed = true;
messageChanged = true;
continue;
}
if (block.name !== resolvedName) {
nextContent.push({ ...(block as object), name: resolvedName } as typeof block);
changed = true;
messageChanged = true;
continue;