fix: canonicalize replay tool names

Regeneration-Prompt: | Address the PR review findings that the replay-only tool-call validator is stricter than the normal dispatch path: it incorrectly rejects allowlisted tool names with punctuation and strips mixed-case allowlisted names on replay. Keep the replay sanitizer scoped to live provider replays, but resolve replayed tool names through the same canonical exact/structured/case-insensitive allowlist helpers used elsewhere in attempt.ts. Add regression tests for a dotted allowlisted tool name and a mixed-case allowlisted tool name so those tool calls survive replay and keep their canonical names.
2026-04-01 04:11:03 +00:00 · 2026-03-18 13:42:14 -07:00
parent 42c9a1e6cf
commit d96aaf6d59
2 changed files with 71 additions and 17 deletions
--- a/src/agents/pi-embedded-runner/run/attempt.test.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.test.ts
@@ -875,10 +875,59 @@ describe("wrapStreamFnSanitizeMalformedToolCalls", () => {
      name?: string;
      input?: { attachments?: Array<{ content?: string }> };
    };
-    expect(toolCall.name).toBe("SESSIONS_SPAWN");
+    expect(toolCall.name).toBe("sessions_spawn");
    expect(toolCall.input?.attachments?.[0]?.content).toBe(attachmentContent);
  });

+  it("preserves allowlisted tool names that contain punctuation", async () => {
+    const messages = [
+      {
+        role: "assistant",
+        content: [{ type: "toolUse", id: "call_1", name: "admin.export", input: { scope: "all" } }],
+      },
+    ];
+    const baseFn = vi.fn((_model, _context) =>
+      createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }),
+    );
+
+    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(
+      baseFn as never,
+      new Set(["admin.export"]),
+    );
+    const stream = wrapped({} as never, { messages } as never, {} as never) as
+      | FakeWrappedStream
+      | Promise<FakeWrappedStream>;
+    await Promise.resolve(stream);
+
+    expect(baseFn).toHaveBeenCalledTimes(1);
+    const seenContext = baseFn.mock.calls[0]?.[1] as { messages: unknown[] };
+    expect(seenContext.messages).toBe(messages);
+  });
+
+  it("canonicalizes mixed-case allowlisted tool names on replay", async () => {
+    const messages = [
+      {
+        role: "assistant",
+        content: [{ type: "toolCall", id: "call_1", name: "readfile", arguments: {} }],
+      },
+    ];
+    const baseFn = vi.fn((_model, _context) =>
+      createFakeStream({ events: [], resultMessage: { role: "assistant", content: [] } }),
+    );
+
+    const wrapped = wrapStreamFnSanitizeMalformedToolCalls(baseFn as never, new Set(["ReadFile"]));
+    const stream = wrapped({} as never, { messages } as never, {} as never) as
+      | FakeWrappedStream
+      | Promise<FakeWrappedStream>;
+    await Promise.resolve(stream);
+
+    expect(baseFn).toHaveBeenCalledTimes(1);
+    const seenContext = baseFn.mock.calls[0]?.[1] as {
+      messages: Array<{ content?: Array<{ name?: string }> }>;
+    };
+    expect(seenContext.messages[0]?.content?.[0]?.name).toBe("ReadFile");
+  });
+
  it("drops orphaned tool results after replay sanitization removes a tool-call turn", async () => {
    const messages = [
      {
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -649,7 +649,6 @@ function isToolCallBlockType(type: unknown): boolean {
 }

 const REPLAY_TOOL_CALL_NAME_MAX_CHARS = 64;
-const REPLAY_TOOL_CALL_NAME_RE = /^[A-Za-z0-9_-]+$/;

 type ReplayToolCallBlock = {
  type?: unknown;
@@ -677,21 +676,21 @@ function replayToolCallNonEmptyString(value: unknown): value is string {
  return typeof value === "string" && value.trim().length > 0;
 }

-function replayToolCallHasName(
-  block: ReplayToolCallBlock,
+function resolveReplayAllowedToolName(
+  rawName: string,
  allowedToolNames?: Set<string>,
-): block is ReplayToolCallBlock & { name: string } {
-  if (!replayToolCallNonEmptyString(block.name)) {
-    return false;
-  }
-  const trimmed = block.name.trim();
-  if (trimmed.length > REPLAY_TOOL_CALL_NAME_MAX_CHARS || !REPLAY_TOOL_CALL_NAME_RE.test(trimmed)) {
-    return false;
+): string | null {
+  const trimmed = rawName.trim();
+  if (!trimmed || trimmed.length > REPLAY_TOOL_CALL_NAME_MAX_CHARS || /\s/.test(trimmed)) {
+    return null;
  }
  if (!allowedToolNames || allowedToolNames.size === 0) {
-    return true;
+    return trimmed;
  }
-  return allowedToolNames.has(trimmed.toLowerCase());
+  return (
+    resolveExactAllowedToolName(trimmed, allowedToolNames) ??
+    resolveStructuredAllowedToolName(trimmed, allowedToolNames)
+  );
 }

 function sanitizeReplayToolCallInputs(
@@ -723,16 +722,22 @@ function sanitizeReplayToolCallInputs(
      if (
        !replayToolCallHasInput(block) ||
        !replayToolCallNonEmptyString(block.id) ||
-        !replayToolCallHasName(block, allowedToolNames)
+        !replayToolCallNonEmptyString(block.name)
      ) {
        changed = true;
        messageChanged = true;
        continue;
      }

-      const trimmedName = block.name.trim();
-      if (block.name !== trimmedName) {
-        nextContent.push({ ...(block as object), name: trimmedName } as typeof block);
+      const resolvedName = resolveReplayAllowedToolName(block.name, allowedToolNames);
+      if (!resolvedName) {
+        changed = true;
+        messageChanged = true;
+        continue;
+      }
+
+      if (block.name !== resolvedName) {
+        nextContent.push({ ...(block as object), name: resolvedName } as typeof block);
        changed = true;
        messageChanged = true;
        continue;