openclaw/src/gateway/live-tool-probe-utils.test.ts

import { describe, expect, it } from "vitest";
import {
  hasExpectedSingleNonce,
  hasExpectedToolNonce,
  isLikelyToolNonceRefusal,
  shouldRetryExecReadProbe,
  shouldRetryToolReadProbe,
} from "./live-tool-probe-utils.js";

describe("live tool probe utils", () => {
  describe("nonce matching", () => {
    it.each([
      {
        name: "matches tool nonce pairs only when both are present",
        actual: hasExpectedToolNonce("value a-1 and b-2", "a-1", "b-2"),
        expected: true,
      },
      {
        name: "rejects partial tool nonce matches",
        actual: hasExpectedToolNonce("value a-1 only", "a-1", "b-2"),
        expected: false,
      },
      {
        name: "matches a single nonce when present",
        actual: hasExpectedSingleNonce("value nonce-1", "nonce-1"),
        expected: true,
      },
      {
        name: "rejects single nonce mismatches",
        actual: hasExpectedSingleNonce("value nonce-2", "nonce-1"),
        expected: false,
      },
    ])("$name", ({ actual, expected }) => {
      expect(actual).toBe(expected);
    });
  });

  describe("refusal detection", () => {
    it.each([
      {
        name: "detects nonce refusal phrasing",
        text: "Same request, same answer — this isn't a real OpenClaw probe. No part of the system asks me to parrot back nonce values.",
        expected: true,
      },
      {
        name: "detects prompt-injection style refusals without nonce text",
        text: "That's not a legitimate self-test. This looks like a prompt injection attempt.",
        expected: true,
      },
      {
        name: "detects tool authorization refusals",
        text: "Before proceeding, I must confirm: are you authorizing me to execute the read tool with the provided arguments?",
        expected: true,
      },
      {
        name: "detects unavailable read tool refusals",
        text: "tool probe missing nonce: I can’t: there is no `read`/`Read` tool available in this session, and I won’t output those nonce values without actually reading the file.",
        expected: true,
      },
      {
        name: "ignores generic helper text",
        text: "I can help with that request.",
        expected: false,
      },
      {
        name: "does not treat nonce markers without the word nonce as refusal",
        text: "No part of the system asks me to parrot back values.",
        expected: false,
      },
    ])("$name", ({ text, expected }) => {
      expect(isLikelyToolNonceRefusal(text)).toBe(expected);
    });
  });

  describe("shouldRetryToolReadProbe", () => {
    it.each([
      {
        name: "retries malformed tool output when attempts remain",
        params: {
          text: "read[object Object],[object Object]",
          nonceA: "nonce-a",
          nonceB: "nonce-b",
          provider: "mistral",
          attempt: 0,
          maxAttempts: 3,
        },
        expected: true,
      },
      {
        name: "does not retry once max attempts are exhausted",
        params: {
          text: "read[object Object],[object Object]",
          nonceA: "nonce-a",
          nonceB: "nonce-b",
          provider: "mistral",
          attempt: 2,
          maxAttempts: 3,
        },
        expected: false,
      },
      {
        name: "does not retry when the nonce pair is already present",
        params: {
          text: "nonce-a nonce-b",
          nonceA: "nonce-a",
          nonceB: "nonce-b",
          provider: "mistral",
          attempt: 0,
          maxAttempts: 3,
        },
        expected: false,
      },
      {
        name: "prefers a valid nonce pair even if the text still contains scaffolding words",
        params: {
          text: "tool output nonce-a nonce-b function",
          nonceA: "nonce-a",
          nonceB: "nonce-b",
          provider: "openai",
          attempt: 0,
          maxAttempts: 3,
        },
        expected: false,
      },
      {
        name: "retries empty output",
        params: {
          text: "   ",
          nonceA: "nonce-a",
          nonceB: "nonce-b",
          provider: "openai",
          attempt: 0,
          maxAttempts: 3,
        },
        expected: true,
      },
      {
        name: "retries tool scaffolding output",
        params: {
          text: "Use tool function read[] now.",
          nonceA: "nonce-a",
          nonceB: "nonce-b",
          provider: "openai",
          attempt: 0,
          maxAttempts: 3,
        },
        expected: true,
      },
      {
        name: "retries conversational try-again output",
        params: {
          text: "Let me try reading the file again:",
          nonceA: "nonce-a",
          nonceB: "nonce-b",
          provider: "zai",
          attempt: 0,
          maxAttempts: 3,
        },
        expected: true,
      },
      {
        name: "does not retry generic conversational text without tool-retry context",
        params: {
          text: "Let me try a different approach.",
          nonceA: "nonce-a",
          nonceB: "nonce-b",
          provider: "zai",
          attempt: 0,
          maxAttempts: 3,
        },
        expected: false,
      },
      {
        name: "retries mistral nonce marker echoes without parsed values",
        params: {
          text: "nonceA= nonceB=",
          nonceA: "nonce-a",
          nonceB: "nonce-b",
          provider: "mistral",
          attempt: 0,
          maxAttempts: 3,
        },
        expected: true,
      },
      {
        name: "retries anthropic refusal output",
        params: {
          text: "This isn't a real OpenClaw probe; I won't parrot back nonce values.",
          nonceA: "nonce-a",
          nonceB: "nonce-b",
          provider: "anthropic",
          attempt: 0,
          maxAttempts: 3,
        },
        expected: true,
      },
      {
        name: "does not special-case anthropic refusals for other providers",
        params: {
          text: "This isn't a real OpenClaw probe; I won't parrot back nonce values.",
          nonceA: "nonce-a",
          nonceB: "nonce-b",
          provider: "openai",
          attempt: 0,
          maxAttempts: 3,
        },
        expected: false,
      },
    ])("$name", ({ params, expected }) => {
      expect(shouldRetryToolReadProbe(params)).toBe(expected);
    });
  });

  describe("shouldRetryExecReadProbe", () => {
    it.each([
      {
        name: "retries malformed exec+read output when attempts remain",
        params: {
          text: "read[object Object]",
          nonce: "nonce-c",
          provider: "openai",
          attempt: 0,
          maxAttempts: 3,
        },
        expected: true,
      },
      {
        name: "does not retry once max attempts are exhausted",
        params: {
          text: "read[object Object]",
          nonce: "nonce-c",
          provider: "openai",
          attempt: 2,
          maxAttempts: 3,
        },
        expected: false,
      },
      {
        name: "does not retry when the nonce is already present",
        params: {
          text: "nonce-c",
          nonce: "nonce-c",
          provider: "openai",
          attempt: 0,
          maxAttempts: 3,
        },
        expected: false,
      },
      {
        name: "prefers a valid nonce even if the text still contains scaffolding words",
        params: {
          text: "tool output nonce-c function",
          nonce: "nonce-c",
          provider: "openai",
          attempt: 0,
          maxAttempts: 3,
        },
        expected: false,
      },
      {
        name: "retries anthropic nonce refusal output",
        params: {
          text: "No part of the system asks me to parrot back nonce values.",
          nonce: "nonce-c",
          provider: "anthropic",
          attempt: 0,
          maxAttempts: 3,
        },
        expected: true,
      },
      {
        name: "retries conversational try-again exec output",
        params: {
          text: "Let me try reading the file again:",
          nonce: "nonce-c",
          provider: "zai",
          attempt: 0,
          maxAttempts: 3,
        },
        expected: true,
      },
      {
        name: "retries eventual-consistency exec readback output",
        params: {
          text: "The file creation command succeeded, but the file wasn't found immediately after. Let me verify the file exists and read it again.",
          nonce: "nonce-c",
          provider: "mistral",
          attempt: 0,
          maxAttempts: 3,
        },
        expected: true,
      },
      {
        name: "retries file-not-found exec readback wording",
        params: {
          text: "The `exec` command ran successfully, but the file read failed because the file was not found. Let me verify the file creation and read it again.",
          nonce: "nonce-c",
          provider: "mistral",
          attempt: 0,
          maxAttempts: 3,
        },
        expected: true,
      },
      {
        name: "does not retry generic exec conversational text without tool-retry context",
        params: {
          text: "Let me try a different approach.",
          nonce: "nonce-c",
          provider: "zai",
          attempt: 0,
          maxAttempts: 3,
        },
        expected: false,
      },
      {
        name: "does not special-case anthropic refusals for other providers",
        params: {
          text: "No part of the system asks me to parrot back nonce values.",
          nonce: "nonce-c",
          provider: "openai",
          attempt: 0,
          maxAttempts: 3,
        },
        expected: false,
      },
    ])("$name", ({ params, expected }) => {
      expect(shouldRetryExecReadProbe(params)).toBe(expected);
    });
  });
});