openclaw/test/scripts/real-behavior-proof-policy.test.ts

import { describe, expect, it, vi } from "vitest";
import {
  MOCK_ONLY_PROOF_LABEL,
  NEEDS_REAL_BEHAVIOR_PROOF_LABEL,
  PROOF_OVERRIDE_LABEL,
  PROOF_SUPPLIED_LABEL,
  evaluateClawSweeperExactHeadProof,
  evaluateRealBehaviorProof,
  hasClawSweeperExactHeadProof,
  isMaintainerTeamMember,
  labelsForRealBehaviorProof,
} from "../../scripts/github/real-behavior-proof-policy.mjs";

function externalPr(body: string, overrides: Record<string, unknown> = {}) {
  return {
    body,
    author_association: "CONTRIBUTOR",
    user: {
      login: "external-contributor",
      type: "User",
    },
    labels: [],
    ...overrides,
  };
}

function proofBody(evidence: string, overrides: Record<string, string> = {}) {
  const fields = {
    behavior: "Gateway startup no longer drops the configured Discord channel.",
    environment: "macOS 15.4, Node 24, local OpenClaw gateway with a redacted Discord token.",
    steps: "pnpm openclaw gateway restart, then pnpm openclaw gateway status",
    evidence,
    observedResult: "The gateway stayed connected and the Discord channel showed ready.",
    notTested: "No known gaps.",
    ...overrides,
  };
  return [
    "## Real behavior proof",
    "",
    `- Behavior or issue addressed: ${fields.behavior}`,
    `- Real environment tested: ${fields.environment}`,
    `- Exact steps or command run after this patch: ${fields.steps}`,
    `- Evidence after fix: ${fields.evidence}`,
    `- Observed result after fix: ${fields.observedResult}`,
    `- What was not tested: ${fields.notTested}`,
  ].join("\n");
}

describe("real-behavior-proof-policy", () => {
  it.each([
    "![after](https://github.com/user-attachments/assets/abc123)",
    "Linked artifact: https://github.com/openclaw/openclaw/actions/runs/123456789/artifacts/987654321",
    "Redacted runtime log: gateway connected Discord channel and delivered the reply.",
    ["Terminal transcript:", "```text", "$ openclaw gateway status", "discord ready", "```"].join(
      "\n",
    ),
  ])("passes external PRs with real after-fix evidence: %s", (evidence) => {
    const evaluation = evaluateRealBehaviorProof({
      pullRequest: externalPr(proofBody(evidence)),
    });

    expect(evaluation.status).toBe("passed");
    expect(labelsForRealBehaviorProof(evaluation)).toEqual([PROOF_SUPPLIED_LABEL]);
  });

  it("passes CRLF-formatted external PRs with screenshot proof", () => {
    const evaluation = evaluateRealBehaviorProof({
      pullRequest: externalPr(
        proofBody("![after](https://github.com/user-attachments/assets/gateway-ready)").replace(
          /\n/g,
          "\r\n",
        ),
      ),
    });

    expect(evaluation.status).toBe("passed");
    expect(evaluation.fields).toStrictEqual({
      behavior: "Gateway startup no longer drops the configured Discord channel.",
      evidence: "![after](https://github.com/user-attachments/assets/gateway-ready)",
      environment: "macOS 15.4, Node 24, local OpenClaw gateway with a redacted Discord token.",
      notTested: "No known gaps.",
      observedResult: "The gateway stayed connected and the Discord channel showed ready.",
      steps: "pnpm openclaw gateway restart, then pnpm openclaw gateway status",
    });
    expect(labelsForRealBehaviorProof(evaluation)).toEqual([PROOF_SUPPLIED_LABEL]);
  });

  it("fails external PRs without a real behavior proof section", () => {
    const evaluation = evaluateRealBehaviorProof({
      pullRequest: externalPr("## Summary\n\n- Fixed startup."),
    });

    expect(evaluation.status).toBe("missing");
    expect(labelsForRealBehaviorProof(evaluation)).toEqual([NEEDS_REAL_BEHAVIOR_PROOF_LABEL]);
  });

  it("fails external PRs that say the changed behavior was not tested", () => {
    const evaluation = evaluateRealBehaviorProof({
      pullRequest: externalPr(proofBody("not tested")),
    });

    expect(evaluation.status).toBe("missing");
    expect(labelsForRealBehaviorProof(evaluation)).toEqual([NEEDS_REAL_BEHAVIOR_PROOF_LABEL]);
  });

  it("fails external PRs whose proof is only tests, mocks, snapshots, lint, typecheck, or CI", () => {
    const evaluation = evaluateRealBehaviorProof({
      pullRequest: externalPr(
        proofBody("pnpm test passed and Vitest mocks cover the branch.", {
          steps: "pnpm test",
          observedResult: "CI passes.",
        }),
      ),
    });

    expect(evaluation.status).toBe("mock_only");
    expect(labelsForRealBehaviorProof(evaluation)).toEqual([MOCK_ONLY_PROOF_LABEL]);
  });

  it("fails external PRs whose only copied output is a fenced test or CI transcript", () => {
    const evaluation = evaluateRealBehaviorProof({
      pullRequest: externalPr(
        proofBody(["```text", "$ pnpm test", "CI passed with Vitest mocks", "```"].join("\n"), {
          steps: "pnpm test",
          observedResult: "CI passes.",
        }),
      ),
    });

    expect(evaluation.status).toBe("mock_only");
    expect(labelsForRealBehaviorProof(evaluation)).toEqual([MOCK_ONLY_PROOF_LABEL]);
  });

  it("fails external PRs whose terminal label only contains test or CI output", () => {
    const evaluation = evaluateRealBehaviorProof({
      pullRequest: externalPr(
        proofBody(
          [
            "Terminal transcript:",
            "```text",
            "$ pnpm test",
            "CI passed with Vitest mocks",
            "```",
          ].join("\n"),
          {
            steps: "pnpm test",
            observedResult: "CI passes.",
          },
        ),
      ),
    });

    expect(evaluation.status).toBe("mock_only");
    expect(labelsForRealBehaviorProof(evaluation)).toEqual([MOCK_ONLY_PROOF_LABEL]);
  });

  it("passes maintainer, bot, and override cases", () => {
    expect(
      evaluateRealBehaviorProof({
        pullRequest: externalPr("", { author_association: "MEMBER" }),
      }).status,
    ).toBe("skipped");
    expect(
      evaluateRealBehaviorProof({
        pullRequest: externalPr("", {
          user: {
            login: "renovate[bot]",
            type: "Bot",
          },
        }),
      }).status,
    ).toBe("skipped");
    expect(
      evaluateRealBehaviorProof({
        pullRequest: externalPr("", { labels: [{ name: PROOF_OVERRIDE_LABEL }] }),
      }).status,
    ).toBe("override");
  });

  it("accepts ClawSweeper pass verdict comments only for the exact PR head", () => {
    const pullRequest = {
      number: 83581,
      head: {
        sha: "06ee95df6608d29a395c52ba8ab53fdd93a9dc4f",
      },
    };
    const comments = [
      {
        user: {
          login: "clawsweeper[bot]",
          type: "Bot",
        },
        performed_via_github_app: {
          slug: "clawsweeper",
        },
        body: [
          "Codex review: passed.",
          "<!-- clawsweeper-verdict:pass item=83581 sha=06ee95df6608d29a395c52ba8ab53fdd93a9dc4f confidence=high -->",
        ].join("\n"),
      },
    ];

    expect(hasClawSweeperExactHeadProof({ pullRequest, comments })).toBe(true);
    expect(evaluateClawSweeperExactHeadProof({ pullRequest, comments }).passed).toBe(true);
    expect(
      hasClawSweeperExactHeadProof({
        pullRequest: {
          ...pullRequest,
          head: { sha: "d0215b2d67a45a783277fc7d2949ac4a30f63ec6" },
        },
        comments,
      }),
    ).toBe(false);
  });

  it("rejects forged ClawSweeper pass verdict markers from contributor comments", () => {
    const pullRequest = {
      number: 83581,
      head: {
        sha: "06ee95df6608d29a395c52ba8ab53fdd93a9dc4f",
      },
    };
    const comments = [
      {
        user: {
          login: "external-contributor",
          type: "User",
        },
        body: "<!-- clawsweeper-verdict:pass item=83581 sha=06ee95df6608d29a395c52ba8ab53fdd93a9dc4f confidence=high -->",
      },
    ];

    expect(hasClawSweeperExactHeadProof({ pullRequest, comments })).toBe(false);
    expect(evaluateClawSweeperExactHeadProof({ pullRequest, comments }).passed).toBe(false);
  });

  it("rejects bot-shaped ClawSweeper pass verdict markers without the GitHub App source", () => {
    const pullRequest = {
      number: 83581,
      head: {
        sha: "06ee95df6608d29a395c52ba8ab53fdd93a9dc4f",
      },
    };
    const comments = [
      {
        user: {
          login: "clawsweeper[bot]",
          type: "Bot",
        },
        body: "<!-- clawsweeper-verdict:pass item=83581 sha=06ee95df6608d29a395c52ba8ab53fdd93a9dc4f confidence=high -->",
      },
    ];

    expect(hasClawSweeperExactHeadProof({ pullRequest, comments })).toBe(false);
    expect(evaluateClawSweeperExactHeadProof({ pullRequest, comments }).passed).toBe(false);
  });
});

describe("isMaintainerTeamMember", () => {
  function jsonResponse(status: number, body: unknown = {}) {
    return {
      ok: status >= 200 && status < 300,
      status,
      json: () => Promise.resolve(body),
    };
  }

  it("returns true for active members", async () => {
    const fetch = vi.fn().mockResolvedValue(jsonResponse(200, { state: "active" }));
    const result = await isMaintainerTeamMember({
      token: "tok",
      org: "openclaw",
      login: "private-maint",
      fetch,
    });

    expect(result).toBe(true);
    expect(fetch).toHaveBeenCalledWith(
      "https://api.github.com/orgs/openclaw/teams/maintainer/memberships/private-maint",
      expect.objectContaining({
        headers: expect.objectContaining({
          Authorization: "Bearer tok",
          Accept: "application/vnd.github+json",
        }),
      }),
    );
  });

  it("returns false for non-active membership states", async () => {
    const fetch = vi.fn().mockResolvedValue(jsonResponse(200, { state: "pending" }));
    expect(await isMaintainerTeamMember({ token: "t", org: "o", login: "u", fetch })).toBe(false);
  });

  it("returns false when GitHub returns 404", async () => {
    const fetch = vi.fn().mockResolvedValue(jsonResponse(404));
    expect(await isMaintainerTeamMember({ token: "t", org: "o", login: "u", fetch })).toBe(false);
  });

  it("returns false when the token, org, or login is missing", async () => {
    const fetch = vi.fn();
    expect(await isMaintainerTeamMember({ org: "o", login: "u", fetch })).toBe(false);
    expect(await isMaintainerTeamMember({ token: "t", login: "u", fetch })).toBe(false);
    expect(await isMaintainerTeamMember({ token: "t", org: "o", fetch })).toBe(false);
    expect(fetch).not.toHaveBeenCalled();
  });

  it("throws on unexpected HTTP errors so the caller can warn and fall back", async () => {
    const fetch = vi.fn().mockResolvedValue(jsonResponse(500));
    await expect(
      isMaintainerTeamMember({ token: "t", org: "o", login: "u", fetch }),
    ).rejects.toThrow(/500/);
  });
});