Files
openclaw/test/scripts/real-behavior-proof-policy.test.ts
Tak Hoffman 06a39015f2 fix(ci): authenticate proof verdict markers (#83692)
Summary:
- The branch restricts exact-head ClawSweeper proof markers to GitHub App-authored comments, adds read-only issue-comment token fallback for the proof workflow, and adds focused regression tests plus a changelog entry.
- Reproducibility: yes. Source inspection of current main shows any issue comment body with a matching `clawsw ...  SHA is accepted without author/App authentication; the PR adds focused negative tests for forged comments.

Automerge notes:
- PR branch already contained follow-up commit before automerge: fix(ci): authenticate proof verdict markers

Validation:
- ClawSweeper review passed for head f4c375eaa7.
- Required merge gates passed before the squash merge.

Prepared head SHA: f4c375eaa7
Review: https://github.com/openclaw/openclaw/pull/83692#issuecomment-4479843682

Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
Co-authored-by: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com>
Co-authored-by: clawsweeper[bot] <274271284+clawsweeper[bot]@users.noreply.github.com>
Approved-by: takhoffman
Co-authored-by: takhoffman <781889+takhoffman@users.noreply.github.com>
2026-05-18 17:42:10 +00:00

314 lines
10 KiB
TypeScript

import { describe, expect, it, vi } from "vitest";
import {
MOCK_ONLY_PROOF_LABEL,
NEEDS_REAL_BEHAVIOR_PROOF_LABEL,
PROOF_OVERRIDE_LABEL,
PROOF_SUPPLIED_LABEL,
evaluateClawSweeperExactHeadProof,
evaluateRealBehaviorProof,
hasClawSweeperExactHeadProof,
isMaintainerTeamMember,
labelsForRealBehaviorProof,
} from "../../scripts/github/real-behavior-proof-policy.mjs";
function externalPr(body: string, overrides: Record<string, unknown> = {}) {
return {
body,
author_association: "CONTRIBUTOR",
user: {
login: "external-contributor",
type: "User",
},
labels: [],
...overrides,
};
}
function proofBody(evidence: string, overrides: Record<string, string> = {}) {
const fields = {
behavior: "Gateway startup no longer drops the configured Discord channel.",
environment: "macOS 15.4, Node 24, local OpenClaw gateway with a redacted Discord token.",
steps: "pnpm openclaw gateway restart, then pnpm openclaw gateway status",
evidence,
observedResult: "The gateway stayed connected and the Discord channel showed ready.",
notTested: "No known gaps.",
...overrides,
};
return [
"## Real behavior proof",
"",
`- Behavior or issue addressed: ${fields.behavior}`,
`- Real environment tested: ${fields.environment}`,
`- Exact steps or command run after this patch: ${fields.steps}`,
`- Evidence after fix: ${fields.evidence}`,
`- Observed result after fix: ${fields.observedResult}`,
`- What was not tested: ${fields.notTested}`,
].join("\n");
}
describe("real-behavior-proof-policy", () => {
it.each([
"![after](https://github.com/user-attachments/assets/abc123)",
"Linked artifact: https://github.com/openclaw/openclaw/actions/runs/123456789/artifacts/987654321",
"Redacted runtime log: gateway connected Discord channel and delivered the reply.",
["Terminal transcript:", "```text", "$ openclaw gateway status", "discord ready", "```"].join(
"\n",
),
])("passes external PRs with real after-fix evidence: %s", (evidence) => {
const evaluation = evaluateRealBehaviorProof({
pullRequest: externalPr(proofBody(evidence)),
});
expect(evaluation.status).toBe("passed");
expect(labelsForRealBehaviorProof(evaluation)).toEqual([PROOF_SUPPLIED_LABEL]);
});
it("passes CRLF-formatted external PRs with screenshot proof", () => {
const evaluation = evaluateRealBehaviorProof({
pullRequest: externalPr(
proofBody("![after](https://github.com/user-attachments/assets/gateway-ready)").replace(
/\n/g,
"\r\n",
),
),
});
expect(evaluation.status).toBe("passed");
expect(evaluation.fields).toStrictEqual({
behavior: "Gateway startup no longer drops the configured Discord channel.",
evidence: "![after](https://github.com/user-attachments/assets/gateway-ready)",
environment: "macOS 15.4, Node 24, local OpenClaw gateway with a redacted Discord token.",
notTested: "No known gaps.",
observedResult: "The gateway stayed connected and the Discord channel showed ready.",
steps: "pnpm openclaw gateway restart, then pnpm openclaw gateway status",
});
expect(labelsForRealBehaviorProof(evaluation)).toEqual([PROOF_SUPPLIED_LABEL]);
});
it("fails external PRs without a real behavior proof section", () => {
const evaluation = evaluateRealBehaviorProof({
pullRequest: externalPr("## Summary\n\n- Fixed startup."),
});
expect(evaluation.status).toBe("missing");
expect(labelsForRealBehaviorProof(evaluation)).toEqual([NEEDS_REAL_BEHAVIOR_PROOF_LABEL]);
});
it("fails external PRs that say the changed behavior was not tested", () => {
const evaluation = evaluateRealBehaviorProof({
pullRequest: externalPr(proofBody("not tested")),
});
expect(evaluation.status).toBe("missing");
expect(labelsForRealBehaviorProof(evaluation)).toEqual([NEEDS_REAL_BEHAVIOR_PROOF_LABEL]);
});
it("fails external PRs whose proof is only tests, mocks, snapshots, lint, typecheck, or CI", () => {
const evaluation = evaluateRealBehaviorProof({
pullRequest: externalPr(
proofBody("pnpm test passed and Vitest mocks cover the branch.", {
steps: "pnpm test",
observedResult: "CI passes.",
}),
),
});
expect(evaluation.status).toBe("mock_only");
expect(labelsForRealBehaviorProof(evaluation)).toEqual([MOCK_ONLY_PROOF_LABEL]);
});
it("fails external PRs whose only copied output is a fenced test or CI transcript", () => {
const evaluation = evaluateRealBehaviorProof({
pullRequest: externalPr(
proofBody(["```text", "$ pnpm test", "CI passed with Vitest mocks", "```"].join("\n"), {
steps: "pnpm test",
observedResult: "CI passes.",
}),
),
});
expect(evaluation.status).toBe("mock_only");
expect(labelsForRealBehaviorProof(evaluation)).toEqual([MOCK_ONLY_PROOF_LABEL]);
});
it("fails external PRs whose terminal label only contains test or CI output", () => {
const evaluation = evaluateRealBehaviorProof({
pullRequest: externalPr(
proofBody(
[
"Terminal transcript:",
"```text",
"$ pnpm test",
"CI passed with Vitest mocks",
"```",
].join("\n"),
{
steps: "pnpm test",
observedResult: "CI passes.",
},
),
),
});
expect(evaluation.status).toBe("mock_only");
expect(labelsForRealBehaviorProof(evaluation)).toEqual([MOCK_ONLY_PROOF_LABEL]);
});
it("passes maintainer, bot, and override cases", () => {
expect(
evaluateRealBehaviorProof({
pullRequest: externalPr("", { author_association: "MEMBER" }),
}).status,
).toBe("skipped");
expect(
evaluateRealBehaviorProof({
pullRequest: externalPr("", {
user: {
login: "renovate[bot]",
type: "Bot",
},
}),
}).status,
).toBe("skipped");
expect(
evaluateRealBehaviorProof({
pullRequest: externalPr("", { labels: [{ name: PROOF_OVERRIDE_LABEL }] }),
}).status,
).toBe("override");
});
it("accepts ClawSweeper pass verdict comments only for the exact PR head", () => {
const pullRequest = {
number: 83581,
head: {
sha: "06ee95df6608d29a395c52ba8ab53fdd93a9dc4f",
},
};
const comments = [
{
user: {
login: "clawsweeper[bot]",
type: "Bot",
},
performed_via_github_app: {
slug: "clawsweeper",
},
body: [
"Codex review: passed.",
"<!-- clawsweeper-verdict:pass item=83581 sha=06ee95df6608d29a395c52ba8ab53fdd93a9dc4f confidence=high -->",
].join("\n"),
},
];
expect(hasClawSweeperExactHeadProof({ pullRequest, comments })).toBe(true);
expect(evaluateClawSweeperExactHeadProof({ pullRequest, comments }).passed).toBe(true);
expect(
hasClawSweeperExactHeadProof({
pullRequest: {
...pullRequest,
head: { sha: "d0215b2d67a45a783277fc7d2949ac4a30f63ec6" },
},
comments,
}),
).toBe(false);
});
it("rejects forged ClawSweeper pass verdict markers from contributor comments", () => {
const pullRequest = {
number: 83581,
head: {
sha: "06ee95df6608d29a395c52ba8ab53fdd93a9dc4f",
},
};
const comments = [
{
user: {
login: "external-contributor",
type: "User",
},
body: "<!-- clawsweeper-verdict:pass item=83581 sha=06ee95df6608d29a395c52ba8ab53fdd93a9dc4f confidence=high -->",
},
];
expect(hasClawSweeperExactHeadProof({ pullRequest, comments })).toBe(false);
expect(evaluateClawSweeperExactHeadProof({ pullRequest, comments }).passed).toBe(false);
});
it("rejects bot-shaped ClawSweeper pass verdict markers without the GitHub App source", () => {
const pullRequest = {
number: 83581,
head: {
sha: "06ee95df6608d29a395c52ba8ab53fdd93a9dc4f",
},
};
const comments = [
{
user: {
login: "clawsweeper[bot]",
type: "Bot",
},
body: "<!-- clawsweeper-verdict:pass item=83581 sha=06ee95df6608d29a395c52ba8ab53fdd93a9dc4f confidence=high -->",
},
];
expect(hasClawSweeperExactHeadProof({ pullRequest, comments })).toBe(false);
expect(evaluateClawSweeperExactHeadProof({ pullRequest, comments }).passed).toBe(false);
});
});
describe("isMaintainerTeamMember", () => {
function jsonResponse(status: number, body: unknown = {}) {
return {
ok: status >= 200 && status < 300,
status,
json: () => Promise.resolve(body),
};
}
it("returns true for active members", async () => {
const fetch = vi.fn().mockResolvedValue(jsonResponse(200, { state: "active" }));
const result = await isMaintainerTeamMember({
token: "tok",
org: "openclaw",
login: "private-maint",
fetch,
});
expect(result).toBe(true);
expect(fetch).toHaveBeenCalledWith(
"https://api.github.com/orgs/openclaw/teams/maintainer/memberships/private-maint",
expect.objectContaining({
headers: expect.objectContaining({
Authorization: "Bearer tok",
Accept: "application/vnd.github+json",
}),
}),
);
});
it("returns false for non-active membership states", async () => {
const fetch = vi.fn().mockResolvedValue(jsonResponse(200, { state: "pending" }));
expect(await isMaintainerTeamMember({ token: "t", org: "o", login: "u", fetch })).toBe(false);
});
it("returns false when GitHub returns 404", async () => {
const fetch = vi.fn().mockResolvedValue(jsonResponse(404));
expect(await isMaintainerTeamMember({ token: "t", org: "o", login: "u", fetch })).toBe(false);
});
it("returns false when the token, org, or login is missing", async () => {
const fetch = vi.fn();
expect(await isMaintainerTeamMember({ org: "o", login: "u", fetch })).toBe(false);
expect(await isMaintainerTeamMember({ token: "t", login: "u", fetch })).toBe(false);
expect(await isMaintainerTeamMember({ token: "t", org: "o", fetch })).toBe(false);
expect(fetch).not.toHaveBeenCalled();
});
it("throws on unexpected HTTP errors so the caller can warn and fall back", async () => {
const fetch = vi.fn().mockResolvedValue(jsonResponse(500));
await expect(
isMaintainerTeamMember({ token: "t", org: "o", login: "u", fetch }),
).rejects.toThrow(/500/);
});
});