fix(ci): honor exact-head proof verdicts (#83688)

This commit is contained in:
Tak Hoffman
2026-05-18 11:39:30 -05:00
committed by GitHub
parent 9dc7bd4d05
commit e4fba78d81
6 changed files with 170 additions and 8 deletions

View File

@@ -18,6 +18,7 @@ jobs:
name: Real behavior proof
permissions:
contents: read
issues: read
pull-requests: read
runs-on: ubuntu-24.04
steps:

View File

@@ -7,6 +7,7 @@ import {
PROOF_SUFFICIENT_LABEL,
PROOF_SUPPLIED_LABEL,
evaluateRealBehaviorProof,
hasClawSweeperExactHeadProof,
labelsForRealBehaviorProof,
} from "./real-behavior-proof-policy.mjs";
@@ -767,6 +768,15 @@ async function listPullRequestFiles(github, context, pullRequest) {
});
}
async function listIssueComments(github, context, issueNumber) {
return github.paginate(github.rest.issues.listComments, {
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issueNumber,
per_page: 100,
});
}
async function addMissingLabels(github, context, core, issueNumber, labels, labelSet) {
const missingLabels = labels.filter((label) => !labelSet.has(label));
if (missingLabels.length === 0) {
@@ -784,7 +794,10 @@ async function addMissingLabels(github, context, core, issueNumber, labels, labe
core.info(`Added candidate labels to #${issueNumber}: ${missingLabels.join(", ")}`);
}
function shouldRemoveProofSufficientLabel(context, proofEvaluation) {
function shouldRemoveProofSufficientLabel(context, proofEvaluation, hasExactHeadClawSweeperProof) {
if (hasExactHeadClawSweeperProof) {
return false;
}
if (proofEvaluation.status !== "passed") {
return true;
}
@@ -793,6 +806,12 @@ function shouldRemoveProofSufficientLabel(context, proofEvaluation) {
async function applyPullRequestCandidateLabels(github, context, core, pullRequest, labelSet) {
const files = await listPullRequestFiles(github, context, pullRequest);
const hasExactHeadClawSweeperProof =
labelSet.has(PROOF_SUFFICIENT_LABEL) &&
hasClawSweeperExactHeadProof({
pullRequest,
comments: await listIssueComments(github, context, pullRequest.number),
});
const proofEvaluation = evaluateRealBehaviorProof({
pullRequest: {
...pullRequest,
@@ -811,7 +830,7 @@ async function applyPullRequestCandidateLabels(github, context, core, pullReques
);
if (
labelSet.has(PROOF_SUFFICIENT_LABEL) &&
shouldRemoveProofSufficientLabel(context, proofEvaluation)
shouldRemoveProofSufficientLabel(context, proofEvaluation, hasExactHeadClawSweeperProof)
) {
staleProofLabels.push(PROOF_SUFFICIENT_LABEL);
}

View File

@@ -1,6 +1,7 @@
#!/usr/bin/env node
import { readFileSync } from "node:fs";
import {
evaluateClawSweeperExactHeadProof,
evaluateRealBehaviorProof,
isMaintainerTeamMember,
} from "./real-behavior-proof-policy.mjs";
@@ -26,12 +27,12 @@ if (!pullRequest) {
process.exit(0);
}
const token = process.env.GH_APP_TOKEN;
const appToken = process.env.GH_APP_TOKEN;
const org = event.repository?.owner?.login;
const authorLogin = pullRequest.user?.login;
if (token && org && authorLogin) {
if (appToken && org && authorLogin) {
try {
if (await isMaintainerTeamMember({ token, org, login: authorLogin })) {
if (await isMaintainerTeamMember({ token: appToken, org, login: authorLogin })) {
console.log(
`PR author @${authorLogin} is an active member of the ${org}/maintainer team; skipping real behavior proof gate.`,
);
@@ -50,6 +51,44 @@ if (evaluation.passed) {
process.exit(0);
}
const token = appToken || process.env.GITHUB_TOKEN;
const repository = process.env.GITHUB_REPOSITORY;
if (token && repository && pullRequest.number) {
const [owner, repo] = repository.split("/");
const comments = [];
for (let page = 1; page <= 10; page += 1) {
const url = new URL(
`https://api.github.com/repos/${owner}/${repo}/issues/${pullRequest.number}/comments`,
);
url.searchParams.set("per_page", "100");
url.searchParams.set("page", String(page));
const response = await fetch(url, {
headers: {
Accept: "application/vnd.github+json",
Authorization: `Bearer ${token}`,
"X-GitHub-Api-Version": "2022-11-28",
},
});
if (!response.ok) {
throw new Error(`Failed to fetch PR comments for proof verdicts: ${response.status}`);
}
const pageComments = await response.json();
comments.push(...pageComments);
if (pageComments.length < 100) {
break;
}
}
const clawSweeperEvaluation = evaluateClawSweeperExactHeadProof({
pullRequest,
comments,
});
if (clawSweeperEvaluation.passed) {
console.log(clawSweeperEvaluation.reason);
process.exit(0);
}
}
const message = `${evaluation.reason} Add after-fix evidence from a real OpenClaw setup in the PR body. Screenshots, recordings, terminal screenshots, console output, redacted runtime logs, linked artifacts, or copied live output count. Unit tests, mocks, snapshots, lint, typechecks, and CI are supplemental only. A maintainer can apply proof: override when appropriate.`;
console.error(`::error title=Real behavior proof required::${escapeCommandValue(message)}`);
process.exit(1);

View File

@@ -5,6 +5,8 @@ export const NEEDS_REAL_BEHAVIOR_PROOF_LABEL = "triage: needs-real-behavior-proo
export const MOCK_ONLY_PROOF_LABEL = "triage: mock-only-proof";
export const MAINTAINER_TEAM_SLUG = "maintainer";
export const CLAWSWEEPER_PROOF_VERDICT_STATUS = "clawsweeper_exact_head_pass";
const privilegedAuthorAssociations = new Set(["OWNER", "MEMBER", "COLLABORATOR"]);
const requiredProofFields = [
@@ -230,11 +232,47 @@ function result(status, reason, details = {}) {
status,
reason,
applies: ["passed", "missing", "mock_only", "insufficient", "override"].includes(status),
passed: ["passed", "skipped", "override"].includes(status),
passed: ["passed", "skipped", "override", CLAWSWEEPER_PROOF_VERDICT_STATUS].includes(status),
...details,
};
}
function extractMarkerField(marker, name) {
const match = marker.match(new RegExp(`\\b${escapeRegex(name)}=([^\\s>]+)`, "i"));
return match?.[1] ?? "";
}
export function hasClawSweeperExactHeadProof({ pullRequest, comments = [] } = {}) {
const pullNumber = String(pullRequest?.number ?? "");
const headSha = String(pullRequest?.head?.sha ?? pullRequest?.head_sha ?? "").toLowerCase();
if (!pullNumber || !/^[0-9a-f]{40}$/i.test(headSha)) {
return false;
}
for (const comment of comments) {
const body = String(comment?.body ?? "");
const markers = body.match(/<!--\s*clawsweeper-verdict:pass\b[\s\S]*?-->/gi) ?? [];
for (const marker of markers) {
const item = extractMarkerField(marker, "item");
const sha = extractMarkerField(marker, "sha").toLowerCase();
if (item === pullNumber && sha === headSha) {
return true;
}
}
}
return false;
}
export function evaluateClawSweeperExactHeadProof({ pullRequest, comments = [] } = {}) {
if (hasClawSweeperExactHeadProof({ pullRequest, comments })) {
return result(
CLAWSWEEPER_PROOF_VERDICT_STATUS,
"ClawSweeper accepted real behavior proof for the exact PR head.",
);
}
return result("insufficient", "No exact-head ClawSweeper proof verdict was found.");
}
export function evaluateRealBehaviorProof({ pullRequest, labels } = {}) {
const currentLabels = labels ?? pullRequest?.labels ?? [];
if (hasProofOverride(currentLabels)) {

View File

@@ -135,6 +135,7 @@ function barnacleGithub(
maintainerLogins?: string[];
removeLabelNotFound?: string[];
repositoryRoles?: Record<string, string>;
comments?: Array<{ body: string }>;
} = {},
) {
const maintainerLogins = new Set(
@@ -154,8 +155,10 @@ function barnacleGithub(
removeLabel: [] as Array<{ issue_number: number; name: string }>,
update: [] as Array<{ issue_number: number; state?: string }>,
};
const listFiles = async () => files;
const listComments = async () => options.comments ?? [];
const github = {
paginate: async () => files,
paginate: async (fn: unknown) => (fn === listComments ? (options.comments ?? []) : files),
rest: {
issues: {
addLabels: async (params: { issue_number: number; labels: string[] }) => {
@@ -173,6 +176,7 @@ function barnacleGithub(
managedLabelSpecs[params.name as keyof typeof managedLabelSpecs]?.description ?? "",
},
}),
listComments,
lock: async (params: { issue_number: number; lock_reason?: string }) => {
calls.lock.push(params);
},
@@ -190,7 +194,7 @@ function barnacleGithub(
updateLabel: async () => undefined,
},
pulls: {
listFiles: async () => files,
listFiles,
},
repos: {
getCollaboratorPermissionLevel: async ({ username }: { username: string }) => {
@@ -784,6 +788,36 @@ describe("barnacle-auto-response", () => {
},
);
it("preserves sufficient proof on synchronize when ClawSweeper passed the exact head", async () => {
const headSha = "06ee95df6608d29a395c52ba8ab53fdd93a9dc4f";
const { calls, github } = barnacleGithub([file("src/gateway/server.ts")], {
comments: [
{
body: `<!-- clawsweeper-verdict:pass item=123 sha=${headSha} confidence=high -->`,
},
],
});
await runBarnacleAutoResponse({
github,
context: barnacleContext(
{
body: blankTemplateBody,
head: { sha: headSha },
},
[PROOF_SUFFICIENT_LABEL],
{ action: "synchronize" },
),
core: {
info: () => undefined,
},
});
expect(calls.removeLabel).not.toContainEqual(
expect.objectContaining({ name: PROOF_SUFFICIENT_LABEL }),
);
});
it("preserves ClawSweeper's sufficient proof label on ordinary label events", async () => {
const { calls, github } = barnacleGithub([file("src/gateway/server.ts")]);

View File

@@ -4,7 +4,9 @@ import {
NEEDS_REAL_BEHAVIOR_PROOF_LABEL,
PROOF_OVERRIDE_LABEL,
PROOF_SUPPLIED_LABEL,
evaluateClawSweeperExactHeadProof,
evaluateRealBehaviorProof,
hasClawSweeperExactHeadProof,
isMaintainerTeamMember,
labelsForRealBehaviorProof,
} from "../../scripts/github/real-behavior-proof-policy.mjs";
@@ -174,6 +176,35 @@ describe("real-behavior-proof-policy", () => {
}).status,
).toBe("override");
});
it("accepts ClawSweeper pass verdict comments only for the exact PR head", () => {
const pullRequest = {
number: 83581,
head: {
sha: "06ee95df6608d29a395c52ba8ab53fdd93a9dc4f",
},
};
const comments = [
{
body: [
"Codex review: passed.",
"<!-- clawsweeper-verdict:pass item=83581 sha=06ee95df6608d29a395c52ba8ab53fdd93a9dc4f confidence=high -->",
].join("\n"),
},
];
expect(hasClawSweeperExactHeadProof({ pullRequest, comments })).toBe(true);
expect(evaluateClawSweeperExactHeadProof({ pullRequest, comments }).passed).toBe(true);
expect(
hasClawSweeperExactHeadProof({
pullRequest: {
...pullRequest,
head: { sha: "d0215b2d67a45a783277fc7d2949ac4a30f63ec6" },
},
comments,
}),
).toBe(false);
});
});
describe("isMaintainerTeamMember", () => {