Files
openclaw/scripts/github/real-behavior-proof-policy.mjs
Dallin Romney cf194419c3 ci(proof): skip real-behavior-proof gate for private maintainers (#83418)
* ci(proof): trust maintainer label for private org members

Private organization memberships report author_association=CONTRIBUTOR
on PRs, so the real-behavior-proof gate currently demands proof from
maintainers whose membership is private. The labeler workflow already
applies the 'maintainer' label via the team-membership API (which sees
private members), so treat that label as an equivalent privileged
signal in evaluateRealBehaviorProof.

* ci(proof): drop noisy comments

* ci(proof): check maintainer team membership via GitHub App token

Replace the label-based private-maintainer skip with a direct
getMembershipForUserInOrg call using a minted GitHub App token, mirroring
the pattern labeler.yml already uses for the same lookup. Removes the
race against the labeler workflow and the implicit dependency on the
'maintainer' label having landed first.

The App-token steps are continue-on-error so the gate still runs (using
the existing author_association path) when the App key secrets are
absent or both mints fail.

* ci(proof): narrow App token to members:read

ClawSweeper review #83418: actions/create-github-app-token defaults to
the full installation permission set, but the proof gate only needs the
org-members read scope used by teams.getMembershipForUserInOrg. Set
permission-members: read on both the primary and fallback mint steps.

* docs(changelog): private maintainers skip the real-behavior-proof gate
2026-05-18 09:22:59 -07:00

326 lines
11 KiB
JavaScript

export const PROOF_OVERRIDE_LABEL = "proof: override";
export const PROOF_SUPPLIED_LABEL = "proof: supplied";
export const PROOF_SUFFICIENT_LABEL = "proof: sufficient";
export const NEEDS_REAL_BEHAVIOR_PROOF_LABEL = "triage: needs-real-behavior-proof";
export const MOCK_ONLY_PROOF_LABEL = "triage: mock-only-proof";
export const MAINTAINER_TEAM_SLUG = "maintainer";
const privilegedAuthorAssociations = new Set(["OWNER", "MEMBER", "COLLABORATOR"]);
const requiredProofFields = [
{
key: "behavior",
names: ["Behavior or issue addressed", "Issue addressed", "Behavior addressed"],
},
{
key: "environment",
names: ["Real environment tested", "Environment tested", "Real setup tested"],
},
{
key: "steps",
names: [
"Exact steps or command run after this patch",
"Exact steps or command run after the patch",
"Exact steps or command run after fix",
"Steps run after the patch",
"Command run after the patch",
],
},
{
key: "evidence",
names: [
"Evidence after fix",
"After-fix evidence",
"Evidence link or embedded proof",
"Evidence",
],
},
{
key: "observedResult",
names: ["Observed result after fix", "Observed result after the fix", "Observed result"],
},
{
key: "notTested",
names: ["What was not tested", "Not tested"],
allowNone: true,
},
];
const allProofFieldNames = requiredProofFields
.flatMap((field) => field.names)
.concat(["Before evidence", "Before evidence optional"]);
const missingValueRegex =
/^(?:n\/?a|not applicable|tbd|todo|unknown|unsure|none provided|no evidence|not tested|untested|-|\[[^\]]*\])$/i;
const standaloneMissingProofRegex =
/^\s*(?:[-*]\s*)?(?:n\/?a|not applicable|not tested|untested|no evidence|did not test|didn't test|could not test|couldn't test)\s*\.?\s*$/im;
const mockOnlyEvidenceRegex =
/\b(?:pnpm|npm|yarn|bun)\s+(?:run\s+)?(?:test|vitest|lint|typecheck|tsgo|build|check)\b|\b(?:vitest|unit tests?|mock(?:ed|s)?|snapshots?|lint|typechecks?|tsgo|ci(?:\s+passes?)?)\b/i;
const artifactEvidenceRegex =
/!\[[^\]]*\]\([^)]+\)|github\.com\/user-attachments\/assets\/|github\.com\/[^/\s]+\/[^/\s]+\/actions\/runs\/\d+\/artifacts\/\d+|https?:\/\/\S+\.(?:png|jpe?g|gif|webp|mp4|mov|webm)\b/i;
const evidenceDescriptorRegex =
/\b(?:screenshot|screen\s*recording|recording|terminal\s+(?:capture|screenshot|transcript|output)|console\s+(?:output|log)|runtime\s+logs?|redacted\s+logs?|live\s+output|actual\s+output|observed\s+output|stdout|stderr|stack trace|trace excerpt|log excerpt|linked\s+artifacts?|artifact\s+links?)\b|```[\s\S]*\n[\s\S]*\n```/i;
const liveCommandRegex =
/\b(?:openclaw|node|docker|curl|gh|ssh|adb|xcrun|xcodebuild|open|npm\s+run|pnpm\s+openclaw)\b/i;
const mockOnlyEvidenceStripRegex =
/\b(?:pnpm|npm|yarn|bun)\s+(?:run\s+)?(?:test|vitest|lint|typecheck|tsgo|build|check)\b|\b(?:vitest|unit tests?|mock(?:ed|s)?|snapshots?|lint|typechecks?|tsgo|ci(?:\s+passes?)?|tests?|passed|passes|green|success|succeeded|with|and|the|branch|only|output|transcript|capture|fenced)\b/gi;
const evidenceDescriptorStripRegex =
/\b(?:screenshot|screen\s*recording|recording|terminal\s+(?:capture|screenshot|transcript|output)|console\s+(?:output|log)|runtime\s+logs?|redacted\s+logs?|live\s+output|actual\s+output|observed\s+output|stdout|stderr|stack trace|trace excerpt|log excerpt|linked\s+artifacts?|artifact\s+links?)\b/gi;
function escapeRegex(text) {
return text.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
function normalizeLineEndings(text = "") {
return text.replace(/\r\n?/g, "\n");
}
function labelNames(labels) {
return new Set(
(labels ?? [])
.map((label) => (typeof label === "string" ? label : label?.name))
.filter((label) => typeof label === "string"),
);
}
function isAutomationUser(user = {}, fallbackLogin = "") {
const login = user?.login ?? fallbackLogin;
return user?.type === "Bot" || /\[bot\]$/i.test(login) || login.startsWith("app/");
}
export function isExternalPullRequest(pullRequest) {
if (!pullRequest) {
return false;
}
if (isAutomationUser(pullRequest.user)) {
return false;
}
const authorAssociation = String(
pullRequest.author_association ?? pullRequest.authorAssociation ?? "",
).toUpperCase();
return !privilegedAuthorAssociations.has(authorAssociation);
}
export function hasProofOverride(labels) {
return labelNames(labels).has(PROOF_OVERRIDE_LABEL);
}
export async function isMaintainerTeamMember({
token,
org,
login,
teamSlug = MAINTAINER_TEAM_SLUG,
fetch = globalThis.fetch,
} = {}) {
if (!token || !org || !login) {
return false;
}
const url = `https://api.github.com/orgs/${encodeURIComponent(org)}/teams/${encodeURIComponent(teamSlug)}/memberships/${encodeURIComponent(login)}`;
const response = await fetch(url, {
headers: {
Authorization: `Bearer ${token}`,
Accept: "application/vnd.github+json",
"X-GitHub-Api-Version": "2022-11-28",
},
});
if (response.status === 404) {
return false;
}
if (!response.ok) {
throw new Error(`Team membership lookup failed: ${response.status}`);
}
const body = await response.json();
return body?.state === "active";
}
export function extractRealBehaviorProofSection(body = "") {
// Normalize CRLF → LF so regexes and section slicing see GitHub web-editor PR
// bodies the same way as locally-authored Markdown.
const normalizedBody = normalizeLineEndings(body);
const headingRegex = /^#{2,6}\s+real behavior proof\b[^\n]*$/gim;
const match = headingRegex.exec(normalizedBody);
if (!match) {
return "";
}
const sectionStart = match.index + match[0].length;
const rest = normalizedBody.slice(sectionStart);
const nextHeading = rest.match(/\n#{1,6}\s+\S/);
return (nextHeading ? rest.slice(0, nextHeading.index) : rest).trim();
}
function fieldLineRegex(name) {
return new RegExp(
`^\\s*(?:[-*]\\s*)?(?:\\*\\*)?${escapeRegex(name)}(?:\\s*\\([^)]*\\))?(?:\\*\\*)?\\s*:\\s*(.*)$`,
"i",
);
}
function isAnyProofFieldLine(line) {
return allProofFieldNames.some((name) => fieldLineRegex(name).test(line));
}
function extractFieldValue(section, field) {
const lines = normalizeLineEndings(section).split("\n");
for (let index = 0; index < lines.length; index += 1) {
const matchingName = field.names.find((name) => fieldLineRegex(name).test(lines[index]));
if (!matchingName) {
continue;
}
const match = lines[index].match(fieldLineRegex(matchingName));
const valueLines = [match?.[1] ?? ""];
for (let next = index + 1; next < lines.length; next += 1) {
const line = lines[next];
if (/^#{1,6}\s+\S/.test(line) || isAnyProofFieldLine(line)) {
break;
}
valueLines.push(line);
}
return valueLines.join("\n").trim();
}
return "";
}
function stripProofFieldLabels(section) {
return normalizeLineEndings(section)
.split("\n")
.map((line) => {
if (!isAnyProofFieldLine(line)) {
return line;
}
const matchingName = allProofFieldNames.find((name) => fieldLineRegex(name).test(line));
const match = matchingName ? line.match(fieldLineRegex(matchingName)) : null;
return match?.[1] ?? "";
})
.join("\n");
}
function isMissingValue(value, field) {
const trimmed = value.trim();
if (!trimmed) {
return true;
}
if (
field.allowNone &&
/^(?:none|nothing else|no known gaps|no additional gaps)$/i.test(trimmed)
) {
return false;
}
return missingValueRegex.test(trimmed);
}
function hasNonMockEvidencePayload(value) {
const payload = value
.replace(evidenceDescriptorStripRegex, "")
.replace(mockOnlyEvidenceStripRegex, "")
.replace(/```(?:\w+)?|```/g, "")
.replace(/[`$>:\-_.()[\]\s]+/g, "");
return Boolean(payload);
}
function result(status, reason, details = {}) {
return {
status,
reason,
applies: ["passed", "missing", "mock_only", "insufficient", "override"].includes(status),
passed: ["passed", "skipped", "override"].includes(status),
...details,
};
}
export function evaluateRealBehaviorProof({ pullRequest, labels } = {}) {
const currentLabels = labels ?? pullRequest?.labels ?? [];
if (hasProofOverride(currentLabels)) {
return result("override", `Maintainer override label ${PROOF_OVERRIDE_LABEL} is present.`);
}
if (!isExternalPullRequest(pullRequest)) {
return result("skipped", "Maintainer, collaborator, or bot PRs do not require this gate.");
}
const section = extractRealBehaviorProofSection(pullRequest?.body ?? "");
if (!section) {
return result(
"missing",
"External PRs must include a Real behavior proof section with after-fix evidence from a real setup.",
);
}
const fields = Object.fromEntries(
requiredProofFields.map((field) => [field.key, extractFieldValue(section, field)]),
);
const missingFields = requiredProofFields
.filter((field) => isMissingValue(fields[field.key] ?? "", field))
.map((field) => field.key);
if (missingFields.length > 0) {
return result(
"missing",
`Real behavior proof is missing required field content: ${missingFields.join(", ")}.`,
{ fields, missingFields },
);
}
const proofContent = stripProofFieldLabels(section);
if (standaloneMissingProofRegex.test(proofContent)) {
return result("insufficient", "Real behavior proof says the changed behavior was not tested.", {
fields,
});
}
const evidenceContent = [fields.evidence, fields.observedResult].join("\n");
const proofContentForMockDetection = [fields.evidence, fields.observedResult, fields.steps].join(
"\n",
);
const hasArtifactEvidence = artifactEvidenceRegex.test(evidenceContent);
const hasNonMockPayload = hasNonMockEvidencePayload(evidenceContent);
const hasMockEvidenceSignal = mockOnlyEvidenceRegex.test(proofContentForMockDetection);
if (hasMockEvidenceSignal && !hasArtifactEvidence && !hasNonMockPayload) {
return result(
"mock_only",
"Unit tests, mocks, snapshots, lint, typechecks, and CI are supplemental and do not count as real behavior proof.",
{ fields },
);
}
const hasRealEvidence =
hasArtifactEvidence ||
(evidenceDescriptorRegex.test(evidenceContent) && hasNonMockPayload) ||
liveCommandRegex.test(evidenceContent);
if (hasMockEvidenceSignal && !hasRealEvidence) {
return result(
"mock_only",
"Unit tests, mocks, snapshots, lint, typechecks, and CI are supplemental and do not count as real behavior proof.",
{ fields },
);
}
if (!hasRealEvidence) {
return result(
"insufficient",
"Real behavior proof must include an after-fix screenshot, recording, terminal capture, console output, redacted runtime log, linked artifact, or copied live output.",
{ fields },
);
}
return result("passed", "External PR includes after-fix real behavior proof.", { fields });
}
export function labelsForRealBehaviorProof(evaluation) {
if (evaluation.status === "passed") {
return [PROOF_SUPPLIED_LABEL];
}
if (evaluation.status === "mock_only") {
return [MOCK_ONLY_PROOF_LABEL];
}
if (evaluation.status === "missing" || evaluation.status === "insufficient") {
return [NEEDS_REAL_BEHAVIOR_PROOF_LABEL];
}
return [];
}