From 33c42c8d3b65dad62a7317345356aa2cb3bd5056 Mon Sep 17 00:00:00 2001 From: pashpashpash Date: Tue, 5 May 2026 16:10:17 -0700 Subject: [PATCH] chore: add positive proof labels (#78117) --- CHANGELOG.md | 1 + scripts/github/barnacle-auto-response.mjs | 37 ++++- scripts/github/real-behavior-proof-policy.mjs | 18 ++- test/scripts/barnacle-auto-response.test.ts | 128 +++++++++++++++++- .../real-behavior-proof-policy.test.ts | 22 ++- 5 files changed, 195 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b8153ca856..0f1966a5e82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ Docs: https://docs.openclaw.ai ### Changes +- PR triage: mark external pull requests with `proof: supplied` when Barnacle finds structured real behavior proof, keep stale negative proof labels in sync across CRLF-edited PR bodies, and let ClawSweeper own the stronger `proof: sufficient` judgement. - Google Meet/Voice Call: make Twilio dial-in joins speak through the realtime Gemini voice bridge with paced audio streaming, backpressure-aware buffering, barge-in queue clearing, same-session agent consult routing, duplicate-consult coalescing, and no TwiML fallback during realtime speech, giving Meet participants a much snappier OpenClaw voice agent. (#77064) Thanks @scoootscooob. - Voice Call/realtime: add opt-in OpenClaw agent voice context capsules and consult-cadence guidance so Gemini/OpenAI realtime calls can sound like the configured agent without consulting the full agent on every ordinary turn. Thanks @scoootscooob. - Docker/Gateway: harden the gateway container by dropping `NET_RAW` and `NET_ADMIN` capabilities and enabling `no-new-privileges` in the bundled `docker-compose.yml`. Thanks @VintageAyu. diff --git a/scripts/github/barnacle-auto-response.mjs b/scripts/github/barnacle-auto-response.mjs index 90e056014c2..69941a0120f 100644 --- a/scripts/github/barnacle-auto-response.mjs +++ b/scripts/github/barnacle-auto-response.mjs @@ -4,6 +4,8 @@ import { MOCK_ONLY_PROOF_LABEL, NEEDS_REAL_BEHAVIOR_PROOF_LABEL, PROOF_OVERRIDE_LABEL, + PROOF_SUFFICIENT_LABEL, + PROOF_SUPPLIED_LABEL, evaluateRealBehaviorProof, labelsForRealBehaviorProof, } from "./real-behavior-proof-policy.mjs"; @@ -150,6 +152,14 @@ export const managedLabelSpecs = { color: "C5DEF5", description: "Candidate: PR proof only shows tests, mocks, snapshots, lint, typecheck, or CI.", }, + [PROOF_SUPPLIED_LABEL]: { + color: "C2E0C6", + description: "External PR includes structured after-fix real behavior proof.", + }, + [PROOF_SUFFICIENT_LABEL]: { + color: "0E8A16", + description: "ClawSweeper judged the real behavior proof convincing.", + }, [PROOF_OVERRIDE_LABEL]: { color: "C2E0C6", description: "Maintainer override for the external PR real behavior proof gate.", @@ -218,7 +228,11 @@ const maintainerAuthorLabel = "maintainer"; const privilegedAuthorAssociations = new Set(["OWNER", "MEMBER", "COLLABORATOR"]); const privilegedRepositoryRoles = new Set(["admin", "maintain", "write"]); const candidateLabelValues = Object.values(candidateLabels); -const proofCandidateLabelValues = [NEEDS_REAL_BEHAVIOR_PROOF_LABEL, MOCK_ONLY_PROOF_LABEL]; +const structuralProofLabelValues = [ + NEEDS_REAL_BEHAVIOR_PROOF_LABEL, + MOCK_ONLY_PROOF_LABEL, + PROOF_SUPPLIED_LABEL, +]; const noisyPrMessage = "Closing this PR because it looks dirty (too many unrelated or unexpected changes). This usually happens when a branch picks up unrelated commits or a merge went sideways. Please recreate the PR from a clean branch."; @@ -759,8 +773,21 @@ async function addMissingLabels(github, context, core, issueNumber, labels, labe core.info(`Added candidate labels to #${issueNumber}: ${missingLabels.join(", ")}`); } +function shouldRemoveProofSufficientLabel(context, proofEvaluation) { + if (proofEvaluation.status !== "passed") { + return true; + } + return ["edited", "synchronize"].includes(context.payload.action); +} + async function applyPullRequestCandidateLabels(github, context, core, pullRequest, labelSet) { const files = await listPullRequestFiles(github, context, pullRequest); + const proofEvaluation = evaluateRealBehaviorProof({ + pullRequest: { + ...pullRequest, + labels: [...labelSet].map((name) => ({ name })), + }, + }); const classifiedLabels = classifyPullRequestCandidateLabels( { ...pullRequest, @@ -768,9 +795,15 @@ async function applyPullRequestCandidateLabels(github, context, core, pullReques }, files, ); - const staleProofLabels = proofCandidateLabelValues.filter( + const staleProofLabels = structuralProofLabelValues.filter( (label) => labelSet.has(label) && !classifiedLabels.includes(label), ); + if ( + labelSet.has(PROOF_SUFFICIENT_LABEL) && + shouldRemoveProofSufficientLabel(context, proofEvaluation) + ) { + staleProofLabels.push(PROOF_SUFFICIENT_LABEL); + } await removeLabels(github, context, pullRequest.number, staleProofLabels, labelSet); await addMissingLabels(github, context, core, pullRequest.number, classifiedLabels, labelSet); } diff --git a/scripts/github/real-behavior-proof-policy.mjs b/scripts/github/real-behavior-proof-policy.mjs index a3645808626..b8e8e6d9af6 100644 --- a/scripts/github/real-behavior-proof-policy.mjs +++ b/scripts/github/real-behavior-proof-policy.mjs @@ -1,4 +1,6 @@ export const PROOF_OVERRIDE_LABEL = "proof: override"; +export const PROOF_SUPPLIED_LABEL = "proof: supplied"; +export const PROOF_SUFFICIENT_LABEL = "proof: sufficient"; export const NEEDS_REAL_BEHAVIOR_PROOF_LABEL = "triage: needs-real-behavior-proof"; export const MOCK_ONLY_PROOF_LABEL = "triage: mock-only-proof"; @@ -75,6 +77,10 @@ function escapeRegex(text) { return text.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } +function normalizeLineEndings(text = "") { + return text.replace(/\r\n?/g, "\n"); +} + function labelNames(labels) { return new Set( (labels ?? []) @@ -106,13 +112,14 @@ export function hasProofOverride(labels) { } export function extractRealBehaviorProofSection(body = "") { + const normalizedBody = normalizeLineEndings(body); const headingRegex = /^#{2,6}\s+real behavior proof\b[^\n]*$/gim; - const match = headingRegex.exec(body); + const match = headingRegex.exec(normalizedBody); if (!match) { return ""; } const sectionStart = match.index + match[0].length; - const rest = body.slice(sectionStart); + const rest = normalizedBody.slice(sectionStart); const nextHeading = rest.match(/\n#{1,6}\s+\S/); return (nextHeading ? rest.slice(0, nextHeading.index) : rest).trim(); } @@ -129,7 +136,7 @@ function isAnyProofFieldLine(line) { } function extractFieldValue(section, field) { - const lines = section.split("\n"); + const lines = normalizeLineEndings(section).split("\n"); for (let index = 0; index < lines.length; index += 1) { const matchingName = field.names.find((name) => fieldLineRegex(name).test(lines[index])); if (!matchingName) { @@ -151,7 +158,7 @@ function extractFieldValue(section, field) { } function stripProofFieldLabels(section) { - return section + return normalizeLineEndings(section) .split("\n") .map((line) => { if (!isAnyProofFieldLine(line)) { @@ -274,6 +281,9 @@ export function evaluateRealBehaviorProof({ pullRequest, labels } = {}) { } export function labelsForRealBehaviorProof(evaluation) { + if (evaluation.status === "passed") { + return [PROOF_SUPPLIED_LABEL]; + } if (evaluation.status === "mock_only") { return [MOCK_ONLY_PROOF_LABEL]; } diff --git a/test/scripts/barnacle-auto-response.test.ts b/test/scripts/barnacle-auto-response.test.ts index 1b27fc27b68..fa888bf58cb 100644 --- a/test/scripts/barnacle-auto-response.test.ts +++ b/test/scripts/barnacle-auto-response.test.ts @@ -5,6 +5,10 @@ import { managedLabelSpecs, runBarnacleAutoResponse, } from "../../scripts/github/barnacle-auto-response.mjs"; +import { + PROOF_SUFFICIENT_LABEL, + PROOF_SUPPLIED_LABEL, +} from "../../scripts/github/real-behavior-proof-policy.mjs"; const blankTemplateBody = [ "## Summary", @@ -227,6 +231,8 @@ describe("barnacle-auto-response", () => { expect(managedLabelSpecs["r: false-positive"].description).toContain("false positive"); expect(managedLabelSpecs["r: third-party-extension"].description).toContain("ClawHub"); expect(managedLabelSpecs["r: too-many-prs"].description).toContain("twenty active PRs"); + expect(managedLabelSpecs[PROOF_SUPPLIED_LABEL].color).toBe("C2E0C6"); + expect(managedLabelSpecs[PROOF_SUFFICIENT_LABEL].color).toBe("0E8A16"); for (const label of Object.values(candidateLabels)) { expect(managedLabelSpecs[label]).toBeDefined(); @@ -283,7 +289,7 @@ describe("barnacle-auto-response", () => { expect(labels).not.toContain(candidateLabels.needsRealBehaviorProof); }); - it("does not label external PRs that include real behavior proof", () => { + it("labels external PRs that include real behavior proof as supplied", () => { const labels = classifyPullRequestCandidateLabels( pr( "Fix gateway startup", @@ -292,6 +298,23 @@ describe("barnacle-auto-response", () => { [file("src/gateway/server.ts")], ); + expect(labels).toContain(PROOF_SUPPLIED_LABEL); + expect(labels).not.toContain(candidateLabels.needsRealBehaviorProof); + expect(labels).not.toContain(candidateLabels.mockOnlyProof); + }); + + it("labels CRLF-formatted external PRs with screenshot proof as supplied", () => { + const labels = classifyPullRequestCandidateLabels( + pr( + "Fix gateway startup", + realBehaviorProofBody( + "![after](https://github.com/user-attachments/assets/gateway-ready)", + ).replace(/\n/g, "\r\n"), + ), + [file("src/gateway/server.ts")], + ); + + expect(labels).toContain(PROOF_SUPPLIED_LABEL); expect(labels).not.toContain(candidateLabels.needsRealBehaviorProof); expect(labels).not.toContain(candidateLabels.mockOnlyProof); }); @@ -662,18 +685,115 @@ describe("barnacle-auto-response", () => { await runBarnacleAutoResponse({ github, - context: barnacleContext({}, [candidateLabels.needsRealBehaviorProof, "proof: override"]), + context: barnacleContext({}, [ + candidateLabels.needsRealBehaviorProof, + candidateLabels.mockOnlyProof, + PROOF_SUPPLIED_LABEL, + PROOF_SUFFICIENT_LABEL, + "proof: override", + ]), core: { info: () => undefined, }, }); - expect(calls.removeLabel).toContainEqual( - expect.objectContaining({ name: candidateLabels.needsRealBehaviorProof }), + expect(calls.removeLabel.map((call) => call.name)).toEqual( + expect.arrayContaining([ + candidateLabels.needsRealBehaviorProof, + candidateLabels.mockOnlyProof, + PROOF_SUPPLIED_LABEL, + PROOF_SUFFICIENT_LABEL, + ]), ); expect(calls.update).toEqual([]); }); + it("removes stale negative proof labels and adds supplied when proof is present", async () => { + const { calls, github } = barnacleGithub([file("src/gateway/server.ts")]); + + await runBarnacleAutoResponse({ + github, + context: barnacleContext( + { + body: realBehaviorProofBody( + "![after](https://github.com/user-attachments/assets/gateway-ready)", + ), + }, + [candidateLabels.needsRealBehaviorProof, candidateLabels.mockOnlyProof], + ), + core: { + info: () => undefined, + }, + }); + + expect(calls.removeLabel.map((call) => call.name)).toEqual( + expect.arrayContaining([ + candidateLabels.needsRealBehaviorProof, + candidateLabels.mockOnlyProof, + ]), + ); + expect(calls.addLabels).toContainEqual( + expect.objectContaining({ + labels: expect.arrayContaining([PROOF_SUPPLIED_LABEL]), + }), + ); + }); + + it.each(["edited", "synchronize"])( + "removes stale sufficient proof label after PR %s events", + async (action) => { + const { calls, github } = barnacleGithub([file("src/gateway/server.ts")]); + + await runBarnacleAutoResponse({ + github, + context: barnacleContext( + { + body: realBehaviorProofBody( + "![after](https://github.com/user-attachments/assets/gateway-ready)", + ), + }, + [PROOF_SUPPLIED_LABEL, PROOF_SUFFICIENT_LABEL], + { action }, + ), + core: { + info: () => undefined, + }, + }); + + expect(calls.removeLabel).toContainEqual( + expect.objectContaining({ name: PROOF_SUFFICIENT_LABEL }), + ); + }, + ); + + it("preserves ClawSweeper's sufficient proof label on ordinary label events", async () => { + const { calls, github } = barnacleGithub([file("src/gateway/server.ts")]); + + await runBarnacleAutoResponse({ + github, + context: barnacleContext( + { + body: realBehaviorProofBody( + "![after](https://github.com/user-attachments/assets/gateway-ready)", + ), + }, + [PROOF_SUPPLIED_LABEL, PROOF_SUFFICIENT_LABEL], + { + action: "labeled", + label: { name: PROOF_SUFFICIENT_LABEL }, + sender: { login: "openclaw-clawsweeper[bot]", type: "Bot" }, + }, + ), + core: { + info: () => undefined, + }, + }); + + expect(calls.removeLabel).not.toContainEqual( + expect.objectContaining({ name: PROOF_SUFFICIENT_LABEL }), + ); + }); + it("actions manually applied candidate labels", async () => { const { calls, github } = barnacleGithub([file("extensions/example/openclaw.plugin.json")]); diff --git a/test/scripts/real-behavior-proof-policy.test.ts b/test/scripts/real-behavior-proof-policy.test.ts index 43cdfbc54e1..25127d0d1e6 100644 --- a/test/scripts/real-behavior-proof-policy.test.ts +++ b/test/scripts/real-behavior-proof-policy.test.ts @@ -3,6 +3,7 @@ import { MOCK_ONLY_PROOF_LABEL, NEEDS_REAL_BEHAVIOR_PROOF_LABEL, PROOF_OVERRIDE_LABEL, + PROOF_SUPPLIED_LABEL, evaluateRealBehaviorProof, labelsForRealBehaviorProof, } from "../../scripts/github/real-behavior-proof-policy.mjs"; @@ -56,7 +57,26 @@ describe("real-behavior-proof-policy", () => { }); expect(evaluation.status).toBe("passed"); - expect(labelsForRealBehaviorProof(evaluation)).toEqual([]); + expect(labelsForRealBehaviorProof(evaluation)).toEqual([PROOF_SUPPLIED_LABEL]); + }); + + it("passes CRLF-formatted external PRs with screenshot proof", () => { + const evaluation = evaluateRealBehaviorProof({ + pullRequest: externalPr( + proofBody("![after](https://github.com/user-attachments/assets/gateway-ready)").replace( + /\n/g, + "\r\n", + ), + ), + }); + + expect(evaluation.status).toBe("passed"); + expect(evaluation.fields).toMatchObject({ + behavior: "Gateway startup no longer drops the configured Discord channel.", + evidence: "![after](https://github.com/user-attachments/assets/gateway-ready)", + observedResult: "The gateway stayed connected and the Discord channel showed ready.", + }); + expect(labelsForRealBehaviorProof(evaluation)).toEqual([PROOF_SUPPLIED_LABEL]); }); it("fails external PRs without a real behavior proof section", () => {