From 33c42c8d3b65dad62a7317345356aa2cb3bd5056 Mon Sep 17 00:00:00 2001
From: pashpashpash <nik@vault77.ai>
Date: Tue, 5 May 2026 16:10:17 -0700
Subject: [PATCH] chore: add positive proof labels (#78117)

---
 CHANGELOG.md                                  |   1 +
 scripts/github/barnacle-auto-response.mjs     |  37 ++++-
 scripts/github/real-behavior-proof-policy.mjs |  18 ++-
 test/scripts/barnacle-auto-response.test.ts   | 128 +++++++++++++++++-
 .../real-behavior-proof-policy.test.ts        |  22 ++-
 5 files changed, 195 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8b8153ca856..0f1966a5e82 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@ Docs: https://docs.openclaw.ai
 
 ### Changes
 
+- PR triage: mark external pull requests with `proof: supplied` when Barnacle finds structured real behavior proof, keep stale negative proof labels in sync across CRLF-edited PR bodies, and let ClawSweeper own the stronger `proof: sufficient` judgement.
 - Google Meet/Voice Call: make Twilio dial-in joins speak through the realtime Gemini voice bridge with paced audio streaming, backpressure-aware buffering, barge-in queue clearing, same-session agent consult routing, duplicate-consult coalescing, and no TwiML fallback during realtime speech, giving Meet participants a much snappier OpenClaw voice agent. (#77064) Thanks @scoootscooob.
 - Voice Call/realtime: add opt-in OpenClaw agent voice context capsules and consult-cadence guidance so Gemini/OpenAI realtime calls can sound like the configured agent without consulting the full agent on every ordinary turn. Thanks @scoootscooob.
 - Docker/Gateway: harden the gateway container by dropping `NET_RAW` and `NET_ADMIN` capabilities and enabling `no-new-privileges` in the bundled `docker-compose.yml`. Thanks @VintageAyu.
diff --git a/scripts/github/barnacle-auto-response.mjs b/scripts/github/barnacle-auto-response.mjs
index 90e056014c2..69941a0120f 100644
--- a/scripts/github/barnacle-auto-response.mjs
+++ b/scripts/github/barnacle-auto-response.mjs
@@ -4,6 +4,8 @@ import {
   MOCK_ONLY_PROOF_LABEL,
   NEEDS_REAL_BEHAVIOR_PROOF_LABEL,
   PROOF_OVERRIDE_LABEL,
+  PROOF_SUFFICIENT_LABEL,
+  PROOF_SUPPLIED_LABEL,
   evaluateRealBehaviorProof,
   labelsForRealBehaviorProof,
 } from "./real-behavior-proof-policy.mjs";
@@ -150,6 +152,14 @@ export const managedLabelSpecs = {
     color: "C5DEF5",
     description: "Candidate: PR proof only shows tests, mocks, snapshots, lint, typecheck, or CI.",
   },
+  [PROOF_SUPPLIED_LABEL]: {
+    color: "C2E0C6",
+    description: "External PR includes structured after-fix real behavior proof.",
+  },
+  [PROOF_SUFFICIENT_LABEL]: {
+    color: "0E8A16",
+    description: "ClawSweeper judged the real behavior proof convincing.",
+  },
   [PROOF_OVERRIDE_LABEL]: {
     color: "C2E0C6",
     description: "Maintainer override for the external PR real behavior proof gate.",
@@ -218,7 +228,11 @@ const maintainerAuthorLabel = "maintainer";
 const privilegedAuthorAssociations = new Set(["OWNER", "MEMBER", "COLLABORATOR"]);
 const privilegedRepositoryRoles = new Set(["admin", "maintain", "write"]);
 const candidateLabelValues = Object.values(candidateLabels);
-const proofCandidateLabelValues = [NEEDS_REAL_BEHAVIOR_PROOF_LABEL, MOCK_ONLY_PROOF_LABEL];
+const structuralProofLabelValues = [
+  NEEDS_REAL_BEHAVIOR_PROOF_LABEL,
+  MOCK_ONLY_PROOF_LABEL,
+  PROOF_SUPPLIED_LABEL,
+];
 const noisyPrMessage =
   "Closing this PR because it looks dirty (too many unrelated or unexpected changes). This usually happens when a branch picks up unrelated commits or a merge went sideways. Please recreate the PR from a clean branch.";
 
@@ -759,8 +773,21 @@ async function addMissingLabels(github, context, core, issueNumber, labels, labe
   core.info(`Added candidate labels to #${issueNumber}: ${missingLabels.join(", ")}`);
 }
 
+function shouldRemoveProofSufficientLabel(context, proofEvaluation) {
+  if (proofEvaluation.status !== "passed") {
+    return true;
+  }
+  return ["edited", "synchronize"].includes(context.payload.action);
+}
+
 async function applyPullRequestCandidateLabels(github, context, core, pullRequest, labelSet) {
   const files = await listPullRequestFiles(github, context, pullRequest);
+  const proofEvaluation = evaluateRealBehaviorProof({
+    pullRequest: {
+      ...pullRequest,
+      labels: [...labelSet].map((name) => ({ name })),
+    },
+  });
   const classifiedLabels = classifyPullRequestCandidateLabels(
     {
       ...pullRequest,
@@ -768,9 +795,15 @@ async function applyPullRequestCandidateLabels(github, context, core, pullReques
     },
     files,
   );
-  const staleProofLabels = proofCandidateLabelValues.filter(
+  const staleProofLabels = structuralProofLabelValues.filter(
     (label) => labelSet.has(label) && !classifiedLabels.includes(label),
   );
+  if (
+    labelSet.has(PROOF_SUFFICIENT_LABEL) &&
+    shouldRemoveProofSufficientLabel(context, proofEvaluation)
+  ) {
+    staleProofLabels.push(PROOF_SUFFICIENT_LABEL);
+  }
   await removeLabels(github, context, pullRequest.number, staleProofLabels, labelSet);
   await addMissingLabels(github, context, core, pullRequest.number, classifiedLabels, labelSet);
 }
diff --git a/scripts/github/real-behavior-proof-policy.mjs b/scripts/github/real-behavior-proof-policy.mjs
index a3645808626..b8e8e6d9af6 100644
--- a/scripts/github/real-behavior-proof-policy.mjs
+++ b/scripts/github/real-behavior-proof-policy.mjs
@@ -1,4 +1,6 @@
 export const PROOF_OVERRIDE_LABEL = "proof: override";
+export const PROOF_SUPPLIED_LABEL = "proof: supplied";
+export const PROOF_SUFFICIENT_LABEL = "proof: sufficient";
 export const NEEDS_REAL_BEHAVIOR_PROOF_LABEL = "triage: needs-real-behavior-proof";
 export const MOCK_ONLY_PROOF_LABEL = "triage: mock-only-proof";
 
@@ -75,6 +77,10 @@ function escapeRegex(text) {
   return text.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
 }
 
+function normalizeLineEndings(text = "") {
+  return text.replace(/\r\n?/g, "\n");
+}
+
 function labelNames(labels) {
   return new Set(
     (labels ?? [])
@@ -106,13 +112,14 @@ export function hasProofOverride(labels) {
 }
 
 export function extractRealBehaviorProofSection(body = "") {
+  const normalizedBody = normalizeLineEndings(body);
   const headingRegex = /^#{2,6}\s+real behavior proof\b[^\n]*$/gim;
-  const match = headingRegex.exec(body);
+  const match = headingRegex.exec(normalizedBody);
   if (!match) {
     return "";
   }
   const sectionStart = match.index + match[0].length;
-  const rest = body.slice(sectionStart);
+  const rest = normalizedBody.slice(sectionStart);
   const nextHeading = rest.match(/\n#{1,6}\s+\S/);
   return (nextHeading ? rest.slice(0, nextHeading.index) : rest).trim();
 }
@@ -129,7 +136,7 @@ function isAnyProofFieldLine(line) {
 }
 
 function extractFieldValue(section, field) {
-  const lines = section.split("\n");
+  const lines = normalizeLineEndings(section).split("\n");
   for (let index = 0; index < lines.length; index += 1) {
     const matchingName = field.names.find((name) => fieldLineRegex(name).test(lines[index]));
     if (!matchingName) {
@@ -151,7 +158,7 @@ function extractFieldValue(section, field) {
 }
 
 function stripProofFieldLabels(section) {
-  return section
+  return normalizeLineEndings(section)
     .split("\n")
     .map((line) => {
       if (!isAnyProofFieldLine(line)) {
@@ -274,6 +281,9 @@ export function evaluateRealBehaviorProof({ pullRequest, labels } = {}) {
 }
 
 export function labelsForRealBehaviorProof(evaluation) {
+  if (evaluation.status === "passed") {
+    return [PROOF_SUPPLIED_LABEL];
+  }
   if (evaluation.status === "mock_only") {
     return [MOCK_ONLY_PROOF_LABEL];
   }
diff --git a/test/scripts/barnacle-auto-response.test.ts b/test/scripts/barnacle-auto-response.test.ts
index 1b27fc27b68..fa888bf58cb 100644
--- a/test/scripts/barnacle-auto-response.test.ts
+++ b/test/scripts/barnacle-auto-response.test.ts
@@ -5,6 +5,10 @@ import {
   managedLabelSpecs,
   runBarnacleAutoResponse,
 } from "../../scripts/github/barnacle-auto-response.mjs";
+import {
+  PROOF_SUFFICIENT_LABEL,
+  PROOF_SUPPLIED_LABEL,
+} from "../../scripts/github/real-behavior-proof-policy.mjs";
 
 const blankTemplateBody = [
   "## Summary",
@@ -227,6 +231,8 @@ describe("barnacle-auto-response", () => {
     expect(managedLabelSpecs["r: false-positive"].description).toContain("false positive");
     expect(managedLabelSpecs["r: third-party-extension"].description).toContain("ClawHub");
     expect(managedLabelSpecs["r: too-many-prs"].description).toContain("twenty active PRs");
+    expect(managedLabelSpecs[PROOF_SUPPLIED_LABEL].color).toBe("C2E0C6");
+    expect(managedLabelSpecs[PROOF_SUFFICIENT_LABEL].color).toBe("0E8A16");
 
     for (const label of Object.values(candidateLabels)) {
       expect(managedLabelSpecs[label]).toBeDefined();
@@ -283,7 +289,7 @@ describe("barnacle-auto-response", () => {
     expect(labels).not.toContain(candidateLabels.needsRealBehaviorProof);
   });
 
-  it("does not label external PRs that include real behavior proof", () => {
+  it("labels external PRs that include real behavior proof as supplied", () => {
     const labels = classifyPullRequestCandidateLabels(
       pr(
         "Fix gateway startup",
@@ -292,6 +298,23 @@ describe("barnacle-auto-response", () => {
       [file("src/gateway/server.ts")],
     );
 
+    expect(labels).toContain(PROOF_SUPPLIED_LABEL);
+    expect(labels).not.toContain(candidateLabels.needsRealBehaviorProof);
+    expect(labels).not.toContain(candidateLabels.mockOnlyProof);
+  });
+
+  it("labels CRLF-formatted external PRs with screenshot proof as supplied", () => {
+    const labels = classifyPullRequestCandidateLabels(
+      pr(
+        "Fix gateway startup",
+        realBehaviorProofBody(
+          "![after](https://github.com/user-attachments/assets/gateway-ready)",
+        ).replace(/\n/g, "\r\n"),
+      ),
+      [file("src/gateway/server.ts")],
+    );
+
+    expect(labels).toContain(PROOF_SUPPLIED_LABEL);
     expect(labels).not.toContain(candidateLabels.needsRealBehaviorProof);
     expect(labels).not.toContain(candidateLabels.mockOnlyProof);
   });
@@ -662,18 +685,115 @@ describe("barnacle-auto-response", () => {
 
     await runBarnacleAutoResponse({
       github,
-      context: barnacleContext({}, [candidateLabels.needsRealBehaviorProof, "proof: override"]),
+      context: barnacleContext({}, [
+        candidateLabels.needsRealBehaviorProof,
+        candidateLabels.mockOnlyProof,
+        PROOF_SUPPLIED_LABEL,
+        PROOF_SUFFICIENT_LABEL,
+        "proof: override",
+      ]),
       core: {
         info: () => undefined,
       },
     });
 
-    expect(calls.removeLabel).toContainEqual(
-      expect.objectContaining({ name: candidateLabels.needsRealBehaviorProof }),
+    expect(calls.removeLabel.map((call) => call.name)).toEqual(
+      expect.arrayContaining([
+        candidateLabels.needsRealBehaviorProof,
+        candidateLabels.mockOnlyProof,
+        PROOF_SUPPLIED_LABEL,
+        PROOF_SUFFICIENT_LABEL,
+      ]),
     );
     expect(calls.update).toEqual([]);
   });
 
+  it("removes stale negative proof labels and adds supplied when proof is present", async () => {
+    const { calls, github } = barnacleGithub([file("src/gateway/server.ts")]);
+
+    await runBarnacleAutoResponse({
+      github,
+      context: barnacleContext(
+        {
+          body: realBehaviorProofBody(
+            "![after](https://github.com/user-attachments/assets/gateway-ready)",
+          ),
+        },
+        [candidateLabels.needsRealBehaviorProof, candidateLabels.mockOnlyProof],
+      ),
+      core: {
+        info: () => undefined,
+      },
+    });
+
+    expect(calls.removeLabel.map((call) => call.name)).toEqual(
+      expect.arrayContaining([
+        candidateLabels.needsRealBehaviorProof,
+        candidateLabels.mockOnlyProof,
+      ]),
+    );
+    expect(calls.addLabels).toContainEqual(
+      expect.objectContaining({
+        labels: expect.arrayContaining([PROOF_SUPPLIED_LABEL]),
+      }),
+    );
+  });
+
+  it.each(["edited", "synchronize"])(
+    "removes stale sufficient proof label after PR %s events",
+    async (action) => {
+      const { calls, github } = barnacleGithub([file("src/gateway/server.ts")]);
+
+      await runBarnacleAutoResponse({
+        github,
+        context: barnacleContext(
+          {
+            body: realBehaviorProofBody(
+              "![after](https://github.com/user-attachments/assets/gateway-ready)",
+            ),
+          },
+          [PROOF_SUPPLIED_LABEL, PROOF_SUFFICIENT_LABEL],
+          { action },
+        ),
+        core: {
+          info: () => undefined,
+        },
+      });
+
+      expect(calls.removeLabel).toContainEqual(
+        expect.objectContaining({ name: PROOF_SUFFICIENT_LABEL }),
+      );
+    },
+  );
+
+  it("preserves ClawSweeper's sufficient proof label on ordinary label events", async () => {
+    const { calls, github } = barnacleGithub([file("src/gateway/server.ts")]);
+
+    await runBarnacleAutoResponse({
+      github,
+      context: barnacleContext(
+        {
+          body: realBehaviorProofBody(
+            "![after](https://github.com/user-attachments/assets/gateway-ready)",
+          ),
+        },
+        [PROOF_SUPPLIED_LABEL, PROOF_SUFFICIENT_LABEL],
+        {
+          action: "labeled",
+          label: { name: PROOF_SUFFICIENT_LABEL },
+          sender: { login: "openclaw-clawsweeper[bot]", type: "Bot" },
+        },
+      ),
+      core: {
+        info: () => undefined,
+      },
+    });
+
+    expect(calls.removeLabel).not.toContainEqual(
+      expect.objectContaining({ name: PROOF_SUFFICIENT_LABEL }),
+    );
+  });
+
   it("actions manually applied candidate labels", async () => {
     const { calls, github } = barnacleGithub([file("extensions/example/openclaw.plugin.json")]);
 
diff --git a/test/scripts/real-behavior-proof-policy.test.ts b/test/scripts/real-behavior-proof-policy.test.ts
index 43cdfbc54e1..25127d0d1e6 100644
--- a/test/scripts/real-behavior-proof-policy.test.ts
+++ b/test/scripts/real-behavior-proof-policy.test.ts
@@ -3,6 +3,7 @@ import {
   MOCK_ONLY_PROOF_LABEL,
   NEEDS_REAL_BEHAVIOR_PROOF_LABEL,
   PROOF_OVERRIDE_LABEL,
+  PROOF_SUPPLIED_LABEL,
   evaluateRealBehaviorProof,
   labelsForRealBehaviorProof,
 } from "../../scripts/github/real-behavior-proof-policy.mjs";
@@ -56,7 +57,26 @@ describe("real-behavior-proof-policy", () => {
     });
 
     expect(evaluation.status).toBe("passed");
-    expect(labelsForRealBehaviorProof(evaluation)).toEqual([]);
+    expect(labelsForRealBehaviorProof(evaluation)).toEqual([PROOF_SUPPLIED_LABEL]);
+  });
+
+  it("passes CRLF-formatted external PRs with screenshot proof", () => {
+    const evaluation = evaluateRealBehaviorProof({
+      pullRequest: externalPr(
+        proofBody("![after](https://github.com/user-attachments/assets/gateway-ready)").replace(
+          /\n/g,
+          "\r\n",
+        ),
+      ),
+    });
+
+    expect(evaluation.status).toBe("passed");
+    expect(evaluation.fields).toMatchObject({
+      behavior: "Gateway startup no longer drops the configured Discord channel.",
+      evidence: "![after](https://github.com/user-attachments/assets/gateway-ready)",
+      observedResult: "The gateway stayed connected and the Discord channel showed ready.",
+    });
+    expect(labelsForRealBehaviorProof(evaluation)).toEqual([PROOF_SUPPLIED_LABEL]);
   });
 
   it("fails external PRs without a real behavior proof section", () => {