diff --git a/scripts/github/barnacle-auto-response.mjs b/scripts/github/barnacle-auto-response.mjs index d36fdf249ec..d132b114a23 100644 --- a/scripts/github/barnacle-auto-response.mjs +++ b/scripts/github/barnacle-auto-response.mjs @@ -833,6 +833,15 @@ function shouldRemoveProofSufficientLabel( return true; } +const negativeProofLabels = new Set([NEEDS_REAL_BEHAVIOR_PROOF_LABEL, MOCK_ONLY_PROOF_LABEL]); + +function shouldPreserveClawSweeperProofJudgment(context, labelSet) { + return ( + labelSet.has(PROOF_SUFFICIENT_LABEL) && + !["edited", "synchronize"].includes(context.payload.action) + ); +} + async function applyPullRequestCandidateLabels(github, context, core, pullRequest, labelSet) { const files = await listPullRequestFiles(github, context, pullRequest); const hasExactHeadClawSweeperProof = @@ -854,8 +863,11 @@ async function applyPullRequestCandidateLabels(github, context, core, pullReques }, files, ); + const candidateLabelsToApply = shouldPreserveClawSweeperProofJudgment(context, labelSet) + ? classifiedLabels.filter((label) => !negativeProofLabels.has(label)) + : classifiedLabels; const staleProofLabels = structuralProofLabelValues.filter( - (label) => labelSet.has(label) && !classifiedLabels.includes(label), + (label) => labelSet.has(label) && !candidateLabelsToApply.includes(label), ); if ( labelSet.has(PROOF_SUFFICIENT_LABEL) && @@ -870,7 +882,14 @@ async function applyPullRequestCandidateLabels(github, context, core, pullReques staleProofLabels.push(PROOF_SUFFICIENT_LABEL); } await removeLabels(github, context, pullRequest.number, staleProofLabels, labelSet); - await addMissingLabels(github, context, core, pullRequest.number, classifiedLabels, labelSet); + await addMissingLabels( + github, + context, + core, + pullRequest.number, + candidateLabelsToApply, + labelSet, + ); } function isAutomationUser(user, fallbackLogin = "") { diff --git a/test/scripts/barnacle-auto-response.test.ts b/test/scripts/barnacle-auto-response.test.ts index 8d1976ebbba..6634a134beb 100644 --- a/test/scripts/barnacle-auto-response.test.ts +++ b/test/scripts/barnacle-auto-response.test.ts @@ -1011,6 +1011,57 @@ describe("barnacle-auto-response", () => { }); expect(calls.removeLabel).toEqual([]); + expect(calls.addLabels.flatMap((call) => call.labels)).not.toContain( + candidateLabels.needsRealBehaviorProof, + ); + }); + + it("does not re-add negative proof labels while sufficient proof is present", async () => { + const { calls, github } = barnacleGithub([file("src/gateway/server.ts")]); + + await runBarnacleAutoResponse({ + github, + context: barnacleContext({}, [PROOF_SUFFICIENT_LABEL], { + action: "unlabeled", + label: { name: candidateLabels.needsRealBehaviorProof }, + sender: { login: "maintainer", type: "User" }, + }), + core: { + info: () => undefined, + }, + }); + + expect(calls.removeLabel).toEqual([]); + expect(calls.addLabels.flatMap((call) => call.labels)).not.toContain( + candidateLabels.needsRealBehaviorProof, + ); + }); + + it("removes negative proof labels when sufficient proof is already present", async () => { + const { calls, github } = barnacleGithub([file("src/gateway/server.ts")]); + + await runBarnacleAutoResponse({ + github, + context: barnacleContext( + {}, + [PROOF_SUFFICIENT_LABEL, candidateLabels.needsRealBehaviorProof], + { + action: "labeled", + label: { name: "status: ready for maintainer look" }, + sender: { login: "openclaw-clawsweeper[bot]", type: "Bot" }, + }, + ), + core: { + info: () => undefined, + }, + }); + + expect(calls.removeLabel).toEqual([ + expectedRemoveLabel(123, candidateLabels.needsRealBehaviorProof), + ]); + expect(calls.addLabels.flatMap((call) => call.labels)).not.toContain( + candidateLabels.needsRealBehaviorProof, + ); }); it("does not let Barnacle veto ClawSweeper's sufficient proof label add", async () => {