Require real behavior proof for external PRs (#77622)

* ci: require real behavior proof for external PRs

* fix: tighten real behavior proof heuristics

* fix: reject test-only real behavior proof labels

---------

Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
pashpashpash
2026-05-04 21:45:30 -07:00
committed by GitHub
parent d02fbc6116
commit 70f34bf177
10 changed files with 671 additions and 11 deletions

View File

@@ -37,6 +37,28 @@ function pr(title: string, body = blankTemplateBody) {
};
}
function realBehaviorProofBody(evidence: string, overrides: Record<string, string> = {}) {
const fields = {
behavior: "Gateway status now reports the Discord channel as ready.",
environment: "macOS 15.4, Node 24, local OpenClaw gateway, redacted Discord token.",
steps: "pnpm openclaw gateway restart and pnpm openclaw gateway status",
evidence,
observedResult: "The gateway stayed connected and Discord reported ready.",
notTested: "No known gaps.",
...overrides,
};
return [
"## Real behavior proof",
"",
`- Behavior or issue addressed: ${fields.behavior}`,
`- Real environment tested: ${fields.environment}`,
`- Exact steps or command run after this patch: ${fields.steps}`,
`- Evidence after fix: ${fields.evidence}`,
`- Observed result after fix: ${fields.observedResult}`,
`- What was not tested: ${fields.notTested}`,
].join("\n");
}
function file(filename: string, status = "modified") {
return {
filename,
@@ -236,6 +258,44 @@ describe("barnacle-auto-response", () => {
);
});
it("labels external PRs that are missing real behavior proof", () => {
const labels = classifyPullRequestCandidateLabels(pr("Fix gateway startup"), [
file("src/gateway/server.ts"),
]);
expect(labels).toContain(candidateLabels.needsRealBehaviorProof);
expect(labels).not.toContain(candidateLabels.mockOnlyProof);
});
it("labels external PRs whose proof is only tests or mocks", () => {
const labels = classifyPullRequestCandidateLabels(
pr(
"Fix gateway startup",
realBehaviorProofBody("pnpm test passed with Vitest mocks.", {
steps: "pnpm test",
observedResult: "CI passes.",
}),
),
[file("src/gateway/server.ts")],
);
expect(labels).toContain(candidateLabels.mockOnlyProof);
expect(labels).not.toContain(candidateLabels.needsRealBehaviorProof);
});
it("does not label external PRs that include real behavior proof", () => {
const labels = classifyPullRequestCandidateLabels(
pr(
"Fix gateway startup",
realBehaviorProofBody("![after](https://github.com/user-attachments/assets/gateway-ready)"),
),
[file("src/gateway/server.ts")],
);
expect(labels).not.toContain(candidateLabels.needsRealBehaviorProof);
expect(labels).not.toContain(candidateLabels.mockOnlyProof);
});
it("uses linked issues as context and suppresses low-signal docs labels", () => {
const labels = classifyPullRequestCandidateLabels(
pr("Update docs", `${blankTemplateBody}\n\nRelated #12345`),
@@ -577,6 +637,43 @@ describe("barnacle-auto-response", () => {
expect(calls.update).toEqual([]);
});
it("adds proof labels to external PRs without auto-closing by default", async () => {
const { calls, github } = barnacleGithub([file("src/gateway/server.ts")]);
await runBarnacleAutoResponse({
github,
context: barnacleContext({}),
core: {
info: () => undefined,
},
});
expect(calls.addLabels).toContainEqual(
expect.objectContaining({
labels: expect.arrayContaining([candidateLabels.needsRealBehaviorProof]),
}),
);
expect(calls.createComment).toEqual([]);
expect(calls.update).toEqual([]);
});
it("removes stale proof labels when override is present", async () => {
const { calls, github } = barnacleGithub([file("src/gateway/server.ts")]);
await runBarnacleAutoResponse({
github,
context: barnacleContext({}, [candidateLabels.needsRealBehaviorProof, "proof: override"]),
core: {
info: () => undefined,
},
});
expect(calls.removeLabel).toContainEqual(
expect.objectContaining({ name: candidateLabels.needsRealBehaviorProof }),
);
expect(calls.update).toEqual([]);
});
it("actions manually applied candidate labels", async () => {
const { calls, github } = barnacleGithub([file("extensions/example/openclaw.plugin.json")]);
@@ -637,7 +734,7 @@ describe("barnacle-auto-response", () => {
expect(calls.removeLabel).toContainEqual(expect.objectContaining({ name: "trigger-response" }));
expect(calls.createComment).toContainEqual(
expect.objectContaining({
body: expect.stringContaining("only changes tests"),
body: expect.stringContaining("does not include real behavior proof"),
}),
);
expect(calls.update).toContainEqual(expect.objectContaining({ state: "closed" }));

View File

@@ -0,0 +1,153 @@
import { describe, expect, it } from "vitest";
import {
MOCK_ONLY_PROOF_LABEL,
NEEDS_REAL_BEHAVIOR_PROOF_LABEL,
PROOF_OVERRIDE_LABEL,
evaluateRealBehaviorProof,
labelsForRealBehaviorProof,
} from "../../scripts/github/real-behavior-proof-policy.mjs";
function externalPr(body: string, overrides: Record<string, unknown> = {}) {
return {
body,
author_association: "CONTRIBUTOR",
user: {
login: "external-contributor",
type: "User",
},
labels: [],
...overrides,
};
}
function proofBody(evidence: string, overrides: Record<string, string> = {}) {
const fields = {
behavior: "Gateway startup no longer drops the configured Discord channel.",
environment: "macOS 15.4, Node 24, local OpenClaw gateway with a redacted Discord token.",
steps: "pnpm openclaw gateway restart, then pnpm openclaw gateway status",
evidence,
observedResult: "The gateway stayed connected and the Discord channel showed ready.",
notTested: "No known gaps.",
...overrides,
};
return [
"## Real behavior proof",
"",
`- Behavior or issue addressed: ${fields.behavior}`,
`- Real environment tested: ${fields.environment}`,
`- Exact steps or command run after this patch: ${fields.steps}`,
`- Evidence after fix: ${fields.evidence}`,
`- Observed result after fix: ${fields.observedResult}`,
`- What was not tested: ${fields.notTested}`,
].join("\n");
}
describe("real-behavior-proof-policy", () => {
it.each([
"![after](https://github.com/user-attachments/assets/abc123)",
"Linked artifact: https://github.com/openclaw/openclaw/actions/runs/123456789/artifacts/987654321",
"Redacted runtime log: gateway connected Discord channel and delivered the reply.",
["Terminal transcript:", "```text", "$ openclaw gateway status", "discord ready", "```"].join(
"\n",
),
])("passes external PRs with real after-fix evidence: %s", (evidence) => {
const evaluation = evaluateRealBehaviorProof({
pullRequest: externalPr(proofBody(evidence)),
});
expect(evaluation.status).toBe("passed");
expect(labelsForRealBehaviorProof(evaluation)).toEqual([]);
});
it("fails external PRs without a real behavior proof section", () => {
const evaluation = evaluateRealBehaviorProof({
pullRequest: externalPr("## Summary\n\n- Fixed startup."),
});
expect(evaluation.status).toBe("missing");
expect(labelsForRealBehaviorProof(evaluation)).toEqual([NEEDS_REAL_BEHAVIOR_PROOF_LABEL]);
});
it("fails external PRs that say the changed behavior was not tested", () => {
const evaluation = evaluateRealBehaviorProof({
pullRequest: externalPr(proofBody("not tested")),
});
expect(evaluation.status).toBe("missing");
expect(labelsForRealBehaviorProof(evaluation)).toEqual([NEEDS_REAL_BEHAVIOR_PROOF_LABEL]);
});
it("fails external PRs whose proof is only tests, mocks, snapshots, lint, typecheck, or CI", () => {
const evaluation = evaluateRealBehaviorProof({
pullRequest: externalPr(
proofBody("pnpm test passed and Vitest mocks cover the branch.", {
steps: "pnpm test",
observedResult: "CI passes.",
}),
),
});
expect(evaluation.status).toBe("mock_only");
expect(labelsForRealBehaviorProof(evaluation)).toEqual([MOCK_ONLY_PROOF_LABEL]);
});
it("fails external PRs whose only copied output is a fenced test or CI transcript", () => {
const evaluation = evaluateRealBehaviorProof({
pullRequest: externalPr(
proofBody(["```text", "$ pnpm test", "CI passed with Vitest mocks", "```"].join("\n"), {
steps: "pnpm test",
observedResult: "CI passes.",
}),
),
});
expect(evaluation.status).toBe("mock_only");
expect(labelsForRealBehaviorProof(evaluation)).toEqual([MOCK_ONLY_PROOF_LABEL]);
});
it("fails external PRs whose terminal label only contains test or CI output", () => {
const evaluation = evaluateRealBehaviorProof({
pullRequest: externalPr(
proofBody(
[
"Terminal transcript:",
"```text",
"$ pnpm test",
"CI passed with Vitest mocks",
"```",
].join("\n"),
{
steps: "pnpm test",
observedResult: "CI passes.",
},
),
),
});
expect(evaluation.status).toBe("mock_only");
expect(labelsForRealBehaviorProof(evaluation)).toEqual([MOCK_ONLY_PROOF_LABEL]);
});
it("passes maintainer, bot, and override cases", () => {
expect(
evaluateRealBehaviorProof({
pullRequest: externalPr("", { author_association: "MEMBER" }),
}).status,
).toBe("skipped");
expect(
evaluateRealBehaviorProof({
pullRequest: externalPr("", {
user: {
login: "renovate[bot]",
type: "Bot",
},
}),
}).status,
).toBe("skipped");
expect(
evaluateRealBehaviorProof({
pullRequest: externalPr("", { labels: [{ name: PROOF_OVERRIDE_LABEL }] }),
}).status,
).toBe("override");
});
});