mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 10:40:43 +00:00
Require real behavior proof for external PRs (#77622)
* ci: require real behavior proof for external PRs * fix: tighten real behavior proof heuristics * fix: reject test-only real behavior proof labels --------- Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
@@ -37,6 +37,28 @@ function pr(title: string, body = blankTemplateBody) {
|
||||
};
|
||||
}
|
||||
|
||||
function realBehaviorProofBody(evidence: string, overrides: Record<string, string> = {}) {
|
||||
const fields = {
|
||||
behavior: "Gateway status now reports the Discord channel as ready.",
|
||||
environment: "macOS 15.4, Node 24, local OpenClaw gateway, redacted Discord token.",
|
||||
steps: "pnpm openclaw gateway restart and pnpm openclaw gateway status",
|
||||
evidence,
|
||||
observedResult: "The gateway stayed connected and Discord reported ready.",
|
||||
notTested: "No known gaps.",
|
||||
...overrides,
|
||||
};
|
||||
return [
|
||||
"## Real behavior proof",
|
||||
"",
|
||||
`- Behavior or issue addressed: ${fields.behavior}`,
|
||||
`- Real environment tested: ${fields.environment}`,
|
||||
`- Exact steps or command run after this patch: ${fields.steps}`,
|
||||
`- Evidence after fix: ${fields.evidence}`,
|
||||
`- Observed result after fix: ${fields.observedResult}`,
|
||||
`- What was not tested: ${fields.notTested}`,
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
function file(filename: string, status = "modified") {
|
||||
return {
|
||||
filename,
|
||||
@@ -236,6 +258,44 @@ describe("barnacle-auto-response", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("labels external PRs that are missing real behavior proof", () => {
|
||||
const labels = classifyPullRequestCandidateLabels(pr("Fix gateway startup"), [
|
||||
file("src/gateway/server.ts"),
|
||||
]);
|
||||
|
||||
expect(labels).toContain(candidateLabels.needsRealBehaviorProof);
|
||||
expect(labels).not.toContain(candidateLabels.mockOnlyProof);
|
||||
});
|
||||
|
||||
it("labels external PRs whose proof is only tests or mocks", () => {
|
||||
const labels = classifyPullRequestCandidateLabels(
|
||||
pr(
|
||||
"Fix gateway startup",
|
||||
realBehaviorProofBody("pnpm test passed with Vitest mocks.", {
|
||||
steps: "pnpm test",
|
||||
observedResult: "CI passes.",
|
||||
}),
|
||||
),
|
||||
[file("src/gateway/server.ts")],
|
||||
);
|
||||
|
||||
expect(labels).toContain(candidateLabels.mockOnlyProof);
|
||||
expect(labels).not.toContain(candidateLabels.needsRealBehaviorProof);
|
||||
});
|
||||
|
||||
it("does not label external PRs that include real behavior proof", () => {
|
||||
const labels = classifyPullRequestCandidateLabels(
|
||||
pr(
|
||||
"Fix gateway startup",
|
||||
realBehaviorProofBody(""),
|
||||
),
|
||||
[file("src/gateway/server.ts")],
|
||||
);
|
||||
|
||||
expect(labels).not.toContain(candidateLabels.needsRealBehaviorProof);
|
||||
expect(labels).not.toContain(candidateLabels.mockOnlyProof);
|
||||
});
|
||||
|
||||
it("uses linked issues as context and suppresses low-signal docs labels", () => {
|
||||
const labels = classifyPullRequestCandidateLabels(
|
||||
pr("Update docs", `${blankTemplateBody}\n\nRelated #12345`),
|
||||
@@ -577,6 +637,43 @@ describe("barnacle-auto-response", () => {
|
||||
expect(calls.update).toEqual([]);
|
||||
});
|
||||
|
||||
it("adds proof labels to external PRs without auto-closing by default", async () => {
|
||||
const { calls, github } = barnacleGithub([file("src/gateway/server.ts")]);
|
||||
|
||||
await runBarnacleAutoResponse({
|
||||
github,
|
||||
context: barnacleContext({}),
|
||||
core: {
|
||||
info: () => undefined,
|
||||
},
|
||||
});
|
||||
|
||||
expect(calls.addLabels).toContainEqual(
|
||||
expect.objectContaining({
|
||||
labels: expect.arrayContaining([candidateLabels.needsRealBehaviorProof]),
|
||||
}),
|
||||
);
|
||||
expect(calls.createComment).toEqual([]);
|
||||
expect(calls.update).toEqual([]);
|
||||
});
|
||||
|
||||
it("removes stale proof labels when override is present", async () => {
|
||||
const { calls, github } = barnacleGithub([file("src/gateway/server.ts")]);
|
||||
|
||||
await runBarnacleAutoResponse({
|
||||
github,
|
||||
context: barnacleContext({}, [candidateLabels.needsRealBehaviorProof, "proof: override"]),
|
||||
core: {
|
||||
info: () => undefined,
|
||||
},
|
||||
});
|
||||
|
||||
expect(calls.removeLabel).toContainEqual(
|
||||
expect.objectContaining({ name: candidateLabels.needsRealBehaviorProof }),
|
||||
);
|
||||
expect(calls.update).toEqual([]);
|
||||
});
|
||||
|
||||
it("actions manually applied candidate labels", async () => {
|
||||
const { calls, github } = barnacleGithub([file("extensions/example/openclaw.plugin.json")]);
|
||||
|
||||
@@ -637,7 +734,7 @@ describe("barnacle-auto-response", () => {
|
||||
expect(calls.removeLabel).toContainEqual(expect.objectContaining({ name: "trigger-response" }));
|
||||
expect(calls.createComment).toContainEqual(
|
||||
expect.objectContaining({
|
||||
body: expect.stringContaining("only changes tests"),
|
||||
body: expect.stringContaining("does not include real behavior proof"),
|
||||
}),
|
||||
);
|
||||
expect(calls.update).toContainEqual(expect.objectContaining({ state: "closed" }));
|
||||
|
||||
153
test/scripts/real-behavior-proof-policy.test.ts
Normal file
153
test/scripts/real-behavior-proof-policy.test.ts
Normal file
@@ -0,0 +1,153 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
MOCK_ONLY_PROOF_LABEL,
|
||||
NEEDS_REAL_BEHAVIOR_PROOF_LABEL,
|
||||
PROOF_OVERRIDE_LABEL,
|
||||
evaluateRealBehaviorProof,
|
||||
labelsForRealBehaviorProof,
|
||||
} from "../../scripts/github/real-behavior-proof-policy.mjs";
|
||||
|
||||
function externalPr(body: string, overrides: Record<string, unknown> = {}) {
|
||||
return {
|
||||
body,
|
||||
author_association: "CONTRIBUTOR",
|
||||
user: {
|
||||
login: "external-contributor",
|
||||
type: "User",
|
||||
},
|
||||
labels: [],
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function proofBody(evidence: string, overrides: Record<string, string> = {}) {
|
||||
const fields = {
|
||||
behavior: "Gateway startup no longer drops the configured Discord channel.",
|
||||
environment: "macOS 15.4, Node 24, local OpenClaw gateway with a redacted Discord token.",
|
||||
steps: "pnpm openclaw gateway restart, then pnpm openclaw gateway status",
|
||||
evidence,
|
||||
observedResult: "The gateway stayed connected and the Discord channel showed ready.",
|
||||
notTested: "No known gaps.",
|
||||
...overrides,
|
||||
};
|
||||
return [
|
||||
"## Real behavior proof",
|
||||
"",
|
||||
`- Behavior or issue addressed: ${fields.behavior}`,
|
||||
`- Real environment tested: ${fields.environment}`,
|
||||
`- Exact steps or command run after this patch: ${fields.steps}`,
|
||||
`- Evidence after fix: ${fields.evidence}`,
|
||||
`- Observed result after fix: ${fields.observedResult}`,
|
||||
`- What was not tested: ${fields.notTested}`,
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
describe("real-behavior-proof-policy", () => {
|
||||
it.each([
|
||||
"",
|
||||
"Linked artifact: https://github.com/openclaw/openclaw/actions/runs/123456789/artifacts/987654321",
|
||||
"Redacted runtime log: gateway connected Discord channel and delivered the reply.",
|
||||
["Terminal transcript:", "```text", "$ openclaw gateway status", "discord ready", "```"].join(
|
||||
"\n",
|
||||
),
|
||||
])("passes external PRs with real after-fix evidence: %s", (evidence) => {
|
||||
const evaluation = evaluateRealBehaviorProof({
|
||||
pullRequest: externalPr(proofBody(evidence)),
|
||||
});
|
||||
|
||||
expect(evaluation.status).toBe("passed");
|
||||
expect(labelsForRealBehaviorProof(evaluation)).toEqual([]);
|
||||
});
|
||||
|
||||
it("fails external PRs without a real behavior proof section", () => {
|
||||
const evaluation = evaluateRealBehaviorProof({
|
||||
pullRequest: externalPr("## Summary\n\n- Fixed startup."),
|
||||
});
|
||||
|
||||
expect(evaluation.status).toBe("missing");
|
||||
expect(labelsForRealBehaviorProof(evaluation)).toEqual([NEEDS_REAL_BEHAVIOR_PROOF_LABEL]);
|
||||
});
|
||||
|
||||
it("fails external PRs that say the changed behavior was not tested", () => {
|
||||
const evaluation = evaluateRealBehaviorProof({
|
||||
pullRequest: externalPr(proofBody("not tested")),
|
||||
});
|
||||
|
||||
expect(evaluation.status).toBe("missing");
|
||||
expect(labelsForRealBehaviorProof(evaluation)).toEqual([NEEDS_REAL_BEHAVIOR_PROOF_LABEL]);
|
||||
});
|
||||
|
||||
it("fails external PRs whose proof is only tests, mocks, snapshots, lint, typecheck, or CI", () => {
|
||||
const evaluation = evaluateRealBehaviorProof({
|
||||
pullRequest: externalPr(
|
||||
proofBody("pnpm test passed and Vitest mocks cover the branch.", {
|
||||
steps: "pnpm test",
|
||||
observedResult: "CI passes.",
|
||||
}),
|
||||
),
|
||||
});
|
||||
|
||||
expect(evaluation.status).toBe("mock_only");
|
||||
expect(labelsForRealBehaviorProof(evaluation)).toEqual([MOCK_ONLY_PROOF_LABEL]);
|
||||
});
|
||||
|
||||
it("fails external PRs whose only copied output is a fenced test or CI transcript", () => {
|
||||
const evaluation = evaluateRealBehaviorProof({
|
||||
pullRequest: externalPr(
|
||||
proofBody(["```text", "$ pnpm test", "CI passed with Vitest mocks", "```"].join("\n"), {
|
||||
steps: "pnpm test",
|
||||
observedResult: "CI passes.",
|
||||
}),
|
||||
),
|
||||
});
|
||||
|
||||
expect(evaluation.status).toBe("mock_only");
|
||||
expect(labelsForRealBehaviorProof(evaluation)).toEqual([MOCK_ONLY_PROOF_LABEL]);
|
||||
});
|
||||
|
||||
it("fails external PRs whose terminal label only contains test or CI output", () => {
|
||||
const evaluation = evaluateRealBehaviorProof({
|
||||
pullRequest: externalPr(
|
||||
proofBody(
|
||||
[
|
||||
"Terminal transcript:",
|
||||
"```text",
|
||||
"$ pnpm test",
|
||||
"CI passed with Vitest mocks",
|
||||
"```",
|
||||
].join("\n"),
|
||||
{
|
||||
steps: "pnpm test",
|
||||
observedResult: "CI passes.",
|
||||
},
|
||||
),
|
||||
),
|
||||
});
|
||||
|
||||
expect(evaluation.status).toBe("mock_only");
|
||||
expect(labelsForRealBehaviorProof(evaluation)).toEqual([MOCK_ONLY_PROOF_LABEL]);
|
||||
});
|
||||
|
||||
it("passes maintainer, bot, and override cases", () => {
|
||||
expect(
|
||||
evaluateRealBehaviorProof({
|
||||
pullRequest: externalPr("", { author_association: "MEMBER" }),
|
||||
}).status,
|
||||
).toBe("skipped");
|
||||
expect(
|
||||
evaluateRealBehaviorProof({
|
||||
pullRequest: externalPr("", {
|
||||
user: {
|
||||
login: "renovate[bot]",
|
||||
type: "Bot",
|
||||
},
|
||||
}),
|
||||
}).status,
|
||||
).toBe("skipped");
|
||||
expect(
|
||||
evaluateRealBehaviorProof({
|
||||
pullRequest: externalPr("", { labels: [{ name: PROOF_OVERRIDE_LABEL }] }),
|
||||
}).status,
|
||||
).toBe("override");
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user