mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-28 09:42:16 +00:00
462 lines
17 KiB
TypeScript
462 lines
17 KiB
TypeScript
import { describe, expect, it, vi } from "vitest";
|
|
import {
|
|
MOCK_ONLY_PROOF_LABEL,
|
|
NEEDS_REAL_BEHAVIOR_PROOF_LABEL,
|
|
PROOF_OVERRIDE_LABEL,
|
|
PROOF_SUPPLIED_LABEL,
|
|
evaluateClawSweeperExactHeadProof,
|
|
evaluateRealBehaviorProof,
|
|
hasClawSweeperExactHeadProof,
|
|
isMaintainerTeamMember,
|
|
labelsForRealBehaviorProof,
|
|
} from "../../scripts/github/real-behavior-proof-policy.mjs";
|
|
|
|
function externalPr(body: string, overrides: Record<string, unknown> = {}) {
|
|
return {
|
|
body,
|
|
author_association: "CONTRIBUTOR",
|
|
user: {
|
|
login: "external-contributor",
|
|
type: "User",
|
|
},
|
|
labels: [],
|
|
...overrides,
|
|
};
|
|
}
|
|
|
|
function proofBody(evidence: string, overrides: Record<string, string> = {}) {
|
|
const fields = {
|
|
behavior: "Gateway startup no longer drops the configured Discord channel.",
|
|
environment: "macOS 15.4, Node 24, local OpenClaw gateway with a redacted Discord token.",
|
|
steps: "pnpm openclaw gateway restart, then pnpm openclaw gateway status",
|
|
evidence,
|
|
observedResult: "The gateway stayed connected and the Discord channel showed ready.",
|
|
notTested: "No known gaps.",
|
|
...overrides,
|
|
};
|
|
return [
|
|
"## Real behavior proof",
|
|
"",
|
|
`- Behavior or issue addressed: ${fields.behavior}`,
|
|
`- Real environment tested: ${fields.environment}`,
|
|
`- Exact steps or command run after this patch: ${fields.steps}`,
|
|
`- Evidence after fix: ${fields.evidence}`,
|
|
`- Observed result after fix: ${fields.observedResult}`,
|
|
`- What was not tested: ${fields.notTested}`,
|
|
].join("\n");
|
|
}
|
|
|
|
describe("real-behavior-proof-policy", () => {
|
|
it.each([
|
|
"",
|
|
"Linked artifact: https://github.com/openclaw/openclaw/actions/runs/123456789/artifacts/987654321",
|
|
"Redacted runtime log: gateway connected Discord channel and delivered the reply.",
|
|
["Terminal transcript:", "```text", "$ openclaw gateway status", "discord ready", "```"].join(
|
|
"\n",
|
|
),
|
|
])("passes external PRs with real after-fix evidence: %s", (evidence) => {
|
|
const evaluation = evaluateRealBehaviorProof({
|
|
pullRequest: externalPr(proofBody(evidence)),
|
|
});
|
|
|
|
expect(evaluation.status).toBe("passed");
|
|
expect(labelsForRealBehaviorProof(evaluation)).toEqual([PROOF_SUPPLIED_LABEL]);
|
|
});
|
|
|
|
it("passes CRLF-formatted external PRs with screenshot proof", () => {
|
|
const evaluation = evaluateRealBehaviorProof({
|
|
pullRequest: externalPr(
|
|
proofBody("").replace(
|
|
/\n/g,
|
|
"\r\n",
|
|
),
|
|
),
|
|
});
|
|
|
|
expect(evaluation.status).toBe("passed");
|
|
expect(evaluation.fields).toStrictEqual({
|
|
behavior: "Gateway startup no longer drops the configured Discord channel.",
|
|
evidence: "",
|
|
environment: "macOS 15.4, Node 24, local OpenClaw gateway with a redacted Discord token.",
|
|
notTested: "No known gaps.",
|
|
observedResult: "The gateway stayed connected and the Discord channel showed ready.",
|
|
steps: "pnpm openclaw gateway restart, then pnpm openclaw gateway status",
|
|
});
|
|
expect(labelsForRealBehaviorProof(evaluation)).toEqual([PROOF_SUPPLIED_LABEL]);
|
|
});
|
|
|
|
it("uses the latest real behavior proof section when duplicates exist", () => {
|
|
const validProof = proofBody(
|
|
[
|
|
"Terminal transcript:",
|
|
"```text",
|
|
"$ openclaw doctor --non-interactive",
|
|
"Discord external plugin is installed without explicit trust.",
|
|
"Add plugins.entries.discord.enabled=true to trust it.",
|
|
"```",
|
|
].join("\n"),
|
|
);
|
|
const mockOnlyProof = proofBody("Focused tests passed: 2 files, 36 tests.", {
|
|
steps: "pnpm test",
|
|
observedResult: "CI passes.",
|
|
});
|
|
|
|
const laterValid = evaluateRealBehaviorProof({
|
|
pullRequest: externalPr(
|
|
[mockOnlyProof, "## Summary", "- Keep the detailed proof below.", validProof].join("\n\n"),
|
|
),
|
|
});
|
|
const laterInvalid = evaluateRealBehaviorProof({
|
|
pullRequest: externalPr(
|
|
[validProof, "## Summary", "- Latest edit replaced proof with tests.", mockOnlyProof].join(
|
|
"\n\n",
|
|
),
|
|
),
|
|
});
|
|
|
|
expect(laterValid.status).toBe("passed");
|
|
expect(laterValid.fields?.evidence).toContain("openclaw doctor --non-interactive");
|
|
expect(labelsForRealBehaviorProof(laterValid)).toEqual([PROOF_SUPPLIED_LABEL]);
|
|
expect(laterInvalid.status).toBe("mock_only");
|
|
expect(labelsForRealBehaviorProof(laterInvalid)).toEqual([MOCK_ONLY_PROOF_LABEL]);
|
|
});
|
|
|
|
it("accepts out-of-scope follow-ups as not-tested proof detail", () => {
|
|
const body = [
|
|
"## Real behavior proof",
|
|
"",
|
|
"- Behavior addressed: Cron validation keeps Google Gemini 3 low thinking.",
|
|
"- Real environment tested: Local macOS source checkout, Node 24.",
|
|
"- Exact steps or command run after this patch:",
|
|
" 1. Built the local checkout with `node scripts/build-all.mjs`.",
|
|
" 2. Ran a redacted behavior probe for `provider=google`, `model=gemini-3-flash-preview`, and `catalogReasoning=false`.",
|
|
'- Evidence after fix: `.artifacts/behavior-85156/after-installed.json` recorded `lowSupported: true` and `fallbackFromLow: "low"`.',
|
|
"- Observed result after fix:",
|
|
" - `levels: off, minimal, low, medium, adaptive, high`",
|
|
" - `lowSupported: true`",
|
|
" - `fallbackFromLow: low`",
|
|
" - `local command version: OpenClaw 2026.5.21`",
|
|
"",
|
|
"## Out-of-scope Follow-ups",
|
|
"- No live systemd cron schedule was tested.",
|
|
"- No real Google provider request was sent.",
|
|
].join("\n");
|
|
const evaluation = evaluateRealBehaviorProof({
|
|
pullRequest: externalPr(body),
|
|
});
|
|
|
|
expect(evaluation.status).toBe("passed");
|
|
expect(evaluation.fields?.notTested).toBe(
|
|
"- No live systemd cron schedule was tested.\n- No real Google provider request was sent.",
|
|
);
|
|
expect(labelsForRealBehaviorProof(evaluation)).toEqual([PROOF_SUPPLIED_LABEL]);
|
|
});
|
|
|
|
it("accepts source PR proof when explicit gaps live in out-of-scope follow-ups", () => {
|
|
const body = [
|
|
"## Real behavior proof",
|
|
"",
|
|
'- Behavior addressed: Cron/provider thinking validation no longer downgrades `google/gemini-3-flash-preview` `thinkingDefault: "low"` to `"off"` when cached catalog metadata says `reasoning:false` but the Google provider policy says Gemini 3 supports low thinking.',
|
|
"- Real environment tested: Local macOS source checkout, Node v24.8.0, OpenClaw 2026.5.21 (c8a35c4), local `openclaw` shim pointed at the freshly built checkout. No channel credentials or provider API keys were used.",
|
|
"- Exact steps or command run after this patch:",
|
|
" 1. Built the local checkout with `node scripts/build-all.mjs`.",
|
|
" 2. Updated `/Users/example/.local/bin/openclaw` to run this checkout's `openclaw.mjs` and verified `/Users/example/.local/bin/openclaw --version`.",
|
|
" 3. Ran a redacted behavior probe for the reported cron validation decision with `provider=google`, `model=gemini-3-flash-preview`, `configuredThinkingDefault=low`, and `catalogReasoning=false`.",
|
|
'- Evidence after fix: `.artifacts/behavior-85156/after-installed.json` from the local checkout recorded `lowSupported: true` and `fallbackFromLow: "low"`.',
|
|
"- Observed result after fix:",
|
|
" - `levels: off, minimal, low, medium, adaptive, high`",
|
|
" - `lowSupported: true`",
|
|
" - `fallbackFromLow: low`",
|
|
" - `local command version: OpenClaw 2026.5.21 (c8a35c4)`",
|
|
"",
|
|
"## Out-of-scope Follow-ups",
|
|
"- No live systemd cron schedule is added in this PR.",
|
|
"- No real Google provider request is sent in this PR.",
|
|
"- No catalog refresh or provider model-list behavior is changed in this PR.",
|
|
"- No channel, gateway allowlist, credential, or auth-profile behavior is changed in this PR.",
|
|
].join("\n");
|
|
const evaluation = evaluateRealBehaviorProof({
|
|
pullRequest: externalPr(body),
|
|
});
|
|
|
|
expect(evaluation.status).toBe("passed");
|
|
expect(evaluation.fields?.notTested).toBe(
|
|
[
|
|
"- No live systemd cron schedule is added in this PR.",
|
|
"- No real Google provider request is sent in this PR.",
|
|
"- No catalog refresh or provider model-list behavior is changed in this PR.",
|
|
"- No channel, gateway allowlist, credential, or auth-profile behavior is changed in this PR.",
|
|
].join("\n"),
|
|
);
|
|
expect(labelsForRealBehaviorProof(evaluation)).toEqual([PROOF_SUPPLIED_LABEL]);
|
|
});
|
|
|
|
it("fails external PRs without a real behavior proof section", () => {
|
|
const evaluation = evaluateRealBehaviorProof({
|
|
pullRequest: externalPr("## Summary\n\n- Fixed startup."),
|
|
});
|
|
|
|
expect(evaluation.status).toBe("missing");
|
|
expect(labelsForRealBehaviorProof(evaluation)).toEqual([NEEDS_REAL_BEHAVIOR_PROOF_LABEL]);
|
|
});
|
|
|
|
it("fails external PRs that say the changed behavior was not tested", () => {
|
|
const evaluation = evaluateRealBehaviorProof({
|
|
pullRequest: externalPr(proofBody("not tested")),
|
|
});
|
|
|
|
expect(evaluation.status).toBe("missing");
|
|
expect(labelsForRealBehaviorProof(evaluation)).toEqual([NEEDS_REAL_BEHAVIOR_PROOF_LABEL]);
|
|
});
|
|
|
|
it("fails external PRs whose proof is only tests, mocks, snapshots, lint, typecheck, or CI", () => {
|
|
const evaluation = evaluateRealBehaviorProof({
|
|
pullRequest: externalPr(
|
|
proofBody("pnpm test passed and Vitest mocks cover the branch.", {
|
|
steps: "pnpm test",
|
|
observedResult: "CI passes.",
|
|
}),
|
|
),
|
|
});
|
|
|
|
expect(evaluation.status).toBe("mock_only");
|
|
expect(labelsForRealBehaviorProof(evaluation)).toEqual([MOCK_ONLY_PROOF_LABEL]);
|
|
});
|
|
|
|
it("fails external PRs whose only copied output is a fenced test or CI transcript", () => {
|
|
const evaluation = evaluateRealBehaviorProof({
|
|
pullRequest: externalPr(
|
|
proofBody(["```text", "$ pnpm test", "CI passed with Vitest mocks", "```"].join("\n"), {
|
|
steps: "pnpm test",
|
|
observedResult: "CI passes.",
|
|
}),
|
|
),
|
|
});
|
|
|
|
expect(evaluation.status).toBe("mock_only");
|
|
expect(labelsForRealBehaviorProof(evaluation)).toEqual([MOCK_ONLY_PROOF_LABEL]);
|
|
});
|
|
|
|
it("fails external PRs whose terminal label only contains test or CI output", () => {
|
|
const evaluation = evaluateRealBehaviorProof({
|
|
pullRequest: externalPr(
|
|
proofBody(
|
|
[
|
|
"Terminal transcript:",
|
|
"```text",
|
|
"$ pnpm test",
|
|
"CI passed with Vitest mocks",
|
|
"```",
|
|
].join("\n"),
|
|
{
|
|
steps: "pnpm test",
|
|
observedResult: "CI passes.",
|
|
},
|
|
),
|
|
),
|
|
});
|
|
|
|
expect(evaluation.status).toBe("mock_only");
|
|
expect(labelsForRealBehaviorProof(evaluation)).toEqual([MOCK_ONLY_PROOF_LABEL]);
|
|
});
|
|
|
|
it("passes maintainer, bot, and override cases", () => {
|
|
expect(
|
|
evaluateRealBehaviorProof({
|
|
pullRequest: externalPr("", { author_association: "MEMBER" }),
|
|
}).status,
|
|
).toBe("skipped");
|
|
expect(
|
|
evaluateRealBehaviorProof({
|
|
pullRequest: externalPr("", {
|
|
user: {
|
|
login: "renovate[bot]",
|
|
type: "Bot",
|
|
},
|
|
}),
|
|
}).status,
|
|
).toBe("skipped");
|
|
expect(
|
|
evaluateRealBehaviorProof({
|
|
pullRequest: externalPr("", { labels: [{ name: PROOF_OVERRIDE_LABEL }] }),
|
|
}).status,
|
|
).toBe("override");
|
|
});
|
|
|
|
it("accepts ClawSweeper pass verdict comments only for the exact PR head", () => {
|
|
const pullRequest = {
|
|
number: 83581,
|
|
head: {
|
|
sha: "06ee95df6608d29a395c52ba8ab53fdd93a9dc4f",
|
|
},
|
|
};
|
|
const comments = [
|
|
{
|
|
user: {
|
|
login: "clawsweeper[bot]",
|
|
type: "Bot",
|
|
},
|
|
performed_via_github_app: {
|
|
slug: "clawsweeper",
|
|
},
|
|
body: [
|
|
"Codex review: passed.",
|
|
"<!-- clawsweeper-verdict:pass item=83581 sha=06ee95df6608d29a395c52ba8ab53fdd93a9dc4f confidence=high -->",
|
|
].join("\n"),
|
|
},
|
|
];
|
|
|
|
expect(hasClawSweeperExactHeadProof({ pullRequest, comments })).toBe(true);
|
|
expect(evaluateClawSweeperExactHeadProof({ pullRequest, comments }).passed).toBe(true);
|
|
expect(
|
|
hasClawSweeperExactHeadProof({
|
|
pullRequest: {
|
|
...pullRequest,
|
|
head: { sha: "d0215b2d67a45a783277fc7d2949ac4a30f63ec6" },
|
|
},
|
|
comments,
|
|
}),
|
|
).toBe(false);
|
|
});
|
|
|
|
it("rejects forged ClawSweeper pass verdict markers from contributor comments", () => {
|
|
const pullRequest = {
|
|
number: 83581,
|
|
head: {
|
|
sha: "06ee95df6608d29a395c52ba8ab53fdd93a9dc4f",
|
|
},
|
|
};
|
|
const comments = [
|
|
{
|
|
user: {
|
|
login: "external-contributor",
|
|
type: "User",
|
|
},
|
|
body: "<!-- clawsweeper-verdict:pass item=83581 sha=06ee95df6608d29a395c52ba8ab53fdd93a9dc4f confidence=high -->",
|
|
},
|
|
];
|
|
|
|
expect(hasClawSweeperExactHeadProof({ pullRequest, comments })).toBe(false);
|
|
expect(evaluateClawSweeperExactHeadProof({ pullRequest, comments }).passed).toBe(false);
|
|
});
|
|
|
|
it("accepts exact ClawSweeper bot pass verdict markers when GitHub omits the app source", () => {
|
|
const pullRequest = {
|
|
number: 83581,
|
|
head: {
|
|
sha: "06ee95df6608d29a395c52ba8ab53fdd93a9dc4f",
|
|
},
|
|
};
|
|
const comments = [
|
|
{
|
|
user: {
|
|
login: "clawsweeper[bot]",
|
|
type: "Bot",
|
|
},
|
|
body: "<!-- clawsweeper-verdict:pass item=83581 sha=06ee95df6608d29a395c52ba8ab53fdd93a9dc4f confidence=high -->",
|
|
},
|
|
];
|
|
|
|
expect(hasClawSweeperExactHeadProof({ pullRequest, comments })).toBe(true);
|
|
expect(evaluateClawSweeperExactHeadProof({ pullRequest, comments }).passed).toBe(true);
|
|
});
|
|
|
|
it("accepts exact OpenClaw ClawSweeper bot pass verdict markers when GitHub omits the app source", () => {
|
|
const pullRequest = {
|
|
number: 83581,
|
|
head: {
|
|
sha: "06ee95df6608d29a395c52ba8ab53fdd93a9dc4f",
|
|
},
|
|
};
|
|
const comments = [
|
|
{
|
|
user: {
|
|
login: "openclaw-clawsweeper[bot]",
|
|
type: "Bot",
|
|
},
|
|
body: "<!-- clawsweeper-verdict:pass item=83581 sha=06ee95df6608d29a395c52ba8ab53fdd93a9dc4f confidence=high -->",
|
|
},
|
|
];
|
|
|
|
expect(hasClawSweeperExactHeadProof({ pullRequest, comments })).toBe(true);
|
|
expect(evaluateClawSweeperExactHeadProof({ pullRequest, comments }).passed).toBe(true);
|
|
});
|
|
|
|
it("rejects bot-shaped pass verdict markers from other bot users", () => {
|
|
const pullRequest = {
|
|
number: 83581,
|
|
head: {
|
|
sha: "06ee95df6608d29a395c52ba8ab53fdd93a9dc4f",
|
|
},
|
|
};
|
|
const comments = [
|
|
{
|
|
user: {
|
|
login: "not-clawsweeper[bot]",
|
|
type: "Bot",
|
|
},
|
|
body: "<!-- clawsweeper-verdict:pass item=83581 sha=06ee95df6608d29a395c52ba8ab53fdd93a9dc4f confidence=high -->",
|
|
},
|
|
];
|
|
|
|
expect(hasClawSweeperExactHeadProof({ pullRequest, comments })).toBe(false);
|
|
expect(evaluateClawSweeperExactHeadProof({ pullRequest, comments }).passed).toBe(false);
|
|
});
|
|
});
|
|
|
|
describe("isMaintainerTeamMember", () => {
|
|
function jsonResponse(status: number, body: unknown = {}) {
|
|
return {
|
|
ok: status >= 200 && status < 300,
|
|
status,
|
|
json: () => Promise.resolve(body),
|
|
};
|
|
}
|
|
|
|
it("returns true for active members", async () => {
|
|
const fetch = vi.fn().mockResolvedValue(jsonResponse(200, { state: "active" }));
|
|
const result = await isMaintainerTeamMember({
|
|
token: "tok",
|
|
org: "openclaw",
|
|
login: "private-maint",
|
|
fetch,
|
|
});
|
|
|
|
expect(result).toBe(true);
|
|
expect(fetch).toHaveBeenCalledWith(
|
|
"https://api.github.com/orgs/openclaw/teams/maintainer/memberships/private-maint",
|
|
expect.objectContaining({
|
|
headers: expect.objectContaining({
|
|
Authorization: "Bearer tok",
|
|
Accept: "application/vnd.github+json",
|
|
}),
|
|
}),
|
|
);
|
|
});
|
|
|
|
it("returns false for non-active membership states", async () => {
|
|
const fetch = vi.fn().mockResolvedValue(jsonResponse(200, { state: "pending" }));
|
|
expect(await isMaintainerTeamMember({ token: "t", org: "o", login: "u", fetch })).toBe(false);
|
|
});
|
|
|
|
it("returns false when GitHub returns 404", async () => {
|
|
const fetch = vi.fn().mockResolvedValue(jsonResponse(404));
|
|
expect(await isMaintainerTeamMember({ token: "t", org: "o", login: "u", fetch })).toBe(false);
|
|
});
|
|
|
|
it("returns false when the token, org, or login is missing", async () => {
|
|
const fetch = vi.fn();
|
|
expect(await isMaintainerTeamMember({ org: "o", login: "u", fetch })).toBe(false);
|
|
expect(await isMaintainerTeamMember({ token: "t", login: "u", fetch })).toBe(false);
|
|
expect(await isMaintainerTeamMember({ token: "t", org: "o", fetch })).toBe(false);
|
|
expect(fetch).not.toHaveBeenCalled();
|
|
});
|
|
|
|
it("throws on unexpected HTTP errors so the caller can warn and fall back", async () => {
|
|
const fetch = vi.fn().mockResolvedValue(jsonResponse(500));
|
|
await expect(
|
|
isMaintainerTeamMember({ token: "t", org: "o", login: "u", fetch }),
|
|
).rejects.toThrow(/500/);
|
|
});
|
|
});
|