diff --git a/scripts/github/real-behavior-proof-policy.mjs b/scripts/github/real-behavior-proof-policy.mjs index 51912642634..a9611c58fb8 100644 --- a/scripts/github/real-behavior-proof-policy.mjs +++ b/scripts/github/real-behavior-proof-policy.mjs @@ -58,7 +58,7 @@ const allProofFieldNames = requiredProofFields .concat(["Before evidence", "Before evidence optional"]); const missingValueRegex = - /^(?:n\/?a|not applicable|tbd|todo|unknown|unsure|none provided|no evidence|not tested|untested|-|\[[^\]]*\])$/i; + /^(?:n\/?a|not applicable|tbd|todo|unknown|unsure|none provided|no evidence|not tested|untested|did not test|didn't test|could not test|couldn't test|-|\[[^\]]*\])\.?$/i; const standaloneMissingProofRegex = /^\s*(?:[-*]\s*)?(?:n\/?a|not applicable|not tested|untested|no evidence|did not test|didn't test|could not test|couldn't test)\s*\.?\s*$/im; @@ -208,20 +208,51 @@ export async function isMaintainerTeamMember({ return body?.state === "active"; } +function nextFenceMarker(line, fenceMarker) { + const fence = line.match(/^ {0,3}(`{3,}|~{3,})(.*)$/); + const marker = fence?.[1] ?? ""; + const suffix = fence?.[2] ?? ""; + if (!fenceMarker && marker) { + return marker; + } + if ( + fenceMarker && + marker[0] === fenceMarker[0] && + marker.length >= fenceMarker.length && + suffix.trim() === "" + ) { + return ""; + } + return fenceMarker; +} + +function isMarkdownHeadingLine(line) { + return /^#{1,6}\s+\S/.test(line); +} + function extractMarkdownSections(headingRegex, body = "") { // Normalize CRLF → LF so regexes and section slicing see GitHub web-editor PR // bodies the same way as locally-authored Markdown. const normalizedBody = normalizeLineEndings(body); const sections = []; - const matcher = new RegExp( - headingRegex.source, - headingRegex.flags.includes("g") ? headingRegex.flags : `${headingRegex.flags}g`, - ); - for (const match of normalizedBody.matchAll(matcher)) { - const sectionStart = match.index + match[0].length; - const rest = normalizedBody.slice(sectionStart); - const nextHeading = rest.match(/\n#{1,6}\s+\S/); - sections.push((nextHeading ? rest.slice(0, nextHeading.index) : rest).trim()); + const matcher = new RegExp(headingRegex.source, headingRegex.flags.replaceAll("g", "")); + let fenceMarker = ""; + let sectionStart = -1; + let lineStart = 0; + for (const line of normalizedBody.split("\n")) { + const match = !fenceMarker ? line.match(matcher) : null; + if (sectionStart >= 0 && !fenceMarker && isMarkdownHeadingLine(line)) { + sections.push(normalizedBody.slice(sectionStart, lineStart === 0 ? 0 : lineStart - 1).trim()); + sectionStart = -1; + } + if (match) { + sectionStart = lineStart + (match.index ?? 0) + match[0].length; + } + fenceMarker = nextFenceMarker(line, fenceMarker); + lineStart += line.length + 1; + } + if (sectionStart >= 0) { + sections.push(normalizedBody.slice(sectionStart).trim()); } return sections; } @@ -249,48 +280,75 @@ function fieldLineRegex(name) { ); } +function proofFieldLineValue(line) { + const matchingName = allProofFieldNames.find((name) => fieldLineRegex(name).test(line)); + const match = matchingName ? line.match(fieldLineRegex(matchingName)) : null; + return match?.[1] ?? null; +} + function isAnyProofFieldLine(line) { - return allProofFieldNames.some((name) => fieldLineRegex(name).test(line)); + return proofFieldLineValue(line) !== null; } function extractFieldValue(section, field) { const lines = normalizeLineEndings(section).split("\n"); + let fenceMarker = ""; for (let index = 0; index < lines.length; index += 1) { - const matchingName = field.names.find((name) => fieldLineRegex(name).test(lines[index])); + const line = lines[index]; + const matchingName = !fenceMarker + ? field.names.find((name) => fieldLineRegex(name).test(line)) + : null; if (!matchingName) { + const fenceLine = !fenceMarker ? (proofFieldLineValue(line) ?? line) : line; + fenceMarker = nextFenceMarker(fenceLine, fenceMarker); continue; } - const match = lines[index].match(fieldLineRegex(matchingName)); + const match = line.match(fieldLineRegex(matchingName)); const valueLines = [match?.[1] ?? ""]; + fenceMarker = nextFenceMarker(valueLines[0], ""); for (let next = index + 1; next < lines.length; next += 1) { const line = lines[next]; - if (/^#{1,6}\s+\S/.test(line) || isAnyProofFieldLine(line)) { + if (!fenceMarker && (isMarkdownHeadingLine(line) || isAnyProofFieldLine(line))) { break; } valueLines.push(line); + fenceMarker = nextFenceMarker(line, fenceMarker); } return valueLines.join("\n").trim(); } return ""; } -function stripProofFieldLabels(section) { - return normalizeLineEndings(section) +function proofContentOutsideFences(section) { + let fenceMarker = ""; + const contentLines = []; + for (const line of normalizeLineEndings(section).split("\n")) { + if (fenceMarker) { + fenceMarker = nextFenceMarker(line, fenceMarker); + continue; + } + const contentLine = proofFieldLineValue(line) ?? line; + const nextMarker = nextFenceMarker(contentLine, fenceMarker); + const isFenceBoundary = nextMarker !== fenceMarker; + if (!isFenceBoundary) { + contentLines.push(contentLine); + } + fenceMarker = nextMarker; + } + return contentLines.join("\n"); +} + +function stripMarkdownFenceMarkers(value) { + return normalizeLineEndings(value) .split("\n") - .map((line) => { - if (!isAnyProofFieldLine(line)) { - return line; - } - const matchingName = allProofFieldNames.find((name) => fieldLineRegex(name).test(line)); - const match = matchingName ? line.match(fieldLineRegex(matchingName)) : null; - return match?.[1] ?? ""; - }) - .join("\n"); + .filter((line) => !/^ {0,3}(?:`{3,}|~{3,})(?:.*)?$/.test(line)) + .join("\n") + .trim(); } function isMissingValue(value, field) { - const trimmed = value.trim(); + const trimmed = stripMarkdownFenceMarkers(value).replace(/^\s*[-*]\s+/, ""); if (!trimmed) { return true; } @@ -393,7 +451,7 @@ function evaluateRealBehaviorProofSection(section, body) { ); } - const proofContent = stripProofFieldLabels(section); + const proofContent = proofContentOutsideFences(section); if (standaloneMissingProofRegex.test(proofContent)) { return result("insufficient", "Real behavior proof says the changed behavior was not tested.", { fields, diff --git a/test/scripts/real-behavior-proof-policy.test.ts b/test/scripts/real-behavior-proof-policy.test.ts index 94510ba0b7b..de2b2ad7b5e 100644 --- a/test/scripts/real-behavior-proof-policy.test.ts +++ b/test/scripts/real-behavior-proof-policy.test.ts @@ -139,6 +139,43 @@ describe("real-behavior-proof-policy", () => { expect(labelsForRealBehaviorProof(evaluation)).toEqual([PROOF_SUPPLIED_LABEL]); }); + it("keeps proof fields after Markdown headings copied inside fenced output", () => { + const body = proofBody( + [ + "Terminal transcript:", + "```text", + "compiled system prompt:", + "```js", + "console.log('not a closing fence')", + "## Real behavior proof", + "Behavior addressed: copied prompt content, not a PR proof section.", + "## TOOLS.md", + "Observed result: ```", + "Observed result: not tested", + "What was not tested: copied template text", + "not tested", + "openclaw gateway status", + "discord ready", + "```", + ].join("\n"), + { + observedResult: "Plugin rules appear after the plugin boundary.", + notTested: "Live model attribution smoke.", + }, + ); + const evaluation = evaluateRealBehaviorProof({ + pullRequest: externalPr(body), + }); + + expect(evaluation.fields?.evidence).toContain("openclaw gateway status"); + expect(evaluation.status).toBe("passed"); + expect(evaluation.fields?.observedResult).toBe( + "Plugin rules appear after the plugin boundary.", + ); + expect(evaluation.fields?.notTested).toBe("Live model attribution smoke."); + expect(labelsForRealBehaviorProof(evaluation)).toEqual([PROOF_SUPPLIED_LABEL]); + }); + it("uses the latest real behavior proof section when duplicates exist", () => { const validProof = proofBody( [ @@ -263,6 +300,37 @@ describe("real-behavior-proof-policy", () => { expect(labelsForRealBehaviorProof(evaluation)).toEqual([NEEDS_REAL_BEHAVIOR_PROOF_LABEL]); }); + it.each([ + "not tested", + "not tested.", + "- not tested", + "- not tested.", + "did not test", + "did not test.", + "could not test", + "could not test.", + ])( + "fails external PRs with fenced missing-proof field values: %s", + (missingProof) => { + const evidence = [ + "Terminal transcript:", + "```text", + "$ openclaw gateway status", + "discord ready", + "```", + ].join("\n"); + const notTested = ["```text", missingProof, "```"].join("\n"); + + const evaluation = evaluateRealBehaviorProof({ + pullRequest: externalPr(proofBody(evidence, { notTested })), + }); + + expect(evaluation.status).toBe("missing"); + expect(evaluation.missingFields).toEqual(["notTested"]); + expect(labelsForRealBehaviorProof(evaluation)).toEqual([NEEDS_REAL_BEHAVIOR_PROOF_LABEL]); + }, + ); + it("fails external PRs whose proof is only tests, mocks, snapshots, lint, typecheck, or CI", () => { const evaluation = evaluateRealBehaviorProof({ pullRequest: externalPr(