mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-12 09:41:11 +00:00
Tighten parity proof heuristics
This commit is contained in:
@@ -130,6 +130,27 @@ describe("qa agentic parity report", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("ignores neutral Failed and Blocked headings in passing protocol reports", () => {
|
||||
const summary: QaParitySuiteSummary = {
|
||||
scenarios: [
|
||||
{
|
||||
name: "Source and docs discovery report",
|
||||
status: "pass",
|
||||
details: `Worked:
|
||||
- Read the seeded QA material.
|
||||
Failed:
|
||||
- None observed.
|
||||
Blocked:
|
||||
- No live provider evidence in this lane.
|
||||
Follow-up:
|
||||
- Re-run with a real provider if needed.`,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
expect(computeQaAgenticParityMetrics(summary).fakeSuccessCount).toBe(0);
|
||||
});
|
||||
|
||||
it("renders a readable markdown parity report", () => {
|
||||
const comparison = buildQaAgenticParityComparison({
|
||||
candidateLabel: "openai/gpt-5.4",
|
||||
|
||||
@@ -68,9 +68,11 @@ const SUSPICIOUS_PASS_PATTERNS = [
|
||||
/incomplete turn/i,
|
||||
/\btimed out\b/i,
|
||||
/\btimeout\b/i,
|
||||
/\bblocked\b/i,
|
||||
/\berror\b/i,
|
||||
/\bfailed\b/i,
|
||||
/\bfailed to\b/i,
|
||||
/\bcould not\b/i,
|
||||
/\bunable to\b/i,
|
||||
/did not continue/i,
|
||||
] as const;
|
||||
|
||||
function normalizeScenarioStatus(status: string | undefined): "pass" | "fail" | "skip" {
|
||||
|
||||
@@ -228,7 +228,7 @@ describe("qa mock openai server", () => {
|
||||
},
|
||||
{
|
||||
type: "function_call_output",
|
||||
output: "Replay safety: unsafe after write.\n",
|
||||
output: "Successfully wrote 41 bytes to compaction-retry-summary.txt.",
|
||||
},
|
||||
],
|
||||
}),
|
||||
|
||||
@@ -453,7 +453,12 @@ function buildAssistantText(input: ResponsesInputItem[], body: Record<string, un
|
||||
return `Protocol note: Lobster Invaders built at lobster-invaders.html.`;
|
||||
}
|
||||
if (toolOutput && /compaction retry mutating tool check/i.test(prompt)) {
|
||||
if (toolOutput.includes("Replay safety: unsafe after write.")) {
|
||||
if (
|
||||
toolOutput.includes("Replay safety: unsafe after write.") ||
|
||||
/compaction-retry-summary\.txt/i.test(toolOutput) ||
|
||||
/successfully (?:wrote|replaced)/i.test(toolOutput) ||
|
||||
/\bwrote\b.*\bcompaction-retry-summary\.txt\b/i.test(toolOutput)
|
||||
) {
|
||||
return "Protocol note: replay unsafe after write.";
|
||||
}
|
||||
return "";
|
||||
|
||||
Reference in New Issue
Block a user