From a6cd2e826e8d02027fa6e37e9da4816e1438a2d3 Mon Sep 17 00:00:00 2001 From: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com> Date: Fri, 22 May 2026 09:50:42 +0000 Subject: [PATCH] fix: preserve Google Gemini 3 cron thinking --- .../real-behavior-proof-policy.test.ts | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/test/scripts/real-behavior-proof-policy.test.ts b/test/scripts/real-behavior-proof-policy.test.ts index f32f600c6c6..489b76bb34c 100644 --- a/test/scripts/real-behavior-proof-policy.test.ts +++ b/test/scripts/real-behavior-proof-policy.test.ts @@ -116,6 +116,45 @@ describe("real-behavior-proof-policy", () => { expect(labelsForRealBehaviorProof(evaluation)).toEqual([PROOF_SUPPLIED_LABEL]); }); + it("accepts source PR proof when explicit gaps live in out-of-scope follow-ups", () => { + const body = [ + "## Real behavior proof", + "", + '- Behavior addressed: Cron/provider thinking validation no longer downgrades `google/gemini-3-flash-preview` `thinkingDefault: "low"` to `"off"` when cached catalog metadata says `reasoning:false` but the Google provider policy says Gemini 3 supports low thinking.', + "- Real environment tested: Local macOS source checkout, Node v24.8.0, OpenClaw 2026.5.21 (c8a35c4), local `openclaw` shim pointed at the freshly built checkout. No channel credentials or provider API keys were used.", + "- Exact steps or command run after this patch:", + " 1. Built the local checkout with `node scripts/build-all.mjs`.", + " 2. Updated `/Users/example/.local/bin/openclaw` to run this checkout's `openclaw.mjs` and verified `/Users/example/.local/bin/openclaw --version`.", + " 3. Ran a redacted behavior probe for the reported cron validation decision with `provider=google`, `model=gemini-3-flash-preview`, `configuredThinkingDefault=low`, and `catalogReasoning=false`.", + '- Evidence after fix: `.artifacts/behavior-85156/after-installed.json` from the local checkout recorded `lowSupported: true` and `fallbackFromLow: "low"`.', + "- Observed result after fix:", + " - `levels: off, minimal, low, medium, adaptive, high`", + " - `lowSupported: true`", + " - `fallbackFromLow: low`", + " - `local command version: OpenClaw 2026.5.21 (c8a35c4)`", + "", + "## Out-of-scope Follow-ups", + "- No live systemd cron schedule is added in this PR.", + "- No real Google provider request is sent in this PR.", + "- No catalog refresh or provider model-list behavior is changed in this PR.", + "- No channel, gateway allowlist, credential, or auth-profile behavior is changed in this PR.", + ].join("\n"); + const evaluation = evaluateRealBehaviorProof({ + pullRequest: externalPr(body), + }); + + expect(evaluation.status).toBe("passed"); + expect(evaluation.fields?.notTested).toBe( + [ + "- No live systemd cron schedule is added in this PR.", + "- No real Google provider request is sent in this PR.", + "- No catalog refresh or provider model-list behavior is changed in this PR.", + "- No channel, gateway allowlist, credential, or auth-profile behavior is changed in this PR.", + ].join("\n"), + ); + expect(labelsForRealBehaviorProof(evaluation)).toEqual([PROOF_SUPPLIED_LABEL]); + }); + it("fails external PRs without a real behavior proof section", () => { const evaluation = evaluateRealBehaviorProof({ pullRequest: externalPr("## Summary\n\n- Fixed startup."),