diff --git a/scripts/github/real-behavior-proof-policy.mjs b/scripts/github/real-behavior-proof-policy.mjs index c76ab5b9bf1..b4242a91276 100644 --- a/scripts/github/real-behavior-proof-policy.mjs +++ b/scripts/github/real-behavior-proof-policy.mjs @@ -6,6 +6,7 @@ export const MOCK_ONLY_PROOF_LABEL = "triage: mock-only-proof"; export const MAINTAINER_TEAM_SLUG = "maintainer"; export const CLAWSWEEPER_PROOF_VERDICT_STATUS = "clawsweeper_exact_head_pass"; +const CLAWSWEEPER_BOT_LOGIN = "clawsweeper[bot]"; const privilegedAuthorAssociations = new Set(["OWNER", "MEMBER", "COLLABORATOR"]); @@ -246,7 +247,14 @@ function isTrustedClawSweeperComment(comment) { const appSlug = String( comment?.performed_via_github_app?.slug ?? comment?.performedViaGithubApp?.slug ?? "", ).toLowerCase(); - return appSlug === "clawsweeper"; + if (appSlug === "clawsweeper") { + return true; + } + // GitHub can omit performed_via_github_app on issue comments while still + // returning the reserved App bot identity. + const login = String(comment?.user?.login ?? "").toLowerCase(); + const userType = String(comment?.user?.type ?? ""); + return login === CLAWSWEEPER_BOT_LOGIN && userType === "Bot"; } export function hasClawSweeperExactHeadProof({ pullRequest, comments = [] } = {}) { diff --git a/src/cron/isolated-agent.model-overrides.test.ts b/src/cron/isolated-agent.model-overrides.test.ts index 0cf67de28ac..7d3d889c0ae 100644 --- a/src/cron/isolated-agent.model-overrides.test.ts +++ b/src/cron/isolated-agent.model-overrides.test.ts @@ -1,4 +1,5 @@ import "./isolated-agent.mocks.js"; +import path from "node:path"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { loadModelCatalog } from "../agents/model-catalog.js"; import { runEmbeddedPiAgent } from "../agents/pi-embedded.js"; @@ -21,7 +22,7 @@ import * as isolatedAgentRunRuntime from "./isolated-agent/run.runtime.js"; function installThinkingTestProviders() { const registry = createTestRegistry(); - registry.providers = ["anthropic", "openai", "openrouter"].map( + registry.providers = ["anthropic", "google", "openai", "openrouter"].map( (providerId): PluginProviderRegistration => ({ pluginId: providerId, source: "test", @@ -29,10 +30,18 @@ function installThinkingTestProviders() { id: providerId, label: providerId, auth: [], - resolveThinkingProfile: () => ({ - levels: BASE_THINKING_LEVELS.map((id) => ({ id })), - defaultLevel: "off", - }), + resolveThinkingProfile: ({ modelId }) => + providerId === "google" && modelId === "gemini-3-flash-preview" + ? { + levels: (["off", "minimal", "low", "medium", "adaptive", "high"] as const).map( + (id) => ({ id }), + ), + preserveWhenCatalogReasoningFalse: true, + } + : { + levels: BASE_THINKING_LEVELS.map((id) => ({ id })), + defaultLevel: "off", + }, }, }), ); @@ -253,4 +262,38 @@ describe("runCronIsolatedAgentTurn model overrides", () => { expect(callArgs?.thinkLevel).toBe("low"); }); }); + + it("keeps configured Gemini 3 cron thinking when catalog reasoning metadata is stale", async () => { + await withTempHome(async (home) => { + vi.mocked(isolatedAgentRunRuntime.resolveThinkingDefault).mockReturnValueOnce("low"); + vi.mocked(loadModelCatalog).mockResolvedValueOnce([ + { + id: "gemini-3-flash-preview", + name: "Gemini 3 Flash Preview", + provider: "google", + reasoning: false, + }, + ]); + + await runCronTurn(home, { + cfgOverrides: { + agents: { + defaults: { + model: "google/gemini-3-flash-preview", + workspace: path.join(home, "openclaw"), + thinkingDefault: "low", + }, + }, + }, + jobPayload: DEFAULT_AGENT_TURN_PAYLOAD, + mockTexts: ["done"], + }); + + const calls = vi.mocked(runEmbeddedPiAgent).mock.calls; + const callArgs = calls[calls.length - 1]?.[0]; + expect(callArgs?.provider).toBe("google"); + expect(callArgs?.model).toBe("gemini-3-flash-preview"); + expect(callArgs?.thinkLevel).toBe("low"); + }); + }); }); diff --git a/test/scripts/real-behavior-proof-policy.test.ts b/test/scripts/real-behavior-proof-policy.test.ts index 01286df72cb..7ce18a4b24f 100644 --- a/test/scripts/real-behavior-proof-policy.test.ts +++ b/test/scripts/real-behavior-proof-policy.test.ts @@ -234,7 +234,7 @@ describe("real-behavior-proof-policy", () => { expect(evaluateClawSweeperExactHeadProof({ pullRequest, comments }).passed).toBe(false); }); - it("rejects bot-shaped ClawSweeper pass verdict markers without the GitHub App source", () => { + it("accepts exact ClawSweeper bot pass verdict markers when GitHub omits the app source", () => { const pullRequest = { number: 83581, head: { @@ -251,6 +251,27 @@ describe("real-behavior-proof-policy", () => { }, ]; + expect(hasClawSweeperExactHeadProof({ pullRequest, comments })).toBe(true); + expect(evaluateClawSweeperExactHeadProof({ pullRequest, comments }).passed).toBe(true); + }); + + it("rejects bot-shaped pass verdict markers from other bot users", () => { + const pullRequest = { + number: 83581, + head: { + sha: "06ee95df6608d29a395c52ba8ab53fdd93a9dc4f", + }, + }; + const comments = [ + { + user: { + login: "not-clawsweeper[bot]", + type: "Bot", + }, + body: "", + }, + ]; + expect(hasClawSweeperExactHeadProof({ pullRequest, comments })).toBe(false); expect(evaluateClawSweeperExactHeadProof({ pullRequest, comments }).passed).toBe(false); });