From 4cd0207519e00bc67cebeb880148fa5e9a3d250d Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 9 May 2026 23:51:46 +0100 Subject: [PATCH] fix: stabilize Gemini default and Bedrock thinking policy --- CHANGELOG.md | 2 + docs/providers/google.md | 2 +- extensions/amazon-bedrock/index.test.ts | 49 ++++++++++++------- .../provider-policy-api.test.ts | 41 ++++++++++++++++ .../amazon-bedrock/provider-policy-api.ts | 9 ++++ .../amazon-bedrock/register.sync.runtime.ts | 34 +------------ extensions/amazon-bedrock/thinking-policy.ts | 32 ++++++++++++ extensions/google/onboard.ts | 2 +- src/commands/auth-choice.test.ts | 2 +- 9 files changed, 119 insertions(+), 54 deletions(-) create mode 100644 extensions/amazon-bedrock/provider-policy-api.test.ts create mode 100644 extensions/amazon-bedrock/provider-policy-api.ts create mode 100644 extensions/amazon-bedrock/thinking-policy.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 8daee590c60..7a3dd7ad565 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -181,6 +181,8 @@ Docs: https://docs.openclaw.ai ### Fixes +- Google/Gemini: default new API-key onboarding to stable `google/gemini-2.5-flash` instead of the preview Pro route, reducing surprise daily quota exhaustion. Fixes #79670. Thanks @HugeBunny. +- Amazon Bedrock: expose Claude thinking profiles through the lightweight provider policy surface so `/think:adaptive` validates before the Bedrock runtime plugin is loaded. Fixes #79754. Thanks @phoenixyy and @hclsys. - Codex/transcripts: mirror dynamic tool calls and outputs into Codex app-server transcripts so tool activity is visible alongside assistant text instead of being elided, with per-item output capped at 12,000 characters. (#79952) Thanks @scoootscooob. - Memory: close temp SQLite handles before failed atomic reindex cleanup and retry Windows EBUSY/EPERM/EACCES temp file removals, so `memory index --force` does not abort or leave temp sidecars on locked filesystems. Fixes #79708. Thanks @LobsterFarmerAmp and @hclsys. - Agents/CLI: add an explicit `reseedFromRawTranscriptWhenUncompacted` backend opt-in so safe invalidated CLI sessions can reseed from a bounded raw OpenClaw transcript tail before compaction while auth-boundary resets remain no-raw. Fixes #79713. (#79764) Thanks @hclsys. diff --git a/docs/providers/google.md b/docs/providers/google.md index 3426ff8599c..84dca3b83a2 100644 --- a/docs/providers/google.md +++ b/docs/providers/google.md @@ -44,7 +44,7 @@ Choose your preferred auth method and follow the setup steps. { agents: { defaults: { - model: { primary: "google/gemini-3.1-pro-preview" }, + model: { primary: "google/gemini-2.5-flash" }, }, }, } diff --git a/extensions/amazon-bedrock/index.test.ts b/extensions/amazon-bedrock/index.test.ts index 240b78a09b9..c88f59a8d63 100644 --- a/extensions/amazon-bedrock/index.test.ts +++ b/extensions/amazon-bedrock/index.test.ts @@ -7,6 +7,7 @@ import { registerSingleProviderPlugin, } from "openclaw/plugin-sdk/plugin-test-runtime"; import { afterAll, afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { withEnvAsync } from "../../src/test-utils/env.js"; import { setAwsSharedIniFileLoaderForTest } from "./aws-credential-refresh.js"; import { resetBedrockDiscoveryCacheForTest } from "./discovery.js"; import amazonBedrockPlugin from "./index.js"; @@ -338,28 +339,38 @@ describe("amazon-bedrock provider plugin", () => { }); it("refreshes AWS shared config cache before Bedrock sends", async () => { - const order: string[] = []; - refreshSharedConfigCache.mockImplementationOnce(async () => { - order.push("refresh"); - }); - const provider = await registerSingleProviderPlugin(amazonBedrockPlugin); - const wrapped = provider.wrapStreamFn?.({ - provider: "amazon-bedrock", - modelId: ANTHROPIC_MODEL, - streamFn: spyStreamFn, - } as never); - const result = wrapped?.(ANTHROPIC_MODEL_DESCRIPTOR, { messages: [] } as never, { - onPayload: () => { - order.push("original"); + await withEnvAsync( + { + AWS_ACCESS_KEY_ID: undefined, + AWS_SECRET_ACCESS_KEY: undefined, + AWS_BEARER_TOKEN_BEDROCK: undefined, + AWS_BEDROCK_SKIP_AUTH: undefined, }, - }) as Record | undefined; + async () => { + const order: string[] = []; + refreshSharedConfigCache.mockImplementationOnce(async () => { + order.push("refresh"); + }); + const provider = await registerSingleProviderPlugin(amazonBedrockPlugin); + const wrapped = provider.wrapStreamFn?.({ + provider: "amazon-bedrock", + modelId: ANTHROPIC_MODEL, + streamFn: spyStreamFn, + } as never); + const result = wrapped?.(ANTHROPIC_MODEL_DESCRIPTOR, { messages: [] } as never, { + onPayload: () => { + order.push("original"); + }, + }) as Record | undefined; - await ( - result?.onPayload as ((p: Record, model: unknown) => unknown) | undefined - )?.({}, ANTHROPIC_MODEL_DESCRIPTOR); + await ( + result?.onPayload as ((p: Record, model: unknown) => unknown) | undefined + )?.({}, ANTHROPIC_MODEL_DESCRIPTOR); - expect(refreshSharedConfigCache).toHaveBeenCalledWith({ ignoreCache: true }); - expect(order).toEqual(["refresh", "original"]); + expect(refreshSharedConfigCache).toHaveBeenCalledWith({ ignoreCache: true }); + expect(order).toEqual(["refresh", "original"]); + }, + ); }); it("omits temperature for Bedrock Opus 4.7 model ids", async () => { diff --git a/extensions/amazon-bedrock/provider-policy-api.test.ts b/extensions/amazon-bedrock/provider-policy-api.test.ts new file mode 100644 index 00000000000..2356b435725 --- /dev/null +++ b/extensions/amazon-bedrock/provider-policy-api.test.ts @@ -0,0 +1,41 @@ +import { describe, expect, it } from "vitest"; +import { resolveThinkingProfile } from "./provider-policy-api.js"; + +describe("amazon-bedrock provider-policy-api", () => { + it("exposes adaptive thinking for Bedrock Claude 4.6 before runtime registration", () => { + expect( + resolveThinkingProfile({ + provider: "amazon-bedrock", + modelId: "amazon-bedrock/global.anthropic.claude-opus-4-6-v1", + }), + ).toMatchObject({ + levels: expect.arrayContaining([{ id: "adaptive" }]), + defaultLevel: "adaptive", + }); + }); + + it("exposes max thinking for Bedrock Claude Opus 4.7 refs", () => { + expect( + resolveThinkingProfile({ + provider: "amazon-bedrock", + modelId: + "arn:aws:bedrock:us-west-2:123456789012:inference-profile/us.anthropic.claude-opus-4-7", + })?.levels.map((level) => level.id), + ).toEqual(["off", "minimal", "low", "medium", "high", "xhigh", "adaptive", "max"]); + }); + + it.each(["bedrock", "aws-bedrock"])("accepts provider alias %s", (provider) => { + expect( + resolveThinkingProfile({ + provider, + modelId: "global.anthropic.claude-opus-4-6-v1", + })?.levels.map((level) => level.id), + ).toContain("adaptive"); + }); + + it("ignores unrelated providers", () => { + expect( + resolveThinkingProfile({ provider: "anthropic", modelId: "claude-opus-4-6" }), + ).toBeNull(); + }); +}); diff --git a/extensions/amazon-bedrock/provider-policy-api.ts b/extensions/amazon-bedrock/provider-policy-api.ts new file mode 100644 index 00000000000..eeef3c1081c --- /dev/null +++ b/extensions/amazon-bedrock/provider-policy-api.ts @@ -0,0 +1,9 @@ +import { normalizeProviderId } from "openclaw/plugin-sdk/provider-model-shared"; +import { resolveBedrockClaudeThinkingProfile } from "./thinking-policy.js"; + +export function resolveThinkingProfile(params: { provider: string; modelId: string }) { + if (normalizeProviderId(params.provider) !== "amazon-bedrock") { + return null; + } + return resolveBedrockClaudeThinkingProfile(params.modelId); +} diff --git a/extensions/amazon-bedrock/register.sync.runtime.ts b/extensions/amazon-bedrock/register.sync.runtime.ts index 01740101e44..6bf00c08139 100644 --- a/extensions/amazon-bedrock/register.sync.runtime.ts +++ b/extensions/amazon-bedrock/register.sync.runtime.ts @@ -1,7 +1,7 @@ import type { StreamFn } from "@mariozechner/pi-agent-core"; import type { OpenClawConfig } from "openclaw/plugin-sdk/config-types"; import { resolvePluginConfigObject } from "openclaw/plugin-sdk/plugin-config-runtime"; -import type { OpenClawPluginApi, ProviderThinkingProfile } from "openclaw/plugin-sdk/plugin-entry"; +import type { OpenClawPluginApi } from "openclaw/plugin-sdk/plugin-entry"; import { ANTHROPIC_BY_MODEL_REPLAY_HOOKS, normalizeProviderId, @@ -14,6 +14,7 @@ import { import { refreshAwsSharedConfigCacheForBedrock } from "./aws-credential-refresh.js"; import { mergeImplicitBedrockProvider, resolveBedrockConfigApiKey } from "./discovery-shared.js"; import { bedrockMemoryEmbeddingProviderAdapter } from "./memory-embedding-adapter.js"; +import { isOpus47BedrockModelRef, resolveBedrockClaudeThinkingProfile } from "./thinking-policy.js"; type GuardrailConfig = { guardrailIdentifier: string; @@ -182,12 +183,6 @@ function resolvedModelSupportsCaching(modelArn: string): boolean { return matchesPiAiPromptCachingModelId(modelArn); } -function isOpus47BedrockModelRef(modelRef: string): boolean { - return /(?:^|[/.:])(?:(?:us|eu|ap|apac|au|jp|global)\.)?anthropic\.claude-opus-4[.-]7(?:$|[-.:/])/i.test( - modelRef, - ); -} - /** * Resolve the underlying foundation model for an application inference profile * via GetInferenceProfile. Results are cached so we only call the API once per @@ -344,14 +339,6 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void { // Keep registration-local constants inside the function so partial module // initialization during test bootstrap cannot trip TDZ reads. const providerId = "amazon-bedrock"; - const claude46ModelRe = /claude-(?:opus|sonnet)-4(?:\.|-)6(?:$|[-.])/i; - const baseClaudeThinkingLevels = [ - { id: "off" }, - { id: "minimal" }, - { id: "low" }, - { id: "medium" }, - { id: "high" }, - ] as const satisfies ProviderThinkingProfile["levels"]; // Match region from bedrock-runtime (Converse API) URLs. // e.g. https://bedrock-runtime.us-east-1.amazonaws.com const bedrockRegionRe = /bedrock-runtime\.([a-z0-9-]+)\.amazonaws\./; @@ -365,23 +352,6 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void { const anthropicByModelReplayHooks = ANTHROPIC_BY_MODEL_REPLAY_HOOKS; const startupPluginConfig = (api.pluginConfig ?? {}) as AmazonBedrockPluginConfig; - function resolveBedrockClaudeThinkingProfile(modelId: string): ProviderThinkingProfile { - const trimmed = modelId.trim(); - if (isOpus47BedrockModelRef(trimmed)) { - return { - levels: [...baseClaudeThinkingLevels, { id: "xhigh" }, { id: "adaptive" }, { id: "max" }], - defaultLevel: "off", - }; - } - if (claude46ModelRe.test(trimmed)) { - return { - levels: [...baseClaudeThinkingLevels, { id: "adaptive" }], - defaultLevel: "adaptive", - }; - } - return { levels: baseClaudeThinkingLevels }; - } - function resolveCurrentPluginConfig( config: OpenClawConfig | undefined, ): AmazonBedrockPluginConfig | undefined { diff --git a/extensions/amazon-bedrock/thinking-policy.ts b/extensions/amazon-bedrock/thinking-policy.ts new file mode 100644 index 00000000000..90a71f7d494 --- /dev/null +++ b/extensions/amazon-bedrock/thinking-policy.ts @@ -0,0 +1,32 @@ +import type { ProviderThinkingProfile } from "openclaw/plugin-sdk/plugin-entry"; + +const BASE_CLAUDE_THINKING_LEVELS = [ + { id: "off" }, + { id: "minimal" }, + { id: "low" }, + { id: "medium" }, + { id: "high" }, +] as const satisfies ProviderThinkingProfile["levels"]; + +export function isOpus47BedrockModelRef(modelRef: string): boolean { + return /(?:^|[/.:])(?:(?:us|eu|ap|apac|au|jp|global)\.)?anthropic\.claude-opus-4[.-]7(?:$|[-.:/])/i.test( + modelRef, + ); +} + +export function resolveBedrockClaudeThinkingProfile(modelId: string): ProviderThinkingProfile { + const trimmed = modelId.trim(); + if (isOpus47BedrockModelRef(trimmed)) { + return { + levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "xhigh" }, { id: "adaptive" }, { id: "max" }], + defaultLevel: "off", + }; + } + if (/claude-(?:opus|sonnet)-4(?:\.|-)6(?:$|[-.])/i.test(trimmed)) { + return { + levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "adaptive" }], + defaultLevel: "adaptive", + }; + } + return { levels: BASE_CLAUDE_THINKING_LEVELS }; +} diff --git a/extensions/google/onboard.ts b/extensions/google/onboard.ts index 5c6dd39d73f..e5a418d816d 100644 --- a/extensions/google/onboard.ts +++ b/extensions/google/onboard.ts @@ -3,7 +3,7 @@ import { type OpenClawConfig, } from "openclaw/plugin-sdk/provider-onboard"; -export const GOOGLE_GEMINI_DEFAULT_MODEL = "google/gemini-3.1-pro-preview"; +export const GOOGLE_GEMINI_DEFAULT_MODEL = "google/gemini-2.5-flash"; export function applyGoogleGeminiModelDefault(cfg: OpenClawConfig): { next: OpenClawConfig; diff --git a/src/commands/auth-choice.test.ts b/src/commands/auth-choice.test.ts index 54ca11c1075..2ea0636eb8c 100644 --- a/src/commands/auth-choice.test.ts +++ b/src/commands/auth-choice.test.ts @@ -19,7 +19,7 @@ import { type DetectZaiEndpoint = typeof import("../plugins/provider-zai-endpoint.js").detectZaiEndpoint; -const GOOGLE_GEMINI_DEFAULT_MODEL = "google/gemini-3.1-pro-preview"; +const GOOGLE_GEMINI_DEFAULT_MODEL = "google/gemini-2.5-flash"; const ZAI_CODING_GLOBAL_BASE_URL = "https://api.z.ai/api/coding/paas/v4"; const ZAI_CODING_CN_BASE_URL = "https://open.bigmodel.cn/api/coding/paas/v4";