From 771846c5fa8b7225a52ed051623f641609cb2922 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 28 Apr 2026 20:36:05 +0100 Subject: [PATCH] fix(bedrock): omit Opus temperature for profiles --- extensions/amazon-bedrock/index.test.ts | 159 ++++++++++++++++++ .../amazon-bedrock/register.sync.runtime.ts | 117 ++++++++++--- 2 files changed, 252 insertions(+), 24 deletions(-) diff --git a/extensions/amazon-bedrock/index.test.ts b/extensions/amazon-bedrock/index.test.ts index a29f0d2bda0..491d0476671 100644 --- a/extensions/amazon-bedrock/index.test.ts +++ b/extensions/amazon-bedrock/index.test.ts @@ -296,6 +296,105 @@ describe("amazon-bedrock provider plugin", () => { }); }); + it("omits temperature for Bedrock Opus 4.7 model ids", async () => { + const provider = await registerSingleProviderPlugin(amazonBedrockPlugin); + const wrapped = provider.wrapStreamFn?.({ + provider: "amazon-bedrock", + modelId: "us.anthropic.claude-opus-4-7", + streamFn: spyStreamFn, + } as never); + + expect( + wrapped?.( + { + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + id: "us.anthropic.claude-opus-4-7", + } as never, + { messages: [] } as never, + { temperature: 0.2, maxTokens: 10 }, + ), + ).toEqual({ maxTokens: 10 }); + }); + + it("omits temperature for dotted Bedrock Opus 4.7 model ids", async () => { + const provider = await registerSingleProviderPlugin(amazonBedrockPlugin); + const wrapped = provider.wrapStreamFn?.({ + provider: "amazon-bedrock", + modelId: "us.anthropic.claude-opus-4.7-v1:0", + streamFn: spyStreamFn, + } as never); + + expect( + wrapped?.( + { + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + id: "us.anthropic.claude-opus-4.7-v1:0", + } as never, + { messages: [] } as never, + { temperature: 0.2, maxTokens: 10 }, + ), + ).toEqual({ maxTokens: 10 }); + }); + + it("omits temperature for named Bedrock Opus 4.7 inference profile ARNs", async () => { + const provider = await registerSingleProviderPlugin(amazonBedrockPlugin); + const modelId = + "arn:aws:bedrock:us-west-2:123456789012:inference-profile/us.anthropic.claude-opus-4-7"; + const wrapped = provider.wrapStreamFn?.({ + provider: "amazon-bedrock", + modelId, + streamFn: spyStreamFn, + } as never); + + expect( + wrapped?.( + { + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + id: modelId, + } as never, + { messages: [] } as never, + { temperature: 0, region: "us-west-2" } as never, + ), + ).toEqual({ region: "us-west-2" }); + }); + + it("omits temperature for non-US Bedrock Opus 4.7 regional profiles", async () => { + const provider = await registerSingleProviderPlugin(amazonBedrockPlugin); + const wrapped = provider.wrapStreamFn?.({ + provider: "amazon-bedrock", + modelId: "eu.anthropic.claude-opus-4-7", + streamFn: spyStreamFn, + } as never); + + expect( + wrapped?.( + { + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + id: "eu.anthropic.claude-opus-4-7", + } as never, + { messages: [] } as never, + { temperature: 0.4, maxTokens: 12 }, + ), + ).toEqual({ maxTokens: 12 }); + }); + + it("classifies nested Bedrock deprecated-temperature validation as format failover", async () => { + const provider = await registerSingleProviderPlugin(amazonBedrockPlugin); + + expect( + provider.classifyFailoverReason?.({ + provider: "amazon-bedrock", + modelId: "us.anthropic.claude-opus-4-7", + errorMessage: + 'ValidationException: The model returned the following errors: {"type":"error","error":{"type":"invalid_request_error","message":"`temperature` is deprecated for this model."}}', + } as never), + ).toBe("format"); + }); + describe("guardrail config schema", () => { it("defines discovery and guardrail objects with the expected shape", () => { const pluginJson = JSON.parse( @@ -747,6 +846,66 @@ describe("amazon-bedrock provider plugin", () => { expect(bedrockClientConfigs).toEqual([{ region: "us-east-1" }]); }); + it("omits temperature for opaque application inference profile ARNs that resolve to Opus 4.7", async () => { + const modelId = + "arn:aws:bedrock:us-west-2:123456789012:application-inference-profile/z27qyso459dd"; + inferenceProfileGetResults.push({ + models: [ + { + modelArn: "arn:aws:bedrock:us-west-2::foundation-model/anthropic.claude-opus-4.7-v1:0", + }, + ], + }); + const provider = await registerWithConfig(undefined); + const payload: Record = { + inferenceConfig: { temperature: 0.3, maxTokens: 10 }, + system: [{ text: "You are helpful." }], + messages: [{ role: "user", content: [{ text: "Hello" }] }], + }; + + await callWrappedStreamWithPayload( + provider, + modelId, + makeAppInferenceProfileDescriptor(modelId), + { temperature: 0.3, maxTokens: 10, cacheRetention: "none" }, + payload, + ); + + expect(payload.inferenceConfig).toEqual({ maxTokens: 10 }); + expect(sendBedrockCommand).toHaveBeenCalledTimes(1); + expect(bedrockClientConfigs).toEqual([{ region: "us-west-2" }]); + }); + + it("omits temperature for Claude-named application inference profile ARNs that resolve to Opus 4.7", async () => { + inferenceProfileGetResults.push({ + models: [ + { + modelArn: "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-opus-4-7-v1:0", + }, + ], + }); + const provider = await registerWithConfig(undefined); + const payload: Record = { + inferenceConfig: { temperature: 0.3, maxTokens: 10 }, + system: [{ text: "You are helpful." }], + messages: [{ role: "user", content: [{ text: "Hello" }] }], + }; + + await callWrappedStreamWithPayload( + provider, + APP_INFERENCE_PROFILE_ARN, + APP_INFERENCE_PROFILE_DESCRIPTOR, + { temperature: 0.3, maxTokens: 10, cacheRetention: "short" }, + payload, + ); + + const system = payload.system as Array>; + expect(payload.inferenceConfig).toEqual({ maxTokens: 10 }); + expect(system[1]).toEqual({ cachePoint: { type: "default" } }); + expect(sendBedrockCommand).toHaveBeenCalledTimes(1); + expect(bedrockClientConfigs).toEqual([{ region: "us-east-1" }]); + }); + it("does not inject cache points when any resolved profile target is not cacheable", async () => { const modelId = "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/z27qyso459db"; diff --git a/extensions/amazon-bedrock/register.sync.runtime.ts b/extensions/amazon-bedrock/register.sync.runtime.ts index 603dffc3783..8fc6dc9885a 100644 --- a/extensions/amazon-bedrock/register.sync.runtime.ts +++ b/extensions/amazon-bedrock/register.sync.runtime.ts @@ -144,15 +144,27 @@ function resolvedModelSupportsCaching(modelArn: string): boolean { return matchesPiAiPromptCachingModelId(modelArn); } +function isOpus47BedrockModelRef(modelRef: string): boolean { + return /(?:^|[/.:])(?:(?:us|eu|ap|apac|au|jp|global)\.)?anthropic\.claude-opus-4[.-]7(?:$|[-.:/])/i.test( + modelRef, + ); +} + /** * Resolve the underlying foundation model for an application inference profile * via GetInferenceProfile. Results are cached so we only call the API once per - * profile ARN. Returns true if the underlying model supports prompt caching. + * profile ARN. Returns traits needed for request shaping when the model id is + * otherwise opaque. * * Region is extracted from the profile ARN itself to avoid mismatches when * the OpenClaw config region differs from the profile's home region. */ -const appProfileCacheEligibleCache = new Map(); +type BedrockAppProfileTraits = { + cacheEligible: boolean; + omitTemperature: boolean; +}; + +const appProfileTraitsCache = new Map(); type BedrockGetInferenceProfileResponse = { models?: Array<{ modelArn?: string }>; @@ -169,7 +181,7 @@ type BedrockControlPlaneFactory = (region: string | undefined) => BedrockControl let bedrockControlPlaneOverride: BedrockControlPlaneFactory | undefined; export function resetBedrockAppProfileCacheEligibilityForTest(): void { - appProfileCacheEligibleCache.clear(); + appProfileTraitsCache.clear(); } export function setBedrockAppProfileControlPlaneForTest( @@ -190,27 +202,34 @@ async function createBedrockControlPlane(region: string | undefined): Promise { - if (appProfileCacheEligibleCache.has(modelId)) { - return appProfileCacheEligibleCache.get(modelId)!; +): Promise { + const cached = appProfileTraitsCache.get(modelId); + if (cached) { + return cached; } try { const region = extractRegionFromArn(modelId) ?? fallbackRegion; const controlPlane = await createBedrockControlPlane(region); const resp = await controlPlane.getInferenceProfile({ inferenceProfileIdentifier: modelId }); const models = resp.models ?? []; - const eligible = - models.length > 0 && - models.every((m: { modelArn?: string }) => resolvedModelSupportsCaching(m.modelArn ?? "")); - appProfileCacheEligibleCache.set(modelId, eligible); - return eligible; + const modelArns = models.map((m: { modelArn?: string }) => m.modelArn ?? ""); + const traits = { + cacheEligible: + models.length > 0 && modelArns.every((modelArn) => resolvedModelSupportsCaching(modelArn)), + omitTemperature: modelArns.some(isOpus47BedrockModelRef), + }; + appProfileTraitsCache.set(modelId, traits); + return traits; } catch { // Transient failures (throttling, network, IAM) should not be cached — // return the heuristic fallback but allow retry on the next request. - return isAnthropicBedrockModel(modelId); + return { + cacheEligible: isAnthropicBedrockModel(modelId), + omitTemperature: isOpus47BedrockModelRef(modelId), + }; } } @@ -279,6 +298,8 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void { /ValidationException.*(?:exceeds? the (?:maximum|max) (?:number of )?(?:input )?tokens)/i, /ModelStreamErrorException.*(?:Input is too long|too many input tokens)/i, ] as const; + const deprecatedTemperatureValidationRe = + /ValidationException[\s\S]*(?:invalid_request_error[\s\S]*)?temperature[\s\S]*deprecated|ValidationException[\s\S]*deprecated[\s\S]*temperature/i; const anthropicByModelReplayHooks = ANTHROPIC_BY_MODEL_REPLAY_HOOKS; const startupPluginConfig = (api.pluginConfig ?? {}) as AmazonBedrockPluginConfig; @@ -306,6 +327,26 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void { return createBedrockNoCacheWrapper(streamFn); }; + function omitDeprecatedOpus47Temperature( + modelId: string, + options: TOptions, + ): TOptions { + if (!isOpus47BedrockModelRef(modelId) || !("temperature" in options)) { + return options; + } + const next = { ...options } as typeof options & { temperature?: unknown }; + delete next.temperature; + return next; + } + + function omitDeprecatedOpus47PayloadTemperature(payload: Record): void { + const inferenceConfig = payload.inferenceConfig; + if (!inferenceConfig || typeof inferenceConfig !== "object") { + return; + } + delete (inferenceConfig as Record).temperature; + } + /** Extract the AWS region from a bedrock-runtime baseUrl. */ function extractRegionFromBaseUrl(baseUrl: string | undefined): string | undefined { if (!baseUrl) { @@ -386,12 +427,13 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void { const region = resolveBedrockRegion(config) ?? extractRegionFromBaseUrl(model?.baseUrl); const mayNeedCacheInjection = isBedrockAppInferenceProfile(modelId) && !piAiWouldInjectCachePoints(modelId); + const shouldOmitTemperature = isOpus47BedrockModelRef(modelId); // For known Anthropic models (heuristic match), enable injection immediately. // For opaque profile IDs, we'll resolve via GetInferenceProfile on first call. const heuristicMatch = needsCachePointInjection(modelId); - if (!region && !mayNeedCacheInjection) { + if (!region && !mayNeedCacheInjection && !shouldOmitTemperature) { return wrapped; } @@ -400,7 +442,10 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void { return wrapped; } return (streamModel, context, options) => { - const merged = Object.assign({}, options, region ? { region } : {}); + const merged = omitDeprecatedOpus47Temperature( + modelId, + Object.assign({}, options, region ? { region } : {}), + ); if (!mayNeedCacheInjection) { return underlying(streamModel, context, merged); @@ -416,25 +461,46 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void { // want caching enabled, so defaulting to "short" is the safer behavior. const cacheRetention = typeof merged.cacheRetention === "string" ? merged.cacheRetention : "short"; + const originalOnPayload = merged.onPayload as + | ((payload: unknown, model: unknown) => unknown) + | undefined; if (heuristicMatch) { - // Fast path: ARN heuristic already identified this as Claude. - return streamWithPayloadPatch(underlying, streamModel, context, merged, (payload) => { - injectBedrockCachePoints(payload, cacheRetention); + // Fast path: ARN heuristic already identified this as Claude, but the + // concrete target may still need profile traits for Opus 4.7 payloads. + const mayNeedTemperatureTrait = "temperature" in merged; + return underlying(streamModel, context, { + ...merged, + onPayload: async (payload: unknown, payloadModel: unknown) => { + if (payload && typeof payload === "object") { + const payloadRecord = payload as Record; + injectBedrockCachePoints(payloadRecord, cacheRetention); + if (mayNeedTemperatureTrait) { + const traits = await resolveAppProfileTraits(modelId, region); + if (traits.omitTemperature) { + omitDeprecatedOpus47PayloadTemperature(payloadRecord); + } + } + } + return originalOnPayload?.(payload, payloadModel); + }, }); } // Slow path: opaque profile ID — resolve underlying model via API (cached). // pi-ai's onPayload supports async, so we await the resolution inline. - const originalOnPayload = merged.onPayload as - | ((payload: unknown, model: unknown) => unknown) - | undefined; return underlying(streamModel, context, { ...merged, onPayload: async (payload: unknown, payloadModel: unknown) => { - const eligible = await resolveAppProfileCacheEligible(modelId, region); - if (eligible && payload && typeof payload === "object") { - injectBedrockCachePoints(payload as Record, cacheRetention); + const traits = await resolveAppProfileTraits(modelId, region); + if (payload && typeof payload === "object") { + const payloadRecord = payload as Record; + if (traits.cacheEligible) { + injectBedrockCachePoints(payloadRecord, cacheRetention); + } + if (traits.omitTemperature) { + omitDeprecatedOpus47PayloadTemperature(payloadRecord); + } } return originalOnPayload?.(payload, payloadModel); }, @@ -450,6 +516,9 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void { if (/ModelNotReadyException/i.test(errorMessage)) { return "overloaded"; } + if (deprecatedTemperatureValidationRe.test(errorMessage)) { + return "format"; + } return undefined; }, resolveThinkingProfile: ({ modelId }) => ({