diff --git a/CHANGELOG.md b/CHANGELOG.md index a8543e358be..57836950455 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -53,6 +53,7 @@ Docs: https://docs.openclaw.ai - Discord: make auto-thread parent transcript inheritance opt-in via `channels.discord.thread.inheritParent`, keeping newly created Discord thread sessions isolated by default while preserving explicit inheritance for configured accounts. Fixes #69907. (#69986) Thanks @Blahdude. - Browser/Chrome MCP: reset cached existing-session control sessions when a `navigate_page` call times out, so one stuck navigation no longer poisons the browser profile until a gateway restart. (#69733) Thanks @ayeshakhalid192007-dev. - Browser/Chrome MCP: propagate click timeouts and abort signals to existing-session actions so a stuck click fails fast and reconnects instead of poisoning the browser tool until gateway restart. (#63524) Thanks @dongseok0. +- Amazon Bedrock/prompt caching: resolve opaque application inference profile targets before injecting Bedrock cache points, require every routed target to support explicit cache points, and retry transient profile lookups instead of caching a false negative for the rest of the process. (#69953) Thanks @anirudhmarc and @vincentkoc. - Gateway/channel health: base stale-socket recovery on provider-proven transport activity instead of inbound app-event freshness, preventing quiet Slack, Discord, Telegram, Matrix, and local-style channels from being restarted solely because no user traffic arrived. (#69833) Thanks @bek91. - OpenCode Go: canonicalize stale bundled `opencode-go` base URLs from `/go` or `/go/v1` to `/zen/go` or `/zen/go/v1`, so older generated model metadata stops hitting the 404 HTML endpoint. (#69898) - CLI/channels: honor `channels..enabled=false` as a hard read-only presence opt-out, so env vars, manifest env vars, or stale persisted auth state no longer make disabled channel plugins appear in status, doctor, or setup-only discovery. diff --git a/extensions/amazon-bedrock/index.test.ts b/extensions/amazon-bedrock/index.test.ts index d21f3b867f2..ab574b77aba 100644 --- a/extensions/amazon-bedrock/index.test.ts +++ b/extensions/amazon-bedrock/index.test.ts @@ -1,12 +1,45 @@ import { readFileSync } from "node:fs"; import { resolve } from "node:path"; -import { describe, expect, it } from "vitest"; +import { beforeEach, describe, expect, it, vi } from "vitest"; import type { OpenClawConfig } from "../../src/config/config.js"; import { buildPluginApi } from "../../src/plugins/api-builder.js"; import type { PluginRuntime } from "../../src/plugins/runtime/types.js"; import { registerSingleProviderPlugin } from "../../test/helpers/plugins/plugin-registration.js"; import amazonBedrockPlugin from "./index.js"; +type InferenceProfileResult = + | { models?: Array<{ modelArn?: string }> } + | Error; + +const inferenceProfileResults: InferenceProfileResult[] = []; +const bedrockClientConfigs: Array> = []; +const sendGetInferenceProfile = vi.fn(async () => { + const next = inferenceProfileResults.shift(); + if (next instanceof Error) { + throw next; + } + return next ?? { models: [] }; +}); + +vi.mock("@aws-sdk/client-bedrock", () => { + class GetInferenceProfileCommand { + constructor(readonly input: { inferenceProfileIdentifier: string }) {} + } + + class BedrockClient { + constructor(config: Record = {}) { + bedrockClientConfigs.push(config); + } + + send = sendGetInferenceProfile; + } + + return { + BedrockClient, + GetInferenceProfileCommand, + }; +}); + type RegisteredProviderPlugin = Awaited>; /** Register the amazon-bedrock plugin with an optional pluginConfig override. */ @@ -58,6 +91,22 @@ const ANTHROPIC_MODEL_DESCRIPTOR = { id: ANTHROPIC_MODEL, } as never; +const APP_INFERENCE_PROFILE_ARN = + "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/my-claude-profile"; +const APP_INFERENCE_PROFILE_DESCRIPTOR = { + api: "openai-completions", + provider: "amazon-bedrock", + id: APP_INFERENCE_PROFILE_ARN, +} as never; + +function makeAppInferenceProfileDescriptor(modelId: string): never { + return { + api: "openai-completions", + provider: "amazon-bedrock", + id: modelId, + } as never; +} + /** * Call wrapStreamFn and then invoke the returned stream function, capturing * the payload via the onPayload hook that streamWithPayloadPatch installs. @@ -92,6 +141,12 @@ function callWrappedStream( } describe("amazon-bedrock provider plugin", () => { + beforeEach(() => { + inferenceProfileResults.length = 0; + bedrockClientConfigs.length = 0; + sendGetInferenceProfile.mockClear(); + }); + it("marks Claude 4.6 Bedrock models as adaptive by default", async () => { const provider = await registerSingleProviderPlugin(amazonBedrockPlugin); @@ -302,4 +357,352 @@ describe("amazon-bedrock provider plugin", () => { expect(result).toMatchObject({ cacheRetention: "none" }); }); }); + + describe("application inference profile cache point injection", () => { + /** + * Invoke wrapStreamFn with a payload containing system/messages, then + * trigger onPayload to capture the patched payload. + */ + async function callWrappedStreamWithPayload( + provider: RegisteredProviderPlugin, + modelId: string, + modelDescriptor: never, + options: Record, + payload: Record, + ): Promise> { + const wrapped = provider.wrapStreamFn?.({ + provider: "amazon-bedrock", + modelId, + streamFn: spyStreamFn, + } as never); + + const result = wrapped?.(modelDescriptor, { messages: [] } as never, options) as unknown as Record< + string, + unknown + >; + + if (typeof result?.onPayload === "function") { + await ( + result.onPayload as (p: Record, model: unknown) => Promise + )(payload, modelDescriptor); + } + return payload; + } + + it("injects cache points for application inference profile ARNs", async () => { + const provider = await registerWithConfig(undefined); + const payload: Record = { + system: [{ text: "You are helpful." }], + messages: [ + { role: "user", content: [{ text: "Hello" }] }, + ], + }; + + await callWrappedStreamWithPayload( + provider, + APP_INFERENCE_PROFILE_ARN, + APP_INFERENCE_PROFILE_DESCRIPTOR, + { cacheRetention: "short" }, + payload, + ); + + const system = payload.system as Array>; + expect(system).toHaveLength(2); + expect(system[1]).toEqual({ cachePoint: { type: "default" } }); + + const messages = payload.messages as Array<{ role: string; content: Array> }>; + const lastUserContent = messages[0].content; + expect(lastUserContent).toHaveLength(2); + expect(lastUserContent[1]).toEqual({ cachePoint: { type: "default" } }); + }); + + it("uses long TTL when cacheRetention is 'long'", async () => { + const provider = await registerWithConfig(undefined); + const payload: Record = { + system: [{ text: "You are helpful." }], + messages: [ + { role: "user", content: [{ text: "Hello" }] }, + ], + }; + + await callWrappedStreamWithPayload( + provider, + APP_INFERENCE_PROFILE_ARN, + APP_INFERENCE_PROFILE_DESCRIPTOR, + { cacheRetention: "long" }, + payload, + ); + + const system = payload.system as Array>; + expect(system[1]).toEqual({ cachePoint: { type: "default", ttl: "1h" } }); + }); + + it("does not inject cache points when cacheRetention is 'none'", async () => { + const provider = await registerWithConfig(undefined); + const payload: Record = { + system: [{ text: "You are helpful." }], + messages: [ + { role: "user", content: [{ text: "Hello" }] }, + ], + }; + + await callWrappedStreamWithPayload( + provider, + APP_INFERENCE_PROFILE_ARN, + APP_INFERENCE_PROFILE_DESCRIPTOR, + { cacheRetention: "none" }, + payload, + ); + + const system = payload.system as Array>; + expect(system).toHaveLength(1); + }); + + it("does not double-inject cache points if already present", async () => { + const provider = await registerWithConfig(undefined); + const payload: Record = { + system: [{ text: "You are helpful." }, { cachePoint: { type: "default" } }], + messages: [ + { role: "user", content: [{ text: "Hello" }, { cachePoint: { type: "default" } }] }, + ], + }; + + await callWrappedStreamWithPayload( + provider, + APP_INFERENCE_PROFILE_ARN, + APP_INFERENCE_PROFILE_DESCRIPTOR, + { cacheRetention: "short" }, + payload, + ); + + const system = payload.system as Array>; + expect(system).toHaveLength(2); + + const messages = payload.messages as Array<{ role: string; content: Array> }>; + expect(messages[0].content).toHaveLength(2); + }); + + it("does not inject cache points for regular Anthropic model IDs (pi-ai handles them)", async () => { + const provider = await registerWithConfig(undefined); + const payload: Record = { + system: [{ text: "You are helpful." }], + messages: [ + { role: "user", content: [{ text: "Hello" }] }, + ], + }; + + // Regular model IDs contain "claude" so pi-ai handles caching natively. + // wrapStreamFn should not install an onPayload hook for these. + const wrapped = provider.wrapStreamFn?.({ + provider: "amazon-bedrock", + modelId: ANTHROPIC_MODEL, + streamFn: spyStreamFn, + } as never); + + const result = wrapped?.(ANTHROPIC_MODEL_DESCRIPTOR, { messages: [] } as never, { + cacheRetention: "short", + }) as unknown as Record; + + // For regular Anthropic models, no onPayload should be installed for cache injection. + if (typeof result?.onPayload === "function") { + (result.onPayload as (p: Record) => void)(payload); + } + + const system = payload.system as Array>; + expect(system).toHaveLength(1); + }); + + it("does not inject cache points for older Claude models not in pi-ai's cache list", async () => { + const provider = await registerWithConfig(undefined); + const oldClaudeModel = "anthropic.claude-3-opus-20240229-v1:0"; + const payload: Record = { + system: [{ text: "You are helpful." }], + messages: [ + { role: "user", content: [{ text: "Hello" }] }, + ], + }; + + // Claude 3 Opus is not in pi-ai's supportsPromptCaching list, but it's + // also not an application inference profile — we should not inject. + const wrapped = provider.wrapStreamFn?.({ + provider: "amazon-bedrock", + modelId: oldClaudeModel, + streamFn: spyStreamFn, + } as never); + + const result = wrapped?.({ id: oldClaudeModel } as never, { messages: [] } as never, { + cacheRetention: "short", + }) as unknown as Record; + + if (typeof result?.onPayload === "function") { + (result.onPayload as (p: Record) => void)(payload); + } + + const system = payload.system as Array>; + expect(system).toHaveLength(1); + }); + + it("defaults to 'short' cache retention when not explicitly set", async () => { + const provider = await registerWithConfig(undefined); + const payload: Record = { + system: [{ text: "You are helpful." }], + messages: [ + { role: "user", content: [{ text: "Hello" }] }, + ], + }; + + await callWrappedStreamWithPayload( + provider, + APP_INFERENCE_PROFILE_ARN, + APP_INFERENCE_PROFILE_DESCRIPTOR, + {}, + payload, + ); + + const system = payload.system as Array>; + expect(system).toHaveLength(2); + // Default is "short" which means no ttl field + expect(system[1]).toEqual({ cachePoint: { type: "default" } }); + }); + + it("injects cache point only on last USER message", async () => { + const provider = await registerWithConfig(undefined); + const payload: Record = { + system: [{ text: "You are helpful." }], + messages: [ + { role: "user", content: [{ text: "First question" }] }, + { role: "assistant", content: [{ text: "Answer" }] }, + { role: "user", content: [{ text: "Follow-up" }] }, + ], + }; + + await callWrappedStreamWithPayload( + provider, + APP_INFERENCE_PROFILE_ARN, + APP_INFERENCE_PROFILE_DESCRIPTOR, + { cacheRetention: "short" }, + payload, + ); + + const messages = payload.messages as Array<{ role: string; content: Array> }>; + // First user message should NOT have a cache point + expect(messages[0].content).toHaveLength(1); + // Assistant message untouched + expect(messages[1].content).toHaveLength(1); + // Last user message should have a cache point + expect(messages[2].content).toHaveLength(2); + expect(messages[2].content[1]).toEqual({ cachePoint: { type: "default" } }); + }); + + it("injects cache points for opaque application inference profile ARNs after profile lookup", async () => { + const modelId = + "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/z27qyso459da"; + inferenceProfileResults.push({ + models: [ + { + modelArn: + "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-sonnet-4-6-20250514-v1:0", + }, + ], + }); + const provider = await registerWithConfig(undefined); + const payload: Record = { + system: [{ text: "You are helpful." }], + messages: [{ role: "user", content: [{ text: "Hello" }] }], + }; + + await callWrappedStreamWithPayload( + provider, + modelId, + makeAppInferenceProfileDescriptor(modelId), + { cacheRetention: "short" }, + payload, + ); + + const system = payload.system as Array>; + expect(system[1]).toEqual({ cachePoint: { type: "default" } }); + expect(sendGetInferenceProfile).toHaveBeenCalledTimes(1); + expect(bedrockClientConfigs).toEqual([{ region: "us-east-1" }]); + }); + + it("does not inject cache points when any resolved profile target is not cacheable", async () => { + const modelId = + "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/z27qyso459db"; + inferenceProfileResults.push({ + models: [ + { + modelArn: + "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-sonnet-4-6-20250514-v1:0", + }, + { + modelArn: + "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-3-opus-20240229-v1:0", + }, + ], + }); + const provider = await registerWithConfig(undefined); + const payload: Record = { + system: [{ text: "You are helpful." }], + messages: [{ role: "user", content: [{ text: "Hello" }] }], + }; + + await callWrappedStreamWithPayload( + provider, + modelId, + makeAppInferenceProfileDescriptor(modelId), + { cacheRetention: "short" }, + payload, + ); + + expect(payload.system).toEqual([{ text: "You are helpful." }]); + expect(payload.messages).toEqual([{ role: "user", content: [{ text: "Hello" }] }]); + }); + + it("retries opaque profile lookup after a transient failure instead of caching the fallback", async () => { + const modelId = + "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/z27qyso459dc"; + inferenceProfileResults.push( + new Error("throttled"), + { + models: [ + { + modelArn: + "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-sonnet-4-6-20250514-v1:0", + }, + ], + }, + ); + const provider = await registerWithConfig(undefined); + const firstPayload: Record = { + system: [{ text: "You are helpful." }], + messages: [{ role: "user", content: [{ text: "Hello" }] }], + }; + const secondPayload: Record = { + system: [{ text: "You are helpful." }], + messages: [{ role: "user", content: [{ text: "Hello again" }] }], + }; + + await callWrappedStreamWithPayload( + provider, + modelId, + makeAppInferenceProfileDescriptor(modelId), + { cacheRetention: "short" }, + firstPayload, + ); + await callWrappedStreamWithPayload( + provider, + modelId, + makeAppInferenceProfileDescriptor(modelId), + { cacheRetention: "short" }, + secondPayload, + ); + + expect(firstPayload.system).toEqual([{ text: "You are helpful." }]); + expect(secondPayload.system).toEqual([ + { text: "You are helpful." }, + { cachePoint: { type: "default" } }, + ]); + expect(sendGetInferenceProfile).toHaveBeenCalledTimes(2); + }); + }); }); diff --git a/extensions/amazon-bedrock/register.sync.runtime.ts b/extensions/amazon-bedrock/register.sync.runtime.ts index 31adb1dba3c..c930807f6f9 100644 --- a/extensions/amazon-bedrock/register.sync.runtime.ts +++ b/extensions/amazon-bedrock/register.sync.runtime.ts @@ -62,6 +62,180 @@ function createGuardrailWrapStreamFn( }; } +/** + * Mirrors the shipped pi-ai Bedrock `supportsPromptCaching` matcher. + * Keep this in sync with node_modules/@mariozechner/pi-ai/dist/providers/amazon-bedrock.js. + */ +function matchesPiAiPromptCachingModelId(modelId: string): boolean { + const id = modelId.toLowerCase(); + if (!id.includes("claude")) { + return false; + } + // Claude 4.x + if (id.includes("-4-") || id.includes("-4.")) { + return true; + } + // Claude 3.7 Sonnet + if (id.includes("claude-3-7-sonnet")) { + return true; + } + // Claude 3.5 Haiku + if (id.includes("claude-3-5-haiku")) { + return true; + } + return false; +} + +function piAiWouldInjectCachePoints(modelId: string): boolean { + return matchesPiAiPromptCachingModelId(modelId); +} + +/** + * Detect Bedrock application inference profile ARNs — these are the only IDs + * where pi-ai's model-name-based checks fail because the ARN is opaque. + * System-defined profiles (us., eu., global.) and base model IDs always + * contain the model name and are handled by pi-ai natively. + */ +const BEDROCK_APP_INFERENCE_PROFILE_RE = /^arn:aws(-cn|-us-gov)?:bedrock:.*:application-inference-profile\//i; + +function isBedrockAppInferenceProfile(modelId: string): boolean { + return BEDROCK_APP_INFERENCE_PROFILE_RE.test(modelId); +} + +/** + * pi-ai's internal `supportsPromptCaching` checks `model.id` for specific Claude + * model name patterns, which fails for application inference profile ARNs (opaque + * IDs that may not contain the model name). When OpenClaw's `isAnthropicBedrockModel` + * identifies the model but pi-ai won't inject cache points, we do it via onPayload. + * + * Gated to application inference profile ARNs only — regular Claude model IDs and + * system-defined inference profiles (us.anthropic.claude-*) are left to pi-ai. + */ +function needsCachePointInjection(modelId: string): boolean { + // Only target application inference profile ARNs. + if (!isBedrockAppInferenceProfile(modelId)) { + return false; + } + // If pi-ai would already inject cache points, skip. + if (piAiWouldInjectCachePoints(modelId)) { + return false; + } + // Check if OpenClaw identifies this as an Anthropic model via the ARN heuristic. + if (isAnthropicBedrockModel(modelId)) { + return true; + } + return false; +} + +/** + * Extract the region from a Bedrock ARN. + * e.g. "arn:aws:bedrock:us-east-1:123:application-inference-profile/abc" → "us-east-1" + */ +function extractRegionFromArn(arn: string): string | undefined { + const parts = arn.split(":"); + // ARN format: arn:partition:service:region:account:resource + return parts.length >= 4 && parts[3] ? parts[3] : undefined; +} + +/** + * Check if a resolved foundation model ARN supports prompt caching using the + * same matcher pi-ai uses for direct model IDs. + */ +function resolvedModelSupportsCaching(modelArn: string): boolean { + return matchesPiAiPromptCachingModelId(modelArn); +} + +/** + * Resolve the underlying foundation model for an application inference profile + * via GetInferenceProfile. Results are cached so we only call the API once per + * profile ARN. Returns true if the underlying model supports prompt caching. + * + * Region is extracted from the profile ARN itself to avoid mismatches when + * the OpenClaw config region differs from the profile's home region. + */ +const appProfileCacheEligibleCache = new Map(); + +async function resolveAppProfileCacheEligible( + modelId: string, + fallbackRegion: string | undefined, +): Promise { + if (appProfileCacheEligibleCache.has(modelId)) { + return appProfileCacheEligibleCache.get(modelId)!; + } + try { + const { BedrockClient, GetInferenceProfileCommand } = await import("@aws-sdk/client-bedrock"); + const region = extractRegionFromArn(modelId) ?? fallbackRegion; + const client = new BedrockClient(region ? { region } : {}); + const resp = await client.send( + new GetInferenceProfileCommand({ inferenceProfileIdentifier: modelId }), + ); + const models = resp.models ?? []; + const eligible = + models.length > 0 && + models.every((m: { modelArn?: string }) => + resolvedModelSupportsCaching(m.modelArn ?? ""), + ); + appProfileCacheEligibleCache.set(modelId, eligible); + return eligible; + } catch { + // Transient failures (throttling, network, IAM) should not be cached — + // return the heuristic fallback but allow retry on the next request. + return isAnthropicBedrockModel(modelId); + } +} + +type BedrockCachePoint = { cachePoint: { type: "default"; ttl?: string } }; +type BedrockContentBlock = Record; +type BedrockMessage = { role?: string; content?: BedrockContentBlock[] }; + +function hasCachePoint(blocks: BedrockContentBlock[] | undefined): boolean { + return blocks?.some((b) => b.cachePoint != null) === true; +} + +function makeCachePoint(cacheRetention: string | undefined): BedrockCachePoint { + return { + cachePoint: { + type: "default", + ...(cacheRetention === "long" ? { ttl: "1h" } : {}), + }, + }; +} + +/** + * Inject Bedrock Converse cache points into the payload when pi-ai skipped them + * because it didn't recognize the model ID (application inference profiles). + */ +function injectBedrockCachePoints( + payload: Record, + cacheRetention: string | undefined, +): void { + if (!cacheRetention || cacheRetention === "none") { + return; + } + const point = makeCachePoint(cacheRetention); + + // Inject into system prompt if missing. + const system = payload.system as BedrockContentBlock[] | undefined; + if (Array.isArray(system) && system.length > 0 && !hasCachePoint(system)) { + system.push(point); + } + + // Inject into the last user message if missing. + // Bedrock Converse uses lowercase roles ("user" / "assistant"). + const messages = payload.messages as BedrockMessage[] | undefined; + if (Array.isArray(messages) && messages.length > 0) { + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i]; + if (msg.role === "user" && Array.isArray(msg.content)) { + if (!hasCachePoint(msg.content)) { + msg.content.push(point); + } + break; + } + } + } +} + export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void { // Keep registration-local constants inside the function so partial module // initialization during test bootstrap cannot trip TDZ reads. @@ -81,8 +255,17 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void { api.registerMemoryEmbeddingProvider(bedrockMemoryEmbeddingProviderAdapter); - const baseWrapStreamFn = ({ modelId, streamFn }: { modelId: string; streamFn?: StreamFn }) => - isAnthropicBedrockModel(modelId) ? streamFn : createBedrockNoCacheWrapper(streamFn); + const baseWrapStreamFn = ({ modelId, streamFn }: { modelId: string; streamFn?: StreamFn }) => { + if (isAnthropicBedrockModel(modelId)) { + return streamFn; + } + // For app inference profiles with opaque IDs, don't force cacheRetention: "none" + // yet — we may resolve them as Claude later via GetInferenceProfile. + if (isBedrockAppInferenceProfile(modelId)) { + return streamFn; + } + return createBedrockNoCacheWrapper(streamFn); + }; const cacheWrapStreamFn = guardrail?.guardrailIdentifier && guardrail?.guardrailVersion @@ -161,23 +344,62 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void { // Apply cache + guardrail wrapping. const wrapped = cacheWrapStreamFn({ modelId, streamFn }); const region = resolveBedrockRegion(config) ?? extractRegionFromBaseUrl(model?.baseUrl); + const mayNeedCacheInjection = + isBedrockAppInferenceProfile(modelId) && !piAiWouldInjectCachePoints(modelId); - if (!region) { + // For known Anthropic models (heuristic match), enable injection immediately. + // For opaque profile IDs, we'll resolve via GetInferenceProfile on first call. + const heuristicMatch = needsCachePointInjection(modelId); + + if (!region && !mayNeedCacheInjection) { return wrapped; } - // Wrap to inject the region into every stream call so pi-ai's Bedrock - // client connects to the right region for inference profile IDs. const underlying = wrapped ?? streamFn; if (!underlying) { return wrapped; } return (streamModel, context, options) => { - // pi-ai's bedrock provider reads `options.region` at runtime but the - // StreamFn type does not declare it. Merge via Object.assign to avoid - // an unsafe type assertion. - const merged = Object.assign({}, options, { region }); - return underlying(streamModel, context, merged); + const merged = Object.assign({}, options, region ? { region } : {}); + + if (!mayNeedCacheInjection) { + return underlying(streamModel, context, merged); + } + + // Use the cacheRetention from options if explicitly set. + // When undefined, default to "short" to match pi-ai's internal default. + // Note: if the user set cacheRetention: "none" but the opaque ARN wasn't + // recognized by resolveAnthropicCacheRetentionFamily, the value may have + // been dropped upstream. This is a known limitation — the proper fix is + // to also teach resolveAnthropicCacheRetentionFamily about opaque profiles + // (tracked separately). In practice, users with app inference profiles + // want caching enabled, so defaulting to "short" is the safer behavior. + const cacheRetention = typeof merged.cacheRetention === "string" + ? merged.cacheRetention + : "short"; + + if (heuristicMatch) { + // Fast path: ARN heuristic already identified this as Claude. + return streamWithPayloadPatch(underlying, streamModel, context, merged, (payload) => { + injectBedrockCachePoints(payload, cacheRetention); + }); + } + + // Slow path: opaque profile ID — resolve underlying model via API (cached). + // pi-ai's onPayload supports async, so we await the resolution inline. + const originalOnPayload = merged.onPayload as + | ((payload: unknown, model: unknown) => unknown) + | undefined; + return underlying(streamModel, context, { + ...merged, + onPayload: async (payload: unknown, payloadModel: unknown) => { + const eligible = await resolveAppProfileCacheEligible(modelId, region); + if (eligible && payload && typeof payload === "object") { + injectBedrockCachePoints(payload as Record, cacheRetention); + } + return originalOnPayload?.(payload, payloadModel); + }, + }); }; }, matchesContextOverflowError: ({ errorMessage }) => diff --git a/src/agents/pi-embedded-runner/anthropic-family-cache-semantics.ts b/src/agents/pi-embedded-runner/anthropic-family-cache-semantics.ts index 686babfef04..117562ee897 100644 --- a/src/agents/pi-embedded-runner/anthropic-family-cache-semantics.ts +++ b/src/agents/pi-embedded-runner/anthropic-family-cache-semantics.ts @@ -79,10 +79,25 @@ export function resolveAnthropicCacheRetentionFamily(params: { if ( normalizedProvider === "amazon-bedrock" && params.hasExplicitCacheConfig && - typeof params.modelId === "string" && - isAnthropicBedrockModel(params.modelId) + typeof params.modelId === "string" ) { - return "anthropic-bedrock"; + if (isAnthropicBedrockModel(params.modelId)) { + return "anthropic-bedrock"; + } + // Application inference profiles with opaque IDs (e.g. z27qyso459da) can't + // be identified as Claude from the ARN alone. When the user explicitly sets + // cacheRetention, honor it — the extension's GetInferenceProfile resolution + // handles the actual model detection at runtime. + if ( + BEDROCK_APP_INFERENCE_PROFILE_ARN_RE.test( + normalizeLowercaseStringOrEmpty(params.modelId), + ) && + normalizeLowercaseStringOrEmpty(params.modelId).includes( + ":application-inference-profile/", + ) + ) { + return "anthropic-bedrock"; + } } if ( normalizedProvider !== "amazon-bedrock" && diff --git a/src/agents/pi-embedded-runner/extra-params.cache-retention-default.test.ts b/src/agents/pi-embedded-runner/extra-params.cache-retention-default.test.ts index 610239cca2e..b7b212c69b0 100644 --- a/src/agents/pi-embedded-runner/extra-params.cache-retention-default.test.ts +++ b/src/agents/pi-embedded-runner/extra-params.cache-retention-default.test.ts @@ -281,6 +281,39 @@ describe("cacheRetention default behavior", () => { ), ).toBe("none"); }); + + it("passes through explicit cacheRetention for opaque Bedrock app inference profile ARNs", () => { + expect( + resolveCacheRetention( + { cacheRetention: "long" }, + "amazon-bedrock", + "openai-completions", + "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/z27qyso459da", + ), + ).toBe("long"); + }); + + it("passes through explicit 'none' for opaque Bedrock app inference profile ARNs", () => { + expect( + resolveCacheRetention( + { cacheRetention: "none" }, + "amazon-bedrock", + "openai-completions", + "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/z27qyso459da", + ), + ).toBe("none"); + }); + + it("does not default cacheRetention for opaque Bedrock app inference profile ARNs", () => { + expect( + resolveCacheRetention( + undefined, + "amazon-bedrock", + "openai-completions", + "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/z27qyso459da", + ), + ).toBeUndefined(); + }); }); describe("anthropic-family cache semantics", () => {