From 24266af1ce1ba71c13354cb8978e541075c6c8ac Mon Sep 17 00:00:00 2001
From: anirudhmarc <43162556+anirudhmarc@users.noreply.github.com>
Date: Thu, 23 Apr 2026 03:19:29 +0800
Subject: [PATCH] fix(amazon-bedrock): inject cache points for application
 inference profile ARNs (#69953)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(amazon-bedrock): inject cache points for application inference profile ARNs

pi-ai's internal supportsPromptCaching checks model.id for specific Claude
model name patterns (e.g. "-4-", "claude-3-7-sonnet"), which fails for
application inference profile ARNs that don't contain the model name.
This causes prompt caching to silently break for Bedrock users with
application inference profiles.

Work around this by detecting when pi-ai would miss cache point injection
(via piAiWouldInjectCachePoints mirror) and patching the Converse API
payload via onPayload to add cachePoint blocks to the system prompt and
last user message — matching the same format pi-ai uses natively.

The fix is safe:
- Checks for existing cache points to avoid double-injection
- Respects cacheRetention: "none"
- Defaults to "short" retention (matching pi-ai default)
- Becomes a no-op once upstream pi-mono#2925 is fixed

Fixes #19279
Upstream: https://github.com/badlogic/pi-mono/issues/2925

* fix(amazon-bedrock): tighten app-profile cache injection

---------

Co-authored-by: Your Name <you@example.com>
Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
---
 CHANGELOG.md                                  |   1 +
 extensions/amazon-bedrock/index.test.ts       | 405 +++++++++++++++++-
 .../amazon-bedrock/register.sync.runtime.ts   | 242 ++++++++++-
 .../anthropic-family-cache-semantics.ts       |  21 +-
 ...tra-params.cache-retention-default.test.ts |  33 ++
 5 files changed, 688 insertions(+), 14 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a8543e358be..57836950455 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -53,6 +53,7 @@ Docs: https://docs.openclaw.ai
 - Discord: make auto-thread parent transcript inheritance opt-in via `channels.discord.thread.inheritParent`, keeping newly created Discord thread sessions isolated by default while preserving explicit inheritance for configured accounts. Fixes #69907. (#69986) Thanks @Blahdude.
 - Browser/Chrome MCP: reset cached existing-session control sessions when a `navigate_page` call times out, so one stuck navigation no longer poisons the browser profile until a gateway restart. (#69733) Thanks @ayeshakhalid192007-dev.
 - Browser/Chrome MCP: propagate click timeouts and abort signals to existing-session actions so a stuck click fails fast and reconnects instead of poisoning the browser tool until gateway restart. (#63524) Thanks @dongseok0.
+- Amazon Bedrock/prompt caching: resolve opaque application inference profile targets before injecting Bedrock cache points, require every routed target to support explicit cache points, and retry transient profile lookups instead of caching a false negative for the rest of the process. (#69953) Thanks @anirudhmarc and @vincentkoc.
 - Gateway/channel health: base stale-socket recovery on provider-proven transport activity instead of inbound app-event freshness, preventing quiet Slack, Discord, Telegram, Matrix, and local-style channels from being restarted solely because no user traffic arrived. (#69833) Thanks @bek91.
 - OpenCode Go: canonicalize stale bundled `opencode-go` base URLs from `/go` or `/go/v1` to `/zen/go` or `/zen/go/v1`, so older generated model metadata stops hitting the 404 HTML endpoint. (#69898)
 - CLI/channels: honor `channels.<id>.enabled=false` as a hard read-only presence opt-out, so env vars, manifest env vars, or stale persisted auth state no longer make disabled channel plugins appear in status, doctor, or setup-only discovery.
diff --git a/extensions/amazon-bedrock/index.test.ts b/extensions/amazon-bedrock/index.test.ts
index d21f3b867f2..ab574b77aba 100644
--- a/extensions/amazon-bedrock/index.test.ts
+++ b/extensions/amazon-bedrock/index.test.ts
@@ -1,12 +1,45 @@
 import { readFileSync } from "node:fs";
 import { resolve } from "node:path";
-import { describe, expect, it } from "vitest";
+import { beforeEach, describe, expect, it, vi } from "vitest";
 import type { OpenClawConfig } from "../../src/config/config.js";
 import { buildPluginApi } from "../../src/plugins/api-builder.js";
 import type { PluginRuntime } from "../../src/plugins/runtime/types.js";
 import { registerSingleProviderPlugin } from "../../test/helpers/plugins/plugin-registration.js";
 import amazonBedrockPlugin from "./index.js";
 
+type InferenceProfileResult =
+  | { models?: Array<{ modelArn?: string }> }
+  | Error;
+
+const inferenceProfileResults: InferenceProfileResult[] = [];
+const bedrockClientConfigs: Array<Record<string, unknown>> = [];
+const sendGetInferenceProfile = vi.fn(async () => {
+  const next = inferenceProfileResults.shift();
+  if (next instanceof Error) {
+    throw next;
+  }
+  return next ?? { models: [] };
+});
+
+vi.mock("@aws-sdk/client-bedrock", () => {
+  class GetInferenceProfileCommand {
+    constructor(readonly input: { inferenceProfileIdentifier: string }) {}
+  }
+
+  class BedrockClient {
+    constructor(config: Record<string, unknown> = {}) {
+      bedrockClientConfigs.push(config);
+    }
+
+    send = sendGetInferenceProfile;
+  }
+
+  return {
+    BedrockClient,
+    GetInferenceProfileCommand,
+  };
+});
+
 type RegisteredProviderPlugin = Awaited<ReturnType<typeof registerSingleProviderPlugin>>;
 
 /** Register the amazon-bedrock plugin with an optional pluginConfig override. */
@@ -58,6 +91,22 @@ const ANTHROPIC_MODEL_DESCRIPTOR = {
   id: ANTHROPIC_MODEL,
 } as never;
 
+const APP_INFERENCE_PROFILE_ARN =
+  "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/my-claude-profile";
+const APP_INFERENCE_PROFILE_DESCRIPTOR = {
+  api: "openai-completions",
+  provider: "amazon-bedrock",
+  id: APP_INFERENCE_PROFILE_ARN,
+} as never;
+
+function makeAppInferenceProfileDescriptor(modelId: string): never {
+  return {
+    api: "openai-completions",
+    provider: "amazon-bedrock",
+    id: modelId,
+  } as never;
+}
+
 /**
  * Call wrapStreamFn and then invoke the returned stream function, capturing
  * the payload via the onPayload hook that streamWithPayloadPatch installs.
@@ -92,6 +141,12 @@ function callWrappedStream(
 }
 
 describe("amazon-bedrock provider plugin", () => {
+  beforeEach(() => {
+    inferenceProfileResults.length = 0;
+    bedrockClientConfigs.length = 0;
+    sendGetInferenceProfile.mockClear();
+  });
+
   it("marks Claude 4.6 Bedrock models as adaptive by default", async () => {
     const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
 
@@ -302,4 +357,352 @@ describe("amazon-bedrock provider plugin", () => {
       expect(result).toMatchObject({ cacheRetention: "none" });
     });
   });
+
+  describe("application inference profile cache point injection", () => {
+    /**
+     * Invoke wrapStreamFn with a payload containing system/messages, then
+     * trigger onPayload to capture the patched payload.
+     */
+    async function callWrappedStreamWithPayload(
+      provider: RegisteredProviderPlugin,
+      modelId: string,
+      modelDescriptor: never,
+      options: Record<string, unknown>,
+      payload: Record<string, unknown>,
+    ): Promise<Record<string, unknown>> {
+      const wrapped = provider.wrapStreamFn?.({
+        provider: "amazon-bedrock",
+        modelId,
+        streamFn: spyStreamFn,
+      } as never);
+
+      const result = wrapped?.(modelDescriptor, { messages: [] } as never, options) as unknown as Record<
+        string,
+        unknown
+      >;
+
+      if (typeof result?.onPayload === "function") {
+        await (
+          result.onPayload as (p: Record<string, unknown>, model: unknown) => Promise<unknown>
+        )(payload, modelDescriptor);
+      }
+      return payload;
+    }
+
+    it("injects cache points for application inference profile ARNs", async () => {
+      const provider = await registerWithConfig(undefined);
+      const payload: Record<string, unknown> = {
+        system: [{ text: "You are helpful." }],
+        messages: [
+          { role: "user", content: [{ text: "Hello" }] },
+        ],
+      };
+
+      await callWrappedStreamWithPayload(
+        provider,
+        APP_INFERENCE_PROFILE_ARN,
+        APP_INFERENCE_PROFILE_DESCRIPTOR,
+        { cacheRetention: "short" },
+        payload,
+      );
+
+      const system = payload.system as Array<Record<string, unknown>>;
+      expect(system).toHaveLength(2);
+      expect(system[1]).toEqual({ cachePoint: { type: "default" } });
+
+      const messages = payload.messages as Array<{ role: string; content: Array<Record<string, unknown>> }>;
+      const lastUserContent = messages[0].content;
+      expect(lastUserContent).toHaveLength(2);
+      expect(lastUserContent[1]).toEqual({ cachePoint: { type: "default" } });
+    });
+
+    it("uses long TTL when cacheRetention is 'long'", async () => {
+      const provider = await registerWithConfig(undefined);
+      const payload: Record<string, unknown> = {
+        system: [{ text: "You are helpful." }],
+        messages: [
+          { role: "user", content: [{ text: "Hello" }] },
+        ],
+      };
+
+      await callWrappedStreamWithPayload(
+        provider,
+        APP_INFERENCE_PROFILE_ARN,
+        APP_INFERENCE_PROFILE_DESCRIPTOR,
+        { cacheRetention: "long" },
+        payload,
+      );
+
+      const system = payload.system as Array<Record<string, unknown>>;
+      expect(system[1]).toEqual({ cachePoint: { type: "default", ttl: "1h" } });
+    });
+
+    it("does not inject cache points when cacheRetention is 'none'", async () => {
+      const provider = await registerWithConfig(undefined);
+      const payload: Record<string, unknown> = {
+        system: [{ text: "You are helpful." }],
+        messages: [
+          { role: "user", content: [{ text: "Hello" }] },
+        ],
+      };
+
+      await callWrappedStreamWithPayload(
+        provider,
+        APP_INFERENCE_PROFILE_ARN,
+        APP_INFERENCE_PROFILE_DESCRIPTOR,
+        { cacheRetention: "none" },
+        payload,
+      );
+
+      const system = payload.system as Array<Record<string, unknown>>;
+      expect(system).toHaveLength(1);
+    });
+
+    it("does not double-inject cache points if already present", async () => {
+      const provider = await registerWithConfig(undefined);
+      const payload: Record<string, unknown> = {
+        system: [{ text: "You are helpful." }, { cachePoint: { type: "default" } }],
+        messages: [
+          { role: "user", content: [{ text: "Hello" }, { cachePoint: { type: "default" } }] },
+        ],
+      };
+
+      await callWrappedStreamWithPayload(
+        provider,
+        APP_INFERENCE_PROFILE_ARN,
+        APP_INFERENCE_PROFILE_DESCRIPTOR,
+        { cacheRetention: "short" },
+        payload,
+      );
+
+      const system = payload.system as Array<Record<string, unknown>>;
+      expect(system).toHaveLength(2);
+
+      const messages = payload.messages as Array<{ role: string; content: Array<Record<string, unknown>> }>;
+      expect(messages[0].content).toHaveLength(2);
+    });
+
+    it("does not inject cache points for regular Anthropic model IDs (pi-ai handles them)", async () => {
+      const provider = await registerWithConfig(undefined);
+      const payload: Record<string, unknown> = {
+        system: [{ text: "You are helpful." }],
+        messages: [
+          { role: "user", content: [{ text: "Hello" }] },
+        ],
+      };
+
+      // Regular model IDs contain "claude" so pi-ai handles caching natively.
+      // wrapStreamFn should not install an onPayload hook for these.
+      const wrapped = provider.wrapStreamFn?.({
+        provider: "amazon-bedrock",
+        modelId: ANTHROPIC_MODEL,
+        streamFn: spyStreamFn,
+      } as never);
+
+      const result = wrapped?.(ANTHROPIC_MODEL_DESCRIPTOR, { messages: [] } as never, {
+        cacheRetention: "short",
+      }) as unknown as Record<string, unknown>;
+
+      // For regular Anthropic models, no onPayload should be installed for cache injection.
+      if (typeof result?.onPayload === "function") {
+        (result.onPayload as (p: Record<string, unknown>) => void)(payload);
+      }
+
+      const system = payload.system as Array<Record<string, unknown>>;
+      expect(system).toHaveLength(1);
+    });
+
+    it("does not inject cache points for older Claude models not in pi-ai's cache list", async () => {
+      const provider = await registerWithConfig(undefined);
+      const oldClaudeModel = "anthropic.claude-3-opus-20240229-v1:0";
+      const payload: Record<string, unknown> = {
+        system: [{ text: "You are helpful." }],
+        messages: [
+          { role: "user", content: [{ text: "Hello" }] },
+        ],
+      };
+
+      // Claude 3 Opus is not in pi-ai's supportsPromptCaching list, but it's
+      // also not an application inference profile — we should not inject.
+      const wrapped = provider.wrapStreamFn?.({
+        provider: "amazon-bedrock",
+        modelId: oldClaudeModel,
+        streamFn: spyStreamFn,
+      } as never);
+
+      const result = wrapped?.({ id: oldClaudeModel } as never, { messages: [] } as never, {
+        cacheRetention: "short",
+      }) as unknown as Record<string, unknown>;
+
+      if (typeof result?.onPayload === "function") {
+        (result.onPayload as (p: Record<string, unknown>) => void)(payload);
+      }
+
+      const system = payload.system as Array<Record<string, unknown>>;
+      expect(system).toHaveLength(1);
+    });
+
+    it("defaults to 'short' cache retention when not explicitly set", async () => {
+      const provider = await registerWithConfig(undefined);
+      const payload: Record<string, unknown> = {
+        system: [{ text: "You are helpful." }],
+        messages: [
+          { role: "user", content: [{ text: "Hello" }] },
+        ],
+      };
+
+      await callWrappedStreamWithPayload(
+        provider,
+        APP_INFERENCE_PROFILE_ARN,
+        APP_INFERENCE_PROFILE_DESCRIPTOR,
+        {},
+        payload,
+      );
+
+      const system = payload.system as Array<Record<string, unknown>>;
+      expect(system).toHaveLength(2);
+      // Default is "short" which means no ttl field
+      expect(system[1]).toEqual({ cachePoint: { type: "default" } });
+    });
+
+    it("injects cache point only on last USER message", async () => {
+      const provider = await registerWithConfig(undefined);
+      const payload: Record<string, unknown> = {
+        system: [{ text: "You are helpful." }],
+        messages: [
+          { role: "user", content: [{ text: "First question" }] },
+          { role: "assistant", content: [{ text: "Answer" }] },
+          { role: "user", content: [{ text: "Follow-up" }] },
+        ],
+      };
+
+      await callWrappedStreamWithPayload(
+        provider,
+        APP_INFERENCE_PROFILE_ARN,
+        APP_INFERENCE_PROFILE_DESCRIPTOR,
+        { cacheRetention: "short" },
+        payload,
+      );
+
+      const messages = payload.messages as Array<{ role: string; content: Array<Record<string, unknown>> }>;
+      // First user message should NOT have a cache point
+      expect(messages[0].content).toHaveLength(1);
+      // Assistant message untouched
+      expect(messages[1].content).toHaveLength(1);
+      // Last user message should have a cache point
+      expect(messages[2].content).toHaveLength(2);
+      expect(messages[2].content[1]).toEqual({ cachePoint: { type: "default" } });
+    });
+
+    it("injects cache points for opaque application inference profile ARNs after profile lookup", async () => {
+      const modelId =
+        "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/z27qyso459da";
+      inferenceProfileResults.push({
+        models: [
+          {
+            modelArn:
+              "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-sonnet-4-6-20250514-v1:0",
+          },
+        ],
+      });
+      const provider = await registerWithConfig(undefined);
+      const payload: Record<string, unknown> = {
+        system: [{ text: "You are helpful." }],
+        messages: [{ role: "user", content: [{ text: "Hello" }] }],
+      };
+
+      await callWrappedStreamWithPayload(
+        provider,
+        modelId,
+        makeAppInferenceProfileDescriptor(modelId),
+        { cacheRetention: "short" },
+        payload,
+      );
+
+      const system = payload.system as Array<Record<string, unknown>>;
+      expect(system[1]).toEqual({ cachePoint: { type: "default" } });
+      expect(sendGetInferenceProfile).toHaveBeenCalledTimes(1);
+      expect(bedrockClientConfigs).toEqual([{ region: "us-east-1" }]);
+    });
+
+    it("does not inject cache points when any resolved profile target is not cacheable", async () => {
+      const modelId =
+        "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/z27qyso459db";
+      inferenceProfileResults.push({
+        models: [
+          {
+            modelArn:
+              "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-sonnet-4-6-20250514-v1:0",
+          },
+          {
+            modelArn:
+              "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-3-opus-20240229-v1:0",
+          },
+        ],
+      });
+      const provider = await registerWithConfig(undefined);
+      const payload: Record<string, unknown> = {
+        system: [{ text: "You are helpful." }],
+        messages: [{ role: "user", content: [{ text: "Hello" }] }],
+      };
+
+      await callWrappedStreamWithPayload(
+        provider,
+        modelId,
+        makeAppInferenceProfileDescriptor(modelId),
+        { cacheRetention: "short" },
+        payload,
+      );
+
+      expect(payload.system).toEqual([{ text: "You are helpful." }]);
+      expect(payload.messages).toEqual([{ role: "user", content: [{ text: "Hello" }] }]);
+    });
+
+    it("retries opaque profile lookup after a transient failure instead of caching the fallback", async () => {
+      const modelId =
+        "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/z27qyso459dc";
+      inferenceProfileResults.push(
+        new Error("throttled"),
+        {
+          models: [
+            {
+              modelArn:
+                "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-sonnet-4-6-20250514-v1:0",
+            },
+          ],
+        },
+      );
+      const provider = await registerWithConfig(undefined);
+      const firstPayload: Record<string, unknown> = {
+        system: [{ text: "You are helpful." }],
+        messages: [{ role: "user", content: [{ text: "Hello" }] }],
+      };
+      const secondPayload: Record<string, unknown> = {
+        system: [{ text: "You are helpful." }],
+        messages: [{ role: "user", content: [{ text: "Hello again" }] }],
+      };
+
+      await callWrappedStreamWithPayload(
+        provider,
+        modelId,
+        makeAppInferenceProfileDescriptor(modelId),
+        { cacheRetention: "short" },
+        firstPayload,
+      );
+      await callWrappedStreamWithPayload(
+        provider,
+        modelId,
+        makeAppInferenceProfileDescriptor(modelId),
+        { cacheRetention: "short" },
+        secondPayload,
+      );
+
+      expect(firstPayload.system).toEqual([{ text: "You are helpful." }]);
+      expect(secondPayload.system).toEqual([
+        { text: "You are helpful." },
+        { cachePoint: { type: "default" } },
+      ]);
+      expect(sendGetInferenceProfile).toHaveBeenCalledTimes(2);
+    });
+  });
 });
diff --git a/extensions/amazon-bedrock/register.sync.runtime.ts b/extensions/amazon-bedrock/register.sync.runtime.ts
index 31adb1dba3c..c930807f6f9 100644
--- a/extensions/amazon-bedrock/register.sync.runtime.ts
+++ b/extensions/amazon-bedrock/register.sync.runtime.ts
@@ -62,6 +62,180 @@ function createGuardrailWrapStreamFn(
   };
 }
 
+/**
+ * Mirrors the shipped pi-ai Bedrock `supportsPromptCaching` matcher.
+ * Keep this in sync with node_modules/@mariozechner/pi-ai/dist/providers/amazon-bedrock.js.
+ */
+function matchesPiAiPromptCachingModelId(modelId: string): boolean {
+  const id = modelId.toLowerCase();
+  if (!id.includes("claude")) {
+    return false;
+  }
+  // Claude 4.x
+  if (id.includes("-4-") || id.includes("-4.")) {
+    return true;
+  }
+  // Claude 3.7 Sonnet
+  if (id.includes("claude-3-7-sonnet")) {
+    return true;
+  }
+  // Claude 3.5 Haiku
+  if (id.includes("claude-3-5-haiku")) {
+    return true;
+  }
+  return false;
+}
+
+function piAiWouldInjectCachePoints(modelId: string): boolean {
+  return matchesPiAiPromptCachingModelId(modelId);
+}
+
+/**
+ * Detect Bedrock application inference profile ARNs — these are the only IDs
+ * where pi-ai's model-name-based checks fail because the ARN is opaque.
+ * System-defined profiles (us., eu., global.) and base model IDs always
+ * contain the model name and are handled by pi-ai natively.
+ */
+const BEDROCK_APP_INFERENCE_PROFILE_RE = /^arn:aws(-cn|-us-gov)?:bedrock:.*:application-inference-profile\//i;
+
+function isBedrockAppInferenceProfile(modelId: string): boolean {
+  return BEDROCK_APP_INFERENCE_PROFILE_RE.test(modelId);
+}
+
+/**
+ * pi-ai's internal `supportsPromptCaching` checks `model.id` for specific Claude
+ * model name patterns, which fails for application inference profile ARNs (opaque
+ * IDs that may not contain the model name). When OpenClaw's `isAnthropicBedrockModel`
+ * identifies the model but pi-ai won't inject cache points, we do it via onPayload.
+ *
+ * Gated to application inference profile ARNs only — regular Claude model IDs and
+ * system-defined inference profiles (us.anthropic.claude-*) are left to pi-ai.
+ */
+function needsCachePointInjection(modelId: string): boolean {
+  // Only target application inference profile ARNs.
+  if (!isBedrockAppInferenceProfile(modelId)) {
+    return false;
+  }
+  // If pi-ai would already inject cache points, skip.
+  if (piAiWouldInjectCachePoints(modelId)) {
+    return false;
+  }
+  // Check if OpenClaw identifies this as an Anthropic model via the ARN heuristic.
+  if (isAnthropicBedrockModel(modelId)) {
+    return true;
+  }
+  return false;
+}
+
+/**
+ * Extract the region from a Bedrock ARN.
+ * e.g. "arn:aws:bedrock:us-east-1:123:application-inference-profile/abc" → "us-east-1"
+ */
+function extractRegionFromArn(arn: string): string | undefined {
+  const parts = arn.split(":");
+  // ARN format: arn:partition:service:region:account:resource
+  return parts.length >= 4 && parts[3] ? parts[3] : undefined;
+}
+
+/**
+ * Check if a resolved foundation model ARN supports prompt caching using the
+ * same matcher pi-ai uses for direct model IDs.
+ */
+function resolvedModelSupportsCaching(modelArn: string): boolean {
+  return matchesPiAiPromptCachingModelId(modelArn);
+}
+
+/**
+ * Resolve the underlying foundation model for an application inference profile
+ * via GetInferenceProfile. Results are cached so we only call the API once per
+ * profile ARN. Returns true if the underlying model supports prompt caching.
+ *
+ * Region is extracted from the profile ARN itself to avoid mismatches when
+ * the OpenClaw config region differs from the profile's home region.
+ */
+const appProfileCacheEligibleCache = new Map<string, boolean>();
+
+async function resolveAppProfileCacheEligible(
+  modelId: string,
+  fallbackRegion: string | undefined,
+): Promise<boolean> {
+  if (appProfileCacheEligibleCache.has(modelId)) {
+    return appProfileCacheEligibleCache.get(modelId)!;
+  }
+  try {
+    const { BedrockClient, GetInferenceProfileCommand } = await import("@aws-sdk/client-bedrock");
+    const region = extractRegionFromArn(modelId) ?? fallbackRegion;
+    const client = new BedrockClient(region ? { region } : {});
+    const resp = await client.send(
+      new GetInferenceProfileCommand({ inferenceProfileIdentifier: modelId }),
+    );
+    const models = resp.models ?? [];
+    const eligible =
+      models.length > 0 &&
+      models.every((m: { modelArn?: string }) =>
+      resolvedModelSupportsCaching(m.modelArn ?? ""),
+    );
+    appProfileCacheEligibleCache.set(modelId, eligible);
+    return eligible;
+  } catch {
+    // Transient failures (throttling, network, IAM) should not be cached —
+    // return the heuristic fallback but allow retry on the next request.
+    return isAnthropicBedrockModel(modelId);
+  }
+}
+
+type BedrockCachePoint = { cachePoint: { type: "default"; ttl?: string } };
+type BedrockContentBlock = Record<string, unknown>;
+type BedrockMessage = { role?: string; content?: BedrockContentBlock[] };
+
+function hasCachePoint(blocks: BedrockContentBlock[] | undefined): boolean {
+  return blocks?.some((b) => b.cachePoint != null) === true;
+}
+
+function makeCachePoint(cacheRetention: string | undefined): BedrockCachePoint {
+  return {
+    cachePoint: {
+      type: "default",
+      ...(cacheRetention === "long" ? { ttl: "1h" } : {}),
+    },
+  };
+}
+
+/**
+ * Inject Bedrock Converse cache points into the payload when pi-ai skipped them
+ * because it didn't recognize the model ID (application inference profiles).
+ */
+function injectBedrockCachePoints(
+  payload: Record<string, unknown>,
+  cacheRetention: string | undefined,
+): void {
+  if (!cacheRetention || cacheRetention === "none") {
+    return;
+  }
+  const point = makeCachePoint(cacheRetention);
+
+  // Inject into system prompt if missing.
+  const system = payload.system as BedrockContentBlock[] | undefined;
+  if (Array.isArray(system) && system.length > 0 && !hasCachePoint(system)) {
+    system.push(point);
+  }
+
+  // Inject into the last user message if missing.
+  // Bedrock Converse uses lowercase roles ("user" / "assistant").
+  const messages = payload.messages as BedrockMessage[] | undefined;
+  if (Array.isArray(messages) && messages.length > 0) {
+    for (let i = messages.length - 1; i >= 0; i--) {
+      const msg = messages[i];
+      if (msg.role === "user" && Array.isArray(msg.content)) {
+        if (!hasCachePoint(msg.content)) {
+          msg.content.push(point);
+        }
+        break;
+      }
+    }
+  }
+}
+
 export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
   // Keep registration-local constants inside the function so partial module
   // initialization during test bootstrap cannot trip TDZ reads.
@@ -81,8 +255,17 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
 
   api.registerMemoryEmbeddingProvider(bedrockMemoryEmbeddingProviderAdapter);
 
-  const baseWrapStreamFn = ({ modelId, streamFn }: { modelId: string; streamFn?: StreamFn }) =>
-    isAnthropicBedrockModel(modelId) ? streamFn : createBedrockNoCacheWrapper(streamFn);
+  const baseWrapStreamFn = ({ modelId, streamFn }: { modelId: string; streamFn?: StreamFn }) => {
+    if (isAnthropicBedrockModel(modelId)) {
+      return streamFn;
+    }
+    // For app inference profiles with opaque IDs, don't force cacheRetention: "none"
+    // yet — we may resolve them as Claude later via GetInferenceProfile.
+    if (isBedrockAppInferenceProfile(modelId)) {
+      return streamFn;
+    }
+    return createBedrockNoCacheWrapper(streamFn);
+  };
 
   const cacheWrapStreamFn =
     guardrail?.guardrailIdentifier && guardrail?.guardrailVersion
@@ -161,23 +344,62 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
       // Apply cache + guardrail wrapping.
       const wrapped = cacheWrapStreamFn({ modelId, streamFn });
       const region = resolveBedrockRegion(config) ?? extractRegionFromBaseUrl(model?.baseUrl);
+      const mayNeedCacheInjection =
+        isBedrockAppInferenceProfile(modelId) && !piAiWouldInjectCachePoints(modelId);
 
-      if (!region) {
+      // For known Anthropic models (heuristic match), enable injection immediately.
+      // For opaque profile IDs, we'll resolve via GetInferenceProfile on first call.
+      const heuristicMatch = needsCachePointInjection(modelId);
+
+      if (!region && !mayNeedCacheInjection) {
         return wrapped;
       }
 
-      // Wrap to inject the region into every stream call so pi-ai's Bedrock
-      // client connects to the right region for inference profile IDs.
       const underlying = wrapped ?? streamFn;
       if (!underlying) {
         return wrapped;
       }
       return (streamModel, context, options) => {
-        // pi-ai's bedrock provider reads `options.region` at runtime but the
-        // StreamFn type does not declare it. Merge via Object.assign to avoid
-        // an unsafe type assertion.
-        const merged = Object.assign({}, options, { region });
-        return underlying(streamModel, context, merged);
+        const merged = Object.assign({}, options, region ? { region } : {});
+
+        if (!mayNeedCacheInjection) {
+          return underlying(streamModel, context, merged);
+        }
+
+        // Use the cacheRetention from options if explicitly set.
+        // When undefined, default to "short" to match pi-ai's internal default.
+        // Note: if the user set cacheRetention: "none" but the opaque ARN wasn't
+        // recognized by resolveAnthropicCacheRetentionFamily, the value may have
+        // been dropped upstream. This is a known limitation — the proper fix is
+        // to also teach resolveAnthropicCacheRetentionFamily about opaque profiles
+        // (tracked separately). In practice, users with app inference profiles
+        // want caching enabled, so defaulting to "short" is the safer behavior.
+        const cacheRetention = typeof merged.cacheRetention === "string"
+          ? merged.cacheRetention
+          : "short";
+
+        if (heuristicMatch) {
+          // Fast path: ARN heuristic already identified this as Claude.
+          return streamWithPayloadPatch(underlying, streamModel, context, merged, (payload) => {
+            injectBedrockCachePoints(payload, cacheRetention);
+          });
+        }
+
+        // Slow path: opaque profile ID — resolve underlying model via API (cached).
+        // pi-ai's onPayload supports async, so we await the resolution inline.
+        const originalOnPayload = merged.onPayload as
+          | ((payload: unknown, model: unknown) => unknown)
+          | undefined;
+        return underlying(streamModel, context, {
+          ...merged,
+          onPayload: async (payload: unknown, payloadModel: unknown) => {
+            const eligible = await resolveAppProfileCacheEligible(modelId, region);
+            if (eligible && payload && typeof payload === "object") {
+              injectBedrockCachePoints(payload as Record<string, unknown>, cacheRetention);
+            }
+            return originalOnPayload?.(payload, payloadModel);
+          },
+        });
       };
     },
     matchesContextOverflowError: ({ errorMessage }) =>
diff --git a/src/agents/pi-embedded-runner/anthropic-family-cache-semantics.ts b/src/agents/pi-embedded-runner/anthropic-family-cache-semantics.ts
index 686babfef04..117562ee897 100644
--- a/src/agents/pi-embedded-runner/anthropic-family-cache-semantics.ts
+++ b/src/agents/pi-embedded-runner/anthropic-family-cache-semantics.ts
@@ -79,10 +79,25 @@ export function resolveAnthropicCacheRetentionFamily(params: {
   if (
     normalizedProvider === "amazon-bedrock" &&
     params.hasExplicitCacheConfig &&
-    typeof params.modelId === "string" &&
-    isAnthropicBedrockModel(params.modelId)
+    typeof params.modelId === "string"
   ) {
-    return "anthropic-bedrock";
+    if (isAnthropicBedrockModel(params.modelId)) {
+      return "anthropic-bedrock";
+    }
+    // Application inference profiles with opaque IDs (e.g. z27qyso459da) can't
+    // be identified as Claude from the ARN alone. When the user explicitly sets
+    // cacheRetention, honor it — the extension's GetInferenceProfile resolution
+    // handles the actual model detection at runtime.
+    if (
+      BEDROCK_APP_INFERENCE_PROFILE_ARN_RE.test(
+        normalizeLowercaseStringOrEmpty(params.modelId),
+      ) &&
+      normalizeLowercaseStringOrEmpty(params.modelId).includes(
+        ":application-inference-profile/",
+      )
+    ) {
+      return "anthropic-bedrock";
+    }
   }
   if (
     normalizedProvider !== "amazon-bedrock" &&
diff --git a/src/agents/pi-embedded-runner/extra-params.cache-retention-default.test.ts b/src/agents/pi-embedded-runner/extra-params.cache-retention-default.test.ts
index 610239cca2e..b7b212c69b0 100644
--- a/src/agents/pi-embedded-runner/extra-params.cache-retention-default.test.ts
+++ b/src/agents/pi-embedded-runner/extra-params.cache-retention-default.test.ts
@@ -281,6 +281,39 @@ describe("cacheRetention default behavior", () => {
       ),
     ).toBe("none");
   });
+
+  it("passes through explicit cacheRetention for opaque Bedrock app inference profile ARNs", () => {
+    expect(
+      resolveCacheRetention(
+        { cacheRetention: "long" },
+        "amazon-bedrock",
+        "openai-completions",
+        "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/z27qyso459da",
+      ),
+    ).toBe("long");
+  });
+
+  it("passes through explicit 'none' for opaque Bedrock app inference profile ARNs", () => {
+    expect(
+      resolveCacheRetention(
+        { cacheRetention: "none" },
+        "amazon-bedrock",
+        "openai-completions",
+        "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/z27qyso459da",
+      ),
+    ).toBe("none");
+  });
+
+  it("does not default cacheRetention for opaque Bedrock app inference profile ARNs", () => {
+    expect(
+      resolveCacheRetention(
+        undefined,
+        "amazon-bedrock",
+        "openai-completions",
+        "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/z27qyso459da",
+      ),
+    ).toBeUndefined();
+  });
 });
 
 describe("anthropic-family cache semantics", () => {