diff --git a/CHANGELOG.md b/CHANGELOG.md
index a8543e358be..57836950455 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -53,6 +53,7 @@ Docs: https://docs.openclaw.ai
 - Discord: make auto-thread parent transcript inheritance opt-in via `channels.discord.thread.inheritParent`, keeping newly created Discord thread sessions isolated by default while preserving explicit inheritance for configured accounts. Fixes #69907. (#69986) Thanks @Blahdude.
 - Browser/Chrome MCP: reset cached existing-session control sessions when a `navigate_page` call times out, so one stuck navigation no longer poisons the browser profile until a gateway restart. (#69733) Thanks @ayeshakhalid192007-dev.
 - Browser/Chrome MCP: propagate click timeouts and abort signals to existing-session actions so a stuck click fails fast and reconnects instead of poisoning the browser tool until gateway restart. (#63524) Thanks @dongseok0.
+- Amazon Bedrock/prompt caching: resolve opaque application inference profile targets before injecting Bedrock cache points, require every routed target to support explicit cache points, and retry transient profile lookups instead of caching a false negative for the rest of the process. (#69953) Thanks @anirudhmarc and @vincentkoc.
 - Gateway/channel health: base stale-socket recovery on provider-proven transport activity instead of inbound app-event freshness, preventing quiet Slack, Discord, Telegram, Matrix, and local-style channels from being restarted solely because no user traffic arrived. (#69833) Thanks @bek91.
 - OpenCode Go: canonicalize stale bundled `opencode-go` base URLs from `/go` or `/go/v1` to `/zen/go` or `/zen/go/v1`, so older generated model metadata stops hitting the 404 HTML endpoint. (#69898)
 - CLI/channels: honor `channels.<id>.enabled=false` as a hard read-only presence opt-out, so env vars, manifest env vars, or stale persisted auth state no longer make disabled channel plugins appear in status, doctor, or setup-only discovery.
diff --git a/extensions/amazon-bedrock/index.test.ts b/extensions/amazon-bedrock/index.test.ts
index d21f3b867f2..ab574b77aba 100644
--- a/extensions/amazon-bedrock/index.test.ts
+++ b/extensions/amazon-bedrock/index.test.ts
@@ -1,12 +1,45 @@
 import { readFileSync } from "node:fs";
 import { resolve } from "node:path";
-import { describe, expect, it } from "vitest";
+import { beforeEach, describe, expect, it, vi } from "vitest";
 import type { OpenClawConfig } from "../../src/config/config.js";
 import { buildPluginApi } from "../../src/plugins/api-builder.js";
 import type { PluginRuntime } from "../../src/plugins/runtime/types.js";
 import { registerSingleProviderPlugin } from "../../test/helpers/plugins/plugin-registration.js";
 import amazonBedrockPlugin from "./index.js";
 
+type InferenceProfileResult =
+  | { models?: Array<{ modelArn?: string }> }
+  | Error;
+
+const inferenceProfileResults: InferenceProfileResult[] = [];
+const bedrockClientConfigs: Array<Record<string, unknown>> = [];
+const sendGetInferenceProfile = vi.fn(async () => {
+  const next = inferenceProfileResults.shift();
+  if (next instanceof Error) {
+    throw next;
+  }
+  return next ?? { models: [] };
+});
+
+vi.mock("@aws-sdk/client-bedrock", () => {
+  class GetInferenceProfileCommand {
+    constructor(readonly input: { inferenceProfileIdentifier: string }) {}
+  }
+
+  class BedrockClient {
+    constructor(config: Record<string, unknown> = {}) {
+      bedrockClientConfigs.push(config);
+    }
+
+    send = sendGetInferenceProfile;
+  }
+
+  return {
+    BedrockClient,
+    GetInferenceProfileCommand,
+  };
+});
+
 type RegisteredProviderPlugin = Awaited<ReturnType<typeof registerSingleProviderPlugin>>;
 
 /** Register the amazon-bedrock plugin with an optional pluginConfig override. */
@@ -58,6 +91,22 @@ const ANTHROPIC_MODEL_DESCRIPTOR = {
   id: ANTHROPIC_MODEL,
 } as never;
 
+const APP_INFERENCE_PROFILE_ARN =
+  "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/my-claude-profile";
+const APP_INFERENCE_PROFILE_DESCRIPTOR = {
+  api: "openai-completions",
+  provider: "amazon-bedrock",
+  id: APP_INFERENCE_PROFILE_ARN,
+} as never;
+
+function makeAppInferenceProfileDescriptor(modelId: string): never {
+  return {
+    api: "openai-completions",
+    provider: "amazon-bedrock",
+    id: modelId,
+  } as never;
+}
+
 /**
  * Call wrapStreamFn and then invoke the returned stream function, capturing
  * the payload via the onPayload hook that streamWithPayloadPatch installs.
@@ -92,6 +141,12 @@ function callWrappedStream(
 }
 
 describe("amazon-bedrock provider plugin", () => {
+  beforeEach(() => {
+    inferenceProfileResults.length = 0;
+    bedrockClientConfigs.length = 0;
+    sendGetInferenceProfile.mockClear();
+  });
+
   it("marks Claude 4.6 Bedrock models as adaptive by default", async () => {
     const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
 
@@ -302,4 +357,352 @@ describe("amazon-bedrock provider plugin", () => {
       expect(result).toMatchObject({ cacheRetention: "none" });
     });
   });
+
+  describe("application inference profile cache point injection", () => {
+    /**
+     * Invoke wrapStreamFn with a payload containing system/messages, then
+     * trigger onPayload to capture the patched payload.
+     */
+    async function callWrappedStreamWithPayload(
+      provider: RegisteredProviderPlugin,
+      modelId: string,
+      modelDescriptor: never,
+      options: Record<string, unknown>,
+      payload: Record<string, unknown>,
+    ): Promise<Record<string, unknown>> {
+      const wrapped = provider.wrapStreamFn?.({
+        provider: "amazon-bedrock",
+        modelId,
+        streamFn: spyStreamFn,
+      } as never);
+
+      const result = wrapped?.(modelDescriptor, { messages: [] } as never, options) as unknown as Record<
+        string,
+        unknown
+      >;
+
+      if (typeof result?.onPayload === "function") {
+        await (
+          result.onPayload as (p: Record<string, unknown>, model: unknown) => Promise<unknown>
+        )(payload, modelDescriptor);
+      }
+      return payload;
+    }
+
+    it("injects cache points for application inference profile ARNs", async () => {
+      const provider = await registerWithConfig(undefined);
+      const payload: Record<string, unknown> = {
+        system: [{ text: "You are helpful." }],
+        messages: [
+          { role: "user", content: [{ text: "Hello" }] },
+        ],
+      };
+
+      await callWrappedStreamWithPayload(
+        provider,
+        APP_INFERENCE_PROFILE_ARN,
+        APP_INFERENCE_PROFILE_DESCRIPTOR,
+        { cacheRetention: "short" },
+        payload,
+      );
+
+      const system = payload.system as Array<Record<string, unknown>>;
+      expect(system).toHaveLength(2);
+      expect(system[1]).toEqual({ cachePoint: { type: "default" } });
+
+      const messages = payload.messages as Array<{ role: string; content: Array<Record<string, unknown>> }>;
+      const lastUserContent = messages[0].content;
+      expect(lastUserContent).toHaveLength(2);
+      expect(lastUserContent[1]).toEqual({ cachePoint: { type: "default" } });
+    });
+
+    it("uses long TTL when cacheRetention is 'long'", async () => {
+      const provider = await registerWithConfig(undefined);
+      const payload: Record<string, unknown> = {
+        system: [{ text: "You are helpful." }],
+        messages: [
+          { role: "user", content: [{ text: "Hello" }] },
+        ],
+      };
+
+      await callWrappedStreamWithPayload(
+        provider,
+        APP_INFERENCE_PROFILE_ARN,
+        APP_INFERENCE_PROFILE_DESCRIPTOR,
+        { cacheRetention: "long" },
+        payload,
+      );
+
+      const system = payload.system as Array<Record<string, unknown>>;
+      expect(system[1]).toEqual({ cachePoint: { type: "default", ttl: "1h" } });
+    });
+
+    it("does not inject cache points when cacheRetention is 'none'", async () => {
+      const provider = await registerWithConfig(undefined);
+      const payload: Record<string, unknown> = {
+        system: [{ text: "You are helpful." }],
+        messages: [
+          { role: "user", content: [{ text: "Hello" }] },
+        ],
+      };
+
+      await callWrappedStreamWithPayload(
+        provider,
+        APP_INFERENCE_PROFILE_ARN,
+        APP_INFERENCE_PROFILE_DESCRIPTOR,
+        { cacheRetention: "none" },
+        payload,
+      );
+
+      const system = payload.system as Array<Record<string, unknown>>;
+      expect(system).toHaveLength(1);
+    });
+
+    it("does not double-inject cache points if already present", async () => {
+      const provider = await registerWithConfig(undefined);
+      const payload: Record<string, unknown> = {
+        system: [{ text: "You are helpful." }, { cachePoint: { type: "default" } }],
+        messages: [
+          { role: "user", content: [{ text: "Hello" }, { cachePoint: { type: "default" } }] },
+        ],
+      };
+
+      await callWrappedStreamWithPayload(
+        provider,
+        APP_INFERENCE_PROFILE_ARN,
+        APP_INFERENCE_PROFILE_DESCRIPTOR,
+        { cacheRetention: "short" },
+        payload,
+      );
+
+      const system = payload.system as Array<Record<string, unknown>>;
+      expect(system).toHaveLength(2);
+
+      const messages = payload.messages as Array<{ role: string; content: Array<Record<string, unknown>> }>;
+      expect(messages[0].content).toHaveLength(2);
+    });
+
+    it("does not inject cache points for regular Anthropic model IDs (pi-ai handles them)", async () => {
+      const provider = await registerWithConfig(undefined);
+      const payload: Record<string, unknown> = {
+        system: [{ text: "You are helpful." }],
+        messages: [
+          { role: "user", content: [{ text: "Hello" }] },
+        ],
+      };
+
+      // Regular model IDs contain "claude" so pi-ai handles caching natively.
+      // wrapStreamFn should not install an onPayload hook for these.
+      const wrapped = provider.wrapStreamFn?.({
+        provider: "amazon-bedrock",
+        modelId: ANTHROPIC_MODEL,
+        streamFn: spyStreamFn,
+      } as never);
+
+      const result = wrapped?.(ANTHROPIC_MODEL_DESCRIPTOR, { messages: [] } as never, {
+        cacheRetention: "short",
+      }) as unknown as Record<string, unknown>;
+
+      // For regular Anthropic models, no onPayload should be installed for cache injection.
+      if (typeof result?.onPayload === "function") {
+        (result.onPayload as (p: Record<string, unknown>) => void)(payload);
+      }
+
+      const system = payload.system as Array<Record<string, unknown>>;
+      expect(system).toHaveLength(1);
+    });
+
+    it("does not inject cache points for older Claude models not in pi-ai's cache list", async () => {
+      const provider = await registerWithConfig(undefined);
+      const oldClaudeModel = "anthropic.claude-3-opus-20240229-v1:0";
+      const payload: Record<string, unknown> = {
+        system: [{ text: "You are helpful." }],
+        messages: [
+          { role: "user", content: [{ text: "Hello" }] },
+        ],
+      };
+
+      // Claude 3 Opus is not in pi-ai's supportsPromptCaching list, but it's
+      // also not an application inference profile — we should not inject.
+      const wrapped = provider.wrapStreamFn?.({
+        provider: "amazon-bedrock",
+        modelId: oldClaudeModel,
+        streamFn: spyStreamFn,
+      } as never);
+
+      const result = wrapped?.({ id: oldClaudeModel } as never, { messages: [] } as never, {
+        cacheRetention: "short",
+      }) as unknown as Record<string, unknown>;
+
+      if (typeof result?.onPayload === "function") {
+        (result.onPayload as (p: Record<string, unknown>) => void)(payload);
+      }
+
+      const system = payload.system as Array<Record<string, unknown>>;
+      expect(system).toHaveLength(1);
+    });
+
+    it("defaults to 'short' cache retention when not explicitly set", async () => {
+      const provider = await registerWithConfig(undefined);
+      const payload: Record<string, unknown> = {
+        system: [{ text: "You are helpful." }],
+        messages: [
+          { role: "user", content: [{ text: "Hello" }] },
+        ],
+      };
+
+      await callWrappedStreamWithPayload(
+        provider,
+        APP_INFERENCE_PROFILE_ARN,
+        APP_INFERENCE_PROFILE_DESCRIPTOR,
+        {},
+        payload,
+      );
+
+      const system = payload.system as Array<Record<string, unknown>>;
+      expect(system).toHaveLength(2);
+      // Default is "short" which means no ttl field
+      expect(system[1]).toEqual({ cachePoint: { type: "default" } });
+    });
+
+    it("injects cache point only on last USER message", async () => {
+      const provider = await registerWithConfig(undefined);
+      const payload: Record<string, unknown> = {
+        system: [{ text: "You are helpful." }],
+        messages: [
+          { role: "user", content: [{ text: "First question" }] },
+          { role: "assistant", content: [{ text: "Answer" }] },
+          { role: "user", content: [{ text: "Follow-up" }] },
+        ],
+      };
+
+      await callWrappedStreamWithPayload(
+        provider,
+        APP_INFERENCE_PROFILE_ARN,
+        APP_INFERENCE_PROFILE_DESCRIPTOR,
+        { cacheRetention: "short" },
+        payload,
+      );
+
+      const messages = payload.messages as Array<{ role: string; content: Array<Record<string, unknown>> }>;
+      // First user message should NOT have a cache point
+      expect(messages[0].content).toHaveLength(1);
+      // Assistant message untouched
+      expect(messages[1].content).toHaveLength(1);
+      // Last user message should have a cache point
+      expect(messages[2].content).toHaveLength(2);
+      expect(messages[2].content[1]).toEqual({ cachePoint: { type: "default" } });
+    });
+
+    it("injects cache points for opaque application inference profile ARNs after profile lookup", async () => {
+      const modelId =
+        "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/z27qyso459da";
+      inferenceProfileResults.push({
+        models: [
+          {
+            modelArn:
+              "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-sonnet-4-6-20250514-v1:0",
+          },
+        ],
+      });
+      const provider = await registerWithConfig(undefined);
+      const payload: Record<string, unknown> = {
+        system: [{ text: "You are helpful." }],
+        messages: [{ role: "user", content: [{ text: "Hello" }] }],
+      };
+
+      await callWrappedStreamWithPayload(
+        provider,
+        modelId,
+        makeAppInferenceProfileDescriptor(modelId),
+        { cacheRetention: "short" },
+        payload,
+      );
+
+      const system = payload.system as Array<Record<string, unknown>>;
+      expect(system[1]).toEqual({ cachePoint: { type: "default" } });
+      expect(sendGetInferenceProfile).toHaveBeenCalledTimes(1);
+      expect(bedrockClientConfigs).toEqual([{ region: "us-east-1" }]);
+    });
+
+    it("does not inject cache points when any resolved profile target is not cacheable", async () => {
+      const modelId =
+        "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/z27qyso459db";
+      inferenceProfileResults.push({
+        models: [
+          {
+            modelArn:
+              "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-sonnet-4-6-20250514-v1:0",
+          },
+          {
+            modelArn:
+              "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-3-opus-20240229-v1:0",
+          },
+        ],
+      });
+      const provider = await registerWithConfig(undefined);
+      const payload: Record<string, unknown> = {
+        system: [{ text: "You are helpful." }],
+        messages: [{ role: "user", content: [{ text: "Hello" }] }],
+      };
+
+      await callWrappedStreamWithPayload(
+        provider,
+        modelId,
+        makeAppInferenceProfileDescriptor(modelId),
+        { cacheRetention: "short" },
+        payload,
+      );
+
+      expect(payload.system).toEqual([{ text: "You are helpful." }]);
+      expect(payload.messages).toEqual([{ role: "user", content: [{ text: "Hello" }] }]);
+    });
+
+    it("retries opaque profile lookup after a transient failure instead of caching the fallback", async () => {
+      const modelId =
+        "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/z27qyso459dc";
+      inferenceProfileResults.push(
+        new Error("throttled"),
+        {
+          models: [
+            {
+              modelArn:
+                "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-sonnet-4-6-20250514-v1:0",
+            },
+          ],
+        },
+      );
+      const provider = await registerWithConfig(undefined);
+      const firstPayload: Record<string, unknown> = {
+        system: [{ text: "You are helpful." }],
+        messages: [{ role: "user", content: [{ text: "Hello" }] }],
+      };
+      const secondPayload: Record<string, unknown> = {
+        system: [{ text: "You are helpful." }],
+        messages: [{ role: "user", content: [{ text: "Hello again" }] }],
+      };
+
+      await callWrappedStreamWithPayload(
+        provider,
+        modelId,
+        makeAppInferenceProfileDescriptor(modelId),
+        { cacheRetention: "short" },
+        firstPayload,
+      );
+      await callWrappedStreamWithPayload(
+        provider,
+        modelId,
+        makeAppInferenceProfileDescriptor(modelId),
+        { cacheRetention: "short" },
+        secondPayload,
+      );
+
+      expect(firstPayload.system).toEqual([{ text: "You are helpful." }]);
+      expect(secondPayload.system).toEqual([
+        { text: "You are helpful." },
+        { cachePoint: { type: "default" } },
+      ]);
+      expect(sendGetInferenceProfile).toHaveBeenCalledTimes(2);
+    });
+  });
 });
diff --git a/extensions/amazon-bedrock/register.sync.runtime.ts b/extensions/amazon-bedrock/register.sync.runtime.ts
index 31adb1dba3c..c930807f6f9 100644
--- a/extensions/amazon-bedrock/register.sync.runtime.ts
+++ b/extensions/amazon-bedrock/register.sync.runtime.ts
@@ -62,6 +62,180 @@ function createGuardrailWrapStreamFn(
   };
 }
 
+/**
+ * Mirrors the shipped pi-ai Bedrock `supportsPromptCaching` matcher.
+ * Keep this in sync with node_modules/@mariozechner/pi-ai/dist/providers/amazon-bedrock.js.
+ */
+function matchesPiAiPromptCachingModelId(modelId: string): boolean {
+  const id = modelId.toLowerCase();
+  if (!id.includes("claude")) {
+    return false;
+  }
+  // Claude 4.x
+  if (id.includes("-4-") || id.includes("-4.")) {
+    return true;
+  }
+  // Claude 3.7 Sonnet
+  if (id.includes("claude-3-7-sonnet")) {
+    return true;
+  }
+  // Claude 3.5 Haiku
+  if (id.includes("claude-3-5-haiku")) {
+    return true;
+  }
+  return false;
+}
+
+function piAiWouldInjectCachePoints(modelId: string): boolean {
+  return matchesPiAiPromptCachingModelId(modelId);
+}
+
+/**
+ * Detect Bedrock application inference profile ARNs — these are the only IDs
+ * where pi-ai's model-name-based checks fail because the ARN is opaque.
+ * System-defined profiles (us., eu., global.) and base model IDs always
+ * contain the model name and are handled by pi-ai natively.
+ */
+const BEDROCK_APP_INFERENCE_PROFILE_RE = /^arn:aws(-cn|-us-gov)?:bedrock:.*:application-inference-profile\//i;
+
+function isBedrockAppInferenceProfile(modelId: string): boolean {
+  return BEDROCK_APP_INFERENCE_PROFILE_RE.test(modelId);
+}
+
+/**
+ * pi-ai's internal `supportsPromptCaching` checks `model.id` for specific Claude
+ * model name patterns, which fails for application inference profile ARNs (opaque
+ * IDs that may not contain the model name). When OpenClaw's `isAnthropicBedrockModel`
+ * identifies the model but pi-ai won't inject cache points, we do it via onPayload.
+ *
+ * Gated to application inference profile ARNs only — regular Claude model IDs and
+ * system-defined inference profiles (us.anthropic.claude-*) are left to pi-ai.
+ */
+function needsCachePointInjection(modelId: string): boolean {
+  // Only target application inference profile ARNs.
+  if (!isBedrockAppInferenceProfile(modelId)) {
+    return false;
+  }
+  // If pi-ai would already inject cache points, skip.
+  if (piAiWouldInjectCachePoints(modelId)) {
+    return false;
+  }
+  // Check if OpenClaw identifies this as an Anthropic model via the ARN heuristic.
+  if (isAnthropicBedrockModel(modelId)) {
+    return true;
+  }
+  return false;
+}
+
+/**
+ * Extract the region from a Bedrock ARN.
+ * e.g. "arn:aws:bedrock:us-east-1:123:application-inference-profile/abc" → "us-east-1"
+ */
+function extractRegionFromArn(arn: string): string | undefined {
+  const parts = arn.split(":");
+  // ARN format: arn:partition:service:region:account:resource
+  return parts.length >= 4 && parts[3] ? parts[3] : undefined;
+}
+
+/**
+ * Check if a resolved foundation model ARN supports prompt caching using the
+ * same matcher pi-ai uses for direct model IDs.
+ */
+function resolvedModelSupportsCaching(modelArn: string): boolean {
+  return matchesPiAiPromptCachingModelId(modelArn);
+}
+
+/**
+ * Resolve the underlying foundation model for an application inference profile
+ * via GetInferenceProfile. Results are cached so we only call the API once per
+ * profile ARN. Returns true if the underlying model supports prompt caching.
+ *
+ * Region is extracted from the profile ARN itself to avoid mismatches when
+ * the OpenClaw config region differs from the profile's home region.
+ */
+const appProfileCacheEligibleCache = new Map<string, boolean>();
+
+async function resolveAppProfileCacheEligible(
+  modelId: string,
+  fallbackRegion: string | undefined,
+): Promise<boolean> {
+  if (appProfileCacheEligibleCache.has(modelId)) {
+    return appProfileCacheEligibleCache.get(modelId)!;
+  }
+  try {
+    const { BedrockClient, GetInferenceProfileCommand } = await import("@aws-sdk/client-bedrock");
+    const region = extractRegionFromArn(modelId) ?? fallbackRegion;
+    const client = new BedrockClient(region ? { region } : {});
+    const resp = await client.send(
+      new GetInferenceProfileCommand({ inferenceProfileIdentifier: modelId }),
+    );
+    const models = resp.models ?? [];
+    const eligible =
+      models.length > 0 &&
+      models.every((m: { modelArn?: string }) =>
+      resolvedModelSupportsCaching(m.modelArn ?? ""),
+    );
+    appProfileCacheEligibleCache.set(modelId, eligible);
+    return eligible;
+  } catch {
+    // Transient failures (throttling, network, IAM) should not be cached —
+    // return the heuristic fallback but allow retry on the next request.
+    return isAnthropicBedrockModel(modelId);
+  }
+}
+
+type BedrockCachePoint = { cachePoint: { type: "default"; ttl?: string } };
+type BedrockContentBlock = Record<string, unknown>;
+type BedrockMessage = { role?: string; content?: BedrockContentBlock[] };
+
+function hasCachePoint(blocks: BedrockContentBlock[] | undefined): boolean {
+  return blocks?.some((b) => b.cachePoint != null) === true;
+}
+
+function makeCachePoint(cacheRetention: string | undefined): BedrockCachePoint {
+  return {
+    cachePoint: {
+      type: "default",
+      ...(cacheRetention === "long" ? { ttl: "1h" } : {}),
+    },
+  };
+}
+
+/**
+ * Inject Bedrock Converse cache points into the payload when pi-ai skipped them
+ * because it didn't recognize the model ID (application inference profiles).
+ */
+function injectBedrockCachePoints(
+  payload: Record<string, unknown>,
+  cacheRetention: string | undefined,
+): void {
+  if (!cacheRetention || cacheRetention === "none") {
+    return;
+  }
+  const point = makeCachePoint(cacheRetention);
+
+  // Inject into system prompt if missing.
+  const system = payload.system as BedrockContentBlock[] | undefined;
+  if (Array.isArray(system) && system.length > 0 && !hasCachePoint(system)) {
+    system.push(point);
+  }
+
+  // Inject into the last user message if missing.
+  // Bedrock Converse uses lowercase roles ("user" / "assistant").
+  const messages = payload.messages as BedrockMessage[] | undefined;
+  if (Array.isArray(messages) && messages.length > 0) {
+    for (let i = messages.length - 1; i >= 0; i--) {
+      const msg = messages[i];
+      if (msg.role === "user" && Array.isArray(msg.content)) {
+        if (!hasCachePoint(msg.content)) {
+          msg.content.push(point);
+        }
+        break;
+      }
+    }
+  }
+}
+
 export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
   // Keep registration-local constants inside the function so partial module
   // initialization during test bootstrap cannot trip TDZ reads.
@@ -81,8 +255,17 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
 
   api.registerMemoryEmbeddingProvider(bedrockMemoryEmbeddingProviderAdapter);
 
-  const baseWrapStreamFn = ({ modelId, streamFn }: { modelId: string; streamFn?: StreamFn }) =>
-    isAnthropicBedrockModel(modelId) ? streamFn : createBedrockNoCacheWrapper(streamFn);
+  const baseWrapStreamFn = ({ modelId, streamFn }: { modelId: string; streamFn?: StreamFn }) => {
+    if (isAnthropicBedrockModel(modelId)) {
+      return streamFn;
+    }
+    // For app inference profiles with opaque IDs, don't force cacheRetention: "none"
+    // yet — we may resolve them as Claude later via GetInferenceProfile.
+    if (isBedrockAppInferenceProfile(modelId)) {
+      return streamFn;
+    }
+    return createBedrockNoCacheWrapper(streamFn);
+  };
 
   const cacheWrapStreamFn =
     guardrail?.guardrailIdentifier && guardrail?.guardrailVersion
@@ -161,23 +344,62 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
       // Apply cache + guardrail wrapping.
       const wrapped = cacheWrapStreamFn({ modelId, streamFn });
       const region = resolveBedrockRegion(config) ?? extractRegionFromBaseUrl(model?.baseUrl);
+      const mayNeedCacheInjection =
+        isBedrockAppInferenceProfile(modelId) && !piAiWouldInjectCachePoints(modelId);
 
-      if (!region) {
+      // For known Anthropic models (heuristic match), enable injection immediately.
+      // For opaque profile IDs, we'll resolve via GetInferenceProfile on first call.
+      const heuristicMatch = needsCachePointInjection(modelId);
+
+      if (!region && !mayNeedCacheInjection) {
         return wrapped;
       }
 
-      // Wrap to inject the region into every stream call so pi-ai's Bedrock
-      // client connects to the right region for inference profile IDs.
       const underlying = wrapped ?? streamFn;
       if (!underlying) {
         return wrapped;
       }
       return (streamModel, context, options) => {
-        // pi-ai's bedrock provider reads `options.region` at runtime but the
-        // StreamFn type does not declare it. Merge via Object.assign to avoid
-        // an unsafe type assertion.
-        const merged = Object.assign({}, options, { region });
-        return underlying(streamModel, context, merged);
+        const merged = Object.assign({}, options, region ? { region } : {});
+
+        if (!mayNeedCacheInjection) {
+          return underlying(streamModel, context, merged);
+        }
+
+        // Use the cacheRetention from options if explicitly set.
+        // When undefined, default to "short" to match pi-ai's internal default.
+        // Note: if the user set cacheRetention: "none" but the opaque ARN wasn't
+        // recognized by resolveAnthropicCacheRetentionFamily, the value may have
+        // been dropped upstream. This is a known limitation — the proper fix is
+        // to also teach resolveAnthropicCacheRetentionFamily about opaque profiles
+        // (tracked separately). In practice, users with app inference profiles
+        // want caching enabled, so defaulting to "short" is the safer behavior.
+        const cacheRetention = typeof merged.cacheRetention === "string"
+          ? merged.cacheRetention
+          : "short";
+
+        if (heuristicMatch) {
+          // Fast path: ARN heuristic already identified this as Claude.
+          return streamWithPayloadPatch(underlying, streamModel, context, merged, (payload) => {
+            injectBedrockCachePoints(payload, cacheRetention);
+          });
+        }
+
+        // Slow path: opaque profile ID — resolve underlying model via API (cached).
+        // pi-ai's onPayload supports async, so we await the resolution inline.
+        const originalOnPayload = merged.onPayload as
+          | ((payload: unknown, model: unknown) => unknown)
+          | undefined;
+        return underlying(streamModel, context, {
+          ...merged,
+          onPayload: async (payload: unknown, payloadModel: unknown) => {
+            const eligible = await resolveAppProfileCacheEligible(modelId, region);
+            if (eligible && payload && typeof payload === "object") {
+              injectBedrockCachePoints(payload as Record<string, unknown>, cacheRetention);
+            }
+            return originalOnPayload?.(payload, payloadModel);
+          },
+        });
       };
     },
     matchesContextOverflowError: ({ errorMessage }) =>
diff --git a/src/agents/pi-embedded-runner/anthropic-family-cache-semantics.ts b/src/agents/pi-embedded-runner/anthropic-family-cache-semantics.ts
index 686babfef04..117562ee897 100644
--- a/src/agents/pi-embedded-runner/anthropic-family-cache-semantics.ts
+++ b/src/agents/pi-embedded-runner/anthropic-family-cache-semantics.ts
@@ -79,10 +79,25 @@ export function resolveAnthropicCacheRetentionFamily(params: {
   if (
     normalizedProvider === "amazon-bedrock" &&
     params.hasExplicitCacheConfig &&
-    typeof params.modelId === "string" &&
-    isAnthropicBedrockModel(params.modelId)
+    typeof params.modelId === "string"
   ) {
-    return "anthropic-bedrock";
+    if (isAnthropicBedrockModel(params.modelId)) {
+      return "anthropic-bedrock";
+    }
+    // Application inference profiles with opaque IDs (e.g. z27qyso459da) can't
+    // be identified as Claude from the ARN alone. When the user explicitly sets
+    // cacheRetention, honor it — the extension's GetInferenceProfile resolution
+    // handles the actual model detection at runtime.
+    if (
+      BEDROCK_APP_INFERENCE_PROFILE_ARN_RE.test(
+        normalizeLowercaseStringOrEmpty(params.modelId),
+      ) &&
+      normalizeLowercaseStringOrEmpty(params.modelId).includes(
+        ":application-inference-profile/",
+      )
+    ) {
+      return "anthropic-bedrock";
+    }
   }
   if (
     normalizedProvider !== "amazon-bedrock" &&
diff --git a/src/agents/pi-embedded-runner/extra-params.cache-retention-default.test.ts b/src/agents/pi-embedded-runner/extra-params.cache-retention-default.test.ts
index 610239cca2e..b7b212c69b0 100644
--- a/src/agents/pi-embedded-runner/extra-params.cache-retention-default.test.ts
+++ b/src/agents/pi-embedded-runner/extra-params.cache-retention-default.test.ts
@@ -281,6 +281,39 @@ describe("cacheRetention default behavior", () => {
       ),
     ).toBe("none");
   });
+
+  it("passes through explicit cacheRetention for opaque Bedrock app inference profile ARNs", () => {
+    expect(
+      resolveCacheRetention(
+        { cacheRetention: "long" },
+        "amazon-bedrock",
+        "openai-completions",
+        "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/z27qyso459da",
+      ),
+    ).toBe("long");
+  });
+
+  it("passes through explicit 'none' for opaque Bedrock app inference profile ARNs", () => {
+    expect(
+      resolveCacheRetention(
+        { cacheRetention: "none" },
+        "amazon-bedrock",
+        "openai-completions",
+        "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/z27qyso459da",
+      ),
+    ).toBe("none");
+  });
+
+  it("does not default cacheRetention for opaque Bedrock app inference profile ARNs", () => {
+    expect(
+      resolveCacheRetention(
+        undefined,
+        "amazon-bedrock",
+        "openai-completions",
+        "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/z27qyso459da",
+      ),
+    ).toBeUndefined();
+  });
 });
 
 describe("anthropic-family cache semantics", () => {