From 2a15a3bb53f911c5ea77a8708a600fcfcc65b7dc Mon Sep 17 00:00:00 2001
From: wirjo <daniel@wirjo.com>
Date: Thu, 23 Apr 2026 08:53:41 +1000
Subject: [PATCH] fix(amazon-bedrock): add known model context windows to
 discovery (#65952)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(amazon-bedrock): add known model context windows to discovery

Bedrock's ListFoundationModels API does not expose token limits. Discovery
was hardcoding contextWindow: 32000 for every model, causing Claude (1M),
Nova (300K), and other models to hit premature 'Context limit exceeded'
errors and unnecessary session resets.

Adds a lookup table of known context windows for Bedrock models:
- Anthropic Claude: 200K-1M
- Amazon Nova: 128K-1M
- Meta Llama: 128K
- Mistral: 32K-128K
- DeepSeek: 128K
- Cohere: 128K
- AI21 Jamba: 256K

Inference profile prefixes (us., eu., ap., global.) are stripped before
lookup, so us.anthropic.claude-opus-4-6-v1 correctly resolves to 1M.

Also raises the default fallback from 32K to 128K for unknown models —
most modern models have at least 128K context.

Single file change, no type system modifications.

Complementary to #65030 (provenance flag for warning on unknown models).

Fixes #64919
Related: #64250

* add KNOWN_MAX_TOKENS map and expand model coverage

- Add KNOWN_MAX_TOKENS lookup table with Bedrock-optimized values that
  balance response quality against quota burndown (5x rate for Claude 3.7+)
- Add missing models to KNOWN_CONTEXT_WINDOWS: Opus 4.7 (1M), Opus 4.1/4.5,
  Sonnet 4, Claude 3/3.5 Haiku, DeepSeek V3/V3.2, Google Gemma 3
- Refactor prefix-stripping into shared resolveKnownValue() helper
- Fix: use !== undefined instead of truthy check for table lookups
- Wire resolveKnownMaxTokens into toModelDefinition and resolveInferenceProfiles

Quota burndown context: Bedrock reserves input_tokens + max_tokens from
TPM at request start. For Claude 3.7+, output burns at 5x. The values
in KNOWN_MAX_TOKENS are intentionally conservative (8-16K for Claude)
to maximize concurrent throughput while still allowing useful responses.
Thinking budget is added separately by the runtime.

* remove KNOWN_MAX_TOKENS — maxTokens should be handled upstream

Remove the KNOWN_MAX_TOKENS map. Hardcoding maxTokens values in
discovery is the wrong layer to solve this — any explicit value
still gets reserved against Bedrock's TPM quota at request start.

The correct fix is upstream in pi's Bedrock provider: omit maxTokens
from inferenceConfig when not explicitly set, letting the model use
its internal default. This avoids quota waste entirely.

See: badlogic/pi-mono#3399 and badlogic/pi-mono#3400

Keep the expanded KNOWN_CONTEXT_WINDOWS (context windows ARE the
right thing to set in discovery — they affect compaction thresholds
and session management, not API-level quota reservation).

* docs: clarify why hardcoded context windows are needed

Bedrock's ListFoundationModels and GetFoundationModel APIs return no
token limit information — there is no Bedrock API to discover context
windows or max output tokens programmatically. Note that this table
should become a fallback if AWS adds token metadata in the future.

* fix: add au and apac to inference profile prefix regex

Add missing geo prefixes discovered by querying inference profiles
across multiple regions:
- au. (Australia/NZ, used in ap-southeast-2/4/6)
- apac. (Asia-Pacific, used for older models in ap-northeast-1)

Both resolveKnownContextWindow and resolveBaseModelId now handle
all known prefixes: us, eu, ap, apac, au, jp, global.

* test: port au. prefix test from #65449 by @alickgithub2, add apac. coverage

Port the Australia/NZ inference profile test from PR #65449
(credit: @alickgithub2) and extend it to also cover the apac.
prefix discovered in ap-northeast-1.

* expand model coverage: Llama 4, MiniMax, NVIDIA, Mistral 3, GLM, Qwen

Cross-referenced KNOWN_CONTEXT_WINDOWS against live
list-foundation-models API. Added missing models:
- Llama 4 Maverick (1M) and Scout (512K)
- MiniMax M2/M2.1/M2.5 (1M)
- NVIDIA Nemotron Super/Nano variants (128K)
- Mistral Large 3 675B (128K)
- GLM 4.7/4.7-flash/5 (128K)
- Qwen3 Coder/32B/VL (128-256K)

Removed deprecated deepseek.v3-v1:0 and claude-opus-4-20250514
(not in active foundation models list).

* raise default context window from 128K to 200K

200K matches the floor for all current Claude models (the most
popular on Bedrock). Every other active model with a lower actual
limit is already in the explicit table. This ensures new Claude
models get a correct default without requiring a table update.

* test: update discovery test expectations for known context window values

* test: fix remaining contextWindow expectation (default 200K)

* fix(amazon-bedrock): keep conservative context fallback

* docs(changelog): note Bedrock context window fix

* fix(amazon-bedrock): normalize known context fallback

---------

Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
---
 CHANGELOG.md                                |   1 +
 extensions/amazon-bedrock/discovery.test.ts | 165 +++++++++++++++++++-
 extensions/amazon-bedrock/discovery.ts      | 123 ++++++++++++++-
 3 files changed, 281 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3b7c4ba9e76..c29eaea4249 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,6 +23,7 @@ Docs: https://docs.openclaw.ai
 
 ### Fixes
 
+- Providers/Amazon Bedrock: use known context-window metadata for discovered models while keeping the unknown-model fallback conservative, so compaction and overflow handling improve for newer Bedrock models without overstating unlisted model limits. Thanks @wirjo.
 - Providers/Amazon Bedrock Mantle: refresh IAM-backed bearer tokens at runtime instead of baking discovery-time tokens into provider config, so long-lived Mantle sessions keep working after the initial token ages out. Thanks @wirjo.
 - Codex harness: rotate the shared app-server websocket client when the configured bearer token changes, so auth-token refreshes reconnect with the new `Authorization` header instead of reusing a stale socket. (#70328) Thanks @Lucenx9.
 - Telegram/sandbox: keep Telegram bot DMs on per-account sender session keys even when `session.dmScope=main`, so sandbox/tool policy can distinguish Telegram-originated direct chats from the agent main session.
diff --git a/extensions/amazon-bedrock/discovery.test.ts b/extensions/amazon-bedrock/discovery.test.ts
index 7c994c8a2cb..5d03e0d273b 100644
--- a/extensions/amazon-bedrock/discovery.test.ts
+++ b/extensions/amazon-bedrock/discovery.test.ts
@@ -87,7 +87,7 @@ describe("bedrock discovery", () => {
       name: "Claude 3.7 Sonnet",
       reasoning: false,
       input: ["text", "image"],
-      contextWindow: 32000,
+      contextWindow: 200000,
       maxTokens: 4096,
     });
   });
@@ -104,7 +104,11 @@ describe("bedrock discovery", () => {
   });
 
   it("uses configured defaults for context and max tokens", async () => {
-    mockSingleActiveSummary();
+    mockSingleActiveSummary({
+      modelId: "example.unknown-text-v1:0",
+      modelName: "Example Unknown Text",
+      providerName: "example",
+    });
 
     const models = await discoverBedrockModels({
       region: "us-east-1",
@@ -114,6 +118,68 @@ describe("bedrock discovery", () => {
     expect(models[0]).toMatchObject({ contextWindow: 64000, maxTokens: 8192 });
   });
 
+  it("keeps the conservative fallback for unknown inference profiles", async () => {
+    sendMock
+      .mockResolvedValueOnce({
+        modelSummaries: [],
+      })
+      .mockResolvedValueOnce({
+        inferenceProfileSummaries: [
+          {
+            inferenceProfileId: "jp.example.unknown-text-v1:0",
+            inferenceProfileName: "JP Example Unknown Text",
+            status: "ACTIVE",
+            type: "SYSTEM_DEFINED",
+            models: [
+              {
+                modelArn: "arn:aws:bedrock:ap-northeast-1::foundation-model/example.unknown-text-v1:0",
+              },
+            ],
+          },
+        ],
+      });
+
+    const models = await discoverBedrockModels({ region: "ap-northeast-1", clientFactory });
+
+    expect(models).toHaveLength(1);
+    expect(models[0]).toMatchObject({
+      id: "jp.example.unknown-text-v1:0",
+      contextWindow: 32000,
+      maxTokens: 4096,
+      input: ["text"],
+    });
+  });
+
+  it("normalizes region-prefixed versioned model ids when resolving context windows", async () => {
+    sendMock
+      .mockResolvedValueOnce({
+        modelSummaries: [],
+      })
+      .mockResolvedValueOnce({
+        inferenceProfileSummaries: [
+          {
+            inferenceProfileId: "jp.anthropic.claude-sonnet-4-6-v1:0",
+            inferenceProfileName: "JP Claude Sonnet 4.6",
+            status: "ACTIVE",
+            type: "SYSTEM_DEFINED",
+            models: [
+              {
+                modelArn:
+                  "arn:aws:bedrock:ap-northeast-1::foundation-model/anthropic.claude-sonnet-4-6-v1:0",
+              },
+            ],
+          },
+        ],
+      });
+
+    const models = await discoverBedrockModels({ region: "ap-northeast-1", clientFactory });
+
+    expect(models[0]).toMatchObject({
+      id: "jp.anthropic.claude-sonnet-4-6-v1:0",
+      contextWindow: 1_000_000,
+    });
+  });
+
   it("caches results when refreshInterval is enabled", async () => {
     mockSingleActiveSummary();
 
@@ -252,7 +318,7 @@ describe("bedrock discovery", () => {
     expect(usProfile).toMatchObject({
       name: "US Anthropic Claude Sonnet 4.6",
       input: ["text", "image"],
-      contextWindow: 32000,
+      contextWindow: 1000000,
       maxTokens: 4096,
     });
     expect(euProfile).toMatchObject({ input: ["text", "image"] });
@@ -356,11 +422,43 @@ describe("bedrock discovery", () => {
     expect(profile).toMatchObject({
       id: "us.my-prod-profile",
       input: ["text", "image"],
-      contextWindow: 32000,
+      contextWindow: 1000000,
       maxTokens: 4096,
     });
   });
 
+  it("uses the resolved base model id for application-profile context fallback", async () => {
+    sendMock
+      .mockResolvedValueOnce({
+        modelSummaries: [],
+      })
+      .mockResolvedValueOnce({
+        inferenceProfileSummaries: [
+          {
+            inferenceProfileId: "us.my-prod-profile",
+            inferenceProfileName: "Prod Claude Profile",
+            status: "ACTIVE",
+            type: "APPLICATION",
+            models: [
+              {
+                modelArn:
+                  "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-opus-4-6-v1:0",
+              },
+            ],
+          },
+        ],
+      });
+
+    const models = await discoverBedrockModels({ region: "us-east-1", clientFactory });
+
+    expect(models[0]).toMatchObject({
+      id: "us.my-prod-profile",
+      contextWindow: 1_000_000,
+      maxTokens: 4096,
+      input: ["text"],
+    });
+  });
+
   it("merges implicit Bedrock models into explicit provider overrides", () => {
     expect(
       mergeImplicitBedrockProvider({
@@ -433,4 +531,63 @@ describe("bedrock discovery", () => {
     expect(legacyEnabled?.baseUrl).toBe("https://bedrock-runtime.us-west-2.amazonaws.com");
     expect(sendMock).toHaveBeenCalledTimes(4);
   });
+
+  // Ported from #65449 by @alickgithub2 — extended to also cover apac. prefix
+  it("resolves au. and apac. prefixes for regional inference profiles", async () => {
+    sendMock
+      .mockResolvedValueOnce({
+        modelSummaries: [
+          {
+            modelId: "anthropic.claude-sonnet-4-6",
+            modelName: "Claude Sonnet 4.6",
+            providerName: "anthropic",
+            inputModalities: ["TEXT", "IMAGE"],
+            outputModalities: ["TEXT"],
+            responseStreamingSupported: true,
+            modelLifecycle: { status: "ACTIVE" },
+          },
+        ],
+      })
+      .mockResolvedValueOnce({
+        inferenceProfileSummaries: [
+          {
+            inferenceProfileId: "au.anthropic.claude-sonnet-4-6",
+            inferenceProfileName: "AU Anthropic Claude Sonnet 4.6",
+            inferenceProfileArn:
+              "arn:aws:bedrock:ap-southeast-2::inference-profile/au.anthropic.claude-sonnet-4-6",
+            status: "ACTIVE",
+            type: "SYSTEM_DEFINED",
+            models: [], // no ARNs — forces the prefix-regex fallback
+          },
+          {
+            inferenceProfileId: "apac.anthropic.claude-sonnet-4-6",
+            inferenceProfileName: "APAC Anthropic Claude Sonnet 4.6",
+            inferenceProfileArn:
+              "arn:aws:bedrock:ap-northeast-1::inference-profile/apac.anthropic.claude-sonnet-4-6",
+            status: "ACTIVE",
+            type: "SYSTEM_DEFINED",
+            models: [],
+          },
+        ],
+      });
+
+    const models = await discoverBedrockModels({ region: "ap-southeast-2", clientFactory });
+
+    // Foundation model + 2 regional inference profiles
+    expect(models).toHaveLength(3);
+
+    const auProfile = models.find((m) => m.id === "au.anthropic.claude-sonnet-4-6");
+    expect(auProfile).toMatchObject({
+      id: "au.anthropic.claude-sonnet-4-6",
+      name: "AU Anthropic Claude Sonnet 4.6",
+      input: ["text", "image"],
+    });
+
+    const apacProfile = models.find((m) => m.id === "apac.anthropic.claude-sonnet-4-6");
+    expect(apacProfile).toMatchObject({
+      id: "apac.anthropic.claude-sonnet-4-6",
+      name: "APAC Anthropic Claude Sonnet 4.6",
+      input: ["text", "image"],
+    });
+  });
 });
diff --git a/extensions/amazon-bedrock/discovery.ts b/extensions/amazon-bedrock/discovery.ts
index b7a9eaa756f..a8b020ab747 100644
--- a/extensions/amazon-bedrock/discovery.ts
+++ b/extensions/amazon-bedrock/discovery.ts
@@ -21,8 +21,121 @@ import {
 const log = createSubsystemLogger("bedrock-discovery");
 
 const DEFAULT_REFRESH_INTERVAL_SECONDS = 3600;
-const DEFAULT_CONTEXT_WINDOW = 32000;
+const DEFAULT_CONTEXT_WINDOW = 32_000;
 const DEFAULT_MAX_TOKENS = 4096;
+
+// ---------------------------------------------------------------------------
+// Known model context windows (Bedrock API does not expose token limits)
+// ---------------------------------------------------------------------------
+
+/**
+ * Bedrock's ListFoundationModels and GetFoundationModel APIs return no token
+ * limit information — only model ID, name, modalities, and lifecycle status.
+ * There is currently no Bedrock API to discover context windows or max output
+ * tokens programmatically.
+ *
+ * This map provides correct context window values for known models so that
+ * session management, compaction thresholds, and context overflow detection
+ * work correctly. If AWS adds token metadata to the API in the future, this
+ * table should become a fallback rather than the primary source.
+ *
+ * Inference profile prefixes (us., eu., ap., global.) are stripped before lookup.
+ *
+ * Sources: https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html
+ *          https://platform.claude.com/docs/en/about-claude/models
+ */
+const KNOWN_CONTEXT_WINDOWS: Record<string, number> = {
+  // Anthropic Claude
+  "anthropic.claude-3-7-sonnet-20250219-v1:0": 200_000,
+  "anthropic.claude-opus-4-7": 1_000_000,
+  "anthropic.claude-opus-4-6-v1": 1_000_000,
+  "anthropic.claude-opus-4-6-v1:0": 1_000_000,
+  "anthropic.claude-sonnet-4-6": 1_000_000,
+  "anthropic.claude-sonnet-4-6-v1:0": 1_000_000,
+  "anthropic.claude-sonnet-4-5-20250929-v1:0": 200_000,
+  "anthropic.claude-sonnet-4-20250514-v1:0": 200_000,
+  "anthropic.claude-opus-4-5-20251101-v1:0": 200_000,
+  "anthropic.claude-opus-4-1-20250805-v1:0": 200_000,
+  "anthropic.claude-haiku-4-5-20251001-v1:0": 200_000,
+  "anthropic.claude-3-5-haiku-20241022-v1:0": 200_000,
+  "anthropic.claude-3-haiku-20240307-v1:0": 200_000,
+  // Amazon Nova
+  "amazon.nova-premier-v1:0": 1_000_000,
+  "amazon.nova-pro-v1:0": 300_000,
+  "amazon.nova-lite-v1:0": 300_000,
+  "amazon.nova-micro-v1:0": 128_000,
+  "amazon.nova-2-lite-v1:0": 300_000,
+  // MiniMax
+  "minimax.minimax-m2.5": 1_000_000,
+  "minimax.minimax-m2.1": 1_000_000,
+  "minimax.minimax-m2": 1_000_000,
+  // Meta Llama 4
+  "meta.llama4-maverick-17b-instruct-v1:0": 1_000_000,
+  "meta.llama4-scout-17b-instruct-v1:0": 512_000,
+  // Meta Llama 3
+  "meta.llama3-3-70b-instruct-v1:0": 128_000,
+  "meta.llama3-2-90b-instruct-v1:0": 128_000,
+  "meta.llama3-2-11b-instruct-v1:0": 128_000,
+  "meta.llama3-2-3b-instruct-v1:0": 128_000,
+  "meta.llama3-2-1b-instruct-v1:0": 128_000,
+  "meta.llama3-1-405b-instruct-v1:0": 128_000,
+  "meta.llama3-1-70b-instruct-v1:0": 128_000,
+  "meta.llama3-1-8b-instruct-v1:0": 128_000,
+  // NVIDIA Nemotron
+  "nvidia.nemotron-super-3-120b": 256_000,
+  "nvidia.nemotron-nano-3-30b": 128_000,
+  "nvidia.nemotron-nano-12b-v2": 128_000,
+  "nvidia.nemotron-nano-9b-v2": 128_000,
+  // Mistral
+  "mistral.mistral-large-3-675b-instruct": 128_000,
+  "mistral.mistral-large-2407-v1:0": 128_000,
+  "mistral.mistral-small-2402-v1:0": 32_000,
+  // DeepSeek
+  "deepseek.r1-v1:0": 128_000,
+  "deepseek.v3.2": 128_000,
+  // Cohere
+  "cohere.command-r-plus-v1:0": 128_000,
+  "cohere.command-r-v1:0": 128_000,
+  // AI21
+  "ai21.jamba-1-5-large-v1:0": 256_000,
+  "ai21.jamba-1-5-mini-v1:0": 256_000,
+  // Google Gemma
+  "google.gemma-3-27b-it": 128_000,
+  "google.gemma-3-12b-it": 128_000,
+  "google.gemma-3-4b-it": 128_000,
+  // GLM
+  "zai.glm-5": 128_000,
+  "zai.glm-4.7": 128_000,
+  "zai.glm-4.7-flash": 128_000,
+  // Qwen
+  "qwen.qwen3-coder-next": 256_000,
+  "qwen.qwen3-coder-30b-a3b-v1:0": 256_000,
+  "qwen.qwen3-32b-v1:0": 128_000,
+  "qwen.qwen3-vl-235b-a22b": 128_000,
+};
+
+/**
+ * Resolve the real context window for a Bedrock model ID.
+ * Strips inference profile prefixes (us., eu., ap., global.) before lookup.
+ */
+function resolveKnownContextWindow(modelId: string): number | undefined {
+  const stripped = modelId.replace(/^(?:us|eu|ap|apac|au|jp|global)\./, "");
+  const candidates = [modelId, stripped];
+  for (const candidate of candidates) {
+    if (KNOWN_CONTEXT_WINDOWS[candidate] !== undefined) {
+      return KNOWN_CONTEXT_WINDOWS[candidate];
+    }
+    const withoutVersionSuffix = candidate.replace(/:0$/, "");
+    if (
+      withoutVersionSuffix !== candidate &&
+      KNOWN_CONTEXT_WINDOWS[withoutVersionSuffix] !== undefined
+    ) {
+      return KNOWN_CONTEXT_WINDOWS[withoutVersionSuffix];
+    }
+  }
+  return undefined;
+}
+
 const DEFAULT_COST = {
   input: 0,
   output: 0,
@@ -163,7 +276,7 @@ function toModelDefinition(
     reasoning: inferReasoningSupport(summary),
     input: mapInputModalities(summary),
     cost: DEFAULT_COST,
-    contextWindow: defaults.contextWindow,
+    contextWindow: resolveKnownContextWindow(id) ?? defaults.contextWindow,
     maxTokens: defaults.maxTokens,
   };
 }
@@ -192,7 +305,7 @@ function resolveBaseModelId(profile: InferenceProfileSummary): string | undefine
   }
   if (profile.type === "SYSTEM_DEFINED") {
     const id = profile.inferenceProfileId ?? "";
-    const prefixMatch = /^(?:us|eu|ap|jp|global)\.(.+)$/i.exec(id);
+    const prefixMatch = /^(?:us|eu|ap|apac|au|jp|global)\.(.+)$/i.exec(id);
     if (prefixMatch) {
       return prefixMatch[1];
     }
@@ -282,7 +395,9 @@ function resolveInferenceProfiles(
       reasoning: baseModel?.reasoning ?? false,
       input: baseModel?.input ?? ["text"],
       cost: baseModel?.cost ?? DEFAULT_COST,
-      contextWindow: baseModel?.contextWindow ?? defaults.contextWindow,
+      contextWindow: baseModel?.contextWindow
+        ?? resolveKnownContextWindow(baseModelId ?? profile.inferenceProfileId ?? "")
+        ?? defaults.contextWindow,
       maxTokens: baseModel?.maxTokens ?? defaults.maxTokens,
     });
   }