diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b7c4ba9e76..c29eaea4249 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Providers/Amazon Bedrock: use known context-window metadata for discovered models while keeping the unknown-model fallback conservative, so compaction and overflow handling improve for newer Bedrock models without overstating unlisted model limits. Thanks @wirjo. - Providers/Amazon Bedrock Mantle: refresh IAM-backed bearer tokens at runtime instead of baking discovery-time tokens into provider config, so long-lived Mantle sessions keep working after the initial token ages out. Thanks @wirjo. - Codex harness: rotate the shared app-server websocket client when the configured bearer token changes, so auth-token refreshes reconnect with the new `Authorization` header instead of reusing a stale socket. (#70328) Thanks @Lucenx9. - Telegram/sandbox: keep Telegram bot DMs on per-account sender session keys even when `session.dmScope=main`, so sandbox/tool policy can distinguish Telegram-originated direct chats from the agent main session. diff --git a/extensions/amazon-bedrock/discovery.test.ts b/extensions/amazon-bedrock/discovery.test.ts index 7c994c8a2cb..5d03e0d273b 100644 --- a/extensions/amazon-bedrock/discovery.test.ts +++ b/extensions/amazon-bedrock/discovery.test.ts @@ -87,7 +87,7 @@ describe("bedrock discovery", () => { name: "Claude 3.7 Sonnet", reasoning: false, input: ["text", "image"], - contextWindow: 32000, + contextWindow: 200000, maxTokens: 4096, }); }); @@ -104,7 +104,11 @@ describe("bedrock discovery", () => { }); it("uses configured defaults for context and max tokens", async () => { - mockSingleActiveSummary(); + mockSingleActiveSummary({ + modelId: "example.unknown-text-v1:0", + modelName: "Example Unknown Text", + providerName: "example", + }); const models = await discoverBedrockModels({ region: "us-east-1", @@ -114,6 +118,68 @@ describe("bedrock discovery", () => { expect(models[0]).toMatchObject({ contextWindow: 64000, maxTokens: 8192 }); }); + it("keeps the conservative fallback for unknown inference profiles", async () => { + sendMock + .mockResolvedValueOnce({ + modelSummaries: [], + }) + .mockResolvedValueOnce({ + inferenceProfileSummaries: [ + { + inferenceProfileId: "jp.example.unknown-text-v1:0", + inferenceProfileName: "JP Example Unknown Text", + status: "ACTIVE", + type: "SYSTEM_DEFINED", + models: [ + { + modelArn: "arn:aws:bedrock:ap-northeast-1::foundation-model/example.unknown-text-v1:0", + }, + ], + }, + ], + }); + + const models = await discoverBedrockModels({ region: "ap-northeast-1", clientFactory }); + + expect(models).toHaveLength(1); + expect(models[0]).toMatchObject({ + id: "jp.example.unknown-text-v1:0", + contextWindow: 32000, + maxTokens: 4096, + input: ["text"], + }); + }); + + it("normalizes region-prefixed versioned model ids when resolving context windows", async () => { + sendMock + .mockResolvedValueOnce({ + modelSummaries: [], + }) + .mockResolvedValueOnce({ + inferenceProfileSummaries: [ + { + inferenceProfileId: "jp.anthropic.claude-sonnet-4-6-v1:0", + inferenceProfileName: "JP Claude Sonnet 4.6", + status: "ACTIVE", + type: "SYSTEM_DEFINED", + models: [ + { + modelArn: + "arn:aws:bedrock:ap-northeast-1::foundation-model/anthropic.claude-sonnet-4-6-v1:0", + }, + ], + }, + ], + }); + + const models = await discoverBedrockModels({ region: "ap-northeast-1", clientFactory }); + + expect(models[0]).toMatchObject({ + id: "jp.anthropic.claude-sonnet-4-6-v1:0", + contextWindow: 1_000_000, + }); + }); + it("caches results when refreshInterval is enabled", async () => { mockSingleActiveSummary(); @@ -252,7 +318,7 @@ describe("bedrock discovery", () => { expect(usProfile).toMatchObject({ name: "US Anthropic Claude Sonnet 4.6", input: ["text", "image"], - contextWindow: 32000, + contextWindow: 1000000, maxTokens: 4096, }); expect(euProfile).toMatchObject({ input: ["text", "image"] }); @@ -356,11 +422,43 @@ describe("bedrock discovery", () => { expect(profile).toMatchObject({ id: "us.my-prod-profile", input: ["text", "image"], - contextWindow: 32000, + contextWindow: 1000000, maxTokens: 4096, }); }); + it("uses the resolved base model id for application-profile context fallback", async () => { + sendMock + .mockResolvedValueOnce({ + modelSummaries: [], + }) + .mockResolvedValueOnce({ + inferenceProfileSummaries: [ + { + inferenceProfileId: "us.my-prod-profile", + inferenceProfileName: "Prod Claude Profile", + status: "ACTIVE", + type: "APPLICATION", + models: [ + { + modelArn: + "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-opus-4-6-v1:0", + }, + ], + }, + ], + }); + + const models = await discoverBedrockModels({ region: "us-east-1", clientFactory }); + + expect(models[0]).toMatchObject({ + id: "us.my-prod-profile", + contextWindow: 1_000_000, + maxTokens: 4096, + input: ["text"], + }); + }); + it("merges implicit Bedrock models into explicit provider overrides", () => { expect( mergeImplicitBedrockProvider({ @@ -433,4 +531,63 @@ describe("bedrock discovery", () => { expect(legacyEnabled?.baseUrl).toBe("https://bedrock-runtime.us-west-2.amazonaws.com"); expect(sendMock).toHaveBeenCalledTimes(4); }); + + // Ported from #65449 by @alickgithub2 — extended to also cover apac. prefix + it("resolves au. and apac. prefixes for regional inference profiles", async () => { + sendMock + .mockResolvedValueOnce({ + modelSummaries: [ + { + modelId: "anthropic.claude-sonnet-4-6", + modelName: "Claude Sonnet 4.6", + providerName: "anthropic", + inputModalities: ["TEXT", "IMAGE"], + outputModalities: ["TEXT"], + responseStreamingSupported: true, + modelLifecycle: { status: "ACTIVE" }, + }, + ], + }) + .mockResolvedValueOnce({ + inferenceProfileSummaries: [ + { + inferenceProfileId: "au.anthropic.claude-sonnet-4-6", + inferenceProfileName: "AU Anthropic Claude Sonnet 4.6", + inferenceProfileArn: + "arn:aws:bedrock:ap-southeast-2::inference-profile/au.anthropic.claude-sonnet-4-6", + status: "ACTIVE", + type: "SYSTEM_DEFINED", + models: [], // no ARNs — forces the prefix-regex fallback + }, + { + inferenceProfileId: "apac.anthropic.claude-sonnet-4-6", + inferenceProfileName: "APAC Anthropic Claude Sonnet 4.6", + inferenceProfileArn: + "arn:aws:bedrock:ap-northeast-1::inference-profile/apac.anthropic.claude-sonnet-4-6", + status: "ACTIVE", + type: "SYSTEM_DEFINED", + models: [], + }, + ], + }); + + const models = await discoverBedrockModels({ region: "ap-southeast-2", clientFactory }); + + // Foundation model + 2 regional inference profiles + expect(models).toHaveLength(3); + + const auProfile = models.find((m) => m.id === "au.anthropic.claude-sonnet-4-6"); + expect(auProfile).toMatchObject({ + id: "au.anthropic.claude-sonnet-4-6", + name: "AU Anthropic Claude Sonnet 4.6", + input: ["text", "image"], + }); + + const apacProfile = models.find((m) => m.id === "apac.anthropic.claude-sonnet-4-6"); + expect(apacProfile).toMatchObject({ + id: "apac.anthropic.claude-sonnet-4-6", + name: "APAC Anthropic Claude Sonnet 4.6", + input: ["text", "image"], + }); + }); }); diff --git a/extensions/amazon-bedrock/discovery.ts b/extensions/amazon-bedrock/discovery.ts index b7a9eaa756f..a8b020ab747 100644 --- a/extensions/amazon-bedrock/discovery.ts +++ b/extensions/amazon-bedrock/discovery.ts @@ -21,8 +21,121 @@ import { const log = createSubsystemLogger("bedrock-discovery"); const DEFAULT_REFRESH_INTERVAL_SECONDS = 3600; -const DEFAULT_CONTEXT_WINDOW = 32000; +const DEFAULT_CONTEXT_WINDOW = 32_000; const DEFAULT_MAX_TOKENS = 4096; + +// --------------------------------------------------------------------------- +// Known model context windows (Bedrock API does not expose token limits) +// --------------------------------------------------------------------------- + +/** + * Bedrock's ListFoundationModels and GetFoundationModel APIs return no token + * limit information — only model ID, name, modalities, and lifecycle status. + * There is currently no Bedrock API to discover context windows or max output + * tokens programmatically. + * + * This map provides correct context window values for known models so that + * session management, compaction thresholds, and context overflow detection + * work correctly. If AWS adds token metadata to the API in the future, this + * table should become a fallback rather than the primary source. + * + * Inference profile prefixes (us., eu., ap., global.) are stripped before lookup. + * + * Sources: https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html + * https://platform.claude.com/docs/en/about-claude/models + */ +const KNOWN_CONTEXT_WINDOWS: Record = { + // Anthropic Claude + "anthropic.claude-3-7-sonnet-20250219-v1:0": 200_000, + "anthropic.claude-opus-4-7": 1_000_000, + "anthropic.claude-opus-4-6-v1": 1_000_000, + "anthropic.claude-opus-4-6-v1:0": 1_000_000, + "anthropic.claude-sonnet-4-6": 1_000_000, + "anthropic.claude-sonnet-4-6-v1:0": 1_000_000, + "anthropic.claude-sonnet-4-5-20250929-v1:0": 200_000, + "anthropic.claude-sonnet-4-20250514-v1:0": 200_000, + "anthropic.claude-opus-4-5-20251101-v1:0": 200_000, + "anthropic.claude-opus-4-1-20250805-v1:0": 200_000, + "anthropic.claude-haiku-4-5-20251001-v1:0": 200_000, + "anthropic.claude-3-5-haiku-20241022-v1:0": 200_000, + "anthropic.claude-3-haiku-20240307-v1:0": 200_000, + // Amazon Nova + "amazon.nova-premier-v1:0": 1_000_000, + "amazon.nova-pro-v1:0": 300_000, + "amazon.nova-lite-v1:0": 300_000, + "amazon.nova-micro-v1:0": 128_000, + "amazon.nova-2-lite-v1:0": 300_000, + // MiniMax + "minimax.minimax-m2.5": 1_000_000, + "minimax.minimax-m2.1": 1_000_000, + "minimax.minimax-m2": 1_000_000, + // Meta Llama 4 + "meta.llama4-maverick-17b-instruct-v1:0": 1_000_000, + "meta.llama4-scout-17b-instruct-v1:0": 512_000, + // Meta Llama 3 + "meta.llama3-3-70b-instruct-v1:0": 128_000, + "meta.llama3-2-90b-instruct-v1:0": 128_000, + "meta.llama3-2-11b-instruct-v1:0": 128_000, + "meta.llama3-2-3b-instruct-v1:0": 128_000, + "meta.llama3-2-1b-instruct-v1:0": 128_000, + "meta.llama3-1-405b-instruct-v1:0": 128_000, + "meta.llama3-1-70b-instruct-v1:0": 128_000, + "meta.llama3-1-8b-instruct-v1:0": 128_000, + // NVIDIA Nemotron + "nvidia.nemotron-super-3-120b": 256_000, + "nvidia.nemotron-nano-3-30b": 128_000, + "nvidia.nemotron-nano-12b-v2": 128_000, + "nvidia.nemotron-nano-9b-v2": 128_000, + // Mistral + "mistral.mistral-large-3-675b-instruct": 128_000, + "mistral.mistral-large-2407-v1:0": 128_000, + "mistral.mistral-small-2402-v1:0": 32_000, + // DeepSeek + "deepseek.r1-v1:0": 128_000, + "deepseek.v3.2": 128_000, + // Cohere + "cohere.command-r-plus-v1:0": 128_000, + "cohere.command-r-v1:0": 128_000, + // AI21 + "ai21.jamba-1-5-large-v1:0": 256_000, + "ai21.jamba-1-5-mini-v1:0": 256_000, + // Google Gemma + "google.gemma-3-27b-it": 128_000, + "google.gemma-3-12b-it": 128_000, + "google.gemma-3-4b-it": 128_000, + // GLM + "zai.glm-5": 128_000, + "zai.glm-4.7": 128_000, + "zai.glm-4.7-flash": 128_000, + // Qwen + "qwen.qwen3-coder-next": 256_000, + "qwen.qwen3-coder-30b-a3b-v1:0": 256_000, + "qwen.qwen3-32b-v1:0": 128_000, + "qwen.qwen3-vl-235b-a22b": 128_000, +}; + +/** + * Resolve the real context window for a Bedrock model ID. + * Strips inference profile prefixes (us., eu., ap., global.) before lookup. + */ +function resolveKnownContextWindow(modelId: string): number | undefined { + const stripped = modelId.replace(/^(?:us|eu|ap|apac|au|jp|global)\./, ""); + const candidates = [modelId, stripped]; + for (const candidate of candidates) { + if (KNOWN_CONTEXT_WINDOWS[candidate] !== undefined) { + return KNOWN_CONTEXT_WINDOWS[candidate]; + } + const withoutVersionSuffix = candidate.replace(/:0$/, ""); + if ( + withoutVersionSuffix !== candidate && + KNOWN_CONTEXT_WINDOWS[withoutVersionSuffix] !== undefined + ) { + return KNOWN_CONTEXT_WINDOWS[withoutVersionSuffix]; + } + } + return undefined; +} + const DEFAULT_COST = { input: 0, output: 0, @@ -163,7 +276,7 @@ function toModelDefinition( reasoning: inferReasoningSupport(summary), input: mapInputModalities(summary), cost: DEFAULT_COST, - contextWindow: defaults.contextWindow, + contextWindow: resolveKnownContextWindow(id) ?? defaults.contextWindow, maxTokens: defaults.maxTokens, }; } @@ -192,7 +305,7 @@ function resolveBaseModelId(profile: InferenceProfileSummary): string | undefine } if (profile.type === "SYSTEM_DEFINED") { const id = profile.inferenceProfileId ?? ""; - const prefixMatch = /^(?:us|eu|ap|jp|global)\.(.+)$/i.exec(id); + const prefixMatch = /^(?:us|eu|ap|apac|au|jp|global)\.(.+)$/i.exec(id); if (prefixMatch) { return prefixMatch[1]; } @@ -282,7 +395,9 @@ function resolveInferenceProfiles( reasoning: baseModel?.reasoning ?? false, input: baseModel?.input ?? ["text"], cost: baseModel?.cost ?? DEFAULT_COST, - contextWindow: baseModel?.contextWindow ?? defaults.contextWindow, + contextWindow: baseModel?.contextWindow + ?? resolveKnownContextWindow(baseModelId ?? profile.inferenceProfileId ?? "") + ?? defaults.contextWindow, maxTokens: baseModel?.maxTokens ?? defaults.maxTokens, }); }