mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-30 15:00:26 +00:00
* refactor: extract agent core package Introduce packages/agent-core as the OpenClaw-owned home for reusable agent loop, harness, session, prompt, and runtime dependency contracts. * refactor: extract shared llm runtime Move provider model registries, stream wrappers, OAuth helpers, and LLM utilities into src/llm with plugin-sdk barrels instead of depending on the old embedded runtime layout. * refactor: remove pi runtime internals Rename remaining Pi-shaped agent surfaces to OpenClaw agent runtime names, delete obsolete Pi docs and package graph checks, and add the third-party notice for incorporated code. * refactor: tighten agent session runtime Make agent-core/runtime dependencies explicit, consolidate compaction and session transcript helpers, and move model/session helpers behind OpenClaw-owned contracts. * refactor: remove static model and pi auth paths Drop static model catalogs and Pi auth bridges, move model/provider facts to manifest-owned runtime contracts, and harden internal embedded-agent utilities. * refactor: remove legacy provider compat paths * docs: remove agent parity notes * fix: skip provider wildcard metadata parsing * refactor: share session extension sdk loading * refactor: inline acpx proxy error formatter * refactor: fold edit recovery into edit tool * fix: accept extension batch separator * test: align startup provider plugin expectations * fix: restore provider-scoped release discovery * test: align static asset packaging expectations * fix: run static provider catalogs during scoped discovery * fix: add provider entry catalogs for scoped live discovery * fix: load lightweight provider catalog entries * fix: refresh provider-scoped plugin metadata * fix: keep provider catalog entries on release live path * fix: keep static manifest models in release live checks * fix: harden release model discovery * fix: reduce OpenAI live cache probe reasoning * fix: disable OpenAI cache probe reasoning * ci: extend OpenAI gateway live timeout * fix: extend live gateway model budget * fix: stabilize release validation regressions * fix: honor provider aliases in model rows * fix: stabilize release validation lanes * fix: stabilize release memory qa * ci: stabilize release validation lanes * ci: prefer ipv4 for live docker node calls * fix: restore shared tool-call stream wrapper * ci: remove legacy pi test shard alias * fix: clean up embedded agent test drift * fix: stabilize runtime alias status * fix: clean up embedded agent ci drift * fix: restore release ci invariants * fix: clean up post-rebase runtime drift * fix: restore release ci checks * fix: restore release ci after rebase * fix: remove stale pi runtime path * test: align compaction runtime expectations * test: update plugin prerelease expectations * fix: handle claude live tool approvals * fix: stabilize release validation gates * fix: finish agent runtime import * test: finish post-rebase agent runtime mocks * fix: keep codex compaction native * fix: stabilize codex app-server hook tests * test: isolate codex diagnostic active run * test: remove codex diagnostic completion race # Conflicts: # extensions/codex/src/app-server/run-attempt.test.ts * ci: fix full release manifest performance run id * refactor: narrow llm plugin sdk boundary * chore: drop generated google boundary stamps * fix: repair rebase fallout * fix: clean up rebased runtime references * fix: decode codex jwt payloads as base64url * fix: preserve shipped pi runtime alias * fix: add scoped sdk virtual modules * fix: decode llm codex oauth jwt as base64url * fix: avoid stale vertex adc negative cache * fix: harden tool arg decoding and codeql path * fix: keep vertex adc negative checks live * refactor: consolidate codex jwt and edit helpers * fix: await codex oauth node runtime imports * fix: preserve sdk tool and notice contracts * fix: preserve shipped compat config boundaries * fix: align codex oauth callback host * fix: terminate agent-core loop streams on failure * fix: keep codex oauth callback alive during fallback * ci: include session tools in critical codeql scans * fix: keep Cloudflare Anthropic provider auth header * docs: redirect legacy pi runtime pages * fix: honor bundled web provider compat discovery * fix: protect session output spill files * fix: keep legacy agent dir env blocked * fix: contain auto-discovered skill symlinks * fix: harden agent core sdk proxy surfaces * fix: restore approval reaction sdk compat * fix: keep live docker runs bounded * fix: keep codex oauth redirect host aligned * fix: resolve post-rebase agent runtime drift * fix: redact anthropic oauth parse failures * fix: preserve responses strict tool shaping * fix: repair agent runtime rebase cleanup * docs: redirect retired parity pages * fix: bound auto-discovered resources to roots * fix: repair post-rebase agent test drift * fix: preserve bundled provider allowlist migration * fix: preserve manifest-owned provider aliases * fix: declare photon image dependency * fix: keep provider headers out of proxy body * fix: preserve shipped env aliases * fix: refresh control ui i18n generated state * fix: quote read fallback paths * fix: preview edits through configured backend * test: satisfy core test typecheck * fix: preserve ZAI usage auth fallback * test: repair codex diagnostic test * fix: repair agent runtime rebase drift * test: finish embedded runner import rename * fix: repair agent runtime rebase integrations * test: align compaction oauth fallback expectations * fix: allow sdk-auth session models * fix: update doctor tool schema import * fix: preserve bedrock plugin region * fix: stream harmony-like prose immediately * ci: include session runtime in codeql shards * fix: repair latest rebase integrations * fix: honor explicit codex websocket transport * fix: keep openai-compatible credentials provider-scoped * fix: refresh sdk api baseline after rebase * fix: route cli runtime aliases through openclaw harness * test: rename stale harness mock expectation * test: rename embedded agent overflow calls * test: clean embedded auth test wording * test: use openclaw stream types in deepinfra cache test * fix: refresh sdk api baseline on latest main * fix: honor bundled discovery compat allowlists * fix: refresh sdk api baseline after latest rebase * fix: remove stale rebase imports * test: rename stale model catalog mock * test: mock renamed doctor runtime modules * fix: map canonical kimi env auth * fix: use internal model registry in bench script * fix: migrate deepinfra provider catalog entry * fix: enforce builtin tool suppression * fix: route compaction auth and proxy payloads safely * refactor: prune unused llm registry leftovers * test: update codex hooks session import * test: fix model picker ci coverage * test: align model picker auth mock types
613 lines
21 KiB
TypeScript
613 lines
21 KiB
TypeScript
import {
|
|
type BedrockClient,
|
|
type ListFoundationModelsCommandOutput,
|
|
type ListInferenceProfilesCommandOutput,
|
|
} from "@aws-sdk/client-bedrock";
|
|
import { createSubsystemLogger } from "openclaw/plugin-sdk/core";
|
|
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
|
|
import type {
|
|
BedrockDiscoveryConfig,
|
|
ModelDefinitionConfig,
|
|
ModelProviderConfig,
|
|
} from "openclaw/plugin-sdk/provider-model-shared";
|
|
import {
|
|
normalizeLowercaseStringOrEmpty,
|
|
normalizeOptionalLowercaseString,
|
|
} from "openclaw/plugin-sdk/string-coerce-runtime";
|
|
import { refreshAwsSharedConfigCacheForBedrock } from "./aws-credential-refresh.js";
|
|
import { resolveBedrockConfigApiKey } from "./discovery-shared.js";
|
|
|
|
const log = createSubsystemLogger("bedrock-discovery");
|
|
|
|
const DEFAULT_REFRESH_INTERVAL_SECONDS = 3600;
|
|
const DEFAULT_CONTEXT_WINDOW = 32_000;
|
|
const DEFAULT_MAX_TOKENS = 4096;
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Known model context windows (Bedrock API does not expose token limits)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Bedrock's ListFoundationModels and GetFoundationModel APIs return no token
|
|
* limit information — only model ID, name, modalities, and lifecycle status.
|
|
* There is currently no Bedrock API to discover context windows or max output
|
|
* tokens programmatically.
|
|
*
|
|
* This map provides correct context window values for known models so that
|
|
* session management, compaction thresholds, and context overflow detection
|
|
* work correctly. If AWS adds token metadata to the API in the future, this
|
|
* table should become a fallback rather than the primary source.
|
|
*
|
|
* Inference profile prefixes (us., eu., ap., global.) are stripped before lookup.
|
|
*
|
|
* Sources: https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html
|
|
* https://platform.claude.com/docs/en/about-claude/models
|
|
*/
|
|
const KNOWN_CONTEXT_WINDOWS: Record<string, number> = {
|
|
// Anthropic Claude
|
|
"anthropic.claude-3-7-sonnet-20250219-v1:0": 200_000,
|
|
"anthropic.claude-opus-4-7": 1_000_000,
|
|
"anthropic.claude-opus-4-6-v1": 1_000_000,
|
|
"anthropic.claude-opus-4-6-v1:0": 1_000_000,
|
|
"anthropic.claude-sonnet-4-6": 1_000_000,
|
|
"anthropic.claude-sonnet-4-6-v1:0": 1_000_000,
|
|
"anthropic.claude-sonnet-4-5-20250929-v1:0": 200_000,
|
|
"anthropic.claude-sonnet-4-20250514-v1:0": 200_000,
|
|
"anthropic.claude-opus-4-5-20251101-v1:0": 200_000,
|
|
"anthropic.claude-opus-4-1-20250805-v1:0": 200_000,
|
|
"anthropic.claude-haiku-4-5-20251001-v1:0": 200_000,
|
|
"anthropic.claude-3-5-haiku-20241022-v1:0": 200_000,
|
|
"anthropic.claude-3-haiku-20240307-v1:0": 200_000,
|
|
// Amazon Nova
|
|
"amazon.nova-premier-v1:0": 1_000_000,
|
|
"amazon.nova-pro-v1:0": 300_000,
|
|
"amazon.nova-lite-v1:0": 300_000,
|
|
"amazon.nova-micro-v1:0": 128_000,
|
|
"amazon.nova-2-lite-v1:0": 300_000,
|
|
// MiniMax
|
|
"minimax.minimax-m2.5": 1_000_000,
|
|
"minimax.minimax-m2.1": 1_000_000,
|
|
"minimax.minimax-m2": 1_000_000,
|
|
// Meta Llama 4
|
|
"meta.llama4-maverick-17b-instruct-v1:0": 1_000_000,
|
|
"meta.llama4-scout-17b-instruct-v1:0": 512_000,
|
|
// Meta Llama 3
|
|
"meta.llama3-3-70b-instruct-v1:0": 128_000,
|
|
"meta.llama3-2-90b-instruct-v1:0": 128_000,
|
|
"meta.llama3-2-11b-instruct-v1:0": 128_000,
|
|
"meta.llama3-2-3b-instruct-v1:0": 128_000,
|
|
"meta.llama3-2-1b-instruct-v1:0": 128_000,
|
|
"meta.llama3-1-405b-instruct-v1:0": 128_000,
|
|
"meta.llama3-1-70b-instruct-v1:0": 128_000,
|
|
"meta.llama3-1-8b-instruct-v1:0": 128_000,
|
|
// NVIDIA Nemotron
|
|
"nvidia.nemotron-super-3-120b": 256_000,
|
|
"nvidia.nemotron-nano-3-30b": 128_000,
|
|
"nvidia.nemotron-nano-12b-v2": 128_000,
|
|
"nvidia.nemotron-nano-9b-v2": 128_000,
|
|
// Mistral
|
|
"mistral.mistral-large-3-675b-instruct": 128_000,
|
|
"mistral.mistral-large-2407-v1:0": 128_000,
|
|
"mistral.mistral-small-2402-v1:0": 32_000,
|
|
// DeepSeek
|
|
"deepseek.r1-v1:0": 128_000,
|
|
"deepseek.v3.2": 128_000,
|
|
// Cohere
|
|
"cohere.command-r-plus-v1:0": 128_000,
|
|
"cohere.command-r-v1:0": 128_000,
|
|
// AI21
|
|
"ai21.jamba-1-5-large-v1:0": 256_000,
|
|
"ai21.jamba-1-5-mini-v1:0": 256_000,
|
|
// Google Gemma
|
|
"google.gemma-3-27b-it": 128_000,
|
|
"google.gemma-3-12b-it": 128_000,
|
|
"google.gemma-3-4b-it": 128_000,
|
|
// GLM
|
|
"zai.glm-5": 128_000,
|
|
"zai.glm-4.7": 128_000,
|
|
"zai.glm-4.7-flash": 128_000,
|
|
// Qwen
|
|
"qwen.qwen3-coder-next": 256_000,
|
|
"qwen.qwen3-coder-30b-a3b-v1:0": 256_000,
|
|
"qwen.qwen3-32b-v1:0": 128_000,
|
|
"qwen.qwen3-vl-235b-a22b": 128_000,
|
|
};
|
|
|
|
/**
|
|
* Resolve the real context window for a Bedrock model ID.
|
|
* Strips inference profile prefixes (us., eu., ap., global.) before lookup.
|
|
*/
|
|
function resolveKnownContextWindow(modelId: string): number | undefined {
|
|
const stripped = modelId.replace(/^(?:us|eu|ap|apac|au|jp|global)\./, "");
|
|
const candidates = [modelId, stripped];
|
|
for (const candidate of candidates) {
|
|
if (KNOWN_CONTEXT_WINDOWS[candidate] !== undefined) {
|
|
return KNOWN_CONTEXT_WINDOWS[candidate];
|
|
}
|
|
const withoutVersionSuffix = candidate.replace(/:0$/, "");
|
|
if (
|
|
withoutVersionSuffix !== candidate &&
|
|
KNOWN_CONTEXT_WINDOWS[withoutVersionSuffix] !== undefined
|
|
) {
|
|
return KNOWN_CONTEXT_WINDOWS[withoutVersionSuffix];
|
|
}
|
|
}
|
|
return undefined;
|
|
}
|
|
|
|
const DEFAULT_COST = {
|
|
input: 0,
|
|
output: 0,
|
|
cacheRead: 0,
|
|
cacheWrite: 0,
|
|
};
|
|
|
|
type BedrockModelSummary = NonNullable<ListFoundationModelsCommandOutput["modelSummaries"]>[number];
|
|
|
|
type InferenceProfileSummary = NonNullable<
|
|
ListInferenceProfilesCommandOutput["inferenceProfileSummaries"]
|
|
>[number];
|
|
|
|
type BedrockDiscoverySdk = {
|
|
createClient(region: string): BedrockClient;
|
|
createListFoundationModelsCommand(): unknown;
|
|
createListInferenceProfilesCommand(input: { nextToken?: string }): unknown;
|
|
};
|
|
|
|
async function loadBedrockDiscoverySdk(): Promise<BedrockDiscoverySdk> {
|
|
const { BedrockClient, ListFoundationModelsCommand, ListInferenceProfilesCommand } =
|
|
await import("@aws-sdk/client-bedrock");
|
|
return {
|
|
createClient: (region) => new BedrockClient({ region }),
|
|
createListFoundationModelsCommand: () => new ListFoundationModelsCommand({}),
|
|
createListInferenceProfilesCommand: (input) => new ListInferenceProfilesCommand(input),
|
|
};
|
|
}
|
|
|
|
function createInjectedClientDiscoverySdk(): BedrockDiscoverySdk {
|
|
class ListFoundationModelsCommand {
|
|
constructor(readonly input: Record<string, unknown> = {}) {}
|
|
}
|
|
class ListInferenceProfilesCommand {
|
|
constructor(readonly input: Record<string, unknown> = {}) {}
|
|
}
|
|
return {
|
|
createClient() {
|
|
throw new Error("clientFactory is required for injected Bedrock discovery commands");
|
|
},
|
|
createListFoundationModelsCommand: () => new ListFoundationModelsCommand({}),
|
|
createListInferenceProfilesCommand: (input) => new ListInferenceProfilesCommand(input),
|
|
};
|
|
}
|
|
|
|
type BedrockDiscoveryCacheEntry = {
|
|
expiresAt: number;
|
|
value?: ModelDefinitionConfig[];
|
|
inFlight?: Promise<ModelDefinitionConfig[]>;
|
|
};
|
|
|
|
const discoveryCache = new Map<string, BedrockDiscoveryCacheEntry>();
|
|
let hasLoggedBedrockError = false;
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Helper utilities
|
|
// ---------------------------------------------------------------------------
|
|
|
|
function normalizeProviderFilter(filter?: string[]): string[] {
|
|
if (!filter || filter.length === 0) {
|
|
return [];
|
|
}
|
|
const normalized = new Set(
|
|
filter
|
|
.map((entry) => normalizeOptionalLowercaseString(entry))
|
|
.filter((entry): entry is string => Boolean(entry)),
|
|
);
|
|
return Array.from(normalized).toSorted();
|
|
}
|
|
|
|
function buildCacheKey(params: {
|
|
region: string;
|
|
providerFilter: string[];
|
|
refreshIntervalSeconds: number;
|
|
defaultContextWindow: number;
|
|
defaultMaxTokens: number;
|
|
}): string {
|
|
return JSON.stringify(params);
|
|
}
|
|
|
|
function includesTextModalities(modalities?: Array<string>): boolean {
|
|
return (modalities ?? []).some((entry) => normalizeOptionalLowercaseString(entry) === "text");
|
|
}
|
|
|
|
function isActive(summary: BedrockModelSummary): boolean {
|
|
const status = summary.modelLifecycle?.status;
|
|
return typeof status === "string" ? status.toUpperCase() === "ACTIVE" : false;
|
|
}
|
|
|
|
function mapInputModalities(summary: BedrockModelSummary): Array<"text" | "image"> {
|
|
const inputs = summary.inputModalities ?? [];
|
|
const mapped = new Set<"text" | "image">();
|
|
for (const modality of inputs) {
|
|
const lower = normalizeOptionalLowercaseString(modality);
|
|
if (lower === "text") {
|
|
mapped.add("text");
|
|
}
|
|
if (lower === "image") {
|
|
mapped.add("image");
|
|
}
|
|
}
|
|
if (mapped.size === 0) {
|
|
mapped.add("text");
|
|
}
|
|
return Array.from(mapped);
|
|
}
|
|
|
|
function inferReasoningSupport(summary: BedrockModelSummary): boolean {
|
|
const haystack = normalizeLowercaseStringOrEmpty(
|
|
`${summary.modelId ?? ""} ${summary.modelName ?? ""}`,
|
|
);
|
|
return haystack.includes("reasoning") || haystack.includes("thinking");
|
|
}
|
|
|
|
function resolveDefaultContextWindow(config?: BedrockDiscoveryConfig): number {
|
|
const value = Math.floor(config?.defaultContextWindow ?? DEFAULT_CONTEXT_WINDOW);
|
|
return value > 0 ? value : DEFAULT_CONTEXT_WINDOW;
|
|
}
|
|
|
|
function resolveDefaultMaxTokens(config?: BedrockDiscoveryConfig): number {
|
|
const value = Math.floor(config?.defaultMaxTokens ?? DEFAULT_MAX_TOKENS);
|
|
return value > 0 ? value : DEFAULT_MAX_TOKENS;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Foundation model helpers
|
|
// ---------------------------------------------------------------------------
|
|
|
|
function matchesProviderFilter(summary: BedrockModelSummary, filter: string[]): boolean {
|
|
if (filter.length === 0) {
|
|
return true;
|
|
}
|
|
const providerName =
|
|
summary.providerName ??
|
|
(typeof summary.modelId === "string" ? summary.modelId.split(".")[0] : undefined);
|
|
const normalized = normalizeOptionalLowercaseString(providerName);
|
|
if (!normalized) {
|
|
return false;
|
|
}
|
|
return filter.includes(normalized);
|
|
}
|
|
|
|
function shouldIncludeSummary(summary: BedrockModelSummary, filter: string[]): boolean {
|
|
if (!summary.modelId?.trim()) {
|
|
return false;
|
|
}
|
|
if (!matchesProviderFilter(summary, filter)) {
|
|
return false;
|
|
}
|
|
if (summary.responseStreamingSupported !== true) {
|
|
return false;
|
|
}
|
|
if (!includesTextModalities(summary.outputModalities)) {
|
|
return false;
|
|
}
|
|
if (!isActive(summary)) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
function toModelDefinition(
|
|
summary: BedrockModelSummary,
|
|
defaults: { contextWindow: number; maxTokens: number },
|
|
): ModelDefinitionConfig {
|
|
const id = summary.modelId?.trim() ?? "";
|
|
return {
|
|
id,
|
|
name: summary.modelName?.trim() || id,
|
|
reasoning: inferReasoningSupport(summary),
|
|
input: mapInputModalities(summary),
|
|
cost: DEFAULT_COST,
|
|
contextWindow: resolveKnownContextWindow(id) ?? defaults.contextWindow,
|
|
maxTokens: defaults.maxTokens,
|
|
};
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Inference profile helpers
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Resolve the base foundation model ID from an inference profile.
|
|
*
|
|
* System-defined profiles use a region prefix:
|
|
* "us.anthropic.claude-sonnet-4-6" → "anthropic.claude-sonnet-4-6"
|
|
*
|
|
* Application profiles carry the model ARN in their models[] array:
|
|
* models[0].modelArn = "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-sonnet-4-6"
|
|
* → "anthropic.claude-sonnet-4-6"
|
|
*/
|
|
function resolveBaseModelId(profile: InferenceProfileSummary): string | undefined {
|
|
const firstArn = profile.models?.[0]?.modelArn;
|
|
if (firstArn) {
|
|
const arnMatch = /foundation-model\/(.+)$/.exec(firstArn);
|
|
if (arnMatch) {
|
|
return arnMatch[1];
|
|
}
|
|
}
|
|
if (profile.type === "SYSTEM_DEFINED") {
|
|
const id = profile.inferenceProfileId ?? "";
|
|
const prefixMatch = /^(?:us|eu|ap|apac|au|jp|global)\.(.+)$/i.exec(id);
|
|
if (prefixMatch) {
|
|
return prefixMatch[1];
|
|
}
|
|
}
|
|
return undefined;
|
|
}
|
|
|
|
/**
|
|
* Fetch raw inference profile summaries from the Bedrock control plane.
|
|
* Handles pagination. Best-effort: silently returns empty array if IAM lacks
|
|
* bedrock:ListInferenceProfiles permission.
|
|
*/
|
|
async function fetchInferenceProfileSummaries(
|
|
client: BedrockClient,
|
|
createListInferenceProfilesCommand: BedrockDiscoverySdk["createListInferenceProfilesCommand"],
|
|
): Promise<InferenceProfileSummary[]> {
|
|
try {
|
|
const profiles: InferenceProfileSummary[] = [];
|
|
let nextToken: string | undefined;
|
|
do {
|
|
const response: ListInferenceProfilesCommandOutput = await client.send(
|
|
createListInferenceProfilesCommand({ nextToken }) as never,
|
|
);
|
|
for (const summary of response.inferenceProfileSummaries ?? []) {
|
|
profiles.push(summary);
|
|
}
|
|
nextToken = response.nextToken;
|
|
} while (nextToken);
|
|
return profiles;
|
|
} catch (error) {
|
|
log.debug?.("Skipping inference profile discovery", {
|
|
error: formatErrorMessage(error),
|
|
});
|
|
return [];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Convert raw inference profile summaries into model definitions.
|
|
*
|
|
* Each profile inherits capabilities (modalities, reasoning, context window,
|
|
* cost) from its underlying foundation model. This ensures that
|
|
* "us.anthropic.claude-sonnet-4-6" has the same capabilities as
|
|
* "anthropic.claude-sonnet-4-6" — including image input, reasoning support,
|
|
* and token limits.
|
|
*
|
|
* When the foundation model isn't found in the map (e.g. the model is only
|
|
* available via inference profiles in this region), safe defaults are used.
|
|
*/
|
|
function resolveInferenceProfiles(
|
|
profiles: InferenceProfileSummary[],
|
|
defaults: { contextWindow: number; maxTokens: number },
|
|
providerFilter: string[],
|
|
foundationModels: Map<string, ModelDefinitionConfig>,
|
|
): ModelDefinitionConfig[] {
|
|
const discovered: ModelDefinitionConfig[] = [];
|
|
for (const profile of profiles) {
|
|
if (!profile.inferenceProfileId?.trim()) {
|
|
continue;
|
|
}
|
|
if (profile.status !== "ACTIVE") {
|
|
continue;
|
|
}
|
|
|
|
// Apply provider filter: check if any of the underlying models match.
|
|
if (providerFilter.length > 0) {
|
|
const models = profile.models ?? [];
|
|
const matchesFilter = models.some((m) => {
|
|
const provider = m.modelArn?.split("/")?.[1]?.split(".")?.[0];
|
|
return provider
|
|
? providerFilter.includes(normalizeOptionalLowercaseString(provider) ?? "")
|
|
: false;
|
|
});
|
|
if (!matchesFilter) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Look up the underlying foundation model to inherit its capabilities.
|
|
const baseModelId = resolveBaseModelId(profile);
|
|
const baseModel = baseModelId
|
|
? foundationModels.get(normalizeLowercaseStringOrEmpty(baseModelId))
|
|
: undefined;
|
|
|
|
discovered.push({
|
|
id: profile.inferenceProfileId,
|
|
name: profile.inferenceProfileName?.trim() || profile.inferenceProfileId,
|
|
reasoning: baseModel?.reasoning ?? false,
|
|
input: baseModel?.input ?? ["text"],
|
|
cost: baseModel?.cost ?? DEFAULT_COST,
|
|
contextWindow:
|
|
baseModel?.contextWindow ??
|
|
resolveKnownContextWindow(baseModelId ?? profile.inferenceProfileId ?? "") ??
|
|
defaults.contextWindow,
|
|
maxTokens: baseModel?.maxTokens ?? defaults.maxTokens,
|
|
});
|
|
}
|
|
return discovered;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Public API
|
|
// ---------------------------------------------------------------------------
|
|
|
|
export function resetBedrockDiscoveryCacheForTest(): void {
|
|
discoveryCache.clear();
|
|
hasLoggedBedrockError = false;
|
|
}
|
|
|
|
export async function discoverBedrockModels(params: {
|
|
region: string;
|
|
config?: BedrockDiscoveryConfig;
|
|
now?: () => number;
|
|
clientFactory?: (region: string) => BedrockClient;
|
|
}): Promise<ModelDefinitionConfig[]> {
|
|
const refreshIntervalSeconds = Math.max(
|
|
0,
|
|
Math.floor(params.config?.refreshInterval ?? DEFAULT_REFRESH_INTERVAL_SECONDS),
|
|
);
|
|
const providerFilter = normalizeProviderFilter(params.config?.providerFilter);
|
|
const defaultContextWindow = resolveDefaultContextWindow(params.config);
|
|
const defaultMaxTokens = resolveDefaultMaxTokens(params.config);
|
|
const cacheKey = buildCacheKey({
|
|
region: params.region,
|
|
providerFilter,
|
|
refreshIntervalSeconds,
|
|
defaultContextWindow,
|
|
defaultMaxTokens,
|
|
});
|
|
const now = params.now?.() ?? Date.now();
|
|
|
|
if (refreshIntervalSeconds > 0) {
|
|
const cached = discoveryCache.get(cacheKey);
|
|
if (cached?.value && cached.expiresAt > now) {
|
|
return cached.value;
|
|
}
|
|
if (cached?.inFlight) {
|
|
return cached.inFlight;
|
|
}
|
|
}
|
|
|
|
const sdk = params.clientFactory
|
|
? createInjectedClientDiscoverySdk()
|
|
: await loadBedrockDiscoverySdk();
|
|
const clientFactory = params.clientFactory ?? ((region: string) => sdk.createClient(region));
|
|
if (!params.clientFactory) {
|
|
await refreshAwsSharedConfigCacheForBedrock();
|
|
}
|
|
const client = clientFactory(params.region);
|
|
|
|
const discoveryPromise = (async () => {
|
|
// Discover foundation models and inference profiles in parallel.
|
|
// Both API calls are independent, but we need the foundation model data
|
|
// to resolve inference profile capabilities — so we fetch in parallel,
|
|
// then build the lookup map before processing profiles.
|
|
const [rawFoundationResponse, profileSummaries] = await Promise.all([
|
|
client.send(sdk.createListFoundationModelsCommand() as never),
|
|
fetchInferenceProfileSummaries(client, (input) =>
|
|
sdk.createListInferenceProfilesCommand(input),
|
|
),
|
|
]);
|
|
const foundationResponse = rawFoundationResponse as ListFoundationModelsCommandOutput;
|
|
|
|
const discovered: ModelDefinitionConfig[] = [];
|
|
const seenIds = new Set<string>();
|
|
const foundationModels = new Map<string, ModelDefinitionConfig>();
|
|
|
|
// Foundation models first — build both the results list and the lookup map.
|
|
for (const summary of foundationResponse.modelSummaries ?? []) {
|
|
if (!shouldIncludeSummary(summary, providerFilter)) {
|
|
continue;
|
|
}
|
|
const def = toModelDefinition(summary, {
|
|
contextWindow: defaultContextWindow,
|
|
maxTokens: defaultMaxTokens,
|
|
});
|
|
discovered.push(def);
|
|
const normalizedId = normalizeLowercaseStringOrEmpty(def.id);
|
|
seenIds.add(normalizedId);
|
|
foundationModels.set(normalizedId, def);
|
|
}
|
|
|
|
// Merge inference profiles — inherit capabilities from foundation models.
|
|
const inferenceProfiles = resolveInferenceProfiles(
|
|
profileSummaries,
|
|
{ contextWindow: defaultContextWindow, maxTokens: defaultMaxTokens },
|
|
providerFilter,
|
|
foundationModels,
|
|
);
|
|
for (const profile of inferenceProfiles) {
|
|
const normalizedId = normalizeLowercaseStringOrEmpty(profile.id);
|
|
if (!seenIds.has(normalizedId)) {
|
|
discovered.push(profile);
|
|
seenIds.add(normalizedId);
|
|
}
|
|
}
|
|
|
|
// Sort: global cross-region profiles first (recommended for most users —
|
|
// better capacity, automatic failover, no data sovereignty constraints),
|
|
// then remaining profiles/models alphabetically.
|
|
return discovered.toSorted((a, b) => {
|
|
const aGlobal = a.id.startsWith("global.") ? 0 : 1;
|
|
const bGlobal = b.id.startsWith("global.") ? 0 : 1;
|
|
if (aGlobal !== bGlobal) {
|
|
return aGlobal - bGlobal;
|
|
}
|
|
return a.name.localeCompare(b.name);
|
|
});
|
|
})();
|
|
|
|
if (refreshIntervalSeconds > 0) {
|
|
discoveryCache.set(cacheKey, {
|
|
expiresAt: now + refreshIntervalSeconds * 1000,
|
|
inFlight: discoveryPromise,
|
|
});
|
|
}
|
|
|
|
try {
|
|
const value = await discoveryPromise;
|
|
if (refreshIntervalSeconds > 0) {
|
|
discoveryCache.set(cacheKey, {
|
|
expiresAt: now + refreshIntervalSeconds * 1000,
|
|
value,
|
|
});
|
|
}
|
|
return value;
|
|
} catch (error) {
|
|
if (refreshIntervalSeconds > 0) {
|
|
discoveryCache.delete(cacheKey);
|
|
}
|
|
if (!hasLoggedBedrockError) {
|
|
hasLoggedBedrockError = true;
|
|
log.warn("Failed to discover Bedrock models", {
|
|
error: formatErrorMessage(error),
|
|
});
|
|
}
|
|
return [];
|
|
}
|
|
}
|
|
|
|
export async function resolveImplicitBedrockProvider(params: {
|
|
pluginConfig?: { discovery?: BedrockDiscoveryConfig };
|
|
env?: NodeJS.ProcessEnv;
|
|
clientFactory?: (region: string) => BedrockClient;
|
|
}): Promise<ModelProviderConfig | null> {
|
|
const env = params.env ?? process.env;
|
|
const discoveryConfig = params.pluginConfig?.discovery;
|
|
const enabled = discoveryConfig?.enabled;
|
|
const hasAwsCreds = resolveBedrockConfigApiKey(env) !== undefined;
|
|
if (enabled === false) {
|
|
return null;
|
|
}
|
|
if (enabled !== true && !hasAwsCreds) {
|
|
return null;
|
|
}
|
|
|
|
const region = discoveryConfig?.region ?? env.AWS_REGION ?? env.AWS_DEFAULT_REGION ?? "us-east-1";
|
|
const models = await discoverBedrockModels({
|
|
region,
|
|
config: discoveryConfig,
|
|
clientFactory: params.clientFactory,
|
|
});
|
|
if (models.length === 0) {
|
|
return null;
|
|
}
|
|
|
|
return {
|
|
baseUrl: `https://bedrock-runtime.${region}.amazonaws.com`,
|
|
api: "bedrock-converse-stream",
|
|
auth: "aws-sdk",
|
|
models,
|
|
};
|
|
}
|