From 699b2320a85df12b5fc494554d6c72f9a55dcfee Mon Sep 17 00:00:00 2001 From: wirjo Date: Mon, 6 Apr 2026 10:19:56 +1000 Subject: [PATCH] feat(memory): add Bedrock embedding provider for memory search (#61547) * feat(memory): add Bedrock embedding provider for memory search Add Amazon Bedrock as a native embedding provider for memory search. Supports Titan Embed Text v1/v2 and Cohere Embed models via AWS SDK. - New embeddings-bedrock.ts: BedrockRuntimeClient + InvokeModel - Auth via AWS default credential chain (same as Bedrock inference) - Auto-selected in 'auto' mode when AWS credentials are detected - Titan V2: configurable dimensions (256/512/1024), normalization - Cohere: native batch support with search_query/search_document types - 16 new tests covering all model types, auth detection, edge cases Closes #26289 * fix(memory): harden bedrock embedding selection --------- Co-authored-by: Vincent Koc --- CHANGELOG.md | 1 + docs/.generated/config-baseline.sha256 | 8 +- docs/concepts/memory-search.md | 17 +- docs/providers/bedrock.md | 29 ++ docs/reference/memory-config.md | 96 ++++- package.json | 2 + .../src/host/embeddings-bedrock.test.ts | 377 +++++++++++++++++ .../src/host/embeddings-bedrock.ts | 397 ++++++++++++++++++ .../memory-host-sdk/src/host/embeddings.ts | 40 +- pnpm-lock.yaml | 142 +------ src/config/config.schema-regressions.test.ts | 14 + src/config/schema.base.generated.ts | 8 +- src/config/schema.help.ts | 4 +- .../host/embeddings-bedrock.test.ts | 377 +++++++++++++++++ .../host/embeddings-bedrock.ts | 397 ++++++++++++++++++ src/memory-host-sdk/host/embeddings.test.ts | 63 ++- src/memory-host-sdk/host/embeddings.ts | 40 +- 17 files changed, 1861 insertions(+), 151 deletions(-) create mode 100644 packages/memory-host-sdk/src/host/embeddings-bedrock.test.ts create mode 100644 packages/memory-host-sdk/src/host/embeddings-bedrock.ts create mode 100644 src/memory-host-sdk/host/embeddings-bedrock.test.ts create mode 100644 src/memory-host-sdk/host/embeddings-bedrock.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 14a39c6ec27..79dd243a646 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,7 @@ Docs: https://docs.openclaw.ai - Docs/IRC: replace public IRC hostname examples with `irc.example.com` and recommend private servers for bot coordination while listing common public networks for intentional use. - Memory/dreaming: write dreaming trail content to top-level `dreams.md` instead of daily memory notes, update `/dreaming` help text to point there, and keep `dreams.md` available for explicit reads without pulling it into default recall. Thanks @davemorin. - Memory/dreaming: add the Dream Diary surface in Dreams, simplify user-facing dreaming config to `enabled` plus optional `frequency`, treat phases as implementation detail in docs/UI, and keep the lobster animation visible above diary content. Thanks @vignesh07. +- Memory/search: add Amazon Bedrock embeddings for Titan, Cohere, Nova, and TwelveLabs models, with AWS credential-chain auto-detection for `provider: "auto"` and provider-specific dimension controls. Thanks @wirjo. ### Fixes diff --git a/docs/.generated/config-baseline.sha256 b/docs/.generated/config-baseline.sha256 index 1c5d1b52190..cf97f1f64a0 100644 --- a/docs/.generated/config-baseline.sha256 +++ b/docs/.generated/config-baseline.sha256 @@ -1,4 +1,4 @@ -57a3b1cc7d573c3788a670d927eac947fb1685384804f5c3c926f702a27fe00b config-baseline.json -82163136ff466db3caa61290fd65a8b8dd9487fc61f3871c177f96fcecf9e29b config-baseline.core.json -ae67508350baf891b902348d55fada6c17e9c053adf53aaf3a8b92cd364ef3f1 config-baseline.channel.json -d972a11d0f86080a722bddfe48990dd1b8fa16eb8e157e83f49bd46a5941c512 config-baseline.plugin.json +73fbcd00d17685b462dfb11aff74baae99265ae5671db28893d8608456daa44e config-baseline.json +effaf240920c16fce2c78af52dec15aa9ceb049e34f703c568669cb6beef3f91 config-baseline.core.json +3c999707b167138de34f6255e3488b99e404c5132d3fc5879a1fa12d815c31f5 config-baseline.channel.json +031b237717ca108ea2cd314413db4c91edfdfea55f808179e3066331f41af134 config-baseline.plugin.json diff --git a/docs/concepts/memory-search.md b/docs/concepts/memory-search.md index 4b292da1aec..c769513a04e 100644 --- a/docs/concepts/memory-search.md +++ b/docs/concepts/memory-search.md @@ -35,14 +35,15 @@ node-llama-cpp). ## Supported providers -| Provider | ID | Needs API key | Notes | -| -------- | --------- | ------------- | ----------------------------- | -| OpenAI | `openai` | Yes | Auto-detected, fast | -| Gemini | `gemini` | Yes | Supports image/audio indexing | -| Voyage | `voyage` | Yes | Auto-detected | -| Mistral | `mistral` | Yes | Auto-detected | -| Ollama | `ollama` | No | Local, must set explicitly | -| Local | `local` | No | GGUF model, ~0.6 GB download | +| Provider | ID | Needs API key | Notes | +| -------- | --------- | ------------- | ---------------------------------------------------- | +| OpenAI | `openai` | Yes | Auto-detected, fast | +| Gemini | `gemini` | Yes | Supports image/audio indexing | +| Voyage | `voyage` | Yes | Auto-detected | +| Mistral | `mistral` | Yes | Auto-detected | +| Bedrock | `bedrock` | No | Auto-detected when the AWS credential chain resolves | +| Ollama | `ollama` | No | Local, must set explicitly | +| Local | `local` | No | GGUF model, ~0.6 GB download | ## How search works diff --git a/docs/providers/bedrock.md b/docs/providers/bedrock.md index 5c6b7f1635d..caf29e47c65 100644 --- a/docs/providers/bedrock.md +++ b/docs/providers/bedrock.md @@ -271,3 +271,32 @@ grounding checks. The IAM principal used by the gateway must have the `bedrock:ApplyGuardrail` permission in addition to the standard invoke permissions. + +## Embeddings for memory search + +Bedrock can also serve as the embedding provider for +[memory search](/concepts/memory-search). This is configured separately from the +inference provider — set `agents.defaults.memorySearch.provider` to `"bedrock"`: + +```json5 +{ + agents: { + defaults: { + memorySearch: { + provider: "bedrock", + model: "amazon.titan-embed-text-v2:0", // default + }, + }, + }, +} +``` + +Bedrock embeddings use the same AWS SDK credential chain as inference (instance +roles, SSO, access keys, shared config, and web identity). No API key is +needed. When `provider` is `"auto"`, Bedrock is auto-detected if that +credential chain resolves successfully. + +Supported embedding models include Amazon Titan Embed (v1, v2), Amazon Nova +Embed, Cohere Embed (v3, v4), and TwelveLabs Marengo. See +[Memory configuration reference — Bedrock](/reference/memory-config#bedrock-embedding-config) +for the full model list and dimension options. diff --git a/docs/reference/memory-config.md b/docs/reference/memory-config.md index 9d25195f776..76cb60ef7fc 100644 --- a/docs/reference/memory-config.md +++ b/docs/reference/memory-config.md @@ -25,12 +25,12 @@ All memory search settings live under `agents.defaults.memorySearch` in ## Provider selection -| Key | Type | Default | Description | -| ---------- | --------- | ---------------- | -------------------------------------------------------------------------------- | -| `provider` | `string` | auto-detected | Embedding adapter ID: `openai`, `gemini`, `voyage`, `mistral`, `ollama`, `local` | -| `model` | `string` | provider default | Embedding model name | -| `fallback` | `string` | `"none"` | Fallback adapter ID when the primary fails | -| `enabled` | `boolean` | `true` | Enable or disable memory search | +| Key | Type | Default | Description | +| ---------- | --------- | ---------------- | ------------------------------------------------------------------------------------------- | +| `provider` | `string` | auto-detected | Embedding adapter ID: `openai`, `gemini`, `voyage`, `mistral`, `bedrock`, `ollama`, `local` | +| `model` | `string` | provider default | Embedding model name | +| `fallback` | `string` | `"none"` | Fallback adapter ID when the primary fails | +| `enabled` | `boolean` | `true` | Enable or disable memory search | ### Auto-detection order @@ -41,13 +41,14 @@ When `provider` is not set, OpenClaw selects the first available: 3. `gemini` -- if a Gemini key can be resolved. 4. `voyage` -- if a Voyage key can be resolved. 5. `mistral` -- if a Mistral key can be resolved. +6. `bedrock` -- if the AWS SDK credential chain resolves (instance role, access keys, profile, SSO, web identity, or shared config). `ollama` is supported but not auto-detected (set it explicitly). ### API key resolution -Remote embeddings require an API key. OpenClaw resolves from: -auth profiles, `models.providers.*.apiKey`, or environment variables. +Remote embeddings require an API key. Bedrock uses the AWS SDK default +credential chain instead (instance roles, SSO, access keys). | Provider | Env var | Config key | | -------- | ------------------------------ | --------------------------------- | @@ -55,6 +56,7 @@ auth profiles, `models.providers.*.apiKey`, or environment variables. | Gemini | `GEMINI_API_KEY` | `models.providers.google.apiKey` | | Voyage | `VOYAGE_API_KEY` | `models.providers.voyage.apiKey` | | Mistral | `MISTRAL_API_KEY` | `models.providers.mistral.apiKey` | +| Bedrock | AWS credential chain | No API key needed | | Ollama | `OLLAMA_API_KEY` (placeholder) | -- | Codex OAuth covers chat/completions only and does not satisfy embedding @@ -104,6 +106,84 @@ Changing model or `outputDimensionality` triggers an automatic full reindex. --- +## Bedrock embedding config + +Bedrock uses the AWS SDK default credential chain -- no API keys needed. +If OpenClaw runs on EC2 with a Bedrock-enabled instance role, just set the +provider and model: + +```json5 +{ + agents: { + defaults: { + memorySearch: { + provider: "bedrock", + model: "amazon.titan-embed-text-v2:0", + }, + }, + }, +} +``` + +| Key | Type | Default | Description | +| ---------------------- | -------- | ------------------------------ | ------------------------------- | +| `model` | `string` | `amazon.titan-embed-text-v2:0` | Any Bedrock embedding model ID | +| `outputDimensionality` | `number` | model default | For Titan V2: 256, 512, or 1024 | + +### Supported models + +The following models are supported (with family detection and dimension +defaults): + +| Model ID | Provider | Default Dims | Configurable Dims | +| ------------------------------------------ | ---------- | ------------ | -------------------- | +| `amazon.titan-embed-text-v2:0` | Amazon | 1024 | 256, 512, 1024 | +| `amazon.titan-embed-text-v1` | Amazon | 1536 | -- | +| `amazon.titan-embed-g1-text-02` | Amazon | 1536 | -- | +| `amazon.titan-embed-image-v1` | Amazon | 1024 | -- | +| `amazon.nova-2-multimodal-embeddings-v1:0` | Amazon | 1024 | 256, 384, 1024, 3072 | +| `cohere.embed-english-v3` | Cohere | 1024 | -- | +| `cohere.embed-multilingual-v3` | Cohere | 1024 | -- | +| `cohere.embed-v4:0` | Cohere | 1536 | 256-1536 | +| `twelvelabs.marengo-embed-3-0-v1:0` | TwelveLabs | 512 | -- | +| `twelvelabs.marengo-embed-2-7-v1:0` | TwelveLabs | 1024 | -- | + +Throughput-suffixed variants (e.g., `amazon.titan-embed-text-v1:2:8k`) inherit +the base model's configuration. + +### Authentication + +Bedrock auth uses the standard AWS SDK credential resolution order: + +1. Environment variables (`AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY`) +2. SSO token cache +3. Web identity token credentials +4. Shared credentials and config files +5. ECS or EC2 metadata credentials + +Region is resolved from `AWS_REGION`, `AWS_DEFAULT_REGION`, the +`amazon-bedrock` provider `baseUrl`, or defaults to `us-east-1`. + +### IAM permissions + +The IAM role or user needs: + +```json +{ + "Effect": "Allow", + "Action": "bedrock:InvokeModel", + "Resource": "*" +} +``` + +For least-privilege, scope `InvokeModel` to the specific model: + +``` +arn:aws:bedrock:*::foundation-model/amazon.titan-embed-text-v2:0 +``` + +--- + ## Local embedding config | Key | Type | Default | Description | diff --git a/package.json b/package.json index d9ffc8a49e2..d5f517de360 100644 --- a/package.json +++ b/package.json @@ -1145,6 +1145,8 @@ "@agentclientprotocol/sdk": "0.18.0", "@anthropic-ai/vertex-sdk": "^0.14.4", "@aws-sdk/client-bedrock": "3.1023.0", + "@aws-sdk/client-bedrock-runtime": "3.1023.0", + "@aws-sdk/credential-provider-node": "3.972.29", "@clack/prompts": "^1.2.0", "@homebridge/ciao": "^1.3.6", "@line/bot-sdk": "^10.6.0", diff --git a/packages/memory-host-sdk/src/host/embeddings-bedrock.test.ts b/packages/memory-host-sdk/src/host/embeddings-bedrock.test.ts new file mode 100644 index 00000000000..71228daad5f --- /dev/null +++ b/packages/memory-host-sdk/src/host/embeddings-bedrock.test.ts @@ -0,0 +1,377 @@ +import { afterEach, beforeAll, beforeEach, describe, expect, it, vi } from "vitest"; + +const { defaultProviderMock, resolveCredentialsMock, sendMock } = vi.hoisted(() => ({ + defaultProviderMock: vi.fn(), + resolveCredentialsMock: vi.fn(), + sendMock: vi.fn(), +})); + +vi.mock("@aws-sdk/client-bedrock-runtime", () => { + class MockClient { + region: string; + constructor(config: { region: string }) { + this.region = config.region; + } + send = sendMock; + } + class MockCommand { + input: unknown; + constructor(input: unknown) { + this.input = input; + } + } + return { BedrockRuntimeClient: MockClient, InvokeModelCommand: MockCommand }; +}); + +vi.mock("@aws-sdk/credential-provider-node", () => ({ + defaultProvider: defaultProviderMock.mockImplementation(() => resolveCredentialsMock), +})); + +let createBedrockEmbeddingProvider: typeof import("./embeddings-bedrock.js").createBedrockEmbeddingProvider; +let resolveBedrockEmbeddingClient: typeof import("./embeddings-bedrock.js").resolveBedrockEmbeddingClient; +let normalizeBedrockEmbeddingModel: typeof import("./embeddings-bedrock.js").normalizeBedrockEmbeddingModel; +let hasAwsCredentials: typeof import("./embeddings-bedrock.js").hasAwsCredentials; + +beforeAll(async () => { + ({ + createBedrockEmbeddingProvider, + resolveBedrockEmbeddingClient, + normalizeBedrockEmbeddingModel, + hasAwsCredentials, + } = await import("./embeddings-bedrock.js")); +}); + +beforeEach(() => { + defaultProviderMock.mockImplementation(() => resolveCredentialsMock); +}); + +const enc = (body: unknown) => ({ body: new TextEncoder().encode(JSON.stringify(body)) }); +const reqBody = (i = 0): Record => + JSON.parse(sendMock.mock.calls[i][0].input.body); + +describe("bedrock embedding provider", () => { + const originalEnv = process.env; + afterEach(() => { + process.env = originalEnv; + vi.restoreAllMocks(); + defaultProviderMock.mockClear(); + resolveCredentialsMock.mockReset(); + sendMock.mockReset(); + }); + + // --- Normalization --- + + it("normalizes model names with prefixes", () => { + expect(normalizeBedrockEmbeddingModel("bedrock/amazon.titan-embed-text-v2:0")).toBe( + "amazon.titan-embed-text-v2:0", + ); + expect(normalizeBedrockEmbeddingModel("amazon-bedrock/cohere.embed-english-v3")).toBe( + "cohere.embed-english-v3", + ); + expect(normalizeBedrockEmbeddingModel("")).toBe("amazon.titan-embed-text-v2:0"); + }); + + // --- Client resolution --- + + it("resolves region from env", () => { + process.env = { ...originalEnv, AWS_REGION: "eu-west-1" }; + const c = resolveBedrockEmbeddingClient({ + config: {} as never, + provider: "bedrock", + model: "amazon.titan-embed-text-v2:0", + fallback: "none", + }); + expect(c.region).toBe("eu-west-1"); + expect(c.dimensions).toBe(1024); + }); + + it("defaults to us-east-1", () => { + process.env = { ...originalEnv }; + delete process.env.AWS_REGION; + delete process.env.AWS_DEFAULT_REGION; + expect( + resolveBedrockEmbeddingClient({ + config: {} as never, + provider: "bedrock", + model: "amazon.titan-embed-text-v2:0", + fallback: "none", + }).region, + ).toBe("us-east-1"); + }); + + it("extracts region from baseUrl", () => { + process.env = { ...originalEnv }; + delete process.env.AWS_REGION; + const c = resolveBedrockEmbeddingClient({ + config: { + models: { + providers: { + "amazon-bedrock": { baseUrl: "https://bedrock-runtime.ap-southeast-2.amazonaws.com" }, + }, + }, + } as never, + provider: "bedrock", + model: "amazon.titan-embed-text-v2:0", + fallback: "none", + }); + expect(c.region).toBe("ap-southeast-2"); + }); + + it("validates dimensions", () => { + expect(() => + resolveBedrockEmbeddingClient({ + config: {} as never, + provider: "bedrock", + model: "amazon.titan-embed-text-v2:0", + fallback: "none", + outputDimensionality: 768, + }), + ).toThrow("Invalid dimensions 768"); + }); + + it("accepts valid dimensions", () => { + expect( + resolveBedrockEmbeddingClient({ + config: {} as never, + provider: "bedrock", + model: "amazon.titan-embed-text-v2:0", + fallback: "none", + outputDimensionality: 256, + }).dimensions, + ).toBe(256); + }); + + it("resolves throughput-suffixed variants", () => { + expect( + resolveBedrockEmbeddingClient({ + config: {} as never, + provider: "bedrock", + model: "amazon.titan-embed-text-v1:2:8k", + fallback: "none", + }).dimensions, + ).toBe(1536); + }); + + // --- Credential detection --- + + it("detects access keys", async () => { + await expect( + hasAwsCredentials({ + AWS_ACCESS_KEY_ID: "A", + AWS_SECRET_ACCESS_KEY: "s", + } as NodeJS.ProcessEnv), + ).resolves.toBe(true); + }); + it("detects profile", async () => { + await expect(hasAwsCredentials({ AWS_PROFILE: "default" } as NodeJS.ProcessEnv)).resolves.toBe( + true, + ); + }); + it("detects ECS task role", async () => { + await expect( + hasAwsCredentials({ AWS_CONTAINER_CREDENTIALS_RELATIVE_URI: "/v2" } as NodeJS.ProcessEnv), + ).resolves.toBe(true); + }); + it("detects EKS IRSA", async () => { + await expect( + hasAwsCredentials({ + AWS_WEB_IDENTITY_TOKEN_FILE: "/var/run/secrets/token", + AWS_ROLE_ARN: "arn:aws:iam::123:role/x", + } as NodeJS.ProcessEnv), + ).resolves.toBe(true); + }); + it("detects credentials via the AWS SDK default provider chain", async () => { + resolveCredentialsMock.mockResolvedValue({ accessKeyId: "AKIAEXAMPLE" }); + await expect(hasAwsCredentials({} as NodeJS.ProcessEnv)).resolves.toBe(true); + expect(defaultProviderMock).toHaveBeenCalledWith({ timeout: 1000, maxRetries: 0 }); + }); + it("returns false with no creds", async () => { + resolveCredentialsMock.mockRejectedValue(new Error("no aws credentials")); + await expect(hasAwsCredentials({} as NodeJS.ProcessEnv)).resolves.toBe(false); + }); + + // --- Titan V2 --- + + it("embeds with Titan V2", async () => { + sendMock.mockResolvedValue(enc({ embedding: [0.1, 0.2, 0.3] })); + const { provider } = await createBedrockEmbeddingProvider({ + config: {} as never, + provider: "bedrock", + model: "amazon.titan-embed-text-v2:0", + fallback: "none", + }); + expect(await provider.embedQuery("test")).toHaveLength(3); + expect(reqBody()).toMatchObject({ inputText: "test", normalize: true, dimensions: 1024 }); + }); + + it("returns empty for blank text", async () => { + const { provider } = await createBedrockEmbeddingProvider({ + config: {} as never, + provider: "bedrock", + model: "amazon.titan-embed-text-v2:0", + fallback: "none", + }); + expect(await provider.embedQuery(" ")).toEqual([]); + expect(sendMock).not.toHaveBeenCalled(); + }); + + it("batches Titan V2 concurrently", async () => { + sendMock + .mockResolvedValueOnce(enc({ embedding: [0.1] })) + .mockResolvedValueOnce(enc({ embedding: [0.2] })); + const { provider } = await createBedrockEmbeddingProvider({ + config: {} as never, + provider: "bedrock", + model: "amazon.titan-embed-text-v2:0", + fallback: "none", + }); + expect(await provider.embedBatch(["a", "b"])).toHaveLength(2); + expect(sendMock).toHaveBeenCalledTimes(2); + }); + + // --- Titan V1 --- + + it("sends only inputText for Titan V1", async () => { + sendMock.mockResolvedValue(enc({ embedding: [0.5] })); + const { provider } = await createBedrockEmbeddingProvider({ + config: {} as never, + provider: "bedrock", + model: "amazon.titan-embed-text-v1", + fallback: "none", + }); + await provider.embedQuery("hi"); + expect(reqBody()).toEqual({ inputText: "hi" }); + }); + + it("handles Titan G1 text variant", async () => { + sendMock.mockResolvedValue(enc({ embedding: [0.1] })); + const { provider } = await createBedrockEmbeddingProvider({ + config: {} as never, + provider: "bedrock", + model: "amazon.titan-embed-g1-text-02", + fallback: "none", + }); + await provider.embedQuery("hi"); + expect(reqBody()).toEqual({ inputText: "hi" }); + }); + + // --- Cohere V3 --- + + it("embeds Cohere V3 batch in single call", async () => { + sendMock.mockResolvedValue(enc({ embeddings: [[0.1], [0.2]] })); + const { provider } = await createBedrockEmbeddingProvider({ + config: {} as never, + provider: "bedrock", + model: "cohere.embed-english-v3", + fallback: "none", + }); + expect(await provider.embedBatch(["a", "b"])).toHaveLength(2); + expect(sendMock).toHaveBeenCalledTimes(1); + expect(reqBody()).toMatchObject({ texts: ["a", "b"], input_type: "search_document" }); + }); + + it("uses search_query for Cohere embedQuery", async () => { + sendMock.mockResolvedValue(enc({ embeddings: [[0.1]] })); + const { provider } = await createBedrockEmbeddingProvider({ + config: {} as never, + provider: "bedrock", + model: "cohere.embed-english-v3", + fallback: "none", + }); + await provider.embedQuery("q"); + expect(reqBody().input_type).toBe("search_query"); + }); + + // --- Cohere V4 --- + + it("embeds Cohere V4 with embedding_types + output_dimension", async () => { + sendMock.mockResolvedValue(enc({ embeddings: { float: [[0.1], [0.2]] } })); + const { provider } = await createBedrockEmbeddingProvider({ + config: {} as never, + provider: "bedrock", + model: "cohere.embed-v4:0", + fallback: "none", + }); + expect(await provider.embedBatch(["a", "b"])).toHaveLength(2); + expect(reqBody()).toMatchObject({ embedding_types: ["float"], output_dimension: 1536 }); + }); + + it("validates Cohere V4 dimensions", () => { + expect(() => + resolveBedrockEmbeddingClient({ + config: {} as never, + provider: "bedrock", + model: "cohere.embed-v4:0", + fallback: "none", + outputDimensionality: 2048, + }), + ).toThrow("Invalid dimensions 2048"); + }); + + // --- Nova --- + + it("embeds Nova with SINGLE_EMBEDDING format", async () => { + sendMock.mockResolvedValue( + enc({ embeddings: [{ embeddingType: "TEXT", embedding: [0.1, 0.2] }] }), + ); + const { provider } = await createBedrockEmbeddingProvider({ + config: {} as never, + provider: "bedrock", + model: "amazon.nova-2-multimodal-embeddings-v1:0", + fallback: "none", + }); + expect(await provider.embedQuery("hi")).toHaveLength(2); + expect(reqBody().taskType).toBe("SINGLE_EMBEDDING"); + }); + + it("validates Nova dimensions", () => { + expect(() => + resolveBedrockEmbeddingClient({ + config: {} as never, + provider: "bedrock", + model: "amazon.nova-2-multimodal-embeddings-v1:0", + fallback: "none", + outputDimensionality: 512, + }), + ).toThrow("Invalid dimensions 512"); + }); + + it("batches Nova concurrently", async () => { + sendMock + .mockResolvedValueOnce(enc({ embeddings: [{ embeddingType: "TEXT", embedding: [0.1] }] })) + .mockResolvedValueOnce(enc({ embeddings: [{ embeddingType: "TEXT", embedding: [0.2] }] })); + const { provider } = await createBedrockEmbeddingProvider({ + config: {} as never, + provider: "bedrock", + model: "amazon.nova-2-multimodal-embeddings-v1:0", + fallback: "none", + }); + expect(await provider.embedBatch(["a", "b"])).toHaveLength(2); + expect(sendMock).toHaveBeenCalledTimes(2); + }); + + // --- TwelveLabs --- + + it("embeds TwelveLabs Marengo", async () => { + sendMock.mockResolvedValue(enc({ data: [{ embedding: [0.1, 0.2] }] })); + const { provider } = await createBedrockEmbeddingProvider({ + config: {} as never, + provider: "bedrock", + model: "twelvelabs.marengo-embed-3-0-v1:0", + fallback: "none", + }); + expect(await provider.embedQuery("hi")).toHaveLength(2); + expect(reqBody()).toEqual({ inputType: "text", text: { inputText: "hi" } }); + }); + + it("embeds TwelveLabs object-style responses", async () => { + sendMock.mockResolvedValue(enc({ data: { embedding: [0.3, 0.4] } })); + const { provider } = await createBedrockEmbeddingProvider({ + config: {} as never, + provider: "bedrock", + model: "twelvelabs.marengo-embed-2-7-v1:0", + fallback: "none", + }); + expect(await provider.embedQuery("hi")).toEqual([0.6, 0.8]); + }); +}); diff --git a/packages/memory-host-sdk/src/host/embeddings-bedrock.ts b/packages/memory-host-sdk/src/host/embeddings-bedrock.ts new file mode 100644 index 00000000000..c536cf22ca5 --- /dev/null +++ b/packages/memory-host-sdk/src/host/embeddings-bedrock.ts @@ -0,0 +1,397 @@ +import { sanitizeAndNormalizeEmbedding } from "./embedding-vectors.js"; +import { debugEmbeddingsLog } from "./embeddings-debug.js"; +import type { EmbeddingProvider, EmbeddingProviderOptions } from "./embeddings.js"; + +// --------------------------------------------------------------------------- +// Types & constants +// --------------------------------------------------------------------------- + +export type BedrockEmbeddingClient = { + region: string; + model: string; + dimensions?: number; +}; + +export const DEFAULT_BEDROCK_EMBEDDING_MODEL = "amazon.titan-embed-text-v2:0"; + +/** Request/response format family — each has a different API shape. */ +type Family = "titan-v1" | "titan-v2" | "cohere-v3" | "cohere-v4" | "nova" | "twelvelabs"; + +interface ModelSpec { + maxTokens: number; + dims: number; + validDims?: number[]; + family: Family; +} + +// --------------------------------------------------------------------------- +// Model catalog +// --------------------------------------------------------------------------- + +const MODELS: Record = { + "amazon.titan-embed-text-v2:0": { + maxTokens: 8192, + dims: 1024, + validDims: [256, 512, 1024], + family: "titan-v2", + }, + "amazon.titan-embed-text-v1": { maxTokens: 8000, dims: 1536, family: "titan-v1" }, + "amazon.titan-embed-g1-text-02": { maxTokens: 8000, dims: 1536, family: "titan-v1" }, + "amazon.titan-embed-image-v1": { maxTokens: 128, dims: 1024, family: "titan-v1" }, + "cohere.embed-english-v3": { maxTokens: 512, dims: 1024, family: "cohere-v3" }, + "cohere.embed-multilingual-v3": { maxTokens: 512, dims: 1024, family: "cohere-v3" }, + "cohere.embed-v4:0": { + maxTokens: 128000, + dims: 1536, + validDims: [256, 384, 512, 768, 1024, 1536], + family: "cohere-v4", + }, + "amazon.nova-2-multimodal-embeddings-v1:0": { + maxTokens: 8192, + dims: 1024, + validDims: [256, 384, 1024, 3072], + family: "nova", + }, + "twelvelabs.marengo-embed-2-7-v1:0": { maxTokens: 512, dims: 1024, family: "twelvelabs" }, + "twelvelabs.marengo-embed-3-0-v1:0": { maxTokens: 512, dims: 512, family: "twelvelabs" }, +}; + +/** Resolve spec, stripping throughput suffixes like `:2:8k` or `:0:512`. */ +function resolveSpec(modelId: string): ModelSpec | undefined { + if (MODELS[modelId]) { + return MODELS[modelId]; + } + const parts = modelId.split(":"); + for (let i = parts.length - 1; i >= 1; i--) { + const spec = MODELS[parts.slice(0, i).join(":")]; + if (spec) { + return spec; + } + } + return undefined; +} + +/** Infer family from model ID prefix when not in catalog. */ +function inferFamily(modelId: string): Family { + const id = modelId.toLowerCase(); + if (id.startsWith("amazon.titan-embed-text-v2")) { + return "titan-v2"; + } + if (id.startsWith("amazon.titan-embed")) { + return "titan-v1"; + } + if (id.startsWith("amazon.nova")) { + return "nova"; + } + if (id.startsWith("cohere.embed-v4")) { + return "cohere-v4"; + } + if (id.startsWith("cohere.embed")) { + return "cohere-v3"; + } + if (id.startsWith("twelvelabs.")) { + return "twelvelabs"; + } + return "titan-v1"; // safest default — simplest request format +} + +// --------------------------------------------------------------------------- +// AWS SDK lazy loader +// --------------------------------------------------------------------------- + +type SdkClient = import("@aws-sdk/client-bedrock-runtime").BedrockRuntimeClient; +type SdkCommand = import("@aws-sdk/client-bedrock-runtime").InvokeModelCommand; + +interface AwsSdk { + BedrockRuntimeClient: new (config: { region: string }) => SdkClient; + InvokeModelCommand: new (input: { + modelId: string; + body: string; + contentType: string; + accept: string; + }) => SdkCommand; +} + +interface AwsCredentialProviderSdk { + defaultProvider: (init?: { timeout?: number; maxRetries?: number }) => () => Promise<{ + accessKeyId?: string; + }>; +} + +let sdkCache: AwsSdk | null = null; +let credentialProviderSdkCache: AwsCredentialProviderSdk | null | undefined; + +async function loadSdk(): Promise { + if (sdkCache) { + return sdkCache; + } + try { + sdkCache = (await import("@aws-sdk/client-bedrock-runtime")) as unknown as AwsSdk; + return sdkCache; + } catch { + throw new Error( + "No API key found for provider bedrock: @aws-sdk/client-bedrock-runtime is not installed. " + + "Install it with: npm install @aws-sdk/client-bedrock-runtime", + ); + } +} + +async function loadCredentialProviderSdk(): Promise { + if (credentialProviderSdkCache !== undefined) { + return credentialProviderSdkCache; + } + try { + credentialProviderSdkCache = + (await import("@aws-sdk/credential-provider-node")) as unknown as AwsCredentialProviderSdk; + } catch { + credentialProviderSdkCache = null; + } + return credentialProviderSdkCache; +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +const MODEL_PREFIX_RE = /^(?:bedrock|amazon-bedrock|aws)\//; +const REGION_RE = /bedrock-runtime\.([a-z0-9-]+)\./; + +export function normalizeBedrockEmbeddingModel(model: string): string { + const trimmed = model.trim(); + return trimmed ? trimmed.replace(MODEL_PREFIX_RE, "") : DEFAULT_BEDROCK_EMBEDDING_MODEL; +} + +function regionFromUrl(url: string | undefined): string | undefined { + return url?.trim() ? REGION_RE.exec(url)?.[1] : undefined; +} + +// --------------------------------------------------------------------------- +// Request builders +// --------------------------------------------------------------------------- + +function buildBody(family: Family, text: string, dims?: number): string { + switch (family) { + case "titan-v2": { + const b: Record = { inputText: text }; + if (dims != null) { + b.dimensions = dims; + b.normalize = true; + } + return JSON.stringify(b); + } + case "titan-v1": + return JSON.stringify({ inputText: text }); + case "nova": + return JSON.stringify({ + taskType: "SINGLE_EMBEDDING", + singleEmbeddingParams: { + embeddingPurpose: "GENERIC_INDEX", + embeddingDimension: dims ?? 1024, + text: { truncationMode: "END", value: text }, + }, + }); + case "twelvelabs": + return JSON.stringify({ inputType: "text", text: { inputText: text } }); + default: + return JSON.stringify({ inputText: text }); + } +} + +function buildCohereBody( + family: Family, + texts: string[], + inputType: "search_query" | "search_document", + dims?: number, +): string { + const body: Record = { texts, input_type: inputType, truncate: "END" }; + if (family === "cohere-v4") { + body.embedding_types = ["float"]; + if (dims != null) { + body.output_dimension = dims; + } + } + return JSON.stringify(body); +} + +// --------------------------------------------------------------------------- +// Response parsers +// --------------------------------------------------------------------------- + +function parseSingle(family: Family, raw: string): number[] { + const data = JSON.parse(raw); + switch (family) { + case "nova": + return data.embeddings?.[0]?.embedding ?? []; + case "twelvelabs": { + if (Array.isArray(data.data)) { + return data.data[0]?.embedding ?? []; + } + if (Array.isArray(data.data?.embedding)) { + return data.data.embedding; + } + return data.embedding ?? []; + } + default: + return data.embedding ?? []; + } +} + +function parseCohereBatch(family: Family, raw: string): number[][] { + const data = JSON.parse(raw); + const embeddings = data.embeddings; + if (!embeddings) { + return []; + } + if (family === "cohere-v4" && !Array.isArray(embeddings)) { + return embeddings.float ?? []; + } + return embeddings; +} + +// --------------------------------------------------------------------------- +// Provider +// --------------------------------------------------------------------------- + +export async function createBedrockEmbeddingProvider( + options: EmbeddingProviderOptions, +): Promise<{ provider: EmbeddingProvider; client: BedrockEmbeddingClient }> { + const client = resolveBedrockEmbeddingClient(options); + const { BedrockRuntimeClient, InvokeModelCommand } = await loadSdk(); + const sdk = new BedrockRuntimeClient({ region: client.region }); + const spec = resolveSpec(client.model); + const family = spec?.family ?? inferFamily(client.model); + + debugEmbeddingsLog("memory embeddings: bedrock client", { + region: client.region, + model: client.model, + dimensions: client.dimensions, + family, + }); + + const invoke = async (body: string): Promise => { + const res = await sdk.send( + new InvokeModelCommand({ + modelId: client.model, + body, + contentType: "application/json", + accept: "application/json", + }), + ); + return new TextDecoder().decode(res.body); + }; + + const isCohere = family === "cohere-v3" || family === "cohere-v4"; + + const embedSingle = async (text: string): Promise => { + const raw = await invoke(buildBody(family, text, client.dimensions)); + return sanitizeAndNormalizeEmbedding(parseSingle(family, raw)); + }; + + const embedCohere = async ( + texts: string[], + inputType: "search_query" | "search_document", + ): Promise => { + const raw = await invoke(buildCohereBody(family, texts, inputType, client.dimensions)); + return parseCohereBatch(family, raw).map((e) => sanitizeAndNormalizeEmbedding(e)); + }; + + const embedQuery = async (text: string): Promise => { + if (!text.trim()) { + return []; + } + if (isCohere) { + return (await embedCohere([text], "search_query"))[0] ?? []; + } + return embedSingle(text); + }; + + const embedBatch = async (texts: string[]): Promise => { + if (texts.length === 0) { + return []; + } + if (isCohere) { + return embedCohere(texts, "search_document"); + } + return Promise.all(texts.map((t) => (t.trim() ? embedSingle(t) : Promise.resolve([])))); + }; + + return { + provider: { + id: "bedrock", + model: client.model, + maxInputTokens: spec?.maxTokens, + embedQuery, + embedBatch, + }, + client, + }; +} + +// --------------------------------------------------------------------------- +// Client resolution +// --------------------------------------------------------------------------- + +export function resolveBedrockEmbeddingClient( + options: EmbeddingProviderOptions, +): BedrockEmbeddingClient { + const model = normalizeBedrockEmbeddingModel(options.model); + const spec = resolveSpec(model); + const providerConfig = options.config.models?.providers?.["amazon-bedrock"]; + + const region = + regionFromUrl(options.remote?.baseUrl) ?? + regionFromUrl(providerConfig?.baseUrl) ?? + process.env.AWS_REGION ?? + process.env.AWS_DEFAULT_REGION ?? + "us-east-1"; + + let dimensions: number | undefined; + if (options.outputDimensionality != null) { + if (spec?.validDims && !spec.validDims.includes(options.outputDimensionality)) { + throw new Error( + `Invalid dimensions ${options.outputDimensionality} for ${model}. Valid values: ${spec.validDims.join(", ")}`, + ); + } + dimensions = options.outputDimensionality; + } else { + dimensions = spec?.dims; + } + + return { region, model, dimensions }; +} + +// --------------------------------------------------------------------------- +// Credential detection +// --------------------------------------------------------------------------- + +const CREDENTIAL_ENV_VARS = [ + "AWS_PROFILE", + "AWS_BEARER_TOKEN_BEDROCK", + "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI", + "AWS_CONTAINER_CREDENTIALS_FULL_URI", + "AWS_EC2_METADATA_SERVICE_ENDPOINT", + "AWS_WEB_IDENTITY_TOKEN_FILE", + "AWS_ROLE_ARN", +] as const; + +export async function hasAwsCredentials(env: NodeJS.ProcessEnv = process.env): Promise { + if (env.AWS_ACCESS_KEY_ID?.trim() && env.AWS_SECRET_ACCESS_KEY?.trim()) { + return true; + } + if (CREDENTIAL_ENV_VARS.some((k) => env[k]?.trim())) { + return true; + } + const credentialProviderSdk = await loadCredentialProviderSdk(); + if (!credentialProviderSdk) { + return false; + } + try { + const credentials = await credentialProviderSdk.defaultProvider({ + timeout: 1000, + maxRetries: 0, + })(); + return typeof credentials.accessKeyId === "string" && credentials.accessKeyId.trim().length > 0; + } catch { + return false; + } +} diff --git a/packages/memory-host-sdk/src/host/embeddings.ts b/packages/memory-host-sdk/src/host/embeddings.ts index 8998b9a9968..5e26f54ddde 100644 --- a/packages/memory-host-sdk/src/host/embeddings.ts +++ b/packages/memory-host-sdk/src/host/embeddings.ts @@ -6,6 +6,11 @@ import { formatErrorMessage } from "../../../../src/infra/errors.js"; import { resolveUserPath } from "../../../../src/utils.js"; import type { EmbeddingInput } from "./embedding-inputs.js"; import { sanitizeAndNormalizeEmbedding } from "./embedding-vectors.js"; +import { + createBedrockEmbeddingProvider, + hasAwsCredentials, + type BedrockEmbeddingClient, +} from "./embeddings-bedrock.js"; import { createGeminiEmbeddingProvider, type GeminiEmbeddingClient, @@ -25,6 +30,7 @@ export type { MistralEmbeddingClient } from "./embeddings-mistral.js"; export type { OpenAiEmbeddingClient } from "./embeddings-openai.js"; export type { VoyageEmbeddingClient } from "./embeddings-voyage.js"; export type { OllamaEmbeddingClient } from "./embeddings-ollama.js"; +export type { BedrockEmbeddingClient } from "./embeddings-bedrock.js"; export type EmbeddingProvider = { id: string; @@ -35,13 +41,21 @@ export type EmbeddingProvider = { embedBatchInputs?: (inputs: EmbeddingInput[]) => Promise; }; -export type EmbeddingProviderId = "openai" | "local" | "gemini" | "voyage" | "mistral" | "ollama"; +export type EmbeddingProviderId = + | "openai" + | "local" + | "gemini" + | "voyage" + | "mistral" + | "ollama" + | "bedrock"; export type EmbeddingProviderRequest = EmbeddingProviderId | "auto"; export type EmbeddingProviderFallback = EmbeddingProviderId | "none"; // Remote providers considered for auto-selection when provider === "auto". // Ollama is intentionally excluded here so that "auto" mode does not // implicitly assume a local Ollama instance is available. +// Bedrock is included when AWS credentials are detected. const REMOTE_EMBEDDING_PROVIDER_IDS = ["openai", "gemini", "voyage", "mistral"] as const; export type EmbeddingProviderResult = { @@ -55,6 +69,7 @@ export type EmbeddingProviderResult = { voyage?: VoyageEmbeddingClient; mistral?: MistralEmbeddingClient; ollama?: OllamaEmbeddingClient; + bedrock?: BedrockEmbeddingClient; }; export type EmbeddingProviderOptions = { @@ -72,7 +87,7 @@ export type EmbeddingProviderOptions = { modelPath?: string; modelCacheDir?: string; }; - /** Gemini embedding-2: output vector dimensions (768, 1536, or 3072). */ + /** Provider-specific output vector dimensions for supported embedding families. */ outputDimensionality?: number; /** Gemini: override the default task type sent with embedding requests. */ taskType?: GeminiTaskType; @@ -192,6 +207,10 @@ export async function createEmbeddingProvider( const { provider, client } = await createMistralEmbeddingProvider(options); return { provider, mistral: client }; } + if (id === "bedrock") { + const { provider, client } = await createBedrockEmbeddingProvider(options); + return { provider, bedrock: client }; + } const { provider, client } = await createOpenAiEmbeddingProvider(options); return { provider, openAi: client }; }; @@ -229,6 +248,23 @@ export async function createEmbeddingProvider( } } + // Try bedrock if AWS credentials are available + if (await hasAwsCredentials()) { + try { + const result = await createProvider("bedrock"); + return { ...result, requestedProvider }; + } catch (err) { + const message = formatPrimaryError(err, "bedrock"); + if (isMissingApiKeyError(err)) { + missingKeyErrors.push(message); + } else { + const wrapped = new Error(message) as Error & { cause?: unknown }; + wrapped.cause = err; + throw wrapped; + } + } + } + // All providers failed due to missing API keys - return null provider for FTS-only mode const details = [...missingKeyErrors, localError].filter(Boolean) as string[]; const reason = details.length > 0 ? details.join("\n\n") : "No embeddings provider available."; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 906395050f4..886e2b8df65 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -39,6 +39,12 @@ importers: '@aws-sdk/client-bedrock': specifier: 3.1023.0 version: 3.1023.0 + '@aws-sdk/client-bedrock-runtime': + specifier: 3.1023.0 + version: 3.1023.0 + '@aws-sdk/credential-provider-node': + specifier: 3.972.29 + version: 3.972.29 '@clack/prompts': specifier: ^1.2.0 version: 1.2.0 @@ -757,56 +763,8 @@ importers: specifier: workspace:* version: link:../.. - packages/clawdbot: - dependencies: - openclaw: - specifier: workspace:* - version: link:../.. - packages/memory-host-sdk: {} - packages/moltbot: - dependencies: - openclaw: - specifier: workspace:* - version: link:../.. - - packages/plugin-package-contract: {} - - ui: - dependencies: - '@create-markdown/preview': - specifier: ^2.0.0 - version: 2.0.0(@create-markdown/core@2.0.0)(shiki@3.23.0) - '@noble/ed25519': - specifier: 3.0.1 - version: 3.0.1 - dompurify: - specifier: ^3.3.3 - version: 3.3.3 - lit: - specifier: ^3.3.2 - version: 3.3.2 - marked: - specifier: ^17.0.5 - version: 17.0.5 - devDependencies: - '@vitest/browser-playwright': - specifier: 4.1.2 - version: 4.1.2(playwright@1.59.1)(vite@8.0.3(@emnapi/core@1.8.1)(@emnapi/runtime@1.9.1)(@types/node@25.5.2)(esbuild@0.27.4)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.3))(vitest@4.1.2) - jsdom: - specifier: ^29.0.1 - version: 29.0.1(@noble/hashes@2.0.1) - playwright: - specifier: ^1.59.1 - version: 1.59.1 - vite: - specifier: 8.0.3 - version: 8.0.3(@emnapi/core@1.8.1)(@emnapi/runtime@1.9.1)(@types/node@25.5.2)(esbuild@0.27.4)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.3) - vitest: - specifier: 4.1.2 - version: 4.1.2(@opentelemetry/api@1.9.1)(@types/node@25.5.2)(@vitest/browser-playwright@4.1.2)(jsdom@29.0.1(@noble/hashes@2.0.1))(vite@8.0.3(@emnapi/core@1.8.1)(@emnapi/runtime@1.9.1)(@types/node@25.5.2)(esbuild@0.27.4)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.3)) - packages: '@agentclientprotocol/sdk@0.18.0': @@ -860,8 +818,8 @@ packages: '@aws-crypto/util@5.2.0': resolution: {integrity: sha512-4RkU9EsI6ZpBve5fseQlGNUWKMa1RLPQ1dnjnQoe07ldfIzcsGb5hC5W0Dm7u423KWzawlrpbjXBrXCEv9zazQ==} - '@aws-sdk/client-bedrock-runtime@3.1022.0': - resolution: {integrity: sha512-gT8+ebNzmLjk07dPTVn0f4ZdEDSFYsyCX3rAxX2QGGOasKeeQQEBW4PxYqHGM6lJrcuFSc/ScSVKTRDxGZlFiA==} + '@aws-sdk/client-bedrock-runtime@3.1023.0': + resolution: {integrity: sha512-C0He9qhrClUp6JEk3QjE0WScDN1GSZF8eruP0uoh5kXeQEJLxyfFDrR2TIYnHntlRs/sMwhO82Vu7yGGQM2pfQ==} engines: {node: '>=20.0.0'} '@aws-sdk/client-bedrock@3.1023.0': @@ -984,10 +942,6 @@ packages: resolution: {integrity: sha512-TKY6h9spUk3OLs5v1oAgW9mAeBE3LAGNBwJokLy96wwmd4W2v/tYlXseProyed9ValDj2u1jK/4Rg1T+1NXyJA==} engines: {node: '>=20.0.0'} - '@aws-sdk/token-providers@3.1022.0': - resolution: {integrity: sha512-rC0+QQh5uo9Y0wtrvsVuGWi8njtf6h6FB94h5NClUoiNTuQiRG/+AjXiqhv1x/m8TnLrgYCHiFzykOdOb5Ea9w==} - engines: {node: '>=20.0.0'} - '@aws-sdk/token-providers@3.1023.0': resolution: {integrity: sha512-g/t814ec7g+MbazONIdQzb0c8FalVnSKCLc665GLG4QdrviKXHzag7HQmf5wBhCDsUDNAIi77fLeElaZSkylTA==} engines: {node: '>=20.0.0'} @@ -1127,25 +1081,6 @@ packages: resolution: {integrity: sha512-ooWCrlZP11i8GImSjTHYHLkvFDP48nS4+204nGb1RiX/WXYHmJA2III9/e2DWVabCESdW7hBAEzHRqUn9OUVvQ==} engines: {node: '>=0.1.90'} - '@create-markdown/core@2.0.0': - resolution: {integrity: sha512-xOmhoiDSa82EzjXp3aViQdB+xfCP4E2jEKxJiKJ702sup3p/CTCtL8fZBKQ3BvzASQRpq/xKCRXZZwRrg1DmZQ==} - engines: {node: '>=20.0.0'} - - '@create-markdown/preview@2.0.0': - resolution: {integrity: sha512-3WTGCrCOVBy9wH2X82Oa2ZHJ+eiEqu8AlucckenWVtFSbzRKzkxgci0BRw7IDvsOsTUEMxS9Ltc9/hSUHkdidA==} - engines: {node: '>=20.0.0'} - peerDependencies: - '@create-markdown/core': '>=2.0.0' - mermaid: '>=10.0.0' - shiki: '>=1.0.0' - peerDependenciesMeta: - '@create-markdown/core': - optional: true - mermaid: - optional: true - shiki: - optional: true - '@csstools/color-helpers@6.0.2': resolution: {integrity: sha512-LMGQLS9EuADloEFkcTBR3BwV/CGHV7zyDxVRtVDTwdI2Ca4it0CCVTT9wCkxSgokjE5Ho41hEPgb8OEUwoXr6Q==} engines: {node: '>=20.19.0'} @@ -2181,9 +2116,6 @@ packages: resolution: {integrity: sha512-vs1Az2OOTBiP4q0pwjW5aF0xp9n4MxVrmkFBxc6EKZc6ddYx5gaZiAsZoq0uRRXWbi3AT/sBqn05eRPtn1JCPw==} engines: {node: '>= 20.19.0'} - '@noble/ed25519@3.0.1': - resolution: {integrity: sha512-t/T8LuK0ym8ALQudCCQCtrRdMSxBnRgHXw+wg+YsSlE6d+on7sX3flqlSJ2mOs9xEuchM36kj9SuX5MG7pXQMA==} - '@noble/hashes@2.0.1': resolution: {integrity: sha512-XlOlEbQcE9fmuXxrVTXCTlG2nlRXa9Rj3rr5Ue/+tX+nmkgbX720YHh0VR3hBF9xDvwnb8D2shVGOwNx+ulArw==} engines: {node: '>= 20.19.0'} @@ -4158,9 +4090,6 @@ packages: resolution: {integrity: sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==} engines: {node: '>= 4'} - dompurify@3.3.3: - resolution: {integrity: sha512-Oj6pzI2+RqBfFG+qOaOLbFXLQ90ARpcGG6UePL82bJLtdsa6CYJD7nmiU8MW9nQNOtCHV3lZ/Bzq1X0QYbBZCA==} - domutils@3.2.2: resolution: {integrity: sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==} @@ -5088,11 +5017,6 @@ packages: engines: {node: '>= 18'} hasBin: true - marked@17.0.5: - resolution: {integrity: sha512-6hLvc0/JEbRjRgzI6wnT2P1XuM1/RrrDEX0kPt0N7jGm1133g6X7DlxFasUIx+72aKAr904GTxhSLDrd5DIlZg==} - engines: {node: '>= 20'} - hasBin: true - math-intrinsics@1.1.0: resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==} engines: {node: '>= 0.4'} @@ -6730,7 +6654,7 @@ snapshots: '@smithy/util-utf8': 2.3.0 tslib: 2.8.1 - '@aws-sdk/client-bedrock-runtime@3.1022.0': + '@aws-sdk/client-bedrock-runtime@3.1023.0': dependencies: '@aws-crypto/sha256-browser': 5.2.0 '@aws-crypto/sha256-js': 5.2.0 @@ -6744,7 +6668,7 @@ snapshots: '@aws-sdk/middleware-user-agent': 3.972.28 '@aws-sdk/middleware-websocket': 3.972.14 '@aws-sdk/region-config-resolver': 3.972.10 - '@aws-sdk/token-providers': 3.1022.0 + '@aws-sdk/token-providers': 3.1023.0 '@aws-sdk/types': 3.973.6 '@aws-sdk/util-endpoints': 3.996.5 '@aws-sdk/util-user-agent-browser': 3.972.8 @@ -7219,18 +7143,6 @@ snapshots: transitivePeerDependencies: - aws-crt - '@aws-sdk/token-providers@3.1022.0': - dependencies: - '@aws-sdk/core': 3.973.26 - '@aws-sdk/nested-clients': 3.996.18 - '@aws-sdk/types': 3.973.6 - '@smithy/property-provider': 4.2.12 - '@smithy/shared-ini-file-loader': 4.4.7 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - transitivePeerDependencies: - - aws-crt - '@aws-sdk/token-providers@3.1023.0': dependencies: '@aws-sdk/core': 3.973.26 @@ -7342,7 +7254,8 @@ snapshots: '@bcoe/v8-coverage@1.0.2': {} - '@blazediff/core@1.9.1': {} + '@blazediff/core@1.9.1': + optional: true '@borewit/text-codec@0.2.2': {} @@ -7411,14 +7324,6 @@ snapshots: '@colors/colors@1.5.0': optional: true - '@create-markdown/core@2.0.0': - optional: true - - '@create-markdown/preview@2.0.0(@create-markdown/core@2.0.0)(shiki@3.23.0)': - optionalDependencies: - '@create-markdown/core': 2.0.0 - shiki: 3.23.0 - '@csstools/color-helpers@6.0.2': {} '@csstools/css-calc@3.1.1(@csstools/css-parser-algorithms@4.0.0(@csstools/css-tokenizer@4.0.0))(@csstools/css-tokenizer@4.0.0)': @@ -8261,7 +8166,7 @@ snapshots: '@mariozechner/pi-ai@0.65.0(@modelcontextprotocol/sdk@1.29.0(zod@4.3.6))(ws@8.20.0)(zod@4.3.6)': dependencies: '@anthropic-ai/sdk': 0.81.0(zod@4.3.6) - '@aws-sdk/client-bedrock-runtime': 3.1022.0 + '@aws-sdk/client-bedrock-runtime': 3.1023.0 '@google/genai': 1.48.0(@modelcontextprotocol/sdk@1.29.0(zod@4.3.6)) '@mistralai/mistralai': 1.14.1 '@sinclair/typebox': 0.34.49 @@ -8515,8 +8420,6 @@ snapshots: dependencies: '@noble/hashes': 2.0.1 - '@noble/ed25519@3.0.1': {} - '@noble/hashes@2.0.1': {} '@node-llama-cpp/linux-arm64@3.18.1': @@ -8961,7 +8864,8 @@ snapshots: '@pinojs/redact@0.4.0': {} - '@polka/url@1.0.0-next.29': {} + '@polka/url@1.0.0-next.29': + optional: true '@protobufjs/aspromise@1.1.2': {} @@ -9875,6 +9779,7 @@ snapshots: - msw - utf-8-validate - vite + optional: true '@vitest/browser@4.1.2(vite@8.0.3(@emnapi/core@1.8.1)(@emnapi/runtime@1.9.1)(@types/node@25.5.2)(esbuild@0.27.4)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.3))(vitest@4.1.2)': dependencies: @@ -9892,6 +9797,7 @@ snapshots: - msw - utf-8-validate - vite + optional: true '@vitest/coverage-v8@4.1.2(@vitest/browser@4.1.2(vite@8.0.3(@emnapi/core@1.8.1)(@emnapi/runtime@1.9.1)(@types/node@25.5.2)(esbuild@0.27.4)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.3))(vitest@4.1.2))(vitest@4.1.2)': dependencies: @@ -10497,10 +10403,6 @@ snapshots: dependencies: domelementtype: 2.3.0 - dompurify@3.3.3: - optionalDependencies: - '@types/trusted-types': 2.0.7 - domutils@3.2.2: dependencies: dom-serializer: 2.0.0 @@ -11578,8 +11480,6 @@ snapshots: marked@15.0.12: {} - marked@17.0.5: {} - math-intrinsics@1.1.0: {} matrix-events-sdk@0.0.1: {} @@ -11691,7 +11591,8 @@ snapshots: mri@1.2.0: optional: true - mrmime@2.0.1: {} + mrmime@2.0.1: + optional: true ms@2.1.3: {} @@ -12133,6 +12034,7 @@ snapshots: playwright-core: 1.59.1 optionalDependencies: fsevents: 2.3.2 + optional: true pngjs@6.0.0: {} @@ -12653,6 +12555,7 @@ snapshots: '@polka/url': 1.0.0-next.29 mrmime: 2.0.1 totalist: 3.0.1 + optional: true sisteransi@1.0.5: {} @@ -12870,7 +12773,8 @@ snapshots: '@tokenizer/token': 0.3.0 ieee754: 1.2.1 - totalist@3.0.1: {} + totalist@3.0.1: + optional: true tough-cookie@4.1.3: dependencies: diff --git a/src/config/config.schema-regressions.test.ts b/src/config/config.schema-regressions.test.ts index 0b3f556b5aa..c27da0d33a1 100644 --- a/src/config/config.schema-regressions.test.ts +++ b/src/config/config.schema-regressions.test.ts @@ -51,6 +51,20 @@ describe("config schema regressions", () => { expect(res.ok).toBe(true); }); + it('accepts memorySearch provider "bedrock"', () => { + const res = validateConfigObject({ + agents: { + defaults: { + memorySearch: { + provider: "bedrock", + }, + }, + }, + }); + + expect(res.ok).toBe(true); + }); + it("accepts memorySearch.qmd.extraCollections", () => { const res = validateConfigObject({ agents: { diff --git a/src/config/schema.base.generated.ts b/src/config/schema.base.generated.ts index 44f02b5d3db..07988d72138 100644 --- a/src/config/schema.base.generated.ts +++ b/src/config/schema.base.generated.ts @@ -3366,7 +3366,7 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { type: "string", title: "Memory Search Provider", description: - 'Selects the embedding backend used to build/query memory vectors: "openai", "gemini", "voyage", "mistral", "ollama", or "local". Keep your most reliable provider here and configure fallback for resilience.', + 'Selects the embedding backend used to build/query memory vectors: "openai", "gemini", "voyage", "mistral", "bedrock", "ollama", or "local". Keep your most reliable provider here and configure fallback for resilience.', }, remote: { type: "object", @@ -3521,7 +3521,7 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { maximum: 9007199254740991, title: "Memory Search Output Dimensionality", description: - "Gemini embedding-2 only: chooses the output vector size for memory embeddings. Use 768, 1536, or 3072 (default), and expect a full reindex when you change it because stored vector dimensions must stay consistent.", + "Provider-specific output vector size override for memory embeddings. Gemini embedding-2 supports 768, 1536, or 3072; Bedrock families such as Titan V2, Cohere V4, and Nova expose their own allowed sizes. Expect a full reindex when you change it because stored vector dimensions must stay consistent.", }, local: { type: "object", @@ -23958,7 +23958,7 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { }, "agents.defaults.memorySearch.provider": { label: "Memory Search Provider", - help: 'Selects the embedding backend used to build/query memory vectors: "openai", "gemini", "voyage", "mistral", "ollama", or "local". Keep your most reliable provider here and configure fallback for resilience.', + help: 'Selects the embedding backend used to build/query memory vectors: "openai", "gemini", "voyage", "mistral", "bedrock", "ollama", or "local". Keep your most reliable provider here and configure fallback for resilience.', tags: ["advanced"], }, "agents.defaults.memorySearch.remote.baseUrl": { @@ -24009,7 +24009,7 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { }, "agents.defaults.memorySearch.outputDimensionality": { label: "Memory Search Output Dimensionality", - help: "Gemini embedding-2 only: chooses the output vector size for memory embeddings. Use 768, 1536, or 3072 (default), and expect a full reindex when you change it because stored vector dimensions must stay consistent.", + help: "Provider-specific output vector size override for memory embeddings. Gemini embedding-2 supports 768, 1536, or 3072; Bedrock families such as Titan V2, Cohere V4, and Nova expose their own allowed sizes. Expect a full reindex when you change it because stored vector dimensions must stay consistent.", tags: ["advanced"], }, "agents.defaults.memorySearch.fallback": { diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index ca60c6c2b9a..0074844effb 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -867,11 +867,11 @@ export const FIELD_HELP: Record = { "agents.defaults.memorySearch.experimental.sessionMemory": "Indexes session transcripts into memory search so responses can reference prior chat turns. Keep this off unless transcript recall is needed, because indexing cost and storage usage both increase.", "agents.defaults.memorySearch.provider": - 'Selects the embedding backend used to build/query memory vectors: "openai", "gemini", "voyage", "mistral", "ollama", or "local". Keep your most reliable provider here and configure fallback for resilience.', + 'Selects the embedding backend used to build/query memory vectors: "openai", "gemini", "voyage", "mistral", "bedrock", "ollama", or "local". Keep your most reliable provider here and configure fallback for resilience.', "agents.defaults.memorySearch.model": "Embedding model override used by the selected memory provider when a non-default model is required. Set this only when you need explicit recall quality/cost tuning beyond provider defaults.", "agents.defaults.memorySearch.outputDimensionality": - "Gemini embedding-2 only: chooses the output vector size for memory embeddings. Use 768, 1536, or 3072 (default), and expect a full reindex when you change it because stored vector dimensions must stay consistent.", + "Provider-specific output vector size override for memory embeddings. Gemini embedding-2 supports 768, 1536, or 3072; Bedrock families such as Titan V2, Cohere V4, and Nova expose their own allowed sizes. Expect a full reindex when you change it because stored vector dimensions must stay consistent.", "agents.defaults.memorySearch.remote.baseUrl": "Overrides the embedding API endpoint, such as an OpenAI-compatible proxy or custom Gemini base URL. Use this only when routing through your own gateway or vendor endpoint; keep provider defaults otherwise.", "agents.defaults.memorySearch.remote.apiKey": diff --git a/src/memory-host-sdk/host/embeddings-bedrock.test.ts b/src/memory-host-sdk/host/embeddings-bedrock.test.ts new file mode 100644 index 00000000000..71228daad5f --- /dev/null +++ b/src/memory-host-sdk/host/embeddings-bedrock.test.ts @@ -0,0 +1,377 @@ +import { afterEach, beforeAll, beforeEach, describe, expect, it, vi } from "vitest"; + +const { defaultProviderMock, resolveCredentialsMock, sendMock } = vi.hoisted(() => ({ + defaultProviderMock: vi.fn(), + resolveCredentialsMock: vi.fn(), + sendMock: vi.fn(), +})); + +vi.mock("@aws-sdk/client-bedrock-runtime", () => { + class MockClient { + region: string; + constructor(config: { region: string }) { + this.region = config.region; + } + send = sendMock; + } + class MockCommand { + input: unknown; + constructor(input: unknown) { + this.input = input; + } + } + return { BedrockRuntimeClient: MockClient, InvokeModelCommand: MockCommand }; +}); + +vi.mock("@aws-sdk/credential-provider-node", () => ({ + defaultProvider: defaultProviderMock.mockImplementation(() => resolveCredentialsMock), +})); + +let createBedrockEmbeddingProvider: typeof import("./embeddings-bedrock.js").createBedrockEmbeddingProvider; +let resolveBedrockEmbeddingClient: typeof import("./embeddings-bedrock.js").resolveBedrockEmbeddingClient; +let normalizeBedrockEmbeddingModel: typeof import("./embeddings-bedrock.js").normalizeBedrockEmbeddingModel; +let hasAwsCredentials: typeof import("./embeddings-bedrock.js").hasAwsCredentials; + +beforeAll(async () => { + ({ + createBedrockEmbeddingProvider, + resolveBedrockEmbeddingClient, + normalizeBedrockEmbeddingModel, + hasAwsCredentials, + } = await import("./embeddings-bedrock.js")); +}); + +beforeEach(() => { + defaultProviderMock.mockImplementation(() => resolveCredentialsMock); +}); + +const enc = (body: unknown) => ({ body: new TextEncoder().encode(JSON.stringify(body)) }); +const reqBody = (i = 0): Record => + JSON.parse(sendMock.mock.calls[i][0].input.body); + +describe("bedrock embedding provider", () => { + const originalEnv = process.env; + afterEach(() => { + process.env = originalEnv; + vi.restoreAllMocks(); + defaultProviderMock.mockClear(); + resolveCredentialsMock.mockReset(); + sendMock.mockReset(); + }); + + // --- Normalization --- + + it("normalizes model names with prefixes", () => { + expect(normalizeBedrockEmbeddingModel("bedrock/amazon.titan-embed-text-v2:0")).toBe( + "amazon.titan-embed-text-v2:0", + ); + expect(normalizeBedrockEmbeddingModel("amazon-bedrock/cohere.embed-english-v3")).toBe( + "cohere.embed-english-v3", + ); + expect(normalizeBedrockEmbeddingModel("")).toBe("amazon.titan-embed-text-v2:0"); + }); + + // --- Client resolution --- + + it("resolves region from env", () => { + process.env = { ...originalEnv, AWS_REGION: "eu-west-1" }; + const c = resolveBedrockEmbeddingClient({ + config: {} as never, + provider: "bedrock", + model: "amazon.titan-embed-text-v2:0", + fallback: "none", + }); + expect(c.region).toBe("eu-west-1"); + expect(c.dimensions).toBe(1024); + }); + + it("defaults to us-east-1", () => { + process.env = { ...originalEnv }; + delete process.env.AWS_REGION; + delete process.env.AWS_DEFAULT_REGION; + expect( + resolveBedrockEmbeddingClient({ + config: {} as never, + provider: "bedrock", + model: "amazon.titan-embed-text-v2:0", + fallback: "none", + }).region, + ).toBe("us-east-1"); + }); + + it("extracts region from baseUrl", () => { + process.env = { ...originalEnv }; + delete process.env.AWS_REGION; + const c = resolveBedrockEmbeddingClient({ + config: { + models: { + providers: { + "amazon-bedrock": { baseUrl: "https://bedrock-runtime.ap-southeast-2.amazonaws.com" }, + }, + }, + } as never, + provider: "bedrock", + model: "amazon.titan-embed-text-v2:0", + fallback: "none", + }); + expect(c.region).toBe("ap-southeast-2"); + }); + + it("validates dimensions", () => { + expect(() => + resolveBedrockEmbeddingClient({ + config: {} as never, + provider: "bedrock", + model: "amazon.titan-embed-text-v2:0", + fallback: "none", + outputDimensionality: 768, + }), + ).toThrow("Invalid dimensions 768"); + }); + + it("accepts valid dimensions", () => { + expect( + resolveBedrockEmbeddingClient({ + config: {} as never, + provider: "bedrock", + model: "amazon.titan-embed-text-v2:0", + fallback: "none", + outputDimensionality: 256, + }).dimensions, + ).toBe(256); + }); + + it("resolves throughput-suffixed variants", () => { + expect( + resolveBedrockEmbeddingClient({ + config: {} as never, + provider: "bedrock", + model: "amazon.titan-embed-text-v1:2:8k", + fallback: "none", + }).dimensions, + ).toBe(1536); + }); + + // --- Credential detection --- + + it("detects access keys", async () => { + await expect( + hasAwsCredentials({ + AWS_ACCESS_KEY_ID: "A", + AWS_SECRET_ACCESS_KEY: "s", + } as NodeJS.ProcessEnv), + ).resolves.toBe(true); + }); + it("detects profile", async () => { + await expect(hasAwsCredentials({ AWS_PROFILE: "default" } as NodeJS.ProcessEnv)).resolves.toBe( + true, + ); + }); + it("detects ECS task role", async () => { + await expect( + hasAwsCredentials({ AWS_CONTAINER_CREDENTIALS_RELATIVE_URI: "/v2" } as NodeJS.ProcessEnv), + ).resolves.toBe(true); + }); + it("detects EKS IRSA", async () => { + await expect( + hasAwsCredentials({ + AWS_WEB_IDENTITY_TOKEN_FILE: "/var/run/secrets/token", + AWS_ROLE_ARN: "arn:aws:iam::123:role/x", + } as NodeJS.ProcessEnv), + ).resolves.toBe(true); + }); + it("detects credentials via the AWS SDK default provider chain", async () => { + resolveCredentialsMock.mockResolvedValue({ accessKeyId: "AKIAEXAMPLE" }); + await expect(hasAwsCredentials({} as NodeJS.ProcessEnv)).resolves.toBe(true); + expect(defaultProviderMock).toHaveBeenCalledWith({ timeout: 1000, maxRetries: 0 }); + }); + it("returns false with no creds", async () => { + resolveCredentialsMock.mockRejectedValue(new Error("no aws credentials")); + await expect(hasAwsCredentials({} as NodeJS.ProcessEnv)).resolves.toBe(false); + }); + + // --- Titan V2 --- + + it("embeds with Titan V2", async () => { + sendMock.mockResolvedValue(enc({ embedding: [0.1, 0.2, 0.3] })); + const { provider } = await createBedrockEmbeddingProvider({ + config: {} as never, + provider: "bedrock", + model: "amazon.titan-embed-text-v2:0", + fallback: "none", + }); + expect(await provider.embedQuery("test")).toHaveLength(3); + expect(reqBody()).toMatchObject({ inputText: "test", normalize: true, dimensions: 1024 }); + }); + + it("returns empty for blank text", async () => { + const { provider } = await createBedrockEmbeddingProvider({ + config: {} as never, + provider: "bedrock", + model: "amazon.titan-embed-text-v2:0", + fallback: "none", + }); + expect(await provider.embedQuery(" ")).toEqual([]); + expect(sendMock).not.toHaveBeenCalled(); + }); + + it("batches Titan V2 concurrently", async () => { + sendMock + .mockResolvedValueOnce(enc({ embedding: [0.1] })) + .mockResolvedValueOnce(enc({ embedding: [0.2] })); + const { provider } = await createBedrockEmbeddingProvider({ + config: {} as never, + provider: "bedrock", + model: "amazon.titan-embed-text-v2:0", + fallback: "none", + }); + expect(await provider.embedBatch(["a", "b"])).toHaveLength(2); + expect(sendMock).toHaveBeenCalledTimes(2); + }); + + // --- Titan V1 --- + + it("sends only inputText for Titan V1", async () => { + sendMock.mockResolvedValue(enc({ embedding: [0.5] })); + const { provider } = await createBedrockEmbeddingProvider({ + config: {} as never, + provider: "bedrock", + model: "amazon.titan-embed-text-v1", + fallback: "none", + }); + await provider.embedQuery("hi"); + expect(reqBody()).toEqual({ inputText: "hi" }); + }); + + it("handles Titan G1 text variant", async () => { + sendMock.mockResolvedValue(enc({ embedding: [0.1] })); + const { provider } = await createBedrockEmbeddingProvider({ + config: {} as never, + provider: "bedrock", + model: "amazon.titan-embed-g1-text-02", + fallback: "none", + }); + await provider.embedQuery("hi"); + expect(reqBody()).toEqual({ inputText: "hi" }); + }); + + // --- Cohere V3 --- + + it("embeds Cohere V3 batch in single call", async () => { + sendMock.mockResolvedValue(enc({ embeddings: [[0.1], [0.2]] })); + const { provider } = await createBedrockEmbeddingProvider({ + config: {} as never, + provider: "bedrock", + model: "cohere.embed-english-v3", + fallback: "none", + }); + expect(await provider.embedBatch(["a", "b"])).toHaveLength(2); + expect(sendMock).toHaveBeenCalledTimes(1); + expect(reqBody()).toMatchObject({ texts: ["a", "b"], input_type: "search_document" }); + }); + + it("uses search_query for Cohere embedQuery", async () => { + sendMock.mockResolvedValue(enc({ embeddings: [[0.1]] })); + const { provider } = await createBedrockEmbeddingProvider({ + config: {} as never, + provider: "bedrock", + model: "cohere.embed-english-v3", + fallback: "none", + }); + await provider.embedQuery("q"); + expect(reqBody().input_type).toBe("search_query"); + }); + + // --- Cohere V4 --- + + it("embeds Cohere V4 with embedding_types + output_dimension", async () => { + sendMock.mockResolvedValue(enc({ embeddings: { float: [[0.1], [0.2]] } })); + const { provider } = await createBedrockEmbeddingProvider({ + config: {} as never, + provider: "bedrock", + model: "cohere.embed-v4:0", + fallback: "none", + }); + expect(await provider.embedBatch(["a", "b"])).toHaveLength(2); + expect(reqBody()).toMatchObject({ embedding_types: ["float"], output_dimension: 1536 }); + }); + + it("validates Cohere V4 dimensions", () => { + expect(() => + resolveBedrockEmbeddingClient({ + config: {} as never, + provider: "bedrock", + model: "cohere.embed-v4:0", + fallback: "none", + outputDimensionality: 2048, + }), + ).toThrow("Invalid dimensions 2048"); + }); + + // --- Nova --- + + it("embeds Nova with SINGLE_EMBEDDING format", async () => { + sendMock.mockResolvedValue( + enc({ embeddings: [{ embeddingType: "TEXT", embedding: [0.1, 0.2] }] }), + ); + const { provider } = await createBedrockEmbeddingProvider({ + config: {} as never, + provider: "bedrock", + model: "amazon.nova-2-multimodal-embeddings-v1:0", + fallback: "none", + }); + expect(await provider.embedQuery("hi")).toHaveLength(2); + expect(reqBody().taskType).toBe("SINGLE_EMBEDDING"); + }); + + it("validates Nova dimensions", () => { + expect(() => + resolveBedrockEmbeddingClient({ + config: {} as never, + provider: "bedrock", + model: "amazon.nova-2-multimodal-embeddings-v1:0", + fallback: "none", + outputDimensionality: 512, + }), + ).toThrow("Invalid dimensions 512"); + }); + + it("batches Nova concurrently", async () => { + sendMock + .mockResolvedValueOnce(enc({ embeddings: [{ embeddingType: "TEXT", embedding: [0.1] }] })) + .mockResolvedValueOnce(enc({ embeddings: [{ embeddingType: "TEXT", embedding: [0.2] }] })); + const { provider } = await createBedrockEmbeddingProvider({ + config: {} as never, + provider: "bedrock", + model: "amazon.nova-2-multimodal-embeddings-v1:0", + fallback: "none", + }); + expect(await provider.embedBatch(["a", "b"])).toHaveLength(2); + expect(sendMock).toHaveBeenCalledTimes(2); + }); + + // --- TwelveLabs --- + + it("embeds TwelveLabs Marengo", async () => { + sendMock.mockResolvedValue(enc({ data: [{ embedding: [0.1, 0.2] }] })); + const { provider } = await createBedrockEmbeddingProvider({ + config: {} as never, + provider: "bedrock", + model: "twelvelabs.marengo-embed-3-0-v1:0", + fallback: "none", + }); + expect(await provider.embedQuery("hi")).toHaveLength(2); + expect(reqBody()).toEqual({ inputType: "text", text: { inputText: "hi" } }); + }); + + it("embeds TwelveLabs object-style responses", async () => { + sendMock.mockResolvedValue(enc({ data: { embedding: [0.3, 0.4] } })); + const { provider } = await createBedrockEmbeddingProvider({ + config: {} as never, + provider: "bedrock", + model: "twelvelabs.marengo-embed-2-7-v1:0", + fallback: "none", + }); + expect(await provider.embedQuery("hi")).toEqual([0.6, 0.8]); + }); +}); diff --git a/src/memory-host-sdk/host/embeddings-bedrock.ts b/src/memory-host-sdk/host/embeddings-bedrock.ts new file mode 100644 index 00000000000..c536cf22ca5 --- /dev/null +++ b/src/memory-host-sdk/host/embeddings-bedrock.ts @@ -0,0 +1,397 @@ +import { sanitizeAndNormalizeEmbedding } from "./embedding-vectors.js"; +import { debugEmbeddingsLog } from "./embeddings-debug.js"; +import type { EmbeddingProvider, EmbeddingProviderOptions } from "./embeddings.js"; + +// --------------------------------------------------------------------------- +// Types & constants +// --------------------------------------------------------------------------- + +export type BedrockEmbeddingClient = { + region: string; + model: string; + dimensions?: number; +}; + +export const DEFAULT_BEDROCK_EMBEDDING_MODEL = "amazon.titan-embed-text-v2:0"; + +/** Request/response format family — each has a different API shape. */ +type Family = "titan-v1" | "titan-v2" | "cohere-v3" | "cohere-v4" | "nova" | "twelvelabs"; + +interface ModelSpec { + maxTokens: number; + dims: number; + validDims?: number[]; + family: Family; +} + +// --------------------------------------------------------------------------- +// Model catalog +// --------------------------------------------------------------------------- + +const MODELS: Record = { + "amazon.titan-embed-text-v2:0": { + maxTokens: 8192, + dims: 1024, + validDims: [256, 512, 1024], + family: "titan-v2", + }, + "amazon.titan-embed-text-v1": { maxTokens: 8000, dims: 1536, family: "titan-v1" }, + "amazon.titan-embed-g1-text-02": { maxTokens: 8000, dims: 1536, family: "titan-v1" }, + "amazon.titan-embed-image-v1": { maxTokens: 128, dims: 1024, family: "titan-v1" }, + "cohere.embed-english-v3": { maxTokens: 512, dims: 1024, family: "cohere-v3" }, + "cohere.embed-multilingual-v3": { maxTokens: 512, dims: 1024, family: "cohere-v3" }, + "cohere.embed-v4:0": { + maxTokens: 128000, + dims: 1536, + validDims: [256, 384, 512, 768, 1024, 1536], + family: "cohere-v4", + }, + "amazon.nova-2-multimodal-embeddings-v1:0": { + maxTokens: 8192, + dims: 1024, + validDims: [256, 384, 1024, 3072], + family: "nova", + }, + "twelvelabs.marengo-embed-2-7-v1:0": { maxTokens: 512, dims: 1024, family: "twelvelabs" }, + "twelvelabs.marengo-embed-3-0-v1:0": { maxTokens: 512, dims: 512, family: "twelvelabs" }, +}; + +/** Resolve spec, stripping throughput suffixes like `:2:8k` or `:0:512`. */ +function resolveSpec(modelId: string): ModelSpec | undefined { + if (MODELS[modelId]) { + return MODELS[modelId]; + } + const parts = modelId.split(":"); + for (let i = parts.length - 1; i >= 1; i--) { + const spec = MODELS[parts.slice(0, i).join(":")]; + if (spec) { + return spec; + } + } + return undefined; +} + +/** Infer family from model ID prefix when not in catalog. */ +function inferFamily(modelId: string): Family { + const id = modelId.toLowerCase(); + if (id.startsWith("amazon.titan-embed-text-v2")) { + return "titan-v2"; + } + if (id.startsWith("amazon.titan-embed")) { + return "titan-v1"; + } + if (id.startsWith("amazon.nova")) { + return "nova"; + } + if (id.startsWith("cohere.embed-v4")) { + return "cohere-v4"; + } + if (id.startsWith("cohere.embed")) { + return "cohere-v3"; + } + if (id.startsWith("twelvelabs.")) { + return "twelvelabs"; + } + return "titan-v1"; // safest default — simplest request format +} + +// --------------------------------------------------------------------------- +// AWS SDK lazy loader +// --------------------------------------------------------------------------- + +type SdkClient = import("@aws-sdk/client-bedrock-runtime").BedrockRuntimeClient; +type SdkCommand = import("@aws-sdk/client-bedrock-runtime").InvokeModelCommand; + +interface AwsSdk { + BedrockRuntimeClient: new (config: { region: string }) => SdkClient; + InvokeModelCommand: new (input: { + modelId: string; + body: string; + contentType: string; + accept: string; + }) => SdkCommand; +} + +interface AwsCredentialProviderSdk { + defaultProvider: (init?: { timeout?: number; maxRetries?: number }) => () => Promise<{ + accessKeyId?: string; + }>; +} + +let sdkCache: AwsSdk | null = null; +let credentialProviderSdkCache: AwsCredentialProviderSdk | null | undefined; + +async function loadSdk(): Promise { + if (sdkCache) { + return sdkCache; + } + try { + sdkCache = (await import("@aws-sdk/client-bedrock-runtime")) as unknown as AwsSdk; + return sdkCache; + } catch { + throw new Error( + "No API key found for provider bedrock: @aws-sdk/client-bedrock-runtime is not installed. " + + "Install it with: npm install @aws-sdk/client-bedrock-runtime", + ); + } +} + +async function loadCredentialProviderSdk(): Promise { + if (credentialProviderSdkCache !== undefined) { + return credentialProviderSdkCache; + } + try { + credentialProviderSdkCache = + (await import("@aws-sdk/credential-provider-node")) as unknown as AwsCredentialProviderSdk; + } catch { + credentialProviderSdkCache = null; + } + return credentialProviderSdkCache; +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +const MODEL_PREFIX_RE = /^(?:bedrock|amazon-bedrock|aws)\//; +const REGION_RE = /bedrock-runtime\.([a-z0-9-]+)\./; + +export function normalizeBedrockEmbeddingModel(model: string): string { + const trimmed = model.trim(); + return trimmed ? trimmed.replace(MODEL_PREFIX_RE, "") : DEFAULT_BEDROCK_EMBEDDING_MODEL; +} + +function regionFromUrl(url: string | undefined): string | undefined { + return url?.trim() ? REGION_RE.exec(url)?.[1] : undefined; +} + +// --------------------------------------------------------------------------- +// Request builders +// --------------------------------------------------------------------------- + +function buildBody(family: Family, text: string, dims?: number): string { + switch (family) { + case "titan-v2": { + const b: Record = { inputText: text }; + if (dims != null) { + b.dimensions = dims; + b.normalize = true; + } + return JSON.stringify(b); + } + case "titan-v1": + return JSON.stringify({ inputText: text }); + case "nova": + return JSON.stringify({ + taskType: "SINGLE_EMBEDDING", + singleEmbeddingParams: { + embeddingPurpose: "GENERIC_INDEX", + embeddingDimension: dims ?? 1024, + text: { truncationMode: "END", value: text }, + }, + }); + case "twelvelabs": + return JSON.stringify({ inputType: "text", text: { inputText: text } }); + default: + return JSON.stringify({ inputText: text }); + } +} + +function buildCohereBody( + family: Family, + texts: string[], + inputType: "search_query" | "search_document", + dims?: number, +): string { + const body: Record = { texts, input_type: inputType, truncate: "END" }; + if (family === "cohere-v4") { + body.embedding_types = ["float"]; + if (dims != null) { + body.output_dimension = dims; + } + } + return JSON.stringify(body); +} + +// --------------------------------------------------------------------------- +// Response parsers +// --------------------------------------------------------------------------- + +function parseSingle(family: Family, raw: string): number[] { + const data = JSON.parse(raw); + switch (family) { + case "nova": + return data.embeddings?.[0]?.embedding ?? []; + case "twelvelabs": { + if (Array.isArray(data.data)) { + return data.data[0]?.embedding ?? []; + } + if (Array.isArray(data.data?.embedding)) { + return data.data.embedding; + } + return data.embedding ?? []; + } + default: + return data.embedding ?? []; + } +} + +function parseCohereBatch(family: Family, raw: string): number[][] { + const data = JSON.parse(raw); + const embeddings = data.embeddings; + if (!embeddings) { + return []; + } + if (family === "cohere-v4" && !Array.isArray(embeddings)) { + return embeddings.float ?? []; + } + return embeddings; +} + +// --------------------------------------------------------------------------- +// Provider +// --------------------------------------------------------------------------- + +export async function createBedrockEmbeddingProvider( + options: EmbeddingProviderOptions, +): Promise<{ provider: EmbeddingProvider; client: BedrockEmbeddingClient }> { + const client = resolveBedrockEmbeddingClient(options); + const { BedrockRuntimeClient, InvokeModelCommand } = await loadSdk(); + const sdk = new BedrockRuntimeClient({ region: client.region }); + const spec = resolveSpec(client.model); + const family = spec?.family ?? inferFamily(client.model); + + debugEmbeddingsLog("memory embeddings: bedrock client", { + region: client.region, + model: client.model, + dimensions: client.dimensions, + family, + }); + + const invoke = async (body: string): Promise => { + const res = await sdk.send( + new InvokeModelCommand({ + modelId: client.model, + body, + contentType: "application/json", + accept: "application/json", + }), + ); + return new TextDecoder().decode(res.body); + }; + + const isCohere = family === "cohere-v3" || family === "cohere-v4"; + + const embedSingle = async (text: string): Promise => { + const raw = await invoke(buildBody(family, text, client.dimensions)); + return sanitizeAndNormalizeEmbedding(parseSingle(family, raw)); + }; + + const embedCohere = async ( + texts: string[], + inputType: "search_query" | "search_document", + ): Promise => { + const raw = await invoke(buildCohereBody(family, texts, inputType, client.dimensions)); + return parseCohereBatch(family, raw).map((e) => sanitizeAndNormalizeEmbedding(e)); + }; + + const embedQuery = async (text: string): Promise => { + if (!text.trim()) { + return []; + } + if (isCohere) { + return (await embedCohere([text], "search_query"))[0] ?? []; + } + return embedSingle(text); + }; + + const embedBatch = async (texts: string[]): Promise => { + if (texts.length === 0) { + return []; + } + if (isCohere) { + return embedCohere(texts, "search_document"); + } + return Promise.all(texts.map((t) => (t.trim() ? embedSingle(t) : Promise.resolve([])))); + }; + + return { + provider: { + id: "bedrock", + model: client.model, + maxInputTokens: spec?.maxTokens, + embedQuery, + embedBatch, + }, + client, + }; +} + +// --------------------------------------------------------------------------- +// Client resolution +// --------------------------------------------------------------------------- + +export function resolveBedrockEmbeddingClient( + options: EmbeddingProviderOptions, +): BedrockEmbeddingClient { + const model = normalizeBedrockEmbeddingModel(options.model); + const spec = resolveSpec(model); + const providerConfig = options.config.models?.providers?.["amazon-bedrock"]; + + const region = + regionFromUrl(options.remote?.baseUrl) ?? + regionFromUrl(providerConfig?.baseUrl) ?? + process.env.AWS_REGION ?? + process.env.AWS_DEFAULT_REGION ?? + "us-east-1"; + + let dimensions: number | undefined; + if (options.outputDimensionality != null) { + if (spec?.validDims && !spec.validDims.includes(options.outputDimensionality)) { + throw new Error( + `Invalid dimensions ${options.outputDimensionality} for ${model}. Valid values: ${spec.validDims.join(", ")}`, + ); + } + dimensions = options.outputDimensionality; + } else { + dimensions = spec?.dims; + } + + return { region, model, dimensions }; +} + +// --------------------------------------------------------------------------- +// Credential detection +// --------------------------------------------------------------------------- + +const CREDENTIAL_ENV_VARS = [ + "AWS_PROFILE", + "AWS_BEARER_TOKEN_BEDROCK", + "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI", + "AWS_CONTAINER_CREDENTIALS_FULL_URI", + "AWS_EC2_METADATA_SERVICE_ENDPOINT", + "AWS_WEB_IDENTITY_TOKEN_FILE", + "AWS_ROLE_ARN", +] as const; + +export async function hasAwsCredentials(env: NodeJS.ProcessEnv = process.env): Promise { + if (env.AWS_ACCESS_KEY_ID?.trim() && env.AWS_SECRET_ACCESS_KEY?.trim()) { + return true; + } + if (CREDENTIAL_ENV_VARS.some((k) => env[k]?.trim())) { + return true; + } + const credentialProviderSdk = await loadCredentialProviderSdk(); + if (!credentialProviderSdk) { + return false; + } + try { + const credentials = await credentialProviderSdk.defaultProvider({ + timeout: 1000, + maxRetries: 0, + })(); + return typeof credentials.accessKeyId === "string" && credentials.accessKeyId.trim().length > 0; + } catch { + return false; + } +} diff --git a/src/memory-host-sdk/host/embeddings.test.ts b/src/memory-host-sdk/host/embeddings.test.ts index c4fb6545d30..04cf29403c6 100644 --- a/src/memory-host-sdk/host/embeddings.test.ts +++ b/src/memory-host-sdk/host/embeddings.test.ts @@ -6,10 +6,18 @@ import { createEmbeddingProvider, DEFAULT_LOCAL_MODEL } from "./embeddings.js"; import * as nodeLlamaModule from "./node-llama.js"; import { mockPublicPinnedHostname } from "./test-helpers/ssrf.js"; -const { createOllamaEmbeddingProviderMock } = vi.hoisted(() => ({ +const { + bedrockSendMock, + createOllamaEmbeddingProviderMock, + defaultProviderMock, + resolveCredentialsMock, +} = vi.hoisted(() => ({ + bedrockSendMock: vi.fn(), createOllamaEmbeddingProviderMock: vi.fn(async () => { throw new Error("Unexpected ollama provider in embeddings.test.ts"); }), + defaultProviderMock: vi.fn(), + resolveCredentialsMock: vi.fn(), })); vi.mock("../../infra/net/fetch-guard.js", () => ({ @@ -35,6 +43,23 @@ vi.mock("./embeddings-ollama.js", () => ({ createOllamaEmbeddingProvider: createOllamaEmbeddingProviderMock, })); +vi.mock("@aws-sdk/client-bedrock-runtime", () => { + class MockClient { + send = bedrockSendMock; + } + class MockCommand { + input: unknown; + constructor(input: unknown) { + this.input = input; + } + } + return { BedrockRuntimeClient: MockClient, InvokeModelCommand: MockCommand }; +}); + +vi.mock("@aws-sdk/credential-provider-node", () => ({ + defaultProvider: defaultProviderMock.mockImplementation(() => resolveCredentialsMock), +})); + const createFetchMock = () => vi.fn(async (_input?: unknown, _init?: unknown) => ({ ok: true, @@ -63,6 +88,7 @@ type ResolvedProviderAuth = Awaited { vi.spyOn(authModule, "resolveApiKeyForProvider"); vi.spyOn(nodeLlamaModule, "importNodeLlamaCpp"); + defaultProviderMock.mockImplementation(() => resolveCredentialsMock); }); beforeEach(() => { @@ -108,7 +134,7 @@ function createLocalProvider(options?: { fallback?: "none" | "openai" }) { function expectAutoSelectedProvider( result: Awaited>, - expectedId: "openai" | "gemini" | "mistral", + expectedId: "openai" | "gemini" | "mistral" | "bedrock", ) { expect(result.requestedProvider).toBe("auto"); const provider = requireProvider(result); @@ -434,6 +460,39 @@ describe("embedding provider auto selection", () => { expect(url, testCase.name).toBe(testCase.expectedUrl); } }); + + it("selects Bedrock in auto mode when the AWS credential chain resolves", async () => { + bedrockSendMock.mockResolvedValue({ + body: new TextEncoder().encode(JSON.stringify({ embedding: [1, 2, 3] })), + }); + resolveCredentialsMock.mockResolvedValue({ accessKeyId: "AKIAEXAMPLE" }); + vi.mocked(authModule.resolveApiKeyForProvider).mockImplementation(async ({ provider }) => { + throw new Error(`No API key found for provider "${provider}".`); + }); + + const result = await createAutoProvider(); + const provider = expectAutoSelectedProvider(result, "bedrock"); + await provider.embedQuery("hello"); + + expect(bedrockSendMock).toHaveBeenCalledTimes(1); + }); + + it("rethrows non-auth Bedrock setup errors in auto mode", async () => { + resolveCredentialsMock.mockResolvedValue({ accessKeyId: "AKIAEXAMPLE" }); + vi.mocked(authModule.resolveApiKeyForProvider).mockImplementation(async ({ provider }) => { + throw new Error(`No API key found for provider "${provider}".`); + }); + + await expect( + createEmbeddingProvider({ + config: {} as never, + provider: "auto", + model: "", + fallback: "none", + outputDimensionality: 768, + }), + ).rejects.toThrow("Invalid dimensions 768"); + }); }); describe("embedding provider local fallback", () => { diff --git a/src/memory-host-sdk/host/embeddings.ts b/src/memory-host-sdk/host/embeddings.ts index ee18eb34c7f..d355d0a5de2 100644 --- a/src/memory-host-sdk/host/embeddings.ts +++ b/src/memory-host-sdk/host/embeddings.ts @@ -6,6 +6,11 @@ import { formatErrorMessage } from "../../infra/errors.js"; import { resolveUserPath } from "../../utils.js"; import type { EmbeddingInput } from "./embedding-inputs.js"; import { sanitizeAndNormalizeEmbedding } from "./embedding-vectors.js"; +import { + createBedrockEmbeddingProvider, + hasAwsCredentials, + type BedrockEmbeddingClient, +} from "./embeddings-bedrock.js"; import { createGeminiEmbeddingProvider, type GeminiEmbeddingClient, @@ -25,6 +30,7 @@ export type { MistralEmbeddingClient } from "./embeddings-mistral.js"; export type { OpenAiEmbeddingClient } from "./embeddings-openai.js"; export type { VoyageEmbeddingClient } from "./embeddings-voyage.js"; export type { OllamaEmbeddingClient } from "./embeddings-ollama.js"; +export type { BedrockEmbeddingClient } from "./embeddings-bedrock.js"; export type EmbeddingProvider = { id: string; @@ -35,13 +41,21 @@ export type EmbeddingProvider = { embedBatchInputs?: (inputs: EmbeddingInput[]) => Promise; }; -export type EmbeddingProviderId = "openai" | "local" | "gemini" | "voyage" | "mistral" | "ollama"; +export type EmbeddingProviderId = + | "openai" + | "local" + | "gemini" + | "voyage" + | "mistral" + | "ollama" + | "bedrock"; export type EmbeddingProviderRequest = EmbeddingProviderId | "auto"; export type EmbeddingProviderFallback = EmbeddingProviderId | "none"; // Remote providers considered for auto-selection when provider === "auto". // Ollama is intentionally excluded here so that "auto" mode does not // implicitly assume a local Ollama instance is available. +// Bedrock is included when AWS credentials are detected. const REMOTE_EMBEDDING_PROVIDER_IDS = ["openai", "gemini", "voyage", "mistral"] as const; export type EmbeddingProviderResult = { @@ -55,6 +69,7 @@ export type EmbeddingProviderResult = { voyage?: VoyageEmbeddingClient; mistral?: MistralEmbeddingClient; ollama?: OllamaEmbeddingClient; + bedrock?: BedrockEmbeddingClient; }; export type EmbeddingProviderOptions = { @@ -72,7 +87,7 @@ export type EmbeddingProviderOptions = { modelPath?: string; modelCacheDir?: string; }; - /** Gemini embedding-2: output vector dimensions (768, 1536, or 3072). */ + /** Provider-specific output vector dimensions for supported embedding families. */ outputDimensionality?: number; /** Gemini: override the default task type sent with embedding requests. */ taskType?: GeminiTaskType; @@ -192,6 +207,10 @@ export async function createEmbeddingProvider( const { provider, client } = await createMistralEmbeddingProvider(options); return { provider, mistral: client }; } + if (id === "bedrock") { + const { provider, client } = await createBedrockEmbeddingProvider(options); + return { provider, bedrock: client }; + } const { provider, client } = await createOpenAiEmbeddingProvider(options); return { provider, openAi: client }; }; @@ -229,6 +248,23 @@ export async function createEmbeddingProvider( } } + // Try bedrock if AWS credentials are available + if (await hasAwsCredentials()) { + try { + const result = await createProvider("bedrock"); + return { ...result, requestedProvider }; + } catch (err) { + const message = formatPrimaryError(err, "bedrock"); + if (isMissingApiKeyError(err)) { + missingKeyErrors.push(message); + } else { + const wrapped = new Error(message) as Error & { cause?: unknown }; + wrapped.cause = err; + throw wrapped; + } + } + } + // All providers failed due to missing API keys - return null provider for FTS-only mode const details = [...missingKeyErrors, localError].filter(Boolean) as string[]; const reason = details.length > 0 ? details.join("\n\n") : "No embeddings provider available.";