From d43cc470c627fa4eb98e776daa18992450a86fa2 Mon Sep 17 00:00:00 2001 From: nv-kasikritc Date: Tue, 7 Apr 2026 21:47:29 +0700 Subject: [PATCH] refactor(nvidia-endpoints): updated language & default models (#59866) * fix(nvidia-endpoints): updated language & default models * fix(nvidia-endpoints): updated link for api key * fix(nvidia-endpoints): removed unused const * fix(nvidia-endpoints): edited max tokens * fix(nvidia-endpoints): fixed typo --------- Co-authored-by: Devin Robison --- docs/providers/nvidia.md | 21 ++++++------- extensions/nvidia/provider-catalog.test.ts | 7 +++-- extensions/nvidia/provider-catalog.ts | 34 +++++++++++++--------- 3 files changed, 36 insertions(+), 26 deletions(-) diff --git a/docs/providers/nvidia.md b/docs/providers/nvidia.md index eb0183f12bc..dae9663b8ee 100644 --- a/docs/providers/nvidia.md +++ b/docs/providers/nvidia.md @@ -1,14 +1,14 @@ --- summary: "Use NVIDIA's OpenAI-compatible API in OpenClaw" read_when: - - You want to use NVIDIA models in OpenClaw + - You want to use open models in OpenClaw for free - You need NVIDIA_API_KEY setup title: "NVIDIA" --- # NVIDIA -NVIDIA provides an OpenAI-compatible API at `https://integrate.api.nvidia.com/v1` for Nemotron and NeMo models. Authenticate with an API key from [NVIDIA NGC](https://catalog.ngc.nvidia.com/). +NVIDIA provides an OpenAI-compatible API at `https://integrate.api.nvidia.com/v1` for open models for free. Authenticate with an API key from [build.nvidia.com](https://build.nvidia.com/settings/api-keys). ## CLI setup @@ -17,7 +17,7 @@ Export the key once, then run onboarding and set an NVIDIA model: ```bash export NVIDIA_API_KEY="nvapi-..." openclaw onboard --auth-choice skip -openclaw models set nvidia/nvidia/llama-3.1-nemotron-70b-instruct +openclaw models set nvidia/nvidia/nemotron-3-super-120b-a12b ``` If you still pass `--token`, remember it lands in shell history and `ps` output; prefer the env var when possible. @@ -37,7 +37,7 @@ If you still pass `--token`, remember it lands in shell history and `ps` output; }, agents: { defaults: { - model: { primary: "nvidia/nvidia/llama-3.1-nemotron-70b-instruct" }, + model: { primary: "nvidia/nvidia/nemotron-3-super-120b-a12b" }, }, }, } @@ -45,14 +45,15 @@ If you still pass `--token`, remember it lands in shell history and `ps` output; ## Model IDs -| Model ref | Name | Context | Max output | -| ---------------------------------------------------- | ---------------------------------------- | ------- | ---------- | -| `nvidia/nvidia/llama-3.1-nemotron-70b-instruct` | NVIDIA Llama 3.1 Nemotron 70B Instruct | 131,072 | 4,096 | -| `nvidia/meta/llama-3.3-70b-instruct` | Meta Llama 3.3 70B Instruct | 131,072 | 4,096 | -| `nvidia/nvidia/mistral-nemo-minitron-8b-8k-instruct` | NVIDIA Mistral NeMo Minitron 8B Instruct | 8,192 | 2,048 | +| Model ref | Name | Context | Max output | +| ------------------------------------------ | ---------------------------- | ------- | ---------- | +| `nvidia/nvidia/nemotron-3-super-120b-a12b` | NVIDIA Nemotron 3 Super 120B | 262,144 | 8,192 | +| `nvidia/moonshotai/kimi-k2.5` | Kimi K2.5 | 262,144 | 8,192 | +| `nvidia/minimaxai/minimax-m2.5` | Minimax M2.5 | 196,608 | 8,192 | +| `nvidia/z-ai/glm5` | GLM 5 | 202,752 | 8,192 | ## Notes -- OpenAI-compatible `/v1` endpoint; use an API key from NVIDIA NGC. +- OpenAI-compatible `/v1` endpoint; use an API key from [build.nvidia.com](https://build.nvidia.com/). - Provider auto-enables when `NVIDIA_API_KEY` is set. - The bundled catalog is static; costs default to `0` in source. diff --git a/extensions/nvidia/provider-catalog.test.ts b/extensions/nvidia/provider-catalog.test.ts index 17ba2774603..de162374b01 100644 --- a/extensions/nvidia/provider-catalog.test.ts +++ b/extensions/nvidia/provider-catalog.test.ts @@ -8,9 +8,10 @@ describe("nvidia provider catalog", () => { expect(provider.baseUrl).toBe("https://integrate.api.nvidia.com/v1"); expect(provider.api).toBe("openai-completions"); expect(provider.models.map((model) => model.id)).toEqual([ - "nvidia/llama-3.1-nemotron-70b-instruct", - "meta/llama-3.3-70b-instruct", - "nvidia/mistral-nemo-minitron-8b-8k-instruct", + "nvidia/nemotron-3-super-120b-a12b", + "moonshotai/kimi-k2.5", + "minimaxai/minimax-m2.5", + "z-ai/glm5", ]); }); }); diff --git a/extensions/nvidia/provider-catalog.ts b/extensions/nvidia/provider-catalog.ts index 75f88bad8f0..ec6cdc64546 100644 --- a/extensions/nvidia/provider-catalog.ts +++ b/extensions/nvidia/provider-catalog.ts @@ -1,9 +1,8 @@ import type { ModelProviderConfig } from "openclaw/plugin-sdk/provider-model-shared"; const NVIDIA_BASE_URL = "https://integrate.api.nvidia.com/v1"; -const NVIDIA_DEFAULT_MODEL_ID = "nvidia/llama-3.1-nemotron-70b-instruct"; -const NVIDIA_DEFAULT_CONTEXT_WINDOW = 131072; -const NVIDIA_DEFAULT_MAX_TOKENS = 4096; +const NVIDIA_DEFAULT_MODEL_ID = "nvidia/nemotron-3-super-120b-a12b"; +const NVIDIA_DEFAULT_MAX_TOKENS = 8192; const NVIDIA_DEFAULT_COST = { input: 0, output: 0, @@ -18,30 +17,39 @@ export function buildNvidiaProvider(): ModelProviderConfig { models: [ { id: NVIDIA_DEFAULT_MODEL_ID, - name: "NVIDIA Llama 3.1 Nemotron 70B Instruct", + name: "NVIDIA Nemotron 3 Super 120B", reasoning: false, input: ["text"], cost: NVIDIA_DEFAULT_COST, - contextWindow: NVIDIA_DEFAULT_CONTEXT_WINDOW, + contextWindow: 262144, maxTokens: NVIDIA_DEFAULT_MAX_TOKENS, }, { - id: "meta/llama-3.3-70b-instruct", - name: "Meta Llama 3.3 70B Instruct", + id: "moonshotai/kimi-k2.5", + name: "Kimi K2.5", reasoning: false, input: ["text"], cost: NVIDIA_DEFAULT_COST, - contextWindow: 131072, - maxTokens: 4096, + contextWindow: 262144, + maxTokens: NVIDIA_DEFAULT_MAX_TOKENS, }, { - id: "nvidia/mistral-nemo-minitron-8b-8k-instruct", - name: "NVIDIA Mistral NeMo Minitron 8B Instruct", + id: "minimaxai/minimax-m2.5", + name: "MiniMax M2.5", reasoning: false, input: ["text"], cost: NVIDIA_DEFAULT_COST, - contextWindow: 8192, - maxTokens: 2048, + contextWindow: 196608, + maxTokens: NVIDIA_DEFAULT_MAX_TOKENS, + }, + { + id: "z-ai/glm5", + name: "GLM-5", + reasoning: false, + input: ["text"], + cost: NVIDIA_DEFAULT_COST, + contextWindow: 202752, + maxTokens: NVIDIA_DEFAULT_MAX_TOKENS, }, ], };