diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f091e0518b..990068f96b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -205,6 +205,7 @@ Docs: https://docs.openclaw.ai - Memory/flush default prompt: ban timestamped variant filenames during default memory flush runs so durable notes stay in the canonical daily `memory/YYYY-MM-DD.md` file. (#34951) thanks @zerone0x. - Agents/reply delivery timing: flush embedded Pi block replies before waiting on compaction retries so already-generated assistant replies reach channels before compaction wait completes. (#35489) thanks @Sid-Qin. - Agents/gateway config guidance: stop exposing `config.schema` through the agent `gateway` tool, remove prompt/docs guidance that told agents to call it, and keep agents on `config.get` plus `config.patch`/`config.apply` for config changes. (#7382) thanks @kakuteki. +- Provider/KiloCode: Keep duplicate models after malformed discovery rows, and strip legacy `reasoning_effort` when proxy reasoning injection is skipped. (#32352) Thanks @pandemicsyn and @vincentkoc. - Agents/failover: classify periodic provider limit exhaustion text (for example `Weekly/Monthly Limit Exhausted`) as `rate_limit` while keeping explicit `402 Payment Required` variants in billing, so failover continues without misclassifying billing-wrapped quota errors. (#33813) thanks @zhouhe-xydt. - Mattermost/interactive button callbacks: allow external callback base URLs and stop requiring loopback-origin requests so button clicks work when Mattermost reaches the gateway over Tailscale, LAN, or a reverse proxy. (#37543) thanks @mukhtharcm. - Gateway/chat.send route inheritance: keep explicit external delivery for channel-scoped sessions while preventing shared-main and other channel-agnostic webchat sessions from inheriting stale external routes, so Control UI replies stay on webchat without breaking selected channel-target sessions. (#34669) Thanks @vincentkoc. diff --git a/docs/providers/kilocode.md b/docs/providers/kilocode.md index 009f4d83812..15f8e4c2b7c 100644 --- a/docs/providers/kilocode.md +++ b/docs/providers/kilocode.md @@ -35,30 +35,39 @@ export KILOCODE_API_KEY="" # pragma: allowlist secret env: { KILOCODE_API_KEY: "" }, // pragma: allowlist secret agents: { defaults: { - model: { primary: "kilocode/anthropic/claude-opus-4.6" }, + model: { primary: "kilocode/kilo/auto" }, }, }, } ``` -## Surfaced model refs +## Default model -The built-in Kilo Gateway catalog currently surfaces these model refs: +The default model is `kilocode/kilo/auto`, a smart routing model that automatically selects +the best underlying model based on the task: -- `kilocode/anthropic/claude-opus-4.6` (default) -- `kilocode/z-ai/glm-5:free` -- `kilocode/minimax/minimax-m2.5:free` -- `kilocode/anthropic/claude-sonnet-4.5` -- `kilocode/openai/gpt-5.2` -- `kilocode/google/gemini-3-pro-preview` -- `kilocode/google/gemini-3-flash-preview` -- `kilocode/x-ai/grok-code-fast-1` -- `kilocode/moonshotai/kimi-k2.5` +- Planning, debugging, and orchestration tasks route to Claude Opus +- Code writing and exploration tasks route to Claude Sonnet + +## Available models + +OpenClaw dynamically discovers available models from the Kilo Gateway at startup. Use +`/models kilocode` to see the full list of models available with your account. + +Any model available on the gateway can be used with the `kilocode/` prefix: + +``` +kilocode/kilo/auto (default - smart routing) +kilocode/anthropic/claude-sonnet-4 +kilocode/openai/gpt-5.2 +kilocode/google/gemini-3-pro-preview +...and many more +``` ## Notes -- Model refs are `kilocode//` (e.g., `kilocode/anthropic/claude-opus-4.6`). -- Default model: `kilocode/anthropic/claude-opus-4.6` +- Model refs are `kilocode/` (e.g., `kilocode/anthropic/claude-sonnet-4`). +- Default model: `kilocode/kilo/auto` - Base URL: `https://api.kilo.ai/api/gateway/` - For more model/provider options, see [/concepts/model-providers](/concepts/model-providers). - Kilo Gateway uses a Bearer token with your API key under the hood. diff --git a/src/agents/auth-profiles/usage.test.ts b/src/agents/auth-profiles/usage.test.ts index ffd6ec2daa7..120f75d3665 100644 --- a/src/agents/auth-profiles/usage.test.ts +++ b/src/agents/auth-profiles/usage.test.ts @@ -26,6 +26,7 @@ function makeStore(usageStats: AuthProfileStore["usageStats"]): AuthProfileStore "anthropic:default": { type: "api_key", provider: "anthropic", key: "sk-test" }, "openai:default": { type: "api_key", provider: "openai", key: "sk-test-2" }, "openrouter:default": { type: "api_key", provider: "openrouter", key: "sk-or-test" }, + "kilocode:default": { type: "api_key", provider: "kilocode", key: "sk-kc-test" }, }, usageStats, }; @@ -120,6 +121,17 @@ describe("isProfileInCooldown", () => { }); expect(isProfileInCooldown(store, "openrouter:default")).toBe(false); }); + + it("returns false for Kilocode even when cooldown fields exist", () => { + const store = makeStore({ + "kilocode:default": { + cooldownUntil: Date.now() + 60_000, + disabledUntil: Date.now() + 60_000, + disabledReason: "billing", + }, + }); + expect(isProfileInCooldown(store, "kilocode:default")).toBe(false); + }); }); describe("resolveProfilesUnavailableReason", () => { diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts index 733a96e13c4..c28b51e3e57 100644 --- a/src/agents/auth-profiles/usage.ts +++ b/src/agents/auth-profiles/usage.ts @@ -20,7 +20,8 @@ const FAILURE_REASON_ORDER = new Map( ); function isAuthCooldownBypassedForProvider(provider: string | undefined): boolean { - return normalizeProviderId(provider ?? "") === "openrouter"; + const normalized = normalizeProviderId(provider ?? ""); + return normalized === "openrouter" || normalized === "kilocode"; } export function resolveProfileUnusableUntil( diff --git a/src/agents/kilocode-models.test.ts b/src/agents/kilocode-models.test.ts new file mode 100644 index 00000000000..d13ee888439 --- /dev/null +++ b/src/agents/kilocode-models.test.ts @@ -0,0 +1,280 @@ +import { describe, expect, it, vi } from "vitest"; +import { discoverKilocodeModels, KILOCODE_MODELS_URL } from "./kilocode-models.js"; + +// discoverKilocodeModels checks for VITEST env and returns static catalog, +// so we need to temporarily unset it to test the fetch path. + +function makeGatewayModel(overrides: Record = {}) { + return { + id: "anthropic/claude-sonnet-4", + name: "Anthropic: Claude Sonnet 4", + created: 1700000000, + description: "A model", + context_length: 200000, + architecture: { + input_modalities: ["text", "image"], + output_modalities: ["text"], + tokenizer: "Claude", + }, + top_provider: { + is_moderated: false, + max_completion_tokens: 8192, + }, + pricing: { + prompt: "0.000003", + completion: "0.000015", + input_cache_read: "0.0000003", + input_cache_write: "0.00000375", + }, + supported_parameters: ["max_tokens", "temperature", "tools", "reasoning"], + ...overrides, + }; +} + +function makeAutoModel(overrides: Record = {}) { + return makeGatewayModel({ + id: "kilo/auto", + name: "Kilo: Auto", + context_length: 1000000, + architecture: { + input_modalities: ["text", "image"], + output_modalities: ["text"], + tokenizer: "Other", + }, + top_provider: { + is_moderated: false, + max_completion_tokens: 128000, + }, + pricing: { + prompt: "0.000005", + completion: "0.000025", + }, + supported_parameters: ["max_tokens", "temperature", "tools", "reasoning", "include_reasoning"], + ...overrides, + }); +} + +describe("discoverKilocodeModels", () => { + it("returns static catalog in test environment", async () => { + // Default vitest env — should return static catalog without fetching + const models = await discoverKilocodeModels(); + expect(models.length).toBeGreaterThan(0); + expect(models.some((m) => m.id === "kilo/auto")).toBe(true); + }); + + it("static catalog has correct defaults for kilo/auto", async () => { + const models = await discoverKilocodeModels(); + const auto = models.find((m) => m.id === "kilo/auto"); + expect(auto).toBeDefined(); + expect(auto?.name).toBe("Kilo Auto"); + expect(auto?.reasoning).toBe(true); + expect(auto?.input).toEqual(["text", "image"]); + expect(auto?.contextWindow).toBe(1000000); + expect(auto?.maxTokens).toBe(128000); + expect(auto?.cost).toEqual({ input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }); + }); +}); + +describe("discoverKilocodeModels (fetch path)", () => { + it("parses gateway models with correct pricing conversion", async () => { + // Temporarily unset test env flags to exercise the fetch path + const origNodeEnv = process.env.NODE_ENV; + const origVitest = process.env.VITEST; + delete process.env.NODE_ENV; + delete process.env.VITEST; + + const mockFetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + data: [makeAutoModel(), makeGatewayModel()], + }), + }); + vi.stubGlobal("fetch", mockFetch); + + try { + const models = await discoverKilocodeModels(); + + // Should have fetched from the gateway URL + expect(mockFetch).toHaveBeenCalledWith( + KILOCODE_MODELS_URL, + expect.objectContaining({ + headers: { Accept: "application/json" }, + }), + ); + + // Should have both models + expect(models.length).toBe(2); + + // Verify the sonnet model pricing (per-token * 1_000_000 = per-1M-token) + const sonnet = models.find((m) => m.id === "anthropic/claude-sonnet-4"); + expect(sonnet).toBeDefined(); + expect(sonnet?.cost.input).toBeCloseTo(3.0); // 0.000003 * 1_000_000 + expect(sonnet?.cost.output).toBeCloseTo(15.0); // 0.000015 * 1_000_000 + expect(sonnet?.cost.cacheRead).toBeCloseTo(0.3); // 0.0000003 * 1_000_000 + expect(sonnet?.cost.cacheWrite).toBeCloseTo(3.75); // 0.00000375 * 1_000_000 + + // Verify modality + expect(sonnet?.input).toEqual(["text", "image"]); + + // Verify reasoning detection + expect(sonnet?.reasoning).toBe(true); + + // Verify context/tokens + expect(sonnet?.contextWindow).toBe(200000); + expect(sonnet?.maxTokens).toBe(8192); + } finally { + process.env.NODE_ENV = origNodeEnv; + if (origVitest !== undefined) { + process.env.VITEST = origVitest; + } + vi.unstubAllGlobals(); + } + }); + + it("falls back to static catalog on network error", async () => { + const origNodeEnv = process.env.NODE_ENV; + const origVitest = process.env.VITEST; + delete process.env.NODE_ENV; + delete process.env.VITEST; + + const mockFetch = vi.fn().mockRejectedValue(new Error("network error")); + vi.stubGlobal("fetch", mockFetch); + + try { + const models = await discoverKilocodeModels(); + expect(models.length).toBeGreaterThan(0); + expect(models.some((m) => m.id === "kilo/auto")).toBe(true); + } finally { + process.env.NODE_ENV = origNodeEnv; + if (origVitest !== undefined) { + process.env.VITEST = origVitest; + } + vi.unstubAllGlobals(); + } + }); + + it("falls back to static catalog on HTTP error", async () => { + const origNodeEnv = process.env.NODE_ENV; + const origVitest = process.env.VITEST; + delete process.env.NODE_ENV; + delete process.env.VITEST; + + const mockFetch = vi.fn().mockResolvedValue({ + ok: false, + status: 500, + }); + vi.stubGlobal("fetch", mockFetch); + + try { + const models = await discoverKilocodeModels(); + expect(models.length).toBeGreaterThan(0); + expect(models.some((m) => m.id === "kilo/auto")).toBe(true); + } finally { + process.env.NODE_ENV = origNodeEnv; + if (origVitest !== undefined) { + process.env.VITEST = origVitest; + } + vi.unstubAllGlobals(); + } + }); + + it("ensures kilo/auto is present even when API doesn't return it", async () => { + const origNodeEnv = process.env.NODE_ENV; + const origVitest = process.env.VITEST; + delete process.env.NODE_ENV; + delete process.env.VITEST; + + const mockFetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + data: [makeGatewayModel()], // no kilo/auto + }), + }); + vi.stubGlobal("fetch", mockFetch); + + try { + const models = await discoverKilocodeModels(); + expect(models.some((m) => m.id === "kilo/auto")).toBe(true); + expect(models.some((m) => m.id === "anthropic/claude-sonnet-4")).toBe(true); + } finally { + process.env.NODE_ENV = origNodeEnv; + if (origVitest !== undefined) { + process.env.VITEST = origVitest; + } + vi.unstubAllGlobals(); + } + }); + + it("detects text-only models without image modality", async () => { + const origNodeEnv = process.env.NODE_ENV; + const origVitest = process.env.VITEST; + delete process.env.NODE_ENV; + delete process.env.VITEST; + + const textOnlyModel = makeGatewayModel({ + id: "some/text-model", + architecture: { + input_modalities: ["text"], + output_modalities: ["text"], + }, + supported_parameters: ["max_tokens", "temperature"], + }); + + const mockFetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ data: [textOnlyModel] }), + }); + vi.stubGlobal("fetch", mockFetch); + + try { + const models = await discoverKilocodeModels(); + const textModel = models.find((m) => m.id === "some/text-model"); + expect(textModel?.input).toEqual(["text"]); + expect(textModel?.reasoning).toBe(false); + } finally { + process.env.NODE_ENV = origNodeEnv; + if (origVitest !== undefined) { + process.env.VITEST = origVitest; + } + vi.unstubAllGlobals(); + } + }); + + it("keeps a later valid duplicate when an earlier entry is malformed", async () => { + const origNodeEnv = process.env.NODE_ENV; + const origVitest = process.env.VITEST; + delete process.env.NODE_ENV; + delete process.env.VITEST; + + const malformedAutoModel = makeAutoModel({ + name: "Broken Kilo Auto", + pricing: undefined, + }); + + const mockFetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + data: [malformedAutoModel, makeAutoModel(), makeGatewayModel()], + }), + }); + vi.stubGlobal("fetch", mockFetch); + + try { + const models = await discoverKilocodeModels(); + const auto = models.find((m) => m.id === "kilo/auto"); + expect(auto).toBeDefined(); + expect(auto?.name).toBe("Kilo: Auto"); + expect(auto?.cost.input).toBeCloseTo(5.0); + expect(models.some((m) => m.id === "anthropic/claude-sonnet-4")).toBe(true); + } finally { + process.env.NODE_ENV = origNodeEnv; + if (origVitest !== undefined) { + process.env.VITEST = origVitest; + } + vi.unstubAllGlobals(); + } + }); +}); diff --git a/src/agents/kilocode-models.ts b/src/agents/kilocode-models.ts new file mode 100644 index 00000000000..5b3c48ffa27 --- /dev/null +++ b/src/agents/kilocode-models.ts @@ -0,0 +1,190 @@ +import type { ModelDefinitionConfig } from "../config/types.js"; +import { createSubsystemLogger } from "../logging/subsystem.js"; +import { + KILOCODE_BASE_URL, + KILOCODE_DEFAULT_CONTEXT_WINDOW, + KILOCODE_DEFAULT_COST, + KILOCODE_DEFAULT_MAX_TOKENS, + KILOCODE_MODEL_CATALOG, +} from "../providers/kilocode-shared.js"; + +const log = createSubsystemLogger("kilocode-models"); + +export const KILOCODE_MODELS_URL = `${KILOCODE_BASE_URL}models`; + +const DISCOVERY_TIMEOUT_MS = 5000; + +// --------------------------------------------------------------------------- +// Gateway response types (OpenRouter-compatible schema) +// --------------------------------------------------------------------------- + +interface GatewayModelPricing { + prompt: string; + completion: string; + image?: string; + request?: string; + input_cache_read?: string; + input_cache_write?: string; + web_search?: string; + internal_reasoning?: string; +} + +interface GatewayModelEntry { + id: string; + name: string; + context_length: number; + architecture?: { + input_modalities?: string[]; + output_modalities?: string[]; + }; + top_provider?: { + max_completion_tokens?: number | null; + }; + pricing: GatewayModelPricing; + supported_parameters?: string[]; +} + +interface GatewayModelsResponse { + data: GatewayModelEntry[]; +} + +// --------------------------------------------------------------------------- +// Pricing conversion +// --------------------------------------------------------------------------- + +/** + * Convert per-token price (as returned by the gateway) to per-1M-token price + * (as stored in OpenClaw's ModelDefinitionConfig.cost). + * + * Gateway/OpenRouter prices are per-token strings like "0.000005". + * OpenClaw costs are per-1M-token numbers like 5.0. + */ +function toPricePerMillion(perToken: string | undefined): number { + if (!perToken) { + return 0; + } + const num = Number(perToken); + if (!Number.isFinite(num) || num < 0) { + return 0; + } + return num * 1_000_000; +} + +// --------------------------------------------------------------------------- +// Model parsing +// --------------------------------------------------------------------------- + +function parseModality(entry: GatewayModelEntry): Array<"text" | "image"> { + const modalities = entry.architecture?.input_modalities; + if (!Array.isArray(modalities)) { + return ["text"]; + } + const hasImage = modalities.some((m) => typeof m === "string" && m.toLowerCase() === "image"); + return hasImage ? ["text", "image"] : ["text"]; +} + +function parseReasoning(entry: GatewayModelEntry): boolean { + const params = entry.supported_parameters; + if (!Array.isArray(params)) { + return false; + } + return params.includes("reasoning") || params.includes("include_reasoning"); +} + +function toModelDefinition(entry: GatewayModelEntry): ModelDefinitionConfig { + return { + id: entry.id, + name: entry.name || entry.id, + reasoning: parseReasoning(entry), + input: parseModality(entry), + cost: { + input: toPricePerMillion(entry.pricing.prompt), + output: toPricePerMillion(entry.pricing.completion), + cacheRead: toPricePerMillion(entry.pricing.input_cache_read), + cacheWrite: toPricePerMillion(entry.pricing.input_cache_write), + }, + contextWindow: entry.context_length || KILOCODE_DEFAULT_CONTEXT_WINDOW, + maxTokens: entry.top_provider?.max_completion_tokens ?? KILOCODE_DEFAULT_MAX_TOKENS, + }; +} + +// --------------------------------------------------------------------------- +// Static fallback +// --------------------------------------------------------------------------- + +function buildStaticCatalog(): ModelDefinitionConfig[] { + return KILOCODE_MODEL_CATALOG.map((model) => ({ + id: model.id, + name: model.name, + reasoning: model.reasoning, + input: model.input, + cost: KILOCODE_DEFAULT_COST, + contextWindow: model.contextWindow ?? KILOCODE_DEFAULT_CONTEXT_WINDOW, + maxTokens: model.maxTokens ?? KILOCODE_DEFAULT_MAX_TOKENS, + })); +} + +// --------------------------------------------------------------------------- +// Discovery +// --------------------------------------------------------------------------- + +/** + * Discover models from the Kilo Gateway API with fallback to static catalog. + * The /api/gateway/models endpoint is public and doesn't require authentication. + */ +export async function discoverKilocodeModels(): Promise { + // Skip API discovery in test environment + if (process.env.NODE_ENV === "test" || process.env.VITEST) { + return buildStaticCatalog(); + } + + try { + const response = await fetch(KILOCODE_MODELS_URL, { + headers: { Accept: "application/json" }, + signal: AbortSignal.timeout(DISCOVERY_TIMEOUT_MS), + }); + + if (!response.ok) { + log.warn(`Failed to discover models: HTTP ${response.status}, using static catalog`); + return buildStaticCatalog(); + } + + const data = (await response.json()) as GatewayModelsResponse; + if (!Array.isArray(data.data) || data.data.length === 0) { + log.warn("No models found from gateway API, using static catalog"); + return buildStaticCatalog(); + } + + const models: ModelDefinitionConfig[] = []; + const discoveredIds = new Set(); + + for (const entry of data.data) { + if (!entry || typeof entry !== "object") { + continue; + } + const id = typeof entry.id === "string" ? entry.id.trim() : ""; + if (!id || discoveredIds.has(id)) { + continue; + } + try { + models.push(toModelDefinition(entry)); + discoveredIds.add(id); + } catch (e) { + log.warn(`Skipping malformed model entry "${id}": ${String(e)}`); + } + } + + // Ensure the static fallback models are always present + const staticModels = buildStaticCatalog(); + for (const staticModel of staticModels) { + if (!discoveredIds.has(staticModel.id)) { + models.unshift(staticModel); + } + } + + return models.length > 0 ? models : buildStaticCatalog(); + } catch (error) { + log.warn(`Discovery failed: ${String(error)}, using static catalog`); + return buildStaticCatalog(); + } +} diff --git a/src/agents/model-catalog.test.ts b/src/agents/model-catalog.test.ts index 5eec49f49b8..b891af4ed2d 100644 --- a/src/agents/model-catalog.test.ts +++ b/src/agents/model-catalog.test.ts @@ -238,9 +238,9 @@ describe("loadModelCatalog", () => { it("does not duplicate opted-in configured models already present in ModelRegistry", async () => { mockPiDiscoveryModels([ { - id: "anthropic/claude-opus-4.6", + id: "kilo/auto", provider: "kilocode", - name: "Claude Opus 4.6", + name: "Kilo Auto", }, ]); @@ -253,8 +253,8 @@ describe("loadModelCatalog", () => { api: "openai-completions", models: [ { - id: "anthropic/claude-opus-4.6", - name: "Configured Claude Opus 4.6", + id: "kilo/auto", + name: "Configured Kilo Auto", reasoning: true, input: ["text", "image"], cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, @@ -269,9 +269,9 @@ describe("loadModelCatalog", () => { }); const matches = result.filter( - (entry) => entry.provider === "kilocode" && entry.id === "anthropic/claude-opus-4.6", + (entry) => entry.provider === "kilocode" && entry.id === "kilo/auto", ); expect(matches).toHaveLength(1); - expect(matches[0]?.name).toBe("Claude Opus 4.6"); + expect(matches[0]?.name).toBe("Kilo Auto"); }); }); diff --git a/src/agents/models-config.providers.kilocode.test.ts b/src/agents/models-config.providers.kilocode.test.ts index 05cfb1b468c..2cbb3b2609f 100644 --- a/src/agents/models-config.providers.kilocode.test.ts +++ b/src/agents/models-config.providers.kilocode.test.ts @@ -5,17 +5,7 @@ import { describe, expect, it } from "vitest"; import { captureEnv } from "../test-utils/env.js"; import { buildKilocodeProvider, resolveImplicitProviders } from "./models-config.providers.js"; -const KILOCODE_MODEL_IDS = [ - "anthropic/claude-opus-4.6", - "z-ai/glm-5:free", - "minimax/minimax-m2.5:free", - "anthropic/claude-sonnet-4.5", - "openai/gpt-5.2", - "google/gemini-3-pro-preview", - "google/gemini-3-flash-preview", - "x-ai/grok-code-fast-1", - "moonshotai/kimi-k2.5", -]; +const KILOCODE_MODEL_IDS = ["kilo/auto"]; describe("Kilo Gateway implicit provider", () => { it("should include kilocode when KILOCODE_API_KEY is configured", async () => { @@ -56,14 +46,15 @@ describe("Kilo Gateway implicit provider", () => { it("should include the default kilocode model", () => { const provider = buildKilocodeProvider(); const modelIds = provider.models.map((m) => m.id); - expect(modelIds).toContain("anthropic/claude-opus-4.6"); + expect(modelIds).toContain("kilo/auto"); }); - it("should include the full surfaced model catalog", () => { + it("should include the static fallback catalog", () => { const provider = buildKilocodeProvider(); const modelIds = provider.models.map((m) => m.id); for (const modelId of KILOCODE_MODEL_IDS) { expect(modelIds).toContain(modelId); } + expect(provider.models).toHaveLength(KILOCODE_MODEL_IDS.length); }); }); diff --git a/src/agents/models-config.providers.ts b/src/agents/models-config.providers.ts index 5c4907bc279..1c7ad06699c 100644 --- a/src/agents/models-config.providers.ts +++ b/src/agents/models-config.providers.ts @@ -40,6 +40,7 @@ import { HUGGINGFACE_MODEL_CATALOG, buildHuggingfaceModelDefinition, } from "./huggingface-models.js"; +import { discoverKilocodeModels } from "./kilocode-models.js"; import { resolveAwsSdkEnvVarName, resolveEnvApiKey } from "./model-auth.js"; import { OLLAMA_NATIVE_BASE_URL } from "./ollama-stream.js"; import { @@ -920,6 +921,23 @@ export function buildKilocodeProvider(): ProviderConfig { }; } +/** + * Build the Kilocode provider with dynamic model discovery from the gateway + * API. Falls back to the static catalog on failure. + * + * Used by {@link resolveImplicitProviders} (async context). The sync + * {@link buildKilocodeProvider} is kept for the onboarding config path + * which cannot await. + */ +async function buildKilocodeProviderWithDiscovery(): Promise { + const models = await discoverKilocodeModels(); + return { + baseUrl: KILOCODE_BASE_URL, + api: "openai-completions", + models, + }; +} + export async function resolveImplicitProviders(params: { agentDir: string; explicitProviders?: Record | null; @@ -1133,7 +1151,7 @@ export async function resolveImplicitProviders(params: { resolveEnvApiKeyVarName("kilocode") ?? resolveApiKeyFromProfiles({ provider: "kilocode", store: authStore }); if (kilocodeKey) { - providers.kilocode = { ...buildKilocodeProvider(), apiKey: kilocodeKey }; + providers.kilocode = { ...(await buildKilocodeProviderWithDiscovery()), apiKey: kilocodeKey }; } return providers; diff --git a/src/agents/pi-embedded-runner-extraparams.test.ts b/src/agents/pi-embedded-runner-extraparams.test.ts index 574d3069741..f34e1514635 100644 --- a/src/agents/pi-embedded-runner-extraparams.test.ts +++ b/src/agents/pi-embedded-runner-extraparams.test.ts @@ -321,7 +321,7 @@ describe("applyExtraParamsToAgent", () => { it("does not inject reasoning.effort for x-ai/grok models on OpenRouter (#32039)", () => { const payloads: Record[] = []; const baseStreamFn: StreamFn = (_model, _context, options) => { - const payload: Record = {}; + const payload: Record = { reasoning_effort: "medium" }; options?.onPayload?.(payload); payloads.push(payload); return {} as ReturnType; diff --git a/src/agents/pi-embedded-runner/extra-params.kilocode.test.ts b/src/agents/pi-embedded-runner/extra-params.kilocode.test.ts new file mode 100644 index 00000000000..509cdb5edf4 --- /dev/null +++ b/src/agents/pi-embedded-runner/extra-params.kilocode.test.ts @@ -0,0 +1,182 @@ +import type { StreamFn } from "@mariozechner/pi-agent-core"; +import type { Context, Model } from "@mariozechner/pi-ai"; +import { createAssistantMessageEventStream } from "@mariozechner/pi-ai"; +import { afterEach, describe, expect, it } from "vitest"; +import { captureEnv } from "../../test-utils/env.js"; +import { applyExtraParamsToAgent } from "./extra-params.js"; + +type CapturedCall = { + headers?: Record; + payload?: Record; +}; + +function applyAndCapture(params: { + provider: string; + modelId: string; + callerHeaders?: Record; +}): CapturedCall { + const captured: CapturedCall = {}; + + const baseStreamFn: StreamFn = (_model, _context, options) => { + captured.headers = options?.headers; + options?.onPayload?.({}); + return createAssistantMessageEventStream(); + }; + const agent = { streamFn: baseStreamFn }; + + applyExtraParamsToAgent(agent, undefined, params.provider, params.modelId); + + const model = { + api: "openai-completions", + provider: params.provider, + id: params.modelId, + } as Model<"openai-completions">; + const context: Context = { messages: [] }; + + void agent.streamFn?.(model, context, { + headers: params.callerHeaders, + }); + + return captured; +} + +describe("extra-params: Kilocode wrapper", () => { + const envSnapshot = captureEnv(["KILOCODE_FEATURE"]); + + afterEach(() => { + envSnapshot.restore(); + }); + + it("injects X-KILOCODE-FEATURE header with default value", () => { + delete process.env.KILOCODE_FEATURE; + + const { headers } = applyAndCapture({ + provider: "kilocode", + modelId: "anthropic/claude-sonnet-4", + }); + + expect(headers?.["X-KILOCODE-FEATURE"]).toBe("openclaw"); + }); + + it("reads X-KILOCODE-FEATURE from KILOCODE_FEATURE env var", () => { + process.env.KILOCODE_FEATURE = "custom-feature"; + + const { headers } = applyAndCapture({ + provider: "kilocode", + modelId: "anthropic/claude-sonnet-4", + }); + + expect(headers?.["X-KILOCODE-FEATURE"]).toBe("custom-feature"); + }); + + it("cannot be overridden by caller headers", () => { + delete process.env.KILOCODE_FEATURE; + + const { headers } = applyAndCapture({ + provider: "kilocode", + modelId: "anthropic/claude-sonnet-4", + callerHeaders: { "X-KILOCODE-FEATURE": "should-be-overwritten" }, + }); + + expect(headers?.["X-KILOCODE-FEATURE"]).toBe("openclaw"); + }); + + it("does not inject header for non-kilocode providers", () => { + const { headers } = applyAndCapture({ + provider: "openrouter", + modelId: "anthropic/claude-sonnet-4", + }); + + expect(headers?.["X-KILOCODE-FEATURE"]).toBeUndefined(); + }); +}); + +describe("extra-params: Kilocode kilo/auto reasoning", () => { + it("does not inject reasoning.effort for kilo/auto", () => { + let capturedPayload: Record | undefined; + + const baseStreamFn: StreamFn = (_model, _context, options) => { + const payload: Record = { reasoning_effort: "high" }; + options?.onPayload?.(payload); + capturedPayload = payload; + return createAssistantMessageEventStream(); + }; + const agent = { streamFn: baseStreamFn }; + + // Pass thinking level explicitly (6th parameter) to trigger reasoning injection + applyExtraParamsToAgent(agent, undefined, "kilocode", "kilo/auto", undefined, "high"); + + const model = { + api: "openai-completions", + provider: "kilocode", + id: "kilo/auto", + } as Model<"openai-completions">; + const context: Context = { messages: [] }; + + void agent.streamFn?.(model, context, {}); + + // kilo/auto should not have reasoning injected + expect(capturedPayload?.reasoning).toBeUndefined(); + expect(capturedPayload).not.toHaveProperty("reasoning_effort"); + }); + + it("injects reasoning.effort for non-auto kilocode models", () => { + let capturedPayload: Record | undefined; + + const baseStreamFn: StreamFn = (_model, _context, options) => { + const payload: Record = {}; + options?.onPayload?.(payload); + capturedPayload = payload; + return createAssistantMessageEventStream(); + }; + const agent = { streamFn: baseStreamFn }; + + applyExtraParamsToAgent( + agent, + undefined, + "kilocode", + "anthropic/claude-sonnet-4", + undefined, + "high", + ); + + const model = { + api: "openai-completions", + provider: "kilocode", + id: "anthropic/claude-sonnet-4", + } as Model<"openai-completions">; + const context: Context = { messages: [] }; + + void agent.streamFn?.(model, context, {}); + + // Non-auto models should have reasoning injected + expect(capturedPayload?.reasoning).toEqual({ effort: "high" }); + }); + + it("does not inject reasoning.effort for x-ai models", () => { + let capturedPayload: Record | undefined; + + const baseStreamFn: StreamFn = (_model, _context, options) => { + const payload: Record = { reasoning_effort: "high" }; + options?.onPayload?.(payload); + capturedPayload = payload; + return createAssistantMessageEventStream(); + }; + const agent = { streamFn: baseStreamFn }; + + applyExtraParamsToAgent(agent, undefined, "kilocode", "x-ai/grok-3", undefined, "high"); + + const model = { + api: "openai-completions", + provider: "kilocode", + id: "x-ai/grok-3", + } as Model<"openai-completions">; + const context: Context = { messages: [] }; + + void agent.streamFn?.(model, context, {}); + + // x-ai models reject reasoning.effort — should be skipped + expect(capturedPayload?.reasoning).toBeUndefined(); + expect(capturedPayload).not.toHaveProperty("reasoning_effort"); + }); +}); diff --git a/src/agents/pi-embedded-runner/extra-params.ts b/src/agents/pi-embedded-runner/extra-params.ts index 9f8380184f3..78dffcd9cbe 100644 --- a/src/agents/pi-embedded-runner/extra-params.ts +++ b/src/agents/pi-embedded-runner/extra-params.ts @@ -9,6 +9,15 @@ const OPENROUTER_APP_HEADERS: Record = { "HTTP-Referer": "https://openclaw.ai", "X-Title": "OpenClaw", }; +const KILOCODE_FEATURE_HEADER = "X-KILOCODE-FEATURE"; +const KILOCODE_FEATURE_DEFAULT = "openclaw"; +const KILOCODE_FEATURE_ENV_VAR = "KILOCODE_FEATURE"; + +function resolveKilocodeAppHeaders(): Record { + const feature = process.env[KILOCODE_FEATURE_ENV_VAR]?.trim() || KILOCODE_FEATURE_DEFAULT; + return { [KILOCODE_FEATURE_HEADER]: feature }; +} + const ANTHROPIC_CONTEXT_1M_BETA = "context-1m-2025-08-07"; const ANTHROPIC_1M_MODEL_PREFIXES = ["claude-opus-4", "claude-sonnet-4"] as const; // NOTE: We only force `store=true` for *direct* OpenAI Responses. @@ -846,6 +855,45 @@ function createKimiCodingAnthropicToolSchemaWrapper(baseStreamFn: StreamFn | und * Create a streamFn wrapper that adds OpenRouter app attribution headers * and injects reasoning.effort based on the configured thinking level. */ +function normalizeProxyReasoningPayload(payload: unknown, thinkingLevel?: ThinkLevel): void { + if (!payload || typeof payload !== "object") { + return; + } + + const payloadObj = payload as Record; + + // pi-ai may inject a top-level reasoning_effort (OpenAI flat format). + // OpenRouter-compatible proxy gateways expect the nested reasoning.effort + // shape instead, and some models reject the flat field outright. + delete payloadObj.reasoning_effort; + + // When thinking is "off", or provider/model guards disable injection, + // leave reasoning unset after normalizing away the legacy flat field. + if (!thinkingLevel || thinkingLevel === "off") { + return; + } + + const existingReasoning = payloadObj.reasoning; + + // OpenRouter treats reasoning.effort and reasoning.max_tokens as + // alternative controls. If max_tokens is already present, do not inject + // effort and do not overwrite caller-supplied reasoning. + if ( + existingReasoning && + typeof existingReasoning === "object" && + !Array.isArray(existingReasoning) + ) { + const reasoningObj = existingReasoning as Record; + if (!("max_tokens" in reasoningObj) && !("effort" in reasoningObj)) { + reasoningObj.effort = mapThinkingLevelToOpenRouterReasoningEffort(thinkingLevel); + } + } else if (!existingReasoning) { + payloadObj.reasoning = { + effort: mapThinkingLevelToOpenRouterReasoningEffort(thinkingLevel), + }; + } +} + function createOpenRouterWrapper( baseStreamFn: StreamFn | undefined, thinkingLevel?: ThinkLevel, @@ -860,42 +908,7 @@ function createOpenRouterWrapper( ...options?.headers, }, onPayload: (payload) => { - if (thinkingLevel && payload && typeof payload === "object") { - const payloadObj = payload as Record; - - // pi-ai may inject a top-level reasoning_effort (OpenAI flat format). - // OpenRouter expects the nested reasoning.effort format instead, and - // rejects payloads containing both fields. Remove the flat field so - // only the nested one is sent. - delete payloadObj.reasoning_effort; - - // When thinking is "off", do not inject reasoning at all. - // Some models (e.g. deepseek/deepseek-r1) require reasoning and reject - // { effort: "none" } with "Reasoning is mandatory for this endpoint and - // cannot be disabled." Omitting the field lets each model use its own - // default reasoning behavior. - if (thinkingLevel !== "off") { - const existingReasoning = payloadObj.reasoning; - - // OpenRouter treats reasoning.effort and reasoning.max_tokens as - // alternative controls. If max_tokens is already present, do not - // inject effort and do not overwrite caller-supplied reasoning. - if ( - existingReasoning && - typeof existingReasoning === "object" && - !Array.isArray(existingReasoning) - ) { - const reasoningObj = existingReasoning as Record; - if (!("max_tokens" in reasoningObj) && !("effort" in reasoningObj)) { - reasoningObj.effort = mapThinkingLevelToOpenRouterReasoningEffort(thinkingLevel); - } - } else if (!existingReasoning) { - payloadObj.reasoning = { - effort: mapThinkingLevelToOpenRouterReasoningEffort(thinkingLevel), - }; - } - } - } + normalizeProxyReasoningPayload(payload, thinkingLevel); onPayload?.(payload); }, }); @@ -903,14 +916,41 @@ function createOpenRouterWrapper( } /** - * Models on OpenRouter that do not support the `reasoning.effort` parameter. - * Injecting it causes "Invalid arguments passed to the model" errors. + * Models on OpenRouter-style proxy providers that reject `reasoning.effort`. */ -function isOpenRouterReasoningUnsupported(modelId: string): boolean { +function isProxyReasoningUnsupported(modelId: string): boolean { const id = modelId.toLowerCase(); return id.startsWith("x-ai/"); } +/** + * Create a streamFn wrapper that adds the Kilocode feature attribution header + * and injects reasoning.effort based on the configured thinking level. + * + * The Kilocode provider gateway manages provider-specific quirks (e.g. cache + * control) server-side, so we only handle header injection and reasoning here. + */ +function createKilocodeWrapper( + baseStreamFn: StreamFn | undefined, + thinkingLevel?: ThinkLevel, +): StreamFn { + const underlying = baseStreamFn ?? streamSimple; + return (model, context, options) => { + const onPayload = options?.onPayload; + return underlying(model, context, { + ...options, + headers: { + ...options?.headers, + ...resolveKilocodeAppHeaders(), + }, + onPayload: (payload) => { + normalizeProxyReasoningPayload(payload, thinkingLevel); + onPayload?.(payload); + }, + }); + }; +} + function isGemini31Model(modelId: string): boolean { const normalized = modelId.toLowerCase(); return normalized.includes("gemini-3.1-pro") || normalized.includes("gemini-3.1-flash"); @@ -1118,12 +1158,22 @@ export function applyExtraParamsToAgent( // and reject payloads containing it with "Invalid arguments passed to the // model." Skip reasoning injection for these models. // See: openclaw/openclaw#32039 - const skipReasoningInjection = modelId === "auto" || isOpenRouterReasoningUnsupported(modelId); + const skipReasoningInjection = modelId === "auto" || isProxyReasoningUnsupported(modelId); const openRouterThinkingLevel = skipReasoningInjection ? undefined : thinkingLevel; agent.streamFn = createOpenRouterWrapper(agent.streamFn, openRouterThinkingLevel); agent.streamFn = createOpenRouterSystemCacheWrapper(agent.streamFn); } + if (provider === "kilocode") { + log.debug(`applying Kilocode feature header for ${provider}/${modelId}`); + // kilo/auto is a dynamic routing model — skip reasoning injection + // (same rationale as OpenRouter "auto"). See: openclaw/openclaw#24851 + // Also skip for models known to reject reasoning.effort (e.g. x-ai/*). + const kilocodeThinkingLevel = + modelId === "kilo/auto" || isProxyReasoningUnsupported(modelId) ? undefined : thinkingLevel; + agent.streamFn = createKilocodeWrapper(agent.streamFn, kilocodeThinkingLevel); + } + if (provider === "amazon-bedrock" && !isAnthropicBedrockModel(modelId)) { log.debug(`disabling prompt caching for non-Anthropic Bedrock model ${provider}/${modelId}`); agent.streamFn = createBedrockNoCacheWrapper(agent.streamFn); diff --git a/src/commands/configure.gateway-auth.prompt-auth-config.test.ts b/src/commands/configure.gateway-auth.prompt-auth-config.test.ts index b6a117f9505..b27e52fcf7c 100644 --- a/src/commands/configure.gateway-auth.prompt-auth-config.test.ts +++ b/src/commands/configure.gateway-auth.prompt-auth-config.test.ts @@ -56,8 +56,8 @@ function createKilocodeProvider() { baseUrl: "https://api.kilo.ai/api/gateway/", api: "openai-completions", models: [ - { id: "anthropic/claude-opus-4.6", name: "Claude Opus 4.6" }, - { id: "minimax/minimax-m2.5:free", name: "MiniMax M2.5 (Free)" }, + { id: "kilo/auto", name: "Kilo Auto" }, + { id: "anthropic/claude-sonnet-4", name: "Claude Sonnet 4" }, ], }; } @@ -67,7 +67,7 @@ function createApplyAuthChoiceConfig(includeMinimaxProvider = false) { config: { agents: { defaults: { - model: { primary: "kilocode/anthropic/claude-opus-4.6" }, + model: { primary: "kilocode/kilo/auto" }, }, }, models: { @@ -92,7 +92,7 @@ async function runPromptAuthConfigWithAllowlist(includeMinimaxProvider = false) mocks.promptAuthChoiceGrouped.mockResolvedValue("kilocode-api-key"); mocks.applyAuthChoice.mockResolvedValue(createApplyAuthChoiceConfig(includeMinimaxProvider)); mocks.promptModelAllowlist.mockResolvedValue({ - models: ["kilocode/anthropic/claude-opus-4.6"], + models: ["kilocode/kilo/auto"], }); return promptAuthConfig({}, makeRuntime(), noopPrompter); @@ -102,19 +102,17 @@ describe("promptAuthConfig", () => { it("keeps Kilo provider models while applying allowlist defaults", async () => { const result = await runPromptAuthConfigWithAllowlist(); expect(result.models?.providers?.kilocode?.models?.map((model) => model.id)).toEqual([ - "anthropic/claude-opus-4.6", - "minimax/minimax-m2.5:free", - ]); - expect(Object.keys(result.agents?.defaults?.models ?? {})).toEqual([ - "kilocode/anthropic/claude-opus-4.6", + "kilo/auto", + "anthropic/claude-sonnet-4", ]); + expect(Object.keys(result.agents?.defaults?.models ?? {})).toEqual(["kilocode/kilo/auto"]); }); it("does not mutate provider model catalogs when allowlist is set", async () => { const result = await runPromptAuthConfigWithAllowlist(true); expect(result.models?.providers?.kilocode?.models?.map((model) => model.id)).toEqual([ - "anthropic/claude-opus-4.6", - "minimax/minimax-m2.5:free", + "kilo/auto", + "anthropic/claude-sonnet-4", ]); expect(result.models?.providers?.minimax?.models?.map((model) => model.id)).toEqual([ "MiniMax-M2.5", diff --git a/src/commands/onboard-auth.config-core.kilocode.test.ts b/src/commands/onboard-auth.config-core.kilocode.test.ts index 38dc802492f..4f1ed796520 100644 --- a/src/commands/onboard-auth.config-core.kilocode.test.ts +++ b/src/commands/onboard-auth.config-core.kilocode.test.ts @@ -21,17 +21,7 @@ import { } from "./onboard-auth.models.js"; const emptyCfg: OpenClawConfig = {}; -const KILOCODE_MODEL_IDS = [ - "anthropic/claude-opus-4.6", - "z-ai/glm-5:free", - "minimax/minimax-m2.5:free", - "anthropic/claude-sonnet-4.5", - "openai/gpt-5.2", - "google/gemini-3-pro-preview", - "google/gemini-3-flash-preview", - "x-ai/grok-code-fast-1", - "moonshotai/kimi-k2.5", -]; +const KILOCODE_MODEL_IDS = ["kilo/auto"]; describe("Kilo Gateway provider config", () => { describe("constants", () => { @@ -40,11 +30,11 @@ describe("Kilo Gateway provider config", () => { }); it("KILOCODE_DEFAULT_MODEL_REF includes provider prefix", () => { - expect(KILOCODE_DEFAULT_MODEL_REF).toBe("kilocode/anthropic/claude-opus-4.6"); + expect(KILOCODE_DEFAULT_MODEL_REF).toBe("kilocode/kilo/auto"); }); - it("KILOCODE_DEFAULT_MODEL_ID is anthropic/claude-opus-4.6", () => { - expect(KILOCODE_DEFAULT_MODEL_ID).toBe("anthropic/claude-opus-4.6"); + it("KILOCODE_DEFAULT_MODEL_ID is kilo/auto", () => { + expect(KILOCODE_DEFAULT_MODEL_ID).toBe("kilo/auto"); }); }); @@ -52,7 +42,7 @@ describe("Kilo Gateway provider config", () => { it("returns correct model shape", () => { const model = buildKilocodeModelDefinition(); expect(model.id).toBe(KILOCODE_DEFAULT_MODEL_ID); - expect(model.name).toBe("Claude Opus 4.6"); + expect(model.name).toBe("Kilo Auto"); expect(model.reasoning).toBe(true); expect(model.input).toEqual(["text", "image"]); expect(model.contextWindow).toBe(KILOCODE_DEFAULT_CONTEXT_WINDOW); diff --git a/src/providers/kilocode-shared.ts b/src/providers/kilocode-shared.ts index 760488fe01e..a06ba873e54 100644 --- a/src/providers/kilocode-shared.ts +++ b/src/providers/kilocode-shared.ts @@ -1,7 +1,7 @@ export const KILOCODE_BASE_URL = "https://api.kilo.ai/api/gateway/"; -export const KILOCODE_DEFAULT_MODEL_ID = "anthropic/claude-opus-4.6"; +export const KILOCODE_DEFAULT_MODEL_ID = "kilo/auto"; export const KILOCODE_DEFAULT_MODEL_REF = `kilocode/${KILOCODE_DEFAULT_MODEL_ID}`; -export const KILOCODE_DEFAULT_MODEL_NAME = "Claude Opus 4.6"; +export const KILOCODE_DEFAULT_MODEL_NAME = "Kilo Auto"; export type KilocodeModelCatalogEntry = { id: string; name: string; @@ -10,6 +10,12 @@ export type KilocodeModelCatalogEntry = { contextWindow?: number; maxTokens?: number; }; +/** + * Static fallback catalog — used by the sync onboarding path and as a + * fallback when dynamic model discovery from the gateway API fails. + * The full model list is fetched dynamically by {@link discoverKilocodeModels} + * in `src/agents/kilocode-models.ts`. + */ export const KILOCODE_MODEL_CATALOG: KilocodeModelCatalogEntry[] = [ { id: KILOCODE_DEFAULT_MODEL_ID, @@ -19,70 +25,6 @@ export const KILOCODE_MODEL_CATALOG: KilocodeModelCatalogEntry[] = [ contextWindow: 1000000, maxTokens: 128000, }, - { - id: "z-ai/glm-5:free", - name: "GLM-5 (Free)", - reasoning: true, - input: ["text"], - contextWindow: 202800, - maxTokens: 131072, - }, - { - id: "minimax/minimax-m2.5:free", - name: "MiniMax M2.5 (Free)", - reasoning: true, - input: ["text"], - contextWindow: 204800, - maxTokens: 131072, - }, - { - id: "anthropic/claude-sonnet-4.5", - name: "Claude Sonnet 4.5", - reasoning: true, - input: ["text", "image"], - contextWindow: 1000000, - maxTokens: 64000, - }, - { - id: "openai/gpt-5.2", - name: "GPT-5.2", - reasoning: true, - input: ["text", "image"], - contextWindow: 400000, - maxTokens: 128000, - }, - { - id: "google/gemini-3-pro-preview", - name: "Gemini 3 Pro Preview", - reasoning: true, - input: ["text", "image"], - contextWindow: 1048576, - maxTokens: 65536, - }, - { - id: "google/gemini-3-flash-preview", - name: "Gemini 3 Flash Preview", - reasoning: true, - input: ["text", "image"], - contextWindow: 1048576, - maxTokens: 65535, - }, - { - id: "x-ai/grok-code-fast-1", - name: "Grok Code Fast 1", - reasoning: true, - input: ["text"], - contextWindow: 256000, - maxTokens: 10000, - }, - { - id: "moonshotai/kimi-k2.5", - name: "Kimi K2.5", - reasoning: true, - input: ["text", "image"], - contextWindow: 262144, - maxTokens: 65535, - }, ]; export const KILOCODE_DEFAULT_CONTEXT_WINDOW = 1000000; export const KILOCODE_DEFAULT_MAX_TOKENS = 128000;