From b938e6398b54409ee70d208cf7f90fba3e2b33c1 Mon Sep 17 00:00:00 2001 From: Sliverp <38134380+sliverp@users.noreply.github.com> Date: Tue, 21 Apr 2026 10:02:57 +0800 Subject: [PATCH] feat: add tiered model pricing support (#67605) Adds tiered model pricing support for cost tracking, keeps configured pricing ahead of cached catalog values, and includes latest Moonshot Kimi K2.6/K2.5 cost estimates.\n\nThanks @sliverp. --- CHANGELOG.md | 1 + docs/providers/moonshot.md | 10 +- extensions/moonshot/provider-catalog.test.ts | 12 + extensions/moonshot/provider-catalog.ts | 16 +- .../reply/agent-runner-usage-line.ts | 14 +- src/commands/channels/status.ts | 2 +- src/config/defaults.ts | 1 + src/config/schema.base.generated.ts | 45 +++ src/config/types.models.ts | 12 + src/config/zod-schema.core.ts | 13 + src/gateway/model-pricing-cache-state.ts | 11 + src/gateway/model-pricing-cache.test.ts | 332 +++++++++++++++- src/gateway/model-pricing-cache.ts | 212 +++++++++- .../server-methods/channels.status.test.ts | 29 ++ src/gateway/server-methods/channels.ts | 200 ++++++---- src/infra/session-cost-usage.ts | 14 +- src/utils/usage-format.test.ts | 365 ++++++++++++++++++ src/utils/usage-format.ts | 180 ++++++++- 18 files changed, 1351 insertions(+), 118 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f72e07dfe8..8b44a4b49e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ Docs: https://docs.openclaw.ai ### Changes +- Models/costs: support tiered model pricing from cached catalogs and configured models, and include bundled Moonshot Kimi K2.6/K2.5 cost estimates for token-usage reports. (#67605) Thanks @sliverp. - Plugins/tests: reuse plugin loader alias and Jiti config resolution across repeated same-context loads, reducing import-heavy test overhead. (#69316) Thanks @amknight. - Cron: split runtime execution state into `jobs-state.json` so `jobs.json` stays stable for git-tracked job definitions. (#63105) Thanks @Feelw00. - Agents/compaction: send opt-in start and completion notices during context compaction. (#67830) Thanks @feniix. diff --git a/docs/providers/moonshot.md b/docs/providers/moonshot.md index 28b597b61e9..b46e1e4550d 100644 --- a/docs/providers/moonshot.md +++ b/docs/providers/moonshot.md @@ -31,6 +31,12 @@ Moonshot and Kimi Coding are **separate providers**. Keys are not interchangeabl [//]: # "moonshot-kimi-k2-ids:end" +Bundled cost estimates for current Moonshot-hosted K2 models use Moonshot's +published pay-as-you-go rates: Kimi K2.6 is $0.16/MTok cache hit, +$0.95/MTok input, and $4.00/MTok output; Kimi K2.5 is $0.10/MTok cache hit, +$0.60/MTok input, and $3.00/MTok output. Other legacy catalog entries keep +zero-cost placeholders unless you override them in config. + ## Getting started Choose your provider and follow the setup steps. @@ -108,7 +114,7 @@ Choose your provider and follow the setup steps. name: "Kimi K2.6", reasoning: false, input: ["text", "image"], - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + cost: { input: 0.95, output: 4, cacheRead: 0.16, cacheWrite: 0 }, contextWindow: 262144, maxTokens: 262144, }, @@ -117,7 +123,7 @@ Choose your provider and follow the setup steps. name: "Kimi K2.5", reasoning: false, input: ["text", "image"], - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + cost: { input: 0.6, output: 3, cacheRead: 0.1, cacheWrite: 0 }, contextWindow: 262144, maxTokens: 262144, }, diff --git a/extensions/moonshot/provider-catalog.test.ts b/extensions/moonshot/provider-catalog.test.ts index 5671df98f37..f4a65b4d042 100644 --- a/extensions/moonshot/provider-catalog.test.ts +++ b/extensions/moonshot/provider-catalog.test.ts @@ -19,6 +19,18 @@ describe("moonshot provider catalog", () => { "kimi-k2-thinking-turbo", "kimi-k2-turbo", ]); + expect(provider.models.find((model) => model.id === "kimi-k2.6")?.cost).toEqual({ + input: 0.95, + output: 4, + cacheRead: 0.16, + cacheWrite: 0, + }); + expect(provider.models.find((model) => model.id === "kimi-k2.5")?.cost).toEqual({ + input: 0.6, + output: 3, + cacheRead: 0.1, + cacheWrite: 0, + }); }); it("opts native Moonshot baseUrls into streaming usage only inside the extension", () => { diff --git a/extensions/moonshot/provider-catalog.ts b/extensions/moonshot/provider-catalog.ts index 94548496766..b3cbd94ac00 100644 --- a/extensions/moonshot/provider-catalog.ts +++ b/extensions/moonshot/provider-catalog.ts @@ -15,6 +15,18 @@ const MOONSHOT_DEFAULT_COST = { cacheRead: 0, cacheWrite: 0, }; +const MOONSHOT_K2_6_COST = { + input: 0.95, + output: 4, + cacheRead: 0.16, + cacheWrite: 0, +}; +const MOONSHOT_K2_5_COST = { + input: 0.6, + output: 3, + cacheRead: 0.1, + cacheWrite: 0, +}; const MOONSHOT_MODEL_CATALOG = [ { @@ -22,7 +34,7 @@ const MOONSHOT_MODEL_CATALOG = [ name: "Kimi K2.6", reasoning: false, input: ["text", "image"], - cost: MOONSHOT_DEFAULT_COST, + cost: MOONSHOT_K2_6_COST, contextWindow: MOONSHOT_DEFAULT_CONTEXT_WINDOW, maxTokens: MOONSHOT_DEFAULT_MAX_TOKENS, }, @@ -31,7 +43,7 @@ const MOONSHOT_MODEL_CATALOG = [ name: "Kimi K2.5", reasoning: false, input: ["text", "image"], - cost: MOONSHOT_DEFAULT_COST, + cost: MOONSHOT_K2_5_COST, contextWindow: MOONSHOT_DEFAULT_CONTEXT_WINDOW, maxTokens: MOONSHOT_DEFAULT_MAX_TOKENS, }, diff --git a/src/auto-reply/reply/agent-runner-usage-line.ts b/src/auto-reply/reply/agent-runner-usage-line.ts index 013c90c57dc..ceef4dfa6d7 100644 --- a/src/auto-reply/reply/agent-runner-usage-line.ts +++ b/src/auto-reply/reply/agent-runner-usage-line.ts @@ -1,4 +1,9 @@ -import { estimateUsageCost, formatTokenCount, formatUsd } from "../../utils/usage-format.js"; +import { + estimateUsageCost, + formatTokenCount, + formatUsd, + type ModelCostConfig, +} from "../../utils/usage-format.js"; import type { ReplyPayload } from "../types.js"; export const formatResponseUsageLine = (params: { @@ -9,12 +14,7 @@ export const formatResponseUsageLine = (params: { cacheWrite?: number; }; showCost: boolean; - costConfig?: { - input: number; - output: number; - cacheRead: number; - cacheWrite: number; - }; + costConfig?: ModelCostConfig; }): string | null => { const usage = params.usage; if (!usage) { diff --git a/src/commands/channels/status.ts b/src/commands/channels/status.ts index dae73b3e0b0..0ac22f18eea 100644 --- a/src/commands/channels/status.ts +++ b/src/commands/channels/status.ts @@ -148,7 +148,7 @@ export async function channelsStatusCommand( opts: ChannelsStatusOptions, runtime: RuntimeEnv = defaultRuntime, ) { - const timeoutMs = Number(opts.timeout ?? 10_000); + const timeoutMs = Number(opts.timeout ?? (opts.probe ? 30_000 : 10_000)); const statusLabel = opts.probe ? "Checking channel status (probe)…" : "Checking channel status…"; const shouldLogStatus = opts.json !== true && !process.stderr.isTTY; if (shouldLogStatus) { diff --git a/src/config/defaults.ts b/src/config/defaults.ts index 7dbda01e857..9f9812f35ba 100644 --- a/src/config/defaults.ts +++ b/src/config/defaults.ts @@ -62,6 +62,7 @@ function resolveModelCost( cacheRead: typeof raw?.cacheRead === "number" ? raw.cacheRead : DEFAULT_MODEL_COST.cacheRead, cacheWrite: typeof raw?.cacheWrite === "number" ? raw.cacheWrite : DEFAULT_MODEL_COST.cacheWrite, + ...(raw?.tieredPricing ? { tieredPricing: raw.tieredPricing } : {}), }; } diff --git a/src/config/schema.base.generated.ts b/src/config/schema.base.generated.ts index 70d36406a17..26244483fb7 100644 --- a/src/config/schema.base.generated.ts +++ b/src/config/schema.base.generated.ts @@ -2767,6 +2767,51 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { cacheWrite: { type: "number", }, + tieredPricing: { + type: "array", + items: { + type: "object", + properties: { + input: { + type: "number", + }, + output: { + type: "number", + }, + cacheRead: { + type: "number", + }, + cacheWrite: { + type: "number", + }, + range: { + anyOf: [ + { + type: "array", + items: [ + { + type: "number", + }, + { + type: "number", + }, + ], + }, + { + type: "array", + items: [ + { + type: "number", + }, + ], + }, + ], + }, + }, + required: ["input", "output", "cacheRead", "cacheWrite", "range"], + additionalProperties: false, + }, + }, }, additionalProperties: false, }, diff --git a/src/config/types.models.ts b/src/config/types.models.ts index f4a7507e44d..6c63fc9db6d 100644 --- a/src/config/types.models.ts +++ b/src/config/types.models.ts @@ -61,6 +61,18 @@ export type ModelDefinitionConfig = { output: number; cacheRead: number; cacheWrite: number; + /** Optional tiered pricing. When present, cost calculation uses + * per-tier rates instead of the flat rates above. Prices are + * USD / million tokens; ranges are half-open `[start, end)` on the + * input-token axis. */ + tieredPricing?: Array<{ + input: number; + output: number; + cacheRead: number; + cacheWrite: number; + /** Bounded tier: `[start, end)`. Open-ended top tier: `[start]` (normalized to `[start, Infinity]` at load time). */ + range: [number, number] | [number]; + }>; }; contextWindow: number; /** diff --git a/src/config/zod-schema.core.ts b/src/config/zod-schema.core.ts index d284e0f7af1..cd9bbdabb92 100644 --- a/src/config/zod-schema.core.ts +++ b/src/config/zod-schema.core.ts @@ -316,6 +316,19 @@ export const ModelDefinitionSchema = z output: z.number().optional(), cacheRead: z.number().optional(), cacheWrite: z.number().optional(), + tieredPricing: z + .array( + z + .object({ + input: z.number(), + output: z.number(), + cacheRead: z.number(), + cacheWrite: z.number(), + range: z.union([z.tuple([z.number(), z.number()]), z.tuple([z.number()])]), + }) + .strict(), + ) + .optional(), }) .strict() .optional(), diff --git a/src/gateway/model-pricing-cache-state.ts b/src/gateway/model-pricing-cache-state.ts index ab1ac52776f..521f8a61081 100644 --- a/src/gateway/model-pricing-cache-state.ts +++ b/src/gateway/model-pricing-cache-state.ts @@ -2,11 +2,22 @@ import { normalizeModelRef } from "../agents/model-selection.js"; import { normalizeProviderId } from "../agents/provider-id.js"; import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js"; +export type CachedPricingTier = { + input: number; + output: number; + cacheRead: number; + cacheWrite: number; + /** [startTokens, endTokens) — half-open interval on the input token axis. */ + range: [number, number]; +}; + export type CachedModelPricing = { input: number; output: number; cacheRead: number; cacheWrite: number; + /** Optional tiered pricing tiers sourced from LiteLLM or local config. */ + tieredPricing?: CachedPricingTier[]; }; let cachedPricing = new Map(); diff --git a/src/gateway/model-pricing-cache.test.ts b/src/gateway/model-pricing-cache.test.ts index 6743728eb8e..5e8dfbeb7fd 100644 --- a/src/gateway/model-pricing-cache.test.ts +++ b/src/gateway/model-pricing-cache.test.ts @@ -134,9 +134,10 @@ describe("model-pricing-cache", () => { }, } as unknown as OpenClawConfig; - const fetchImpl = withFetchPreconnect( - async () => - new Response( + const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => { + const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url; + if (url.includes("openrouter.ai")) { + return new Response( JSON.stringify({ data: [ { @@ -169,8 +170,14 @@ describe("model-pricing-cache", () => { status: 200, headers: { "Content-Type": "application/json" }, }, - ), - ); + ); + } + // LiteLLM — return empty object (no tiered pricing for these models) + return new Response(JSON.stringify({}), { + status: 200, + headers: { "Content-Type": "application/json" }, + }); + }); await refreshGatewayModelPricingCache({ config, fetchImpl }); @@ -210,9 +217,10 @@ describe("model-pricing-cache", () => { }, } as unknown as OpenClawConfig; - const fetchImpl = withFetchPreconnect( - async () => - new Response( + const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => { + const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url; + if (url.includes("openrouter.ai")) { + return new Response( JSON.stringify({ data: [ { @@ -228,8 +236,13 @@ describe("model-pricing-cache", () => { status: 200, headers: { "Content-Type": "application/json" }, }, - ), - ); + ); + } + return new Response(JSON.stringify({}), { + status: 200, + headers: { "Content-Type": "application/json" }, + }); + }); await expect(refreshGatewayModelPricingCache({ config, fetchImpl })).resolves.toBeUndefined(); expect( @@ -241,4 +254,303 @@ describe("model-pricing-cache", () => { cacheWrite: 0, }); }); + + it("loads tiered pricing from LiteLLM and merges with OpenRouter flat pricing", async () => { + const config = { + agents: { + defaults: { + model: { primary: "volcengine/doubao-seed-2-0-pro" }, + }, + }, + } as unknown as OpenClawConfig; + + const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => { + const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url; + if (url.includes("openrouter.ai")) { + // OpenRouter does not have this model + return new Response(JSON.stringify({ data: [] }), { + status: 200, + headers: { "Content-Type": "application/json" }, + }); + } + // LiteLLM catalog + return new Response( + JSON.stringify({ + "volcengine/doubao-seed-2-0-pro": { + input_cost_per_token: 4.6e-7, + output_cost_per_token: 2.3e-6, + litellm_provider: "volcengine", + tiered_pricing: [ + { + input_cost_per_token: 4.6e-7, + output_cost_per_token: 2.3e-6, + range: [0, 32000], + }, + { + input_cost_per_token: 7e-7, + output_cost_per_token: 3.5e-6, + range: [32000, 128000], + }, + { + input_cost_per_token: 1.4e-6, + output_cost_per_token: 7e-6, + range: [128000, 256000], + }, + ], + }, + }), + { + status: 200, + headers: { "Content-Type": "application/json" }, + }, + ); + }); + + await refreshGatewayModelPricingCache({ config, fetchImpl }); + + const pricing = getCachedGatewayModelPricing({ + provider: "volcengine", + model: "doubao-seed-2-0-pro", + }); + + expect(pricing).toBeDefined(); + expect(pricing!.input).toBeCloseTo(0.46); + expect(pricing!.output).toBeCloseTo(2.3); + expect(pricing!.tieredPricing).toHaveLength(3); + expect(pricing!.tieredPricing![0]).toEqual({ + input: expect.closeTo(0.46), + output: expect.closeTo(2.3), + cacheRead: 0, + cacheWrite: 0, + range: [0, 32000], + }); + expect(pricing!.tieredPricing![2].range).toEqual([128000, 256000]); + }); + + it("normalizes LiteLLM open-ended range [start] to [start, Infinity]", async () => { + const config = { + agents: { + defaults: { + model: { primary: "volcengine/doubao-open" }, + }, + }, + } as unknown as OpenClawConfig; + + const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => { + const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url; + if (url.includes("openrouter.ai")) { + return new Response(JSON.stringify({ data: [] }), { + status: 200, + headers: { "Content-Type": "application/json" }, + }); + } + return new Response( + JSON.stringify({ + "volcengine/doubao-open": { + input_cost_per_token: 4.6e-7, + output_cost_per_token: 2.3e-6, + litellm_provider: "volcengine", + tiered_pricing: [ + { + input_cost_per_token: 4.6e-7, + output_cost_per_token: 2.3e-6, + range: [0, 32000], + }, + { + input_cost_per_token: 7e-7, + output_cost_per_token: 3.5e-6, + range: [32000], + }, + ], + }, + }), + { + status: 200, + headers: { "Content-Type": "application/json" }, + }, + ); + }); + + await refreshGatewayModelPricingCache({ config, fetchImpl }); + + const pricing = getCachedGatewayModelPricing({ + provider: "volcengine", + model: "doubao-open", + }); + + expect(pricing).toBeDefined(); + expect(pricing!.tieredPricing).toHaveLength(2); + expect(pricing!.tieredPricing![0].range).toEqual([0, 32000]); + expect(pricing!.tieredPricing![1].range).toEqual([32000, Infinity]); + }); + + it("merges OpenRouter flat pricing with LiteLLM tiered pricing", async () => { + const config = { + agents: { + defaults: { + model: { primary: "dashscope/qwen-plus" }, + }, + }, + } as unknown as OpenClawConfig; + + const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => { + const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url; + if (url.includes("openrouter.ai")) { + return new Response( + JSON.stringify({ + data: [ + { + id: "dashscope/qwen-plus", + pricing: { + prompt: "0.0000004", + completion: "0.0000024", + }, + }, + ], + }), + { + status: 200, + headers: { "Content-Type": "application/json" }, + }, + ); + } + return new Response( + JSON.stringify({ + "dashscope/qwen-plus": { + input_cost_per_token: 4e-7, + output_cost_per_token: 2.4e-6, + litellm_provider: "dashscope", + tiered_pricing: [ + { + input_cost_per_token: 4e-7, + output_cost_per_token: 2.4e-6, + range: [0, 256000], + }, + { + input_cost_per_token: 5e-7, + output_cost_per_token: 3e-6, + range: [256000, 1000000], + }, + ], + }, + }), + { + status: 200, + headers: { "Content-Type": "application/json" }, + }, + ); + }); + + await refreshGatewayModelPricingCache({ config, fetchImpl }); + + const pricing = getCachedGatewayModelPricing({ + provider: "dashscope", + model: "qwen-plus", + }); + + expect(pricing).toBeDefined(); + // OpenRouter base flat pricing is used + expect(pricing!.input).toBeCloseTo(0.4); + expect(pricing!.output).toBeCloseTo(2.4); + // LiteLLM tiered pricing is merged in + expect(pricing!.tieredPricing).toHaveLength(2); + expect(pricing!.tieredPricing![1].range).toEqual([256000, 1000000]); + }); + + it("falls back gracefully when LiteLLM fetch fails", async () => { + const config = { + agents: { + defaults: { + model: { primary: "anthropic/claude-opus-4-6" }, + }, + }, + } as unknown as OpenClawConfig; + + const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => { + const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url; + if (url.includes("openrouter.ai")) { + return new Response( + JSON.stringify({ + data: [ + { + id: "anthropic/claude-opus-4.6", + pricing: { + prompt: "0.000005", + completion: "0.000025", + }, + }, + ], + }), + { + status: 200, + headers: { "Content-Type": "application/json" }, + }, + ); + } + // LiteLLM fails + return new Response("Internal Server Error", { status: 500 }); + }); + + await refreshGatewayModelPricingCache({ config, fetchImpl }); + + // OpenRouter pricing still works + expect( + getCachedGatewayModelPricing({ provider: "anthropic", model: "claude-opus-4-6" }), + ).toEqual({ + input: 5, + output: 25, + cacheRead: 0, + cacheWrite: 0, + }); + }); + + it("treats oversized LiteLLM catalog responses as source failures", async () => { + const config = { + agents: { + defaults: { + model: { primary: "moonshot/kimi-k2.6" }, + }, + }, + } as unknown as OpenClawConfig; + + const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => { + const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url; + if (url.includes("openrouter.ai")) { + return new Response( + JSON.stringify({ + data: [ + { + id: "moonshotai/kimi-k2.6", + pricing: { + prompt: "0.00000095", + completion: "0.000004", + input_cache_read: "0.00000016", + }, + }, + ], + }), + { + status: 200, + headers: { "Content-Type": "application/json" }, + }, + ); + } + return new Response("{}", { + status: 200, + headers: { + "Content-Type": "application/json", + "Content-Length": "6000000", + }, + }); + }); + + await refreshGatewayModelPricingCache({ config, fetchImpl }); + + expect(getCachedGatewayModelPricing({ provider: "moonshot", model: "kimi-k2.6" })).toEqual({ + input: 0.95, + output: 4, + cacheRead: 0.16, + cacheWrite: 0, + }); + }); }); diff --git a/src/gateway/model-pricing-cache.ts b/src/gateway/model-pricing-cache.ts index 20dfe7dfe90..5003328d160 100644 --- a/src/gateway/model-pricing-cache.ts +++ b/src/gateway/model-pricing-cache.ts @@ -19,6 +19,7 @@ import { getGatewayModelPricingCacheMeta as getGatewayModelPricingCacheMetaState, replaceGatewayModelPricingCache, type CachedModelPricing, + type CachedPricingTier, } from "./model-pricing-cache-state.js"; type OpenRouterPricingEntry = { @@ -36,8 +37,11 @@ type OpenRouterModelPayload = { export { getCachedGatewayModelPricing }; const OPENROUTER_MODELS_URL = "https://openrouter.ai/api/v1/models"; +const LITELLM_PRICING_URL = + "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"; const CACHE_TTL_MS = 24 * 60 * 60_000; const FETCH_TIMEOUT_MS = 15_000; +const MAX_PRICING_CATALOG_BYTES = 5 * 1024 * 1024; const PROVIDER_ALIAS_TO_OPENROUTER: Record = { "google-gemini-cli": "google", kimi: "moonshotai", @@ -98,7 +102,8 @@ function toPricePerMillion(value: number | null): number { if (value === null || value < 0 || !Number.isFinite(value)) { return 0; } - return value * 1_000_000; + const scaled = value * 1_000_000; + return Number.isFinite(scaled) ? scaled : 0; } function parseOpenRouterPricing(value: unknown): CachedModelPricing | null { @@ -119,6 +124,136 @@ function parseOpenRouterPricing(value: unknown): CachedModelPricing | null { }; } +async function readPricingJsonObject( + response: Response, + source: string, +): Promise> { + const contentLength = parseNumberString(response.headers.get("content-length")); + if (contentLength !== null && contentLength > MAX_PRICING_CATALOG_BYTES) { + throw new Error(`${source} pricing response too large: ${contentLength} bytes`); + } + const buffer = await response.arrayBuffer(); + if (buffer.byteLength > MAX_PRICING_CATALOG_BYTES) { + throw new Error(`${source} pricing response too large: ${buffer.byteLength} bytes`); + } + const payload = JSON.parse(Buffer.from(buffer).toString("utf8")) as unknown; + if (!payload || typeof payload !== "object" || Array.isArray(payload)) { + throw new Error(`${source} pricing response is not a JSON object`); + } + return payload as Record; +} + +// --------------------------------------------------------------------------- +// LiteLLM tiered-pricing parsing +// --------------------------------------------------------------------------- + +type LiteLLMModelEntry = Record; + +type LiteLLMTierRaw = { + input_cost_per_token?: unknown; + output_cost_per_token?: unknown; + cache_read_input_token_cost?: unknown; + range?: unknown; +}; + +function parseLiteLLMTieredPricing(tiers: unknown): CachedPricingTier[] | undefined { + if (!Array.isArray(tiers) || tiers.length === 0) { + return undefined; + } + const result: CachedPricingTier[] = []; + for (const raw of tiers) { + if (!raw || typeof raw !== "object") { + continue; + } + const tier = raw as LiteLLMTierRaw; + const inputPerToken = parseNumberString(tier.input_cost_per_token); + const outputPerToken = parseNumberString(tier.output_cost_per_token); + if (inputPerToken === null || outputPerToken === null) { + continue; + } + const range = tier.range; + if (!Array.isArray(range) || range.length < 1) { + continue; + } + const start = parseNumberString(range[0]); + if (start === null) { + continue; + } + // Allow open-ended ranges: [128000], [128000, -1], [128000, null] + const rawEnd = range.length >= 2 ? parseNumberString(range[1]) : null; + const end = rawEnd === null || rawEnd <= start ? Infinity : rawEnd; + if ( + !Number.isFinite(inputPerToken) || + !Number.isFinite(outputPerToken) || + inputPerToken < 0 || + outputPerToken < 0 + ) { + continue; + } + result.push({ + input: toPricePerMillion(inputPerToken), + output: toPricePerMillion(outputPerToken), + cacheRead: toPricePerMillion(parseNumberString(tier.cache_read_input_token_cost)), + cacheWrite: 0, + range: [start, end], + }); + } + return result.length > 0 ? result.toSorted((a, b) => a.range[0] - b.range[0]) : undefined; +} + +function parseLiteLLMPricing(entry: LiteLLMModelEntry): CachedModelPricing | null { + const inputPerToken = parseNumberString(entry.input_cost_per_token); + const outputPerToken = parseNumberString(entry.output_cost_per_token); + if (inputPerToken === null || outputPerToken === null) { + return null; + } + const pricing: CachedModelPricing = { + input: toPricePerMillion(inputPerToken), + output: toPricePerMillion(outputPerToken), + cacheRead: toPricePerMillion(parseNumberString(entry.cache_read_input_token_cost)), + cacheWrite: 0, + }; + const tieredPricing = parseLiteLLMTieredPricing(entry.tiered_pricing); + if (tieredPricing) { + pricing.tieredPricing = tieredPricing; + } + return pricing; +} + +type LiteLLMPricingCatalog = Map; + +async function fetchLiteLLMPricingCatalog(fetchImpl: typeof fetch): Promise { + const response = await fetchImpl(LITELLM_PRICING_URL, { + headers: { Accept: "application/json" }, + signal: AbortSignal.timeout(FETCH_TIMEOUT_MS), + }); + if (!response.ok) { + throw new Error(`LiteLLM pricing fetch failed: HTTP ${response.status}`); + } + const payload = await readPricingJsonObject(response, "LiteLLM"); + const catalog: LiteLLMPricingCatalog = new Map(); + for (const [key, value] of Object.entries(payload)) { + if (!value || typeof value !== "object") { + continue; + } + const entry = value as LiteLLMModelEntry; + const pricing = parseLiteLLMPricing(entry); + if (!pricing) { + continue; + } + catalog.set(key, pricing); + } + return catalog; +} + +function resolveLiteLLMPricingForRef(params: { + ref: ModelRef; + catalog: LiteLLMPricingCatalog; +}): CachedModelPricing | undefined { + // Only use provider-qualified key to avoid cross-provider pricing collisions. + return params.catalog.get(`${params.ref.provider}/${params.ref.model}`); +} + function canonicalizeOpenRouterProvider(provider: string): string { const normalized = normalizeModelRef(provider, "placeholder").provider; return PROVIDER_ALIAS_TO_OPENROUTER[normalized] ?? normalized; @@ -328,7 +463,7 @@ async function fetchOpenRouterPricingCatalog( if (!response.ok) { throw new Error(`OpenRouter /models failed: HTTP ${response.status}`); } - const payload = (await response.json()) as { data?: unknown }; + const payload = await readPricingJsonObject(response, "OpenRouter"); const entries = Array.isArray(payload.data) ? payload.data : []; const catalog = new Map(); for (const entry of entries) { @@ -393,7 +528,23 @@ export async function refreshGatewayModelPricingCache(params: { return; } - const catalogById = await fetchOpenRouterPricingCatalog(fetchImpl); + // Fetch both pricing catalogs in parallel. Each source is + // independently optional — a failure in one does not block the other. + let openRouterFailed = false; + let litellmFailed = false; + const [catalogById, litellmCatalog] = await Promise.all([ + fetchOpenRouterPricingCatalog(fetchImpl).catch((error: unknown) => { + log.warn(`OpenRouter pricing fetch failed: ${String(error)}`); + openRouterFailed = true; + return new Map(); + }), + fetchLiteLLMPricingCatalog(fetchImpl).catch((error: unknown) => { + log.warn(`LiteLLM pricing fetch failed: ${String(error)}`); + litellmFailed = true; + return new Map() as LiteLLMPricingCatalog; + }), + ]); + const catalogByNormalizedId = new Map(); for (const entry of catalogById.values()) { const normalizedId = canonicalizeOpenRouterLookupId(entry.id); @@ -405,15 +556,62 @@ export async function refreshGatewayModelPricingCache(params: { const nextPricing = new Map(); for (const ref of refs) { - const pricing = resolveCatalogPricingForRef({ + // 1. Try OpenRouter first (existing behavior — flat pricing) + const openRouterPricing = resolveCatalogPricingForRef({ ref, catalogById, catalogByNormalizedId, }); - if (!pricing) { - continue; + + // 2. Try LiteLLM (may contain tiered pricing) + const litellmPricing = resolveLiteLLMPricingForRef({ + ref, + catalog: litellmCatalog, + }); + + // Merge strategy: OpenRouter provides the base flat pricing; + // LiteLLM enriches with tieredPricing when available. + // If only one source has data, use that one. + if (openRouterPricing && litellmPricing?.tieredPricing) { + // Both sources present and LiteLLM has tiers — merge. + nextPricing.set(modelKey(ref.provider, ref.model), { + ...openRouterPricing, + tieredPricing: litellmPricing.tieredPricing, + }); + } else if (openRouterPricing) { + // Prefer OpenRouter flat pricing when LiteLLM has no tiers to contribute. + nextPricing.set(modelKey(ref.provider, ref.model), openRouterPricing); + } else if (litellmPricing) { + // Only LiteLLM has data — use it as-is. + nextPricing.set(modelKey(ref.provider, ref.model), litellmPricing); + } + } + + // When either upstream source failed, preserve previously-cached entries + // for any models that the refresh could not resolve. This prevents a + // single-source outage from silently dropping pricing for models that + // depended on the failed source. + if (openRouterFailed || litellmFailed) { + const existingMeta = getGatewayModelPricingCacheMetaState(); + if (nextPricing.size === 0 && existingMeta.size > 0) { + // Both sources failed — retain the entire existing cache. + log.warn("Both pricing sources returned empty data — retaining existing cache"); + scheduleRefresh({ config: params.config, fetchImpl }); + return; + } + // Partial failure — back-fill missing models from the existing cache. + for (const ref of refs) { + const key = modelKey(ref.provider, ref.model); + if (!nextPricing.has(key)) { + const existing = getCachedGatewayModelPricing({ + provider: ref.provider, + model: ref.model, + }); + if (existing) { + nextPricing.set(key, existing); + } + } } - nextPricing.set(modelKey(ref.provider, ref.model), pricing); } replaceGatewayModelPricingCache(nextPricing); diff --git a/src/gateway/server-methods/channels.status.test.ts b/src/gateway/server-methods/channels.status.test.ts index a7434043659..8235a55f7aa 100644 --- a/src/gateway/server-methods/channels.status.test.ts +++ b/src/gateway/server-methods/channels.status.test.ts @@ -133,4 +133,33 @@ describe("channelsHandlers channels.status", () => { undefined, ); }); + + it("caps probe timeout before passing it to channel plugins", async () => { + const autoEnabledConfig = { autoEnabled: true }; + const probeAccount = vi.fn(async () => ({ ok: true })); + mocks.applyPluginAutoEnable.mockReturnValue({ config: autoEnabledConfig, changes: [] }); + mocks.listChannelPlugins.mockReturnValue([ + { + id: "whatsapp", + config: { + listAccountIds: () => ["default"], + resolveAccount: () => ({}), + isEnabled: () => true, + isConfigured: async () => true, + }, + status: { + probeAccount, + }, + }, + ]); + + await channelsHandlers["channels.status"](createOptions({ probe: true, timeoutMs: 999_999 })); + + expect(probeAccount).toHaveBeenCalledWith( + expect.objectContaining({ + timeoutMs: 30_000, + cfg: autoEnabledConfig, + }), + ); + }); }); diff --git a/src/gateway/server-methods/channels.ts b/src/gateway/server-methods/channels.ts index 6ef56f1ae7d..b8daa362314 100644 --- a/src/gateway/server-methods/channels.ts +++ b/src/gateway/server-methods/channels.ts @@ -16,6 +16,7 @@ import { getChannelActivity } from "../../infra/channel-activity.js"; import { DEFAULT_ACCOUNT_ID } from "../../routing/session-key.js"; import { defaultRuntime } from "../../runtime.js"; import { normalizeOptionalString } from "../../shared/string-coerce.js"; +import { runTasksWithConcurrency } from "../../utils/run-with-concurrency.js"; import { ErrorCodes, errorShape, @@ -41,6 +42,17 @@ type ChannelStartPayload = { started: boolean; }; +const CHANNEL_STATUS_MAX_TIMEOUT_MS = 30_000; +const CHANNEL_STATUS_PROBE_CONCURRENCY = 5; + +function resolveChannelsStatusTimeoutMs(params: { probe: boolean; timeoutMsRaw: unknown }): number { + const fallback = params.probe ? CHANNEL_STATUS_MAX_TIMEOUT_MS : 10_000; + if (typeof params.timeoutMsRaw !== "number" || !Number.isFinite(params.timeoutMsRaw)) { + return fallback; + } + return Math.min(Math.max(1000, params.timeoutMsRaw), CHANNEL_STATUS_MAX_TIMEOUT_MS); +} + function resolveRuntimeAccountSnapshot(params: { runtime: ChannelRuntimeSnapshot; channelId: ChannelId; @@ -141,7 +153,7 @@ export const channelsHandlers: GatewayRequestHandlers = { } const probe = (params as { probe?: boolean }).probe === true; const timeoutMsRaw = (params as { timeoutMs?: unknown }).timeoutMs; - const timeoutMs = typeof timeoutMsRaw === "number" ? Math.max(1000, timeoutMsRaw) : 10_000; + const timeoutMs = resolveChannelsStatusTimeoutMs({ probe, timeoutMsRaw }); const cfg = applyPluginAutoEnable({ config: loadConfig(), env: process.env, @@ -174,6 +186,70 @@ export const channelsHandlers: GatewayRequestHandlers = { typeof account !== "object" || (account as { enabled?: boolean }).enabled !== false; + const buildAccountSnapshot = async ( + channelId: ChannelId, + plugin: ChannelPlugin, + accountId: string, + defaultAccountId: string, + ) => { + const account = plugin.config.resolveAccount(cfg, accountId); + const enabled = isAccountEnabled(plugin, account); + let probeResult: unknown; + let lastProbeAt: number | null = null; + if (probe && enabled && plugin.status?.probeAccount) { + let configured = true; + if (plugin.config.isConfigured) { + configured = await plugin.config.isConfigured(account, cfg); + } + if (configured) { + probeResult = await plugin.status.probeAccount({ + account, + timeoutMs, + cfg, + }); + lastProbeAt = Date.now(); + } + } + let auditResult: unknown; + if (probe && enabled && plugin.status?.auditAccount) { + let configured = true; + if (plugin.config.isConfigured) { + configured = await plugin.config.isConfigured(account, cfg); + } + if (configured) { + auditResult = await plugin.status.auditAccount({ + account, + timeoutMs, + cfg, + probe: probeResult, + }); + } + } + const runtimeSnapshot = resolveRuntimeSnapshot(channelId, accountId, defaultAccountId); + const snapshot = await buildChannelAccountSnapshot({ + plugin, + cfg, + accountId, + runtime: runtimeSnapshot, + probe: probeResult, + audit: auditResult, + }); + if (lastProbeAt) { + snapshot.lastProbeAt = lastProbeAt; + } + const activity = getChannelActivity({ + channel: channelId as never, + accountId, + }); + if (snapshot.lastInboundAt == null) { + snapshot.lastInboundAt = activity.inboundAt; + } + if (snapshot.lastOutboundAt == null) { + snapshot.lastOutboundAt = activity.outboundAt; + } + return { accountId: accountId, account, snapshot }; + }; + const buildChannelAccounts = async (channelId: ChannelId) => { const plugin = pluginMap.get(channelId); if (!plugin) { @@ -190,66 +266,20 @@ export const channelsHandlers: GatewayRequestHandlers = { cfg, accountIds, }); - const accounts: ChannelAccountSnapshot[] = []; const resolvedAccounts: Record = {}; - for (const accountId of accountIds) { - const account = plugin.config.resolveAccount(cfg, accountId); - const enabled = isAccountEnabled(plugin, account); - resolvedAccounts[accountId] = account; - let probeResult: unknown; - let lastProbeAt: number | null = null; - if (probe && enabled && plugin.status?.probeAccount) { - let configured = true; - if (plugin.config.isConfigured) { - configured = await plugin.config.isConfigured(account, cfg); - } - if (configured) { - probeResult = await plugin.status.probeAccount({ - account, - timeoutMs, - cfg, - }); - lastProbeAt = Date.now(); - } + const { results } = await runTasksWithConcurrency({ + tasks: accountIds.map( + (accountId) => async () => + await buildAccountSnapshot(channelId, plugin, accountId, defaultAccountId), + ), + limit: probe ? CHANNEL_STATUS_PROBE_CONCURRENCY : accountIds.length || 1, + }); + const accounts: ChannelAccountSnapshot[] = []; + for (const result of results) { + if (result) { + resolvedAccounts[result.accountId] = result.account; + accounts.push(result.snapshot); } - let auditResult: unknown; - if (probe && enabled && plugin.status?.auditAccount) { - let configured = true; - if (plugin.config.isConfigured) { - configured = await plugin.config.isConfigured(account, cfg); - } - if (configured) { - auditResult = await plugin.status.auditAccount({ - account, - timeoutMs, - cfg, - probe: probeResult, - }); - } - } - const runtimeSnapshot = resolveRuntimeSnapshot(channelId, accountId, defaultAccountId); - const snapshot = await buildChannelAccountSnapshot({ - plugin, - cfg, - accountId, - runtime: runtimeSnapshot, - probe: probeResult, - audit: auditResult, - }); - if (lastProbeAt) { - snapshot.lastProbeAt = lastProbeAt; - } - const activity = getChannelActivity({ - channel: channelId as never, - accountId, - }); - if (snapshot.lastInboundAt == null) { - snapshot.lastInboundAt = activity.inboundAt; - } - if (snapshot.lastOutboundAt == null) { - snapshot.lastOutboundAt = activity.outboundAt; - } - accounts.push(snapshot); } const defaultAccount = accounts.find((entry) => entry.accountId === defaultAccountId) ?? accounts[0]; @@ -271,28 +301,36 @@ export const channelsHandlers: GatewayRequestHandlers = { const channelsMap = payload.channels as Record; const accountsMap = payload.channelAccounts as Record; const defaultAccountIdMap = payload.channelDefaultAccountId as Record; - for (const plugin of plugins) { - const { accounts, defaultAccountId, defaultAccount, resolvedAccounts } = - await buildChannelAccounts(plugin.id); - const fallbackAccount = - resolvedAccounts[defaultAccountId] ?? plugin.config.resolveAccount(cfg, defaultAccountId); - const summary = plugin.status?.buildChannelSummary - ? await plugin.status.buildChannelSummary({ - account: fallbackAccount, - cfg, - defaultAccountId, - snapshot: - defaultAccount ?? - ({ - accountId: defaultAccountId, - } as ChannelAccountSnapshot), - }) - : { - configured: defaultAccount?.configured ?? false, - }; - channelsMap[plugin.id] = summary; - accountsMap[plugin.id] = accounts; - defaultAccountIdMap[plugin.id] = defaultAccountId; + const { results: channelResults } = await runTasksWithConcurrency({ + tasks: plugins.map((plugin) => async () => { + const { accounts, defaultAccountId, defaultAccount, resolvedAccounts } = + await buildChannelAccounts(plugin.id); + const fallbackAccount = + resolvedAccounts[defaultAccountId] ?? plugin.config.resolveAccount(cfg, defaultAccountId); + const summary = plugin.status?.buildChannelSummary + ? await plugin.status.buildChannelSummary({ + account: fallbackAccount, + cfg, + defaultAccountId, + snapshot: + defaultAccount ?? + ({ + accountId: defaultAccountId, + } as ChannelAccountSnapshot), + }) + : { + configured: defaultAccount?.configured ?? false, + }; + return { pluginId: plugin.id, summary, accounts, defaultAccountId }; + }), + limit: probe ? CHANNEL_STATUS_PROBE_CONCURRENCY : plugins.length || 1, + }); + for (const result of channelResults) { + if (result) { + channelsMap[result.pluginId] = result.summary; + accountsMap[result.pluginId] = result.accounts; + defaultAccountIdMap[result.pluginId] = result.defaultAccountId; + } } respond(true, payload, undefined); diff --git a/src/infra/session-cost-usage.ts b/src/infra/session-cost-usage.ts index a7656dee8f3..8598cfc9e54 100644 --- a/src/infra/session-cost-usage.ts +++ b/src/infra/session-cost-usage.ts @@ -249,13 +249,23 @@ async function scanTranscriptFile(params: { continue; } - if (entry.usage && entry.costTotal === undefined) { + if (entry.usage) { const cost = resolveModelCostConfig({ provider: entry.provider, model: entry.model, config: params.config, }); - entry.costTotal = estimateUsageCost({ usage: entry.usage, cost }); + if (cost?.tieredPricing && cost.tieredPricing.length > 0) { + // When tiered pricing is configured, always recompute to override + // the flat-rate cost that the transport layer wrote into the transcript. + // Clear costBreakdown so downstream aggregation uses the recomputed total + // instead of the stale flat-rate breakdown from the transport layer. + entry.costTotal = estimateUsageCost({ usage: entry.usage, cost }); + entry.costBreakdown = undefined; + } else if (entry.costTotal === undefined) { + // Fill in missing cost estimates. + entry.costTotal = estimateUsageCost({ usage: entry.usage, cost }); + } } params.onEntry(entry); diff --git a/src/utils/usage-format.test.ts b/src/utils/usage-format.test.ts index 1aca3c2c2b5..a54f2f32424 100644 --- a/src/utils/usage-format.test.ts +++ b/src/utils/usage-format.test.ts @@ -13,6 +13,7 @@ import { formatTokenCount, formatUsd, resolveModelCostConfig, + type PricingTier, } from "./usage-format.js"; describe("usage-format", () => { @@ -254,4 +255,368 @@ describe("usage-format", () => { cacheWrite: 0.8, }); }); + + // ----------------------------------------------------------------------- + // Tiered pricing tests + // ----------------------------------------------------------------------- + + it("uses flat pricing when tieredPricing is absent", () => { + const cost = { input: 1, output: 2, cacheRead: 0.5, cacheWrite: 0 }; + const total = estimateUsageCost({ + usage: { input: 1000, output: 500, cacheRead: 2000 }, + cost, + }); + expect(total).toBeCloseTo(0.003); + }); + + it("estimates cost with single-tier tiered pricing (equivalent to flat)", () => { + const tiers: PricingTier[] = [ + { input: 1, output: 2, cacheRead: 0.5, cacheWrite: 0, range: [0, 1_000_000] }, + ]; + const cost = { input: 1, output: 2, cacheRead: 0.5, cacheWrite: 0, tieredPricing: tiers }; + const total = estimateUsageCost({ + usage: { input: 1000, output: 500, cacheRead: 2000 }, + cost, + }); + // Same as flat: (1000*1 + 500*2 + 2000*0.5) / 1M = 3000/1M = 0.003 + expect(total).toBeCloseTo(0.003); + }); + + it("estimates cost with two tiers — input split across tiers", () => { + // Tier 1: [0, 32000) → input $0.30/M, output $1.50/M + // Tier 2: [32000, 128000) → input $0.50/M, output $2.50/M + const tiers: PricingTier[] = [ + { input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, range: [0, 32_000] }, + { input: 0.5, output: 2.5, cacheRead: 0, cacheWrite: 0, range: [32_000, 128_000] }, + ]; + const cost = { input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers }; + + // 40000 input tokens, 10000 output tokens + // Tier 1 gets 32000/40000 = 80% of input → 32000 input tokens + // Tier 2 gets 8000/40000 = 20% of input → 8000 input tokens + // Input cost = (32000 * 0.3 + 8000 * 0.5) / 1M = (9600 + 4000) / 1M = 0.0136 + // Output cost = (10000 * 0.8 * 1.5 + 10000 * 0.2 * 2.5) / 1M = (12000 + 5000) / 1M = 0.017 + // Total = 0.0136 + 0.017 = 0.0306 + const total = estimateUsageCost({ + usage: { input: 40_000, output: 10_000 }, + cost, + }); + expect(total).toBeCloseTo(0.0306, 4); + }); + + it("estimates cost with three tiers — volcengine-style pricing", () => { + // Simulates volcengine/doubao pricing (per-million): + // Tier 1: [0, 32000) → in $0.46, out $2.30 + // Tier 2: [32000, 128000) → in $0.70, out $3.50 + // Tier 3: [128000, 256000) → in $1.40, out $7.00 + const tiers: PricingTier[] = [ + { input: 0.46, output: 2.3, cacheRead: 0, cacheWrite: 0, range: [0, 32_000] }, + { input: 0.7, output: 3.5, cacheRead: 0, cacheWrite: 0, range: [32_000, 128_000] }, + { input: 1.4, output: 7.0, cacheRead: 0, cacheWrite: 0, range: [128_000, 256_000] }, + ]; + const cost = { input: 0.46, output: 2.3, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers }; + + // 200000 input tokens, 5000 output tokens + // Tier 1: 32000 tokens, fraction = 32000/200000 = 0.16 + // Tier 2: 96000 tokens, fraction = 96000/200000 = 0.48 + // Tier 3: 72000 tokens, fraction = 72000/200000 = 0.36 + // + // Input cost = (32000*0.46 + 96000*0.70 + 72000*1.40) / 1M + // = (14720 + 67200 + 100800) / 1M = 182720 / 1M = 0.18272 + // Output cost = 5000 * (0.16*2.3 + 0.48*3.5 + 0.36*7.0) / 1M + // = 5000 * (0.368 + 1.68 + 2.52) / 1M + // = 5000 * 4.568 / 1M = 22840 / 1M = 0.02284 + // Total = 0.18272 + 0.02284 = 0.20556 + const total = estimateUsageCost({ + usage: { input: 200_000, output: 5_000 }, + cost, + }); + expect(total).toBeCloseTo(0.20556, 4); + }); + + it("uses first tier rates for output when input is zero", () => { + const tiers: PricingTier[] = [ + { input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, range: [0, 32_000] }, + { input: 0.5, output: 2.5, cacheRead: 0, cacheWrite: 0, range: [32_000, 128_000] }, + ]; + const cost = { input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers }; + + const total = estimateUsageCost({ + usage: { input: 0, output: 10_000 }, + cost, + }); + // Falls back to first tier: 10000 * 1.5 / 1M = 0.015 + expect(total).toBeCloseTo(0.015, 6); + }); + + it("falls back to flat pricing when tieredPricing is empty array", () => { + const cost = { + input: 1, + output: 2, + cacheRead: 0.5, + cacheWrite: 0, + tieredPricing: [] as PricingTier[], + }; + const total = estimateUsageCost({ + usage: { input: 1000, output: 500, cacheRead: 2000 }, + cost, + }); + expect(total).toBeCloseTo(0.003); + }); + + it("bills overflow input tokens at last tier rate when input exceeds max range", () => { + // Tiers only cover up to 128000, but input is 200000 + // Tier 1: [0, 32000) → in $0.30/M, out $1.50/M + // Tier 2: [32000, 128000) → in $0.50/M, out $2.50/M + // Overflow: 72000 tokens billed at Tier 2 rates + const tiers: PricingTier[] = [ + { input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, range: [0, 32_000] }, + { input: 0.5, output: 2.5, cacheRead: 0, cacheWrite: 0, range: [32_000, 128_000] }, + ]; + const cost = { input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers }; + + // 200000 input, 10000 output + // Tier 1: 32000 tokens, fraction = 32000/200000 = 0.16 + // Tier 2: 96000 tokens, fraction = 96000/200000 = 0.48 + // Overflow (at Tier 2 rates): 72000 tokens, fraction = 72000/200000 = 0.36 + // + // Input cost = (32000*0.3 + 96000*0.5 + 72000*0.5) / 1M + // = (9600 + 48000 + 36000) / 1M = 93600/1M = 0.0936 + // Output cost = 10000 * (0.16*1.5 + 0.48*2.5 + 0.36*2.5) / 1M + // = 10000 * (0.24 + 1.2 + 0.9) / 1M + // = 10000 * 2.34 / 1M = 23400/1M = 0.0234 + // Total = 0.0936 + 0.0234 = 0.117 + const total = estimateUsageCost({ + usage: { input: 200_000, output: 10_000 }, + cost, + }); + expect(total).toBeCloseTo(0.117, 4); + }); + + it("bills overflow at last tier when only a single small-range tier exists (e.g. <30K)", () => { + // Only one tier covering [0, 30000), input is 100000 + const tiers: PricingTier[] = [ + { input: 1.0, output: 3.0, cacheRead: 0.5, cacheWrite: 0, range: [0, 30_000] }, + ]; + const cost = { input: 1.0, output: 3.0, cacheRead: 0.5, cacheWrite: 0, tieredPricing: tiers }; + + // 100000 input, 5000 output, 2000 cacheRead + // Tier 1: 30000 tokens, fraction = 30000/100000 = 0.3 + // Overflow (at Tier 1 rates): 70000 tokens, fraction = 70000/100000 = 0.7 + // Fractions sum to 1.0 — all output/cache fully billed + // + // Input cost = (30000*1.0 + 70000*1.0) / 1M = 100000/1M = 0.1 + // Output cost = 5000 * (0.3*3.0 + 0.7*3.0) / 1M = 5000*3.0/1M = 0.015 + // CacheRead cost = 2000 * (0.3*0.5 + 0.7*0.5) / 1M = 2000*0.5/1M = 0.001 + // Total = 0.1 + 0.015 + 0.001 = 0.116 + const total = estimateUsageCost({ + usage: { input: 100_000, output: 5_000, cacheRead: 2_000 }, + cost, + }); + expect(total).toBeCloseTo(0.116, 4); + }); + + it("supports open-ended range [start] in tiered pricing (greater-than syntax)", () => { + // Tier 1: [0, 32000) → in $0.30/M, out $1.50/M + // Tier 2: [32000, Infinity) → in $0.50/M, out $2.50/M (open-ended) + const tiers: PricingTier[] = [ + { input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, range: [0, 32_000] }, + { input: 0.5, output: 2.5, cacheRead: 0, cacheWrite: 0, range: [32_000, Infinity] }, + ]; + const cost = { input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers }; + + // 200000 input, 10000 output + // Tier 1: 32000 tokens, fraction = 32000/200000 = 0.16 + // Tier 2: 168000 tokens, fraction = 168000/200000 = 0.84 + // No overflow — Tier 2 absorbs everything beyond 32K + // + // Input cost = (32000*0.3 + 168000*0.5) / 1M = (9600 + 84000) / 1M = 0.0936 + // Output cost = 10000 * (0.16*1.5 + 0.84*2.5) / 1M = 10000 * (0.24 + 2.1) / 1M = 0.0234 + // Total = 0.0936 + 0.0234 = 0.117 + const total = estimateUsageCost({ + usage: { input: 200_000, output: 10_000 }, + cost, + }); + expect(total).toBeCloseTo(0.117, 4); + }); + + it("uses declared tier ranges instead of sequential widths", () => { + const tiers: PricingTier[] = [ + { input: 1, output: 10, cacheRead: 0, cacheWrite: 0, range: [100, 200] }, + { input: 2, output: 20, cacheRead: 0, cacheWrite: 0, range: [0, 100] }, + ]; + const cost = { input: 1, output: 10, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers }; + + const total = estimateUsageCost({ + usage: { input: 150, output: 60 }, + cost, + }); + + expect(total).toBeCloseTo(0.00125, 8); + }); + + it("bills malformed tier gaps at a fallback tier instead of dropping them", () => { + const tiers: PricingTier[] = [ + { input: 1, output: 10, cacheRead: 0, cacheWrite: 0, range: [0, 50] }, + { input: 3, output: 30, cacheRead: 0, cacheWrite: 0, range: [100, 150] }, + ]; + const cost = { input: 1, output: 10, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers }; + + const total = estimateUsageCost({ + usage: { input: 150, output: 60 }, + cost, + }); + + expect(total).toBeCloseTo(0.00175, 8); + }); + + it("normalizes open-ended range from models.json ([start] and [start, -1])", async () => { + await fs.writeFile( + path.join(agentDir, "models.json"), + JSON.stringify( + { + providers: { + volcengine: { + models: [ + { + id: "doubao-open-ended", + cost: { + input: 0.46, + output: 2.3, + cacheRead: 0, + cacheWrite: 0, + tieredPricing: [ + { input: 0.46, output: 2.3, cacheRead: 0, cacheWrite: 0, range: [0, 32000] }, + { input: 0.7, output: 3.5, cacheRead: 0, cacheWrite: 0, range: [32000] }, + ], + }, + }, + { + id: "doubao-neg-one", + cost: { + input: 0.46, + output: 2.3, + cacheRead: 0, + cacheWrite: 0, + tieredPricing: [ + { input: 0.46, output: 2.3, cacheRead: 0, cacheWrite: 0, range: [0, 32000] }, + { input: 0.7, output: 3.5, cacheRead: 0, cacheWrite: 0, range: [32000, -1] }, + ], + }, + }, + ], + }, + }, + }, + null, + 2, + ), + "utf8", + ); + + // [32000] should be normalized to [32000, Infinity] + const cost1 = resolveModelCostConfig({ + provider: "volcengine", + model: "doubao-open-ended", + }); + expect(cost1).toBeDefined(); + expect(cost1!.tieredPricing).toHaveLength(2); + expect(cost1!.tieredPricing![1].range).toEqual([32000, Infinity]); + + // [32000, -1] should also be normalized to [32000, Infinity] + const cost2 = resolveModelCostConfig({ + provider: "volcengine", + model: "doubao-neg-one", + }); + expect(cost2).toBeDefined(); + expect(cost2!.tieredPricing).toHaveLength(2); + expect(cost2!.tieredPricing![1].range).toEqual([32000, Infinity]); + }); + + it("resolves tiered pricing from models.json", async () => { + await fs.writeFile( + path.join(agentDir, "models.json"), + JSON.stringify( + { + providers: { + volcengine: { + models: [ + { + id: "doubao-seed-2-0-pro", + cost: { + input: 0.46, + output: 2.3, + cacheRead: 0, + cacheWrite: 0, + tieredPricing: [ + { input: 0.46, output: 2.3, cacheRead: 0, cacheWrite: 0, range: [0, 32000] }, + { + input: 0.7, + output: 3.5, + cacheRead: 0, + cacheWrite: 0, + range: [32000, 128000], + }, + ], + }, + }, + ], + }, + }, + }, + null, + 2, + ), + "utf8", + ); + + const cost = resolveModelCostConfig({ + provider: "volcengine", + model: "doubao-seed-2-0-pro", + }); + + expect(cost).toBeDefined(); + expect(cost!.tieredPricing).toHaveLength(2); + expect(cost!.tieredPricing![0].range).toEqual([0, 32000]); + expect(cost!.tieredPricing![1].input).toBe(0.7); + }); + + it("resolves tiered pricing from cached gateway (LiteLLM)", () => { + __setGatewayModelPricingForTest([ + { + provider: "volcengine", + model: "doubao-seed", + pricing: { + input: 0.46, + output: 2.3, + cacheRead: 0, + cacheWrite: 0, + tieredPricing: [ + { + input: 0.46, + output: 2.3, + cacheRead: 0, + cacheWrite: 0, + range: [0, 32000] as [number, number], + }, + { + input: 0.7, + output: 3.5, + cacheRead: 0, + cacheWrite: 0, + range: [32000, 128000] as [number, number], + }, + ], + }, + }, + ]); + + const cost = resolveModelCostConfig({ + provider: "volcengine", + model: "doubao-seed", + }); + + expect(cost).toBeDefined(); + expect(cost!.tieredPricing).toHaveLength(2); + }); }); diff --git a/src/utils/usage-format.ts b/src/utils/usage-format.ts index 423eb0f7669..634a0589825 100644 --- a/src/utils/usage-format.ts +++ b/src/utils/usage-format.ts @@ -8,11 +8,41 @@ import type { OpenClawConfig } from "../config/types.openclaw.js"; import { getCachedGatewayModelPricing } from "../gateway/model-pricing-cache.js"; import { normalizeOptionalString } from "../shared/string-coerce.js"; +/** + * A single tier in a tiered-pricing schedule. Prices are expressed as + * USD per-million tokens, just like the flat `ModelCostConfig` fields. + * + * `range` is a half-open interval `[start, end)` expressed in *input* + * token counts. The tiers MUST be sorted in ascending `range[0]` order + * with no gaps. + */ +export type PricingTier = { + input: number; + output: number; + cacheRead: number; + cacheWrite: number; + /** [startTokens, endTokens) — half-open interval on the input token axis. */ + range: [number, number]; +}; + +type RawPricingTier = { + input: number; + output: number; + cacheRead: number; + cacheWrite: number; + range: [number, number] | [number]; +}; + export type ModelCostConfig = { input: number; output: number; cacheRead: number; cacheWrite: number; + /** Optional tiered pricing tiers. When present, `estimateUsageCost` + * uses them instead of the flat rates above. The flat rates still + * serve as the "default / first-tier" fallback for callers that are + * unaware of tiered pricing. */ + tieredPricing?: PricingTier[]; }; export type UsageTotals = { @@ -99,6 +129,47 @@ function shouldUseNormalizedCostLookup(params: { provider?: string; model?: stri return provider === "anthropic" || provider === "openrouter" || provider === "vercel-ai-gateway"; } +/** + * Normalize a raw tieredPricing array from models.json / config. + * Supports open-ended ranges such as `[128000]` or `[128000, -1]`, + * which are converted to `[128000, Infinity]`. + */ +function normalizeTieredPricing(raw: RawPricingTier[] | undefined): PricingTier[] | undefined { + if (!raw || raw.length === 0) { + return undefined; + } + const result: PricingTier[] = []; + for (const tier of raw) { + const range = tier.range; + if (!Array.isArray(range) || range.length < 1) { + continue; + } + const start = typeof range[0] === "number" ? range[0] : NaN; + if (!Number.isFinite(start)) { + continue; + } + const rawEnd = range.length >= 2 ? range[1] : null; + const end = + typeof rawEnd === "number" && Number.isFinite(rawEnd) && rawEnd > start ? rawEnd : Infinity; + if ( + !Number.isFinite(tier.input) || + !Number.isFinite(tier.output) || + !Number.isFinite(tier.cacheRead) || + !Number.isFinite(tier.cacheWrite) + ) { + continue; + } + result.push({ + input: tier.input, + output: tier.output, + cacheRead: tier.cacheRead, + cacheWrite: tier.cacheWrite, + range: [start, end], + }); + } + return result.length > 0 ? result.toSorted((a, b) => a.range[0] - b.range[0]) : undefined; +} + function buildProviderCostIndex( providers: Record | undefined, options?: { allowPluginNormalization?: boolean }, @@ -113,7 +184,16 @@ function buildProviderCostIndex( const normalized = normalizeModelRef(normalizedProvider, model.id, { allowPluginNormalization: options?.allowPluginNormalization, }); - entries.set(modelKey(normalized.provider, normalized.model), model.cost); + const cost = { ...model.cost }; + const normalizedTiers = normalizeTieredPricing(cost.tieredPricing); + const costConfig: ModelCostConfig = { + input: cost.input, + output: cost.output, + cacheRead: cost.cacheRead, + cacheWrite: cost.cacheWrite, + ...(normalizedTiers ? { tieredPricing: normalizedTiers } : {}), + }; + entries.set(modelKey(normalized.provider, normalized.model), costConfig); } } return entries; @@ -233,6 +313,87 @@ export function resolveModelCostConfig(params: { const toNumber = (value: number | undefined): number => typeof value === "number" && Number.isFinite(value) ? value : 0; +/** + * Compute the cost for a single token dimension (input, output, cacheRead, + * or cacheWrite) across a set of sorted tiered-pricing tiers. + * + * The tiers define ranges on the **input** token axis. For each tier, + * the proportion of the total input that falls into that range determines + * the fraction of *all* token types billed at that tier's rates. + * + * For example, if the input is 40 000 tokens and the tiers are: + * [0, 32000) → $0.30/M input, $1.50/M output + * [32000, 128000) → $0.50/M input, $2.50/M output + * + * Then 80 % of every dimension is billed at the first tier and 20 % at the + * second tier. + * + * Prices are per-million; the caller divides by 1 000 000 after summing. + */ +function computeTieredCost( + tiers: PricingTier[], + input: number, + output: number, + cacheRead: number, + cacheWrite: number, +): number { + const totalInputTokens = input; + const sortedTiers = tiers.toSorted((a, b) => a.range[0] - b.range[0]); + if (totalInputTokens <= 0) { + // If there are no input tokens the tier proportion is undefined; + // fall back to the first tier for any residual output/cache usage. + const tier = sortedTiers[0]; + if (!tier) { + return 0; + } + return output * tier.output + cacheRead * tier.cacheRead + cacheWrite * tier.cacheWrite; + } + + let total = 0; + let billedInput = 0; + let coveredUntil = 0; + let lastTier: PricingTier | undefined; + + for (const tier of sortedTiers) { + const [start, end] = tier.range; + const tierStart = Math.max(0, start, coveredUntil); + const tierEnd = Math.min(totalInputTokens, end); + const inputInTier = Math.max(0, tierEnd - tierStart); + if (end > coveredUntil) { + coveredUntil = end; + } + if (inputInTier <= 0) { + continue; + } + const fraction = inputInTier / totalInputTokens; + total += + inputInTier * tier.input + + output * fraction * tier.output + + cacheRead * fraction * tier.cacheRead + + cacheWrite * fraction * tier.cacheWrite; + billedInput += inputInTier; + lastTier = tier; + } + + // Bill any uncovered gaps or overflow at the highest matched tier's rate. + // This keeps malformed remote/user tier ranges from underestimating cost. + const unbilledInput = totalInputTokens - billedInput; + if (unbilledInput > 0) { + const fallbackTier = lastTier ?? sortedTiers[sortedTiers.length - 1]; + if (!fallbackTier) { + return total; + } + const fraction = unbilledInput / totalInputTokens; + total += + unbilledInput * fallbackTier.input + + output * fraction * fallbackTier.output + + cacheRead * fraction * fallbackTier.cacheRead + + cacheWrite * fraction * fallbackTier.cacheWrite; + } + + return total; +} + export function estimateUsageCost(params: { usage?: NormalizedUsage | UsageTotals | null; cost?: ModelCostConfig; @@ -246,11 +407,18 @@ export function estimateUsageCost(params: { const output = toNumber(usage.output); const cacheRead = toNumber(usage.cacheRead); const cacheWrite = toNumber(usage.cacheWrite); - const total = - input * cost.input + - output * cost.output + - cacheRead * cost.cacheRead + - cacheWrite * cost.cacheWrite; + + let total: number; + if (cost.tieredPricing && cost.tieredPricing.length > 0) { + total = computeTieredCost(cost.tieredPricing, input, output, cacheRead, cacheWrite); + } else { + total = + input * cost.input + + output * cost.output + + cacheRead * cost.cacheRead + + cacheWrite * cost.cacheWrite; + } + if (!Number.isFinite(total)) { return undefined; }