feat: add tiered model pricing support (#67605)

Adds tiered model pricing support for cost tracking, keeps configured pricing ahead of cached catalog values, and includes latest Moonshot Kimi K2.6/K2.5 cost estimates.\n\nThanks @sliverp.
This commit is contained in:
Sliverp
2026-04-21 10:02:57 +08:00
committed by GitHub
parent 8d747d20b8
commit b938e6398b
18 changed files with 1351 additions and 118 deletions

View File

@@ -6,6 +6,7 @@ Docs: https://docs.openclaw.ai
### Changes
- Models/costs: support tiered model pricing from cached catalogs and configured models, and include bundled Moonshot Kimi K2.6/K2.5 cost estimates for token-usage reports. (#67605) Thanks @sliverp.
- Plugins/tests: reuse plugin loader alias and Jiti config resolution across repeated same-context loads, reducing import-heavy test overhead. (#69316) Thanks @amknight.
- Cron: split runtime execution state into `jobs-state.json` so `jobs.json` stays stable for git-tracked job definitions. (#63105) Thanks @Feelw00.
- Agents/compaction: send opt-in start and completion notices during context compaction. (#67830) Thanks @feniix.

View File

@@ -31,6 +31,12 @@ Moonshot and Kimi Coding are **separate providers**. Keys are not interchangeabl
[//]: # "moonshot-kimi-k2-ids:end"
Bundled cost estimates for current Moonshot-hosted K2 models use Moonshot's
published pay-as-you-go rates: Kimi K2.6 is $0.16/MTok cache hit,
$0.95/MTok input, and $4.00/MTok output; Kimi K2.5 is $0.10/MTok cache hit,
$0.60/MTok input, and $3.00/MTok output. Other legacy catalog entries keep
zero-cost placeholders unless you override them in config.
## Getting started
Choose your provider and follow the setup steps.
@@ -108,7 +114,7 @@ Choose your provider and follow the setup steps.
name: "Kimi K2.6",
reasoning: false,
input: ["text", "image"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
cost: { input: 0.95, output: 4, cacheRead: 0.16, cacheWrite: 0 },
contextWindow: 262144,
maxTokens: 262144,
},
@@ -117,7 +123,7 @@ Choose your provider and follow the setup steps.
name: "Kimi K2.5",
reasoning: false,
input: ["text", "image"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
cost: { input: 0.6, output: 3, cacheRead: 0.1, cacheWrite: 0 },
contextWindow: 262144,
maxTokens: 262144,
},

View File

@@ -19,6 +19,18 @@ describe("moonshot provider catalog", () => {
"kimi-k2-thinking-turbo",
"kimi-k2-turbo",
]);
expect(provider.models.find((model) => model.id === "kimi-k2.6")?.cost).toEqual({
input: 0.95,
output: 4,
cacheRead: 0.16,
cacheWrite: 0,
});
expect(provider.models.find((model) => model.id === "kimi-k2.5")?.cost).toEqual({
input: 0.6,
output: 3,
cacheRead: 0.1,
cacheWrite: 0,
});
});
it("opts native Moonshot baseUrls into streaming usage only inside the extension", () => {

View File

@@ -15,6 +15,18 @@ const MOONSHOT_DEFAULT_COST = {
cacheRead: 0,
cacheWrite: 0,
};
const MOONSHOT_K2_6_COST = {
input: 0.95,
output: 4,
cacheRead: 0.16,
cacheWrite: 0,
};
const MOONSHOT_K2_5_COST = {
input: 0.6,
output: 3,
cacheRead: 0.1,
cacheWrite: 0,
};
const MOONSHOT_MODEL_CATALOG = [
{
@@ -22,7 +34,7 @@ const MOONSHOT_MODEL_CATALOG = [
name: "Kimi K2.6",
reasoning: false,
input: ["text", "image"],
cost: MOONSHOT_DEFAULT_COST,
cost: MOONSHOT_K2_6_COST,
contextWindow: MOONSHOT_DEFAULT_CONTEXT_WINDOW,
maxTokens: MOONSHOT_DEFAULT_MAX_TOKENS,
},
@@ -31,7 +43,7 @@ const MOONSHOT_MODEL_CATALOG = [
name: "Kimi K2.5",
reasoning: false,
input: ["text", "image"],
cost: MOONSHOT_DEFAULT_COST,
cost: MOONSHOT_K2_5_COST,
contextWindow: MOONSHOT_DEFAULT_CONTEXT_WINDOW,
maxTokens: MOONSHOT_DEFAULT_MAX_TOKENS,
},

View File

@@ -1,4 +1,9 @@
import { estimateUsageCost, formatTokenCount, formatUsd } from "../../utils/usage-format.js";
import {
estimateUsageCost,
formatTokenCount,
formatUsd,
type ModelCostConfig,
} from "../../utils/usage-format.js";
import type { ReplyPayload } from "../types.js";
export const formatResponseUsageLine = (params: {
@@ -9,12 +14,7 @@ export const formatResponseUsageLine = (params: {
cacheWrite?: number;
};
showCost: boolean;
costConfig?: {
input: number;
output: number;
cacheRead: number;
cacheWrite: number;
};
costConfig?: ModelCostConfig;
}): string | null => {
const usage = params.usage;
if (!usage) {

View File

@@ -148,7 +148,7 @@ export async function channelsStatusCommand(
opts: ChannelsStatusOptions,
runtime: RuntimeEnv = defaultRuntime,
) {
const timeoutMs = Number(opts.timeout ?? 10_000);
const timeoutMs = Number(opts.timeout ?? (opts.probe ? 30_000 : 10_000));
const statusLabel = opts.probe ? "Checking channel status (probe)…" : "Checking channel status…";
const shouldLogStatus = opts.json !== true && !process.stderr.isTTY;
if (shouldLogStatus) {

View File

@@ -62,6 +62,7 @@ function resolveModelCost(
cacheRead: typeof raw?.cacheRead === "number" ? raw.cacheRead : DEFAULT_MODEL_COST.cacheRead,
cacheWrite:
typeof raw?.cacheWrite === "number" ? raw.cacheWrite : DEFAULT_MODEL_COST.cacheWrite,
...(raw?.tieredPricing ? { tieredPricing: raw.tieredPricing } : {}),
};
}

View File

@@ -2767,6 +2767,51 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
cacheWrite: {
type: "number",
},
tieredPricing: {
type: "array",
items: {
type: "object",
properties: {
input: {
type: "number",
},
output: {
type: "number",
},
cacheRead: {
type: "number",
},
cacheWrite: {
type: "number",
},
range: {
anyOf: [
{
type: "array",
items: [
{
type: "number",
},
{
type: "number",
},
],
},
{
type: "array",
items: [
{
type: "number",
},
],
},
],
},
},
required: ["input", "output", "cacheRead", "cacheWrite", "range"],
additionalProperties: false,
},
},
},
additionalProperties: false,
},

View File

@@ -61,6 +61,18 @@ export type ModelDefinitionConfig = {
output: number;
cacheRead: number;
cacheWrite: number;
/** Optional tiered pricing. When present, cost calculation uses
* per-tier rates instead of the flat rates above. Prices are
* USD / million tokens; ranges are half-open `[start, end)` on the
* input-token axis. */
tieredPricing?: Array<{
input: number;
output: number;
cacheRead: number;
cacheWrite: number;
/** Bounded tier: `[start, end)`. Open-ended top tier: `[start]` (normalized to `[start, Infinity]` at load time). */
range: [number, number] | [number];
}>;
};
contextWindow: number;
/**

View File

@@ -316,6 +316,19 @@ export const ModelDefinitionSchema = z
output: z.number().optional(),
cacheRead: z.number().optional(),
cacheWrite: z.number().optional(),
tieredPricing: z
.array(
z
.object({
input: z.number(),
output: z.number(),
cacheRead: z.number(),
cacheWrite: z.number(),
range: z.union([z.tuple([z.number(), z.number()]), z.tuple([z.number()])]),
})
.strict(),
)
.optional(),
})
.strict()
.optional(),

View File

@@ -2,11 +2,22 @@ import { normalizeModelRef } from "../agents/model-selection.js";
import { normalizeProviderId } from "../agents/provider-id.js";
import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js";
export type CachedPricingTier = {
input: number;
output: number;
cacheRead: number;
cacheWrite: number;
/** [startTokens, endTokens) — half-open interval on the input token axis. */
range: [number, number];
};
export type CachedModelPricing = {
input: number;
output: number;
cacheRead: number;
cacheWrite: number;
/** Optional tiered pricing tiers sourced from LiteLLM or local config. */
tieredPricing?: CachedPricingTier[];
};
let cachedPricing = new Map<string, CachedModelPricing>();

View File

@@ -134,9 +134,10 @@ describe("model-pricing-cache", () => {
},
} as unknown as OpenClawConfig;
const fetchImpl = withFetchPreconnect(
async () =>
new Response(
const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => {
const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
if (url.includes("openrouter.ai")) {
return new Response(
JSON.stringify({
data: [
{
@@ -169,8 +170,14 @@ describe("model-pricing-cache", () => {
status: 200,
headers: { "Content-Type": "application/json" },
},
),
);
);
}
// LiteLLM — return empty object (no tiered pricing for these models)
return new Response(JSON.stringify({}), {
status: 200,
headers: { "Content-Type": "application/json" },
});
});
await refreshGatewayModelPricingCache({ config, fetchImpl });
@@ -210,9 +217,10 @@ describe("model-pricing-cache", () => {
},
} as unknown as OpenClawConfig;
const fetchImpl = withFetchPreconnect(
async () =>
new Response(
const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => {
const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
if (url.includes("openrouter.ai")) {
return new Response(
JSON.stringify({
data: [
{
@@ -228,8 +236,13 @@ describe("model-pricing-cache", () => {
status: 200,
headers: { "Content-Type": "application/json" },
},
),
);
);
}
return new Response(JSON.stringify({}), {
status: 200,
headers: { "Content-Type": "application/json" },
});
});
await expect(refreshGatewayModelPricingCache({ config, fetchImpl })).resolves.toBeUndefined();
expect(
@@ -241,4 +254,303 @@ describe("model-pricing-cache", () => {
cacheWrite: 0,
});
});
it("loads tiered pricing from LiteLLM and merges with OpenRouter flat pricing", async () => {
const config = {
agents: {
defaults: {
model: { primary: "volcengine/doubao-seed-2-0-pro" },
},
},
} as unknown as OpenClawConfig;
const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => {
const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
if (url.includes("openrouter.ai")) {
// OpenRouter does not have this model
return new Response(JSON.stringify({ data: [] }), {
status: 200,
headers: { "Content-Type": "application/json" },
});
}
// LiteLLM catalog
return new Response(
JSON.stringify({
"volcengine/doubao-seed-2-0-pro": {
input_cost_per_token: 4.6e-7,
output_cost_per_token: 2.3e-6,
litellm_provider: "volcengine",
tiered_pricing: [
{
input_cost_per_token: 4.6e-7,
output_cost_per_token: 2.3e-6,
range: [0, 32000],
},
{
input_cost_per_token: 7e-7,
output_cost_per_token: 3.5e-6,
range: [32000, 128000],
},
{
input_cost_per_token: 1.4e-6,
output_cost_per_token: 7e-6,
range: [128000, 256000],
},
],
},
}),
{
status: 200,
headers: { "Content-Type": "application/json" },
},
);
});
await refreshGatewayModelPricingCache({ config, fetchImpl });
const pricing = getCachedGatewayModelPricing({
provider: "volcengine",
model: "doubao-seed-2-0-pro",
});
expect(pricing).toBeDefined();
expect(pricing!.input).toBeCloseTo(0.46);
expect(pricing!.output).toBeCloseTo(2.3);
expect(pricing!.tieredPricing).toHaveLength(3);
expect(pricing!.tieredPricing![0]).toEqual({
input: expect.closeTo(0.46),
output: expect.closeTo(2.3),
cacheRead: 0,
cacheWrite: 0,
range: [0, 32000],
});
expect(pricing!.tieredPricing![2].range).toEqual([128000, 256000]);
});
it("normalizes LiteLLM open-ended range [start] to [start, Infinity]", async () => {
const config = {
agents: {
defaults: {
model: { primary: "volcengine/doubao-open" },
},
},
} as unknown as OpenClawConfig;
const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => {
const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
if (url.includes("openrouter.ai")) {
return new Response(JSON.stringify({ data: [] }), {
status: 200,
headers: { "Content-Type": "application/json" },
});
}
return new Response(
JSON.stringify({
"volcengine/doubao-open": {
input_cost_per_token: 4.6e-7,
output_cost_per_token: 2.3e-6,
litellm_provider: "volcengine",
tiered_pricing: [
{
input_cost_per_token: 4.6e-7,
output_cost_per_token: 2.3e-6,
range: [0, 32000],
},
{
input_cost_per_token: 7e-7,
output_cost_per_token: 3.5e-6,
range: [32000],
},
],
},
}),
{
status: 200,
headers: { "Content-Type": "application/json" },
},
);
});
await refreshGatewayModelPricingCache({ config, fetchImpl });
const pricing = getCachedGatewayModelPricing({
provider: "volcengine",
model: "doubao-open",
});
expect(pricing).toBeDefined();
expect(pricing!.tieredPricing).toHaveLength(2);
expect(pricing!.tieredPricing![0].range).toEqual([0, 32000]);
expect(pricing!.tieredPricing![1].range).toEqual([32000, Infinity]);
});
it("merges OpenRouter flat pricing with LiteLLM tiered pricing", async () => {
const config = {
agents: {
defaults: {
model: { primary: "dashscope/qwen-plus" },
},
},
} as unknown as OpenClawConfig;
const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => {
const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
if (url.includes("openrouter.ai")) {
return new Response(
JSON.stringify({
data: [
{
id: "dashscope/qwen-plus",
pricing: {
prompt: "0.0000004",
completion: "0.0000024",
},
},
],
}),
{
status: 200,
headers: { "Content-Type": "application/json" },
},
);
}
return new Response(
JSON.stringify({
"dashscope/qwen-plus": {
input_cost_per_token: 4e-7,
output_cost_per_token: 2.4e-6,
litellm_provider: "dashscope",
tiered_pricing: [
{
input_cost_per_token: 4e-7,
output_cost_per_token: 2.4e-6,
range: [0, 256000],
},
{
input_cost_per_token: 5e-7,
output_cost_per_token: 3e-6,
range: [256000, 1000000],
},
],
},
}),
{
status: 200,
headers: { "Content-Type": "application/json" },
},
);
});
await refreshGatewayModelPricingCache({ config, fetchImpl });
const pricing = getCachedGatewayModelPricing({
provider: "dashscope",
model: "qwen-plus",
});
expect(pricing).toBeDefined();
// OpenRouter base flat pricing is used
expect(pricing!.input).toBeCloseTo(0.4);
expect(pricing!.output).toBeCloseTo(2.4);
// LiteLLM tiered pricing is merged in
expect(pricing!.tieredPricing).toHaveLength(2);
expect(pricing!.tieredPricing![1].range).toEqual([256000, 1000000]);
});
it("falls back gracefully when LiteLLM fetch fails", async () => {
const config = {
agents: {
defaults: {
model: { primary: "anthropic/claude-opus-4-6" },
},
},
} as unknown as OpenClawConfig;
const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => {
const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
if (url.includes("openrouter.ai")) {
return new Response(
JSON.stringify({
data: [
{
id: "anthropic/claude-opus-4.6",
pricing: {
prompt: "0.000005",
completion: "0.000025",
},
},
],
}),
{
status: 200,
headers: { "Content-Type": "application/json" },
},
);
}
// LiteLLM fails
return new Response("Internal Server Error", { status: 500 });
});
await refreshGatewayModelPricingCache({ config, fetchImpl });
// OpenRouter pricing still works
expect(
getCachedGatewayModelPricing({ provider: "anthropic", model: "claude-opus-4-6" }),
).toEqual({
input: 5,
output: 25,
cacheRead: 0,
cacheWrite: 0,
});
});
it("treats oversized LiteLLM catalog responses as source failures", async () => {
const config = {
agents: {
defaults: {
model: { primary: "moonshot/kimi-k2.6" },
},
},
} as unknown as OpenClawConfig;
const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => {
const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
if (url.includes("openrouter.ai")) {
return new Response(
JSON.stringify({
data: [
{
id: "moonshotai/kimi-k2.6",
pricing: {
prompt: "0.00000095",
completion: "0.000004",
input_cache_read: "0.00000016",
},
},
],
}),
{
status: 200,
headers: { "Content-Type": "application/json" },
},
);
}
return new Response("{}", {
status: 200,
headers: {
"Content-Type": "application/json",
"Content-Length": "6000000",
},
});
});
await refreshGatewayModelPricingCache({ config, fetchImpl });
expect(getCachedGatewayModelPricing({ provider: "moonshot", model: "kimi-k2.6" })).toEqual({
input: 0.95,
output: 4,
cacheRead: 0.16,
cacheWrite: 0,
});
});
});

View File

@@ -19,6 +19,7 @@ import {
getGatewayModelPricingCacheMeta as getGatewayModelPricingCacheMetaState,
replaceGatewayModelPricingCache,
type CachedModelPricing,
type CachedPricingTier,
} from "./model-pricing-cache-state.js";
type OpenRouterPricingEntry = {
@@ -36,8 +37,11 @@ type OpenRouterModelPayload = {
export { getCachedGatewayModelPricing };
const OPENROUTER_MODELS_URL = "https://openrouter.ai/api/v1/models";
const LITELLM_PRICING_URL =
"https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json";
const CACHE_TTL_MS = 24 * 60 * 60_000;
const FETCH_TIMEOUT_MS = 15_000;
const MAX_PRICING_CATALOG_BYTES = 5 * 1024 * 1024;
const PROVIDER_ALIAS_TO_OPENROUTER: Record<string, string> = {
"google-gemini-cli": "google",
kimi: "moonshotai",
@@ -98,7 +102,8 @@ function toPricePerMillion(value: number | null): number {
if (value === null || value < 0 || !Number.isFinite(value)) {
return 0;
}
return value * 1_000_000;
const scaled = value * 1_000_000;
return Number.isFinite(scaled) ? scaled : 0;
}
function parseOpenRouterPricing(value: unknown): CachedModelPricing | null {
@@ -119,6 +124,136 @@ function parseOpenRouterPricing(value: unknown): CachedModelPricing | null {
};
}
async function readPricingJsonObject(
response: Response,
source: string,
): Promise<Record<string, unknown>> {
const contentLength = parseNumberString(response.headers.get("content-length"));
if (contentLength !== null && contentLength > MAX_PRICING_CATALOG_BYTES) {
throw new Error(`${source} pricing response too large: ${contentLength} bytes`);
}
const buffer = await response.arrayBuffer();
if (buffer.byteLength > MAX_PRICING_CATALOG_BYTES) {
throw new Error(`${source} pricing response too large: ${buffer.byteLength} bytes`);
}
const payload = JSON.parse(Buffer.from(buffer).toString("utf8")) as unknown;
if (!payload || typeof payload !== "object" || Array.isArray(payload)) {
throw new Error(`${source} pricing response is not a JSON object`);
}
return payload as Record<string, unknown>;
}
// ---------------------------------------------------------------------------
// LiteLLM tiered-pricing parsing
// ---------------------------------------------------------------------------
type LiteLLMModelEntry = Record<string, unknown>;
type LiteLLMTierRaw = {
input_cost_per_token?: unknown;
output_cost_per_token?: unknown;
cache_read_input_token_cost?: unknown;
range?: unknown;
};
function parseLiteLLMTieredPricing(tiers: unknown): CachedPricingTier[] | undefined {
if (!Array.isArray(tiers) || tiers.length === 0) {
return undefined;
}
const result: CachedPricingTier[] = [];
for (const raw of tiers) {
if (!raw || typeof raw !== "object") {
continue;
}
const tier = raw as LiteLLMTierRaw;
const inputPerToken = parseNumberString(tier.input_cost_per_token);
const outputPerToken = parseNumberString(tier.output_cost_per_token);
if (inputPerToken === null || outputPerToken === null) {
continue;
}
const range = tier.range;
if (!Array.isArray(range) || range.length < 1) {
continue;
}
const start = parseNumberString(range[0]);
if (start === null) {
continue;
}
// Allow open-ended ranges: [128000], [128000, -1], [128000, null]
const rawEnd = range.length >= 2 ? parseNumberString(range[1]) : null;
const end = rawEnd === null || rawEnd <= start ? Infinity : rawEnd;
if (
!Number.isFinite(inputPerToken) ||
!Number.isFinite(outputPerToken) ||
inputPerToken < 0 ||
outputPerToken < 0
) {
continue;
}
result.push({
input: toPricePerMillion(inputPerToken),
output: toPricePerMillion(outputPerToken),
cacheRead: toPricePerMillion(parseNumberString(tier.cache_read_input_token_cost)),
cacheWrite: 0,
range: [start, end],
});
}
return result.length > 0 ? result.toSorted((a, b) => a.range[0] - b.range[0]) : undefined;
}
function parseLiteLLMPricing(entry: LiteLLMModelEntry): CachedModelPricing | null {
const inputPerToken = parseNumberString(entry.input_cost_per_token);
const outputPerToken = parseNumberString(entry.output_cost_per_token);
if (inputPerToken === null || outputPerToken === null) {
return null;
}
const pricing: CachedModelPricing = {
input: toPricePerMillion(inputPerToken),
output: toPricePerMillion(outputPerToken),
cacheRead: toPricePerMillion(parseNumberString(entry.cache_read_input_token_cost)),
cacheWrite: 0,
};
const tieredPricing = parseLiteLLMTieredPricing(entry.tiered_pricing);
if (tieredPricing) {
pricing.tieredPricing = tieredPricing;
}
return pricing;
}
type LiteLLMPricingCatalog = Map<string, CachedModelPricing>;
async function fetchLiteLLMPricingCatalog(fetchImpl: typeof fetch): Promise<LiteLLMPricingCatalog> {
const response = await fetchImpl(LITELLM_PRICING_URL, {
headers: { Accept: "application/json" },
signal: AbortSignal.timeout(FETCH_TIMEOUT_MS),
});
if (!response.ok) {
throw new Error(`LiteLLM pricing fetch failed: HTTP ${response.status}`);
}
const payload = await readPricingJsonObject(response, "LiteLLM");
const catalog: LiteLLMPricingCatalog = new Map();
for (const [key, value] of Object.entries(payload)) {
if (!value || typeof value !== "object") {
continue;
}
const entry = value as LiteLLMModelEntry;
const pricing = parseLiteLLMPricing(entry);
if (!pricing) {
continue;
}
catalog.set(key, pricing);
}
return catalog;
}
function resolveLiteLLMPricingForRef(params: {
ref: ModelRef;
catalog: LiteLLMPricingCatalog;
}): CachedModelPricing | undefined {
// Only use provider-qualified key to avoid cross-provider pricing collisions.
return params.catalog.get(`${params.ref.provider}/${params.ref.model}`);
}
function canonicalizeOpenRouterProvider(provider: string): string {
const normalized = normalizeModelRef(provider, "placeholder").provider;
return PROVIDER_ALIAS_TO_OPENROUTER[normalized] ?? normalized;
@@ -328,7 +463,7 @@ async function fetchOpenRouterPricingCatalog(
if (!response.ok) {
throw new Error(`OpenRouter /models failed: HTTP ${response.status}`);
}
const payload = (await response.json()) as { data?: unknown };
const payload = await readPricingJsonObject(response, "OpenRouter");
const entries = Array.isArray(payload.data) ? payload.data : [];
const catalog = new Map<string, OpenRouterPricingEntry>();
for (const entry of entries) {
@@ -393,7 +528,23 @@ export async function refreshGatewayModelPricingCache(params: {
return;
}
const catalogById = await fetchOpenRouterPricingCatalog(fetchImpl);
// Fetch both pricing catalogs in parallel. Each source is
// independently optional — a failure in one does not block the other.
let openRouterFailed = false;
let litellmFailed = false;
const [catalogById, litellmCatalog] = await Promise.all([
fetchOpenRouterPricingCatalog(fetchImpl).catch((error: unknown) => {
log.warn(`OpenRouter pricing fetch failed: ${String(error)}`);
openRouterFailed = true;
return new Map<string, OpenRouterPricingEntry>();
}),
fetchLiteLLMPricingCatalog(fetchImpl).catch((error: unknown) => {
log.warn(`LiteLLM pricing fetch failed: ${String(error)}`);
litellmFailed = true;
return new Map<string, CachedModelPricing>() as LiteLLMPricingCatalog;
}),
]);
const catalogByNormalizedId = new Map<string, OpenRouterPricingEntry>();
for (const entry of catalogById.values()) {
const normalizedId = canonicalizeOpenRouterLookupId(entry.id);
@@ -405,15 +556,62 @@ export async function refreshGatewayModelPricingCache(params: {
const nextPricing = new Map<string, CachedModelPricing>();
for (const ref of refs) {
const pricing = resolveCatalogPricingForRef({
// 1. Try OpenRouter first (existing behavior — flat pricing)
const openRouterPricing = resolveCatalogPricingForRef({
ref,
catalogById,
catalogByNormalizedId,
});
if (!pricing) {
continue;
// 2. Try LiteLLM (may contain tiered pricing)
const litellmPricing = resolveLiteLLMPricingForRef({
ref,
catalog: litellmCatalog,
});
// Merge strategy: OpenRouter provides the base flat pricing;
// LiteLLM enriches with tieredPricing when available.
// If only one source has data, use that one.
if (openRouterPricing && litellmPricing?.tieredPricing) {
// Both sources present and LiteLLM has tiers — merge.
nextPricing.set(modelKey(ref.provider, ref.model), {
...openRouterPricing,
tieredPricing: litellmPricing.tieredPricing,
});
} else if (openRouterPricing) {
// Prefer OpenRouter flat pricing when LiteLLM has no tiers to contribute.
nextPricing.set(modelKey(ref.provider, ref.model), openRouterPricing);
} else if (litellmPricing) {
// Only LiteLLM has data — use it as-is.
nextPricing.set(modelKey(ref.provider, ref.model), litellmPricing);
}
}
// When either upstream source failed, preserve previously-cached entries
// for any models that the refresh could not resolve. This prevents a
// single-source outage from silently dropping pricing for models that
// depended on the failed source.
if (openRouterFailed || litellmFailed) {
const existingMeta = getGatewayModelPricingCacheMetaState();
if (nextPricing.size === 0 && existingMeta.size > 0) {
// Both sources failed — retain the entire existing cache.
log.warn("Both pricing sources returned empty data — retaining existing cache");
scheduleRefresh({ config: params.config, fetchImpl });
return;
}
// Partial failure — back-fill missing models from the existing cache.
for (const ref of refs) {
const key = modelKey(ref.provider, ref.model);
if (!nextPricing.has(key)) {
const existing = getCachedGatewayModelPricing({
provider: ref.provider,
model: ref.model,
});
if (existing) {
nextPricing.set(key, existing);
}
}
}
nextPricing.set(modelKey(ref.provider, ref.model), pricing);
}
replaceGatewayModelPricingCache(nextPricing);

View File

@@ -133,4 +133,33 @@ describe("channelsHandlers channels.status", () => {
undefined,
);
});
it("caps probe timeout before passing it to channel plugins", async () => {
const autoEnabledConfig = { autoEnabled: true };
const probeAccount = vi.fn(async () => ({ ok: true }));
mocks.applyPluginAutoEnable.mockReturnValue({ config: autoEnabledConfig, changes: [] });
mocks.listChannelPlugins.mockReturnValue([
{
id: "whatsapp",
config: {
listAccountIds: () => ["default"],
resolveAccount: () => ({}),
isEnabled: () => true,
isConfigured: async () => true,
},
status: {
probeAccount,
},
},
]);
await channelsHandlers["channels.status"](createOptions({ probe: true, timeoutMs: 999_999 }));
expect(probeAccount).toHaveBeenCalledWith(
expect.objectContaining({
timeoutMs: 30_000,
cfg: autoEnabledConfig,
}),
);
});
});

View File

@@ -16,6 +16,7 @@ import { getChannelActivity } from "../../infra/channel-activity.js";
import { DEFAULT_ACCOUNT_ID } from "../../routing/session-key.js";
import { defaultRuntime } from "../../runtime.js";
import { normalizeOptionalString } from "../../shared/string-coerce.js";
import { runTasksWithConcurrency } from "../../utils/run-with-concurrency.js";
import {
ErrorCodes,
errorShape,
@@ -41,6 +42,17 @@ type ChannelStartPayload = {
started: boolean;
};
const CHANNEL_STATUS_MAX_TIMEOUT_MS = 30_000;
const CHANNEL_STATUS_PROBE_CONCURRENCY = 5;
function resolveChannelsStatusTimeoutMs(params: { probe: boolean; timeoutMsRaw: unknown }): number {
const fallback = params.probe ? CHANNEL_STATUS_MAX_TIMEOUT_MS : 10_000;
if (typeof params.timeoutMsRaw !== "number" || !Number.isFinite(params.timeoutMsRaw)) {
return fallback;
}
return Math.min(Math.max(1000, params.timeoutMsRaw), CHANNEL_STATUS_MAX_TIMEOUT_MS);
}
function resolveRuntimeAccountSnapshot(params: {
runtime: ChannelRuntimeSnapshot;
channelId: ChannelId;
@@ -141,7 +153,7 @@ export const channelsHandlers: GatewayRequestHandlers = {
}
const probe = (params as { probe?: boolean }).probe === true;
const timeoutMsRaw = (params as { timeoutMs?: unknown }).timeoutMs;
const timeoutMs = typeof timeoutMsRaw === "number" ? Math.max(1000, timeoutMsRaw) : 10_000;
const timeoutMs = resolveChannelsStatusTimeoutMs({ probe, timeoutMsRaw });
const cfg = applyPluginAutoEnable({
config: loadConfig(),
env: process.env,
@@ -174,6 +186,70 @@ export const channelsHandlers: GatewayRequestHandlers = {
typeof account !== "object" ||
(account as { enabled?: boolean }).enabled !== false;
const buildAccountSnapshot = async (
channelId: ChannelId,
plugin: ChannelPlugin,
accountId: string,
defaultAccountId: string,
) => {
const account = plugin.config.resolveAccount(cfg, accountId);
const enabled = isAccountEnabled(plugin, account);
let probeResult: unknown;
let lastProbeAt: number | null = null;
if (probe && enabled && plugin.status?.probeAccount) {
let configured = true;
if (plugin.config.isConfigured) {
configured = await plugin.config.isConfigured(account, cfg);
}
if (configured) {
probeResult = await plugin.status.probeAccount({
account,
timeoutMs,
cfg,
});
lastProbeAt = Date.now();
}
}
let auditResult: unknown;
if (probe && enabled && plugin.status?.auditAccount) {
let configured = true;
if (plugin.config.isConfigured) {
configured = await plugin.config.isConfigured(account, cfg);
}
if (configured) {
auditResult = await plugin.status.auditAccount({
account,
timeoutMs,
cfg,
probe: probeResult,
});
}
}
const runtimeSnapshot = resolveRuntimeSnapshot(channelId, accountId, defaultAccountId);
const snapshot = await buildChannelAccountSnapshot({
plugin,
cfg,
accountId,
runtime: runtimeSnapshot,
probe: probeResult,
audit: auditResult,
});
if (lastProbeAt) {
snapshot.lastProbeAt = lastProbeAt;
}
const activity = getChannelActivity({
channel: channelId as never,
accountId,
});
if (snapshot.lastInboundAt == null) {
snapshot.lastInboundAt = activity.inboundAt;
}
if (snapshot.lastOutboundAt == null) {
snapshot.lastOutboundAt = activity.outboundAt;
}
return { accountId: accountId, account, snapshot };
};
const buildChannelAccounts = async (channelId: ChannelId) => {
const plugin = pluginMap.get(channelId);
if (!plugin) {
@@ -190,66 +266,20 @@ export const channelsHandlers: GatewayRequestHandlers = {
cfg,
accountIds,
});
const accounts: ChannelAccountSnapshot[] = [];
const resolvedAccounts: Record<string, unknown> = {};
for (const accountId of accountIds) {
const account = plugin.config.resolveAccount(cfg, accountId);
const enabled = isAccountEnabled(plugin, account);
resolvedAccounts[accountId] = account;
let probeResult: unknown;
let lastProbeAt: number | null = null;
if (probe && enabled && plugin.status?.probeAccount) {
let configured = true;
if (plugin.config.isConfigured) {
configured = await plugin.config.isConfigured(account, cfg);
}
if (configured) {
probeResult = await plugin.status.probeAccount({
account,
timeoutMs,
cfg,
});
lastProbeAt = Date.now();
}
const { results } = await runTasksWithConcurrency({
tasks: accountIds.map(
(accountId) => async () =>
await buildAccountSnapshot(channelId, plugin, accountId, defaultAccountId),
),
limit: probe ? CHANNEL_STATUS_PROBE_CONCURRENCY : accountIds.length || 1,
});
const accounts: ChannelAccountSnapshot[] = [];
for (const result of results) {
if (result) {
resolvedAccounts[result.accountId] = result.account;
accounts.push(result.snapshot);
}
let auditResult: unknown;
if (probe && enabled && plugin.status?.auditAccount) {
let configured = true;
if (plugin.config.isConfigured) {
configured = await plugin.config.isConfigured(account, cfg);
}
if (configured) {
auditResult = await plugin.status.auditAccount({
account,
timeoutMs,
cfg,
probe: probeResult,
});
}
}
const runtimeSnapshot = resolveRuntimeSnapshot(channelId, accountId, defaultAccountId);
const snapshot = await buildChannelAccountSnapshot({
plugin,
cfg,
accountId,
runtime: runtimeSnapshot,
probe: probeResult,
audit: auditResult,
});
if (lastProbeAt) {
snapshot.lastProbeAt = lastProbeAt;
}
const activity = getChannelActivity({
channel: channelId as never,
accountId,
});
if (snapshot.lastInboundAt == null) {
snapshot.lastInboundAt = activity.inboundAt;
}
if (snapshot.lastOutboundAt == null) {
snapshot.lastOutboundAt = activity.outboundAt;
}
accounts.push(snapshot);
}
const defaultAccount =
accounts.find((entry) => entry.accountId === defaultAccountId) ?? accounts[0];
@@ -271,28 +301,36 @@ export const channelsHandlers: GatewayRequestHandlers = {
const channelsMap = payload.channels as Record<string, unknown>;
const accountsMap = payload.channelAccounts as Record<string, unknown>;
const defaultAccountIdMap = payload.channelDefaultAccountId as Record<string, unknown>;
for (const plugin of plugins) {
const { accounts, defaultAccountId, defaultAccount, resolvedAccounts } =
await buildChannelAccounts(plugin.id);
const fallbackAccount =
resolvedAccounts[defaultAccountId] ?? plugin.config.resolveAccount(cfg, defaultAccountId);
const summary = plugin.status?.buildChannelSummary
? await plugin.status.buildChannelSummary({
account: fallbackAccount,
cfg,
defaultAccountId,
snapshot:
defaultAccount ??
({
accountId: defaultAccountId,
} as ChannelAccountSnapshot),
})
: {
configured: defaultAccount?.configured ?? false,
};
channelsMap[plugin.id] = summary;
accountsMap[plugin.id] = accounts;
defaultAccountIdMap[plugin.id] = defaultAccountId;
const { results: channelResults } = await runTasksWithConcurrency({
tasks: plugins.map((plugin) => async () => {
const { accounts, defaultAccountId, defaultAccount, resolvedAccounts } =
await buildChannelAccounts(plugin.id);
const fallbackAccount =
resolvedAccounts[defaultAccountId] ?? plugin.config.resolveAccount(cfg, defaultAccountId);
const summary = plugin.status?.buildChannelSummary
? await plugin.status.buildChannelSummary({
account: fallbackAccount,
cfg,
defaultAccountId,
snapshot:
defaultAccount ??
({
accountId: defaultAccountId,
} as ChannelAccountSnapshot),
})
: {
configured: defaultAccount?.configured ?? false,
};
return { pluginId: plugin.id, summary, accounts, defaultAccountId };
}),
limit: probe ? CHANNEL_STATUS_PROBE_CONCURRENCY : plugins.length || 1,
});
for (const result of channelResults) {
if (result) {
channelsMap[result.pluginId] = result.summary;
accountsMap[result.pluginId] = result.accounts;
defaultAccountIdMap[result.pluginId] = result.defaultAccountId;
}
}
respond(true, payload, undefined);

View File

@@ -249,13 +249,23 @@ async function scanTranscriptFile(params: {
continue;
}
if (entry.usage && entry.costTotal === undefined) {
if (entry.usage) {
const cost = resolveModelCostConfig({
provider: entry.provider,
model: entry.model,
config: params.config,
});
entry.costTotal = estimateUsageCost({ usage: entry.usage, cost });
if (cost?.tieredPricing && cost.tieredPricing.length > 0) {
// When tiered pricing is configured, always recompute to override
// the flat-rate cost that the transport layer wrote into the transcript.
// Clear costBreakdown so downstream aggregation uses the recomputed total
// instead of the stale flat-rate breakdown from the transport layer.
entry.costTotal = estimateUsageCost({ usage: entry.usage, cost });
entry.costBreakdown = undefined;
} else if (entry.costTotal === undefined) {
// Fill in missing cost estimates.
entry.costTotal = estimateUsageCost({ usage: entry.usage, cost });
}
}
params.onEntry(entry);

View File

@@ -13,6 +13,7 @@ import {
formatTokenCount,
formatUsd,
resolveModelCostConfig,
type PricingTier,
} from "./usage-format.js";
describe("usage-format", () => {
@@ -254,4 +255,368 @@ describe("usage-format", () => {
cacheWrite: 0.8,
});
});
// -----------------------------------------------------------------------
// Tiered pricing tests
// -----------------------------------------------------------------------
it("uses flat pricing when tieredPricing is absent", () => {
const cost = { input: 1, output: 2, cacheRead: 0.5, cacheWrite: 0 };
const total = estimateUsageCost({
usage: { input: 1000, output: 500, cacheRead: 2000 },
cost,
});
expect(total).toBeCloseTo(0.003);
});
it("estimates cost with single-tier tiered pricing (equivalent to flat)", () => {
const tiers: PricingTier[] = [
{ input: 1, output: 2, cacheRead: 0.5, cacheWrite: 0, range: [0, 1_000_000] },
];
const cost = { input: 1, output: 2, cacheRead: 0.5, cacheWrite: 0, tieredPricing: tiers };
const total = estimateUsageCost({
usage: { input: 1000, output: 500, cacheRead: 2000 },
cost,
});
// Same as flat: (1000*1 + 500*2 + 2000*0.5) / 1M = 3000/1M = 0.003
expect(total).toBeCloseTo(0.003);
});
it("estimates cost with two tiers — input split across tiers", () => {
// Tier 1: [0, 32000) → input $0.30/M, output $1.50/M
// Tier 2: [32000, 128000) → input $0.50/M, output $2.50/M
const tiers: PricingTier[] = [
{ input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, range: [0, 32_000] },
{ input: 0.5, output: 2.5, cacheRead: 0, cacheWrite: 0, range: [32_000, 128_000] },
];
const cost = { input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers };
// 40000 input tokens, 10000 output tokens
// Tier 1 gets 32000/40000 = 80% of input → 32000 input tokens
// Tier 2 gets 8000/40000 = 20% of input → 8000 input tokens
// Input cost = (32000 * 0.3 + 8000 * 0.5) / 1M = (9600 + 4000) / 1M = 0.0136
// Output cost = (10000 * 0.8 * 1.5 + 10000 * 0.2 * 2.5) / 1M = (12000 + 5000) / 1M = 0.017
// Total = 0.0136 + 0.017 = 0.0306
const total = estimateUsageCost({
usage: { input: 40_000, output: 10_000 },
cost,
});
expect(total).toBeCloseTo(0.0306, 4);
});
it("estimates cost with three tiers — volcengine-style pricing", () => {
// Simulates volcengine/doubao pricing (per-million):
// Tier 1: [0, 32000) → in $0.46, out $2.30
// Tier 2: [32000, 128000) → in $0.70, out $3.50
// Tier 3: [128000, 256000) → in $1.40, out $7.00
const tiers: PricingTier[] = [
{ input: 0.46, output: 2.3, cacheRead: 0, cacheWrite: 0, range: [0, 32_000] },
{ input: 0.7, output: 3.5, cacheRead: 0, cacheWrite: 0, range: [32_000, 128_000] },
{ input: 1.4, output: 7.0, cacheRead: 0, cacheWrite: 0, range: [128_000, 256_000] },
];
const cost = { input: 0.46, output: 2.3, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers };
// 200000 input tokens, 5000 output tokens
// Tier 1: 32000 tokens, fraction = 32000/200000 = 0.16
// Tier 2: 96000 tokens, fraction = 96000/200000 = 0.48
// Tier 3: 72000 tokens, fraction = 72000/200000 = 0.36
//
// Input cost = (32000*0.46 + 96000*0.70 + 72000*1.40) / 1M
// = (14720 + 67200 + 100800) / 1M = 182720 / 1M = 0.18272
// Output cost = 5000 * (0.16*2.3 + 0.48*3.5 + 0.36*7.0) / 1M
// = 5000 * (0.368 + 1.68 + 2.52) / 1M
// = 5000 * 4.568 / 1M = 22840 / 1M = 0.02284
// Total = 0.18272 + 0.02284 = 0.20556
const total = estimateUsageCost({
usage: { input: 200_000, output: 5_000 },
cost,
});
expect(total).toBeCloseTo(0.20556, 4);
});
it("uses first tier rates for output when input is zero", () => {
const tiers: PricingTier[] = [
{ input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, range: [0, 32_000] },
{ input: 0.5, output: 2.5, cacheRead: 0, cacheWrite: 0, range: [32_000, 128_000] },
];
const cost = { input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers };
const total = estimateUsageCost({
usage: { input: 0, output: 10_000 },
cost,
});
// Falls back to first tier: 10000 * 1.5 / 1M = 0.015
expect(total).toBeCloseTo(0.015, 6);
});
it("falls back to flat pricing when tieredPricing is empty array", () => {
const cost = {
input: 1,
output: 2,
cacheRead: 0.5,
cacheWrite: 0,
tieredPricing: [] as PricingTier[],
};
const total = estimateUsageCost({
usage: { input: 1000, output: 500, cacheRead: 2000 },
cost,
});
expect(total).toBeCloseTo(0.003);
});
it("bills overflow input tokens at last tier rate when input exceeds max range", () => {
// Tiers only cover up to 128000, but input is 200000
// Tier 1: [0, 32000) → in $0.30/M, out $1.50/M
// Tier 2: [32000, 128000) → in $0.50/M, out $2.50/M
// Overflow: 72000 tokens billed at Tier 2 rates
const tiers: PricingTier[] = [
{ input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, range: [0, 32_000] },
{ input: 0.5, output: 2.5, cacheRead: 0, cacheWrite: 0, range: [32_000, 128_000] },
];
const cost = { input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers };
// 200000 input, 10000 output
// Tier 1: 32000 tokens, fraction = 32000/200000 = 0.16
// Tier 2: 96000 tokens, fraction = 96000/200000 = 0.48
// Overflow (at Tier 2 rates): 72000 tokens, fraction = 72000/200000 = 0.36
//
// Input cost = (32000*0.3 + 96000*0.5 + 72000*0.5) / 1M
// = (9600 + 48000 + 36000) / 1M = 93600/1M = 0.0936
// Output cost = 10000 * (0.16*1.5 + 0.48*2.5 + 0.36*2.5) / 1M
// = 10000 * (0.24 + 1.2 + 0.9) / 1M
// = 10000 * 2.34 / 1M = 23400/1M = 0.0234
// Total = 0.0936 + 0.0234 = 0.117
const total = estimateUsageCost({
usage: { input: 200_000, output: 10_000 },
cost,
});
expect(total).toBeCloseTo(0.117, 4);
});
it("bills overflow at last tier when only a single small-range tier exists (e.g. <30K)", () => {
// Only one tier covering [0, 30000), input is 100000
const tiers: PricingTier[] = [
{ input: 1.0, output: 3.0, cacheRead: 0.5, cacheWrite: 0, range: [0, 30_000] },
];
const cost = { input: 1.0, output: 3.0, cacheRead: 0.5, cacheWrite: 0, tieredPricing: tiers };
// 100000 input, 5000 output, 2000 cacheRead
// Tier 1: 30000 tokens, fraction = 30000/100000 = 0.3
// Overflow (at Tier 1 rates): 70000 tokens, fraction = 70000/100000 = 0.7
// Fractions sum to 1.0 — all output/cache fully billed
//
// Input cost = (30000*1.0 + 70000*1.0) / 1M = 100000/1M = 0.1
// Output cost = 5000 * (0.3*3.0 + 0.7*3.0) / 1M = 5000*3.0/1M = 0.015
// CacheRead cost = 2000 * (0.3*0.5 + 0.7*0.5) / 1M = 2000*0.5/1M = 0.001
// Total = 0.1 + 0.015 + 0.001 = 0.116
const total = estimateUsageCost({
usage: { input: 100_000, output: 5_000, cacheRead: 2_000 },
cost,
});
expect(total).toBeCloseTo(0.116, 4);
});
it("supports open-ended range [start] in tiered pricing (greater-than syntax)", () => {
// Tier 1: [0, 32000) → in $0.30/M, out $1.50/M
// Tier 2: [32000, Infinity) → in $0.50/M, out $2.50/M (open-ended)
const tiers: PricingTier[] = [
{ input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, range: [0, 32_000] },
{ input: 0.5, output: 2.5, cacheRead: 0, cacheWrite: 0, range: [32_000, Infinity] },
];
const cost = { input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers };
// 200000 input, 10000 output
// Tier 1: 32000 tokens, fraction = 32000/200000 = 0.16
// Tier 2: 168000 tokens, fraction = 168000/200000 = 0.84
// No overflow — Tier 2 absorbs everything beyond 32K
//
// Input cost = (32000*0.3 + 168000*0.5) / 1M = (9600 + 84000) / 1M = 0.0936
// Output cost = 10000 * (0.16*1.5 + 0.84*2.5) / 1M = 10000 * (0.24 + 2.1) / 1M = 0.0234
// Total = 0.0936 + 0.0234 = 0.117
const total = estimateUsageCost({
usage: { input: 200_000, output: 10_000 },
cost,
});
expect(total).toBeCloseTo(0.117, 4);
});
it("uses declared tier ranges instead of sequential widths", () => {
const tiers: PricingTier[] = [
{ input: 1, output: 10, cacheRead: 0, cacheWrite: 0, range: [100, 200] },
{ input: 2, output: 20, cacheRead: 0, cacheWrite: 0, range: [0, 100] },
];
const cost = { input: 1, output: 10, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers };
const total = estimateUsageCost({
usage: { input: 150, output: 60 },
cost,
});
expect(total).toBeCloseTo(0.00125, 8);
});
it("bills malformed tier gaps at a fallback tier instead of dropping them", () => {
const tiers: PricingTier[] = [
{ input: 1, output: 10, cacheRead: 0, cacheWrite: 0, range: [0, 50] },
{ input: 3, output: 30, cacheRead: 0, cacheWrite: 0, range: [100, 150] },
];
const cost = { input: 1, output: 10, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers };
const total = estimateUsageCost({
usage: { input: 150, output: 60 },
cost,
});
expect(total).toBeCloseTo(0.00175, 8);
});
it("normalizes open-ended range from models.json ([start] and [start, -1])", async () => {
await fs.writeFile(
path.join(agentDir, "models.json"),
JSON.stringify(
{
providers: {
volcengine: {
models: [
{
id: "doubao-open-ended",
cost: {
input: 0.46,
output: 2.3,
cacheRead: 0,
cacheWrite: 0,
tieredPricing: [
{ input: 0.46, output: 2.3, cacheRead: 0, cacheWrite: 0, range: [0, 32000] },
{ input: 0.7, output: 3.5, cacheRead: 0, cacheWrite: 0, range: [32000] },
],
},
},
{
id: "doubao-neg-one",
cost: {
input: 0.46,
output: 2.3,
cacheRead: 0,
cacheWrite: 0,
tieredPricing: [
{ input: 0.46, output: 2.3, cacheRead: 0, cacheWrite: 0, range: [0, 32000] },
{ input: 0.7, output: 3.5, cacheRead: 0, cacheWrite: 0, range: [32000, -1] },
],
},
},
],
},
},
},
null,
2,
),
"utf8",
);
// [32000] should be normalized to [32000, Infinity]
const cost1 = resolveModelCostConfig({
provider: "volcengine",
model: "doubao-open-ended",
});
expect(cost1).toBeDefined();
expect(cost1!.tieredPricing).toHaveLength(2);
expect(cost1!.tieredPricing![1].range).toEqual([32000, Infinity]);
// [32000, -1] should also be normalized to [32000, Infinity]
const cost2 = resolveModelCostConfig({
provider: "volcengine",
model: "doubao-neg-one",
});
expect(cost2).toBeDefined();
expect(cost2!.tieredPricing).toHaveLength(2);
expect(cost2!.tieredPricing![1].range).toEqual([32000, Infinity]);
});
it("resolves tiered pricing from models.json", async () => {
await fs.writeFile(
path.join(agentDir, "models.json"),
JSON.stringify(
{
providers: {
volcengine: {
models: [
{
id: "doubao-seed-2-0-pro",
cost: {
input: 0.46,
output: 2.3,
cacheRead: 0,
cacheWrite: 0,
tieredPricing: [
{ input: 0.46, output: 2.3, cacheRead: 0, cacheWrite: 0, range: [0, 32000] },
{
input: 0.7,
output: 3.5,
cacheRead: 0,
cacheWrite: 0,
range: [32000, 128000],
},
],
},
},
],
},
},
},
null,
2,
),
"utf8",
);
const cost = resolveModelCostConfig({
provider: "volcengine",
model: "doubao-seed-2-0-pro",
});
expect(cost).toBeDefined();
expect(cost!.tieredPricing).toHaveLength(2);
expect(cost!.tieredPricing![0].range).toEqual([0, 32000]);
expect(cost!.tieredPricing![1].input).toBe(0.7);
});
it("resolves tiered pricing from cached gateway (LiteLLM)", () => {
__setGatewayModelPricingForTest([
{
provider: "volcengine",
model: "doubao-seed",
pricing: {
input: 0.46,
output: 2.3,
cacheRead: 0,
cacheWrite: 0,
tieredPricing: [
{
input: 0.46,
output: 2.3,
cacheRead: 0,
cacheWrite: 0,
range: [0, 32000] as [number, number],
},
{
input: 0.7,
output: 3.5,
cacheRead: 0,
cacheWrite: 0,
range: [32000, 128000] as [number, number],
},
],
},
},
]);
const cost = resolveModelCostConfig({
provider: "volcengine",
model: "doubao-seed",
});
expect(cost).toBeDefined();
expect(cost!.tieredPricing).toHaveLength(2);
});
});

View File

@@ -8,11 +8,41 @@ import type { OpenClawConfig } from "../config/types.openclaw.js";
import { getCachedGatewayModelPricing } from "../gateway/model-pricing-cache.js";
import { normalizeOptionalString } from "../shared/string-coerce.js";
/**
* A single tier in a tiered-pricing schedule. Prices are expressed as
* USD per-million tokens, just like the flat `ModelCostConfig` fields.
*
* `range` is a half-open interval `[start, end)` expressed in *input*
* token counts. The tiers MUST be sorted in ascending `range[0]` order
* with no gaps.
*/
export type PricingTier = {
input: number;
output: number;
cacheRead: number;
cacheWrite: number;
/** [startTokens, endTokens) — half-open interval on the input token axis. */
range: [number, number];
};
type RawPricingTier = {
input: number;
output: number;
cacheRead: number;
cacheWrite: number;
range: [number, number] | [number];
};
export type ModelCostConfig = {
input: number;
output: number;
cacheRead: number;
cacheWrite: number;
/** Optional tiered pricing tiers. When present, `estimateUsageCost`
* uses them instead of the flat rates above. The flat rates still
* serve as the "default / first-tier" fallback for callers that are
* unaware of tiered pricing. */
tieredPricing?: PricingTier[];
};
export type UsageTotals = {
@@ -99,6 +129,47 @@ function shouldUseNormalizedCostLookup(params: { provider?: string; model?: stri
return provider === "anthropic" || provider === "openrouter" || provider === "vercel-ai-gateway";
}
/**
* Normalize a raw tieredPricing array from models.json / config.
* Supports open-ended ranges such as `[128000]` or `[128000, -1]`,
* which are converted to `[128000, Infinity]`.
*/
function normalizeTieredPricing(raw: RawPricingTier[] | undefined): PricingTier[] | undefined {
if (!raw || raw.length === 0) {
return undefined;
}
const result: PricingTier[] = [];
for (const tier of raw) {
const range = tier.range;
if (!Array.isArray(range) || range.length < 1) {
continue;
}
const start = typeof range[0] === "number" ? range[0] : NaN;
if (!Number.isFinite(start)) {
continue;
}
const rawEnd = range.length >= 2 ? range[1] : null;
const end =
typeof rawEnd === "number" && Number.isFinite(rawEnd) && rawEnd > start ? rawEnd : Infinity;
if (
!Number.isFinite(tier.input) ||
!Number.isFinite(tier.output) ||
!Number.isFinite(tier.cacheRead) ||
!Number.isFinite(tier.cacheWrite)
) {
continue;
}
result.push({
input: tier.input,
output: tier.output,
cacheRead: tier.cacheRead,
cacheWrite: tier.cacheWrite,
range: [start, end],
});
}
return result.length > 0 ? result.toSorted((a, b) => a.range[0] - b.range[0]) : undefined;
}
function buildProviderCostIndex(
providers: Record<string, ModelProviderConfig> | undefined,
options?: { allowPluginNormalization?: boolean },
@@ -113,7 +184,16 @@ function buildProviderCostIndex(
const normalized = normalizeModelRef(normalizedProvider, model.id, {
allowPluginNormalization: options?.allowPluginNormalization,
});
entries.set(modelKey(normalized.provider, normalized.model), model.cost);
const cost = { ...model.cost };
const normalizedTiers = normalizeTieredPricing(cost.tieredPricing);
const costConfig: ModelCostConfig = {
input: cost.input,
output: cost.output,
cacheRead: cost.cacheRead,
cacheWrite: cost.cacheWrite,
...(normalizedTiers ? { tieredPricing: normalizedTiers } : {}),
};
entries.set(modelKey(normalized.provider, normalized.model), costConfig);
}
}
return entries;
@@ -233,6 +313,87 @@ export function resolveModelCostConfig(params: {
const toNumber = (value: number | undefined): number =>
typeof value === "number" && Number.isFinite(value) ? value : 0;
/**
* Compute the cost for a single token dimension (input, output, cacheRead,
* or cacheWrite) across a set of sorted tiered-pricing tiers.
*
* The tiers define ranges on the **input** token axis. For each tier,
* the proportion of the total input that falls into that range determines
* the fraction of *all* token types billed at that tier's rates.
*
* For example, if the input is 40 000 tokens and the tiers are:
* [0, 32000) → $0.30/M input, $1.50/M output
* [32000, 128000) → $0.50/M input, $2.50/M output
*
* Then 80 % of every dimension is billed at the first tier and 20 % at the
* second tier.
*
* Prices are per-million; the caller divides by 1 000 000 after summing.
*/
function computeTieredCost(
tiers: PricingTier[],
input: number,
output: number,
cacheRead: number,
cacheWrite: number,
): number {
const totalInputTokens = input;
const sortedTiers = tiers.toSorted((a, b) => a.range[0] - b.range[0]);
if (totalInputTokens <= 0) {
// If there are no input tokens the tier proportion is undefined;
// fall back to the first tier for any residual output/cache usage.
const tier = sortedTiers[0];
if (!tier) {
return 0;
}
return output * tier.output + cacheRead * tier.cacheRead + cacheWrite * tier.cacheWrite;
}
let total = 0;
let billedInput = 0;
let coveredUntil = 0;
let lastTier: PricingTier | undefined;
for (const tier of sortedTiers) {
const [start, end] = tier.range;
const tierStart = Math.max(0, start, coveredUntil);
const tierEnd = Math.min(totalInputTokens, end);
const inputInTier = Math.max(0, tierEnd - tierStart);
if (end > coveredUntil) {
coveredUntil = end;
}
if (inputInTier <= 0) {
continue;
}
const fraction = inputInTier / totalInputTokens;
total +=
inputInTier * tier.input +
output * fraction * tier.output +
cacheRead * fraction * tier.cacheRead +
cacheWrite * fraction * tier.cacheWrite;
billedInput += inputInTier;
lastTier = tier;
}
// Bill any uncovered gaps or overflow at the highest matched tier's rate.
// This keeps malformed remote/user tier ranges from underestimating cost.
const unbilledInput = totalInputTokens - billedInput;
if (unbilledInput > 0) {
const fallbackTier = lastTier ?? sortedTiers[sortedTiers.length - 1];
if (!fallbackTier) {
return total;
}
const fraction = unbilledInput / totalInputTokens;
total +=
unbilledInput * fallbackTier.input +
output * fraction * fallbackTier.output +
cacheRead * fraction * fallbackTier.cacheRead +
cacheWrite * fraction * fallbackTier.cacheWrite;
}
return total;
}
export function estimateUsageCost(params: {
usage?: NormalizedUsage | UsageTotals | null;
cost?: ModelCostConfig;
@@ -246,11 +407,18 @@ export function estimateUsageCost(params: {
const output = toNumber(usage.output);
const cacheRead = toNumber(usage.cacheRead);
const cacheWrite = toNumber(usage.cacheWrite);
const total =
input * cost.input +
output * cost.output +
cacheRead * cost.cacheRead +
cacheWrite * cost.cacheWrite;
let total: number;
if (cost.tieredPricing && cost.tieredPricing.length > 0) {
total = computeTieredCost(cost.tieredPricing, input, output, cacheRead, cacheWrite);
} else {
total =
input * cost.input +
output * cost.output +
cacheRead * cost.cacheRead +
cacheWrite * cost.cacheWrite;
}
if (!Number.isFinite(total)) {
return undefined;
}