mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:10:44 +00:00
feat: add tiered model pricing support (#67605)
Adds tiered model pricing support for cost tracking, keeps configured pricing ahead of cached catalog values, and includes latest Moonshot Kimi K2.6/K2.5 cost estimates.\n\nThanks @sliverp.
This commit is contained in:
@@ -6,6 +6,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Changes
|
||||
|
||||
- Models/costs: support tiered model pricing from cached catalogs and configured models, and include bundled Moonshot Kimi K2.6/K2.5 cost estimates for token-usage reports. (#67605) Thanks @sliverp.
|
||||
- Plugins/tests: reuse plugin loader alias and Jiti config resolution across repeated same-context loads, reducing import-heavy test overhead. (#69316) Thanks @amknight.
|
||||
- Cron: split runtime execution state into `jobs-state.json` so `jobs.json` stays stable for git-tracked job definitions. (#63105) Thanks @Feelw00.
|
||||
- Agents/compaction: send opt-in start and completion notices during context compaction. (#67830) Thanks @feniix.
|
||||
|
||||
@@ -31,6 +31,12 @@ Moonshot and Kimi Coding are **separate providers**. Keys are not interchangeabl
|
||||
|
||||
[//]: # "moonshot-kimi-k2-ids:end"
|
||||
|
||||
Bundled cost estimates for current Moonshot-hosted K2 models use Moonshot's
|
||||
published pay-as-you-go rates: Kimi K2.6 is $0.16/MTok cache hit,
|
||||
$0.95/MTok input, and $4.00/MTok output; Kimi K2.5 is $0.10/MTok cache hit,
|
||||
$0.60/MTok input, and $3.00/MTok output. Other legacy catalog entries keep
|
||||
zero-cost placeholders unless you override them in config.
|
||||
|
||||
## Getting started
|
||||
|
||||
Choose your provider and follow the setup steps.
|
||||
@@ -108,7 +114,7 @@ Choose your provider and follow the setup steps.
|
||||
name: "Kimi K2.6",
|
||||
reasoning: false,
|
||||
input: ["text", "image"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
cost: { input: 0.95, output: 4, cacheRead: 0.16, cacheWrite: 0 },
|
||||
contextWindow: 262144,
|
||||
maxTokens: 262144,
|
||||
},
|
||||
@@ -117,7 +123,7 @@ Choose your provider and follow the setup steps.
|
||||
name: "Kimi K2.5",
|
||||
reasoning: false,
|
||||
input: ["text", "image"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
cost: { input: 0.6, output: 3, cacheRead: 0.1, cacheWrite: 0 },
|
||||
contextWindow: 262144,
|
||||
maxTokens: 262144,
|
||||
},
|
||||
|
||||
@@ -19,6 +19,18 @@ describe("moonshot provider catalog", () => {
|
||||
"kimi-k2-thinking-turbo",
|
||||
"kimi-k2-turbo",
|
||||
]);
|
||||
expect(provider.models.find((model) => model.id === "kimi-k2.6")?.cost).toEqual({
|
||||
input: 0.95,
|
||||
output: 4,
|
||||
cacheRead: 0.16,
|
||||
cacheWrite: 0,
|
||||
});
|
||||
expect(provider.models.find((model) => model.id === "kimi-k2.5")?.cost).toEqual({
|
||||
input: 0.6,
|
||||
output: 3,
|
||||
cacheRead: 0.1,
|
||||
cacheWrite: 0,
|
||||
});
|
||||
});
|
||||
|
||||
it("opts native Moonshot baseUrls into streaming usage only inside the extension", () => {
|
||||
|
||||
@@ -15,6 +15,18 @@ const MOONSHOT_DEFAULT_COST = {
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
};
|
||||
const MOONSHOT_K2_6_COST = {
|
||||
input: 0.95,
|
||||
output: 4,
|
||||
cacheRead: 0.16,
|
||||
cacheWrite: 0,
|
||||
};
|
||||
const MOONSHOT_K2_5_COST = {
|
||||
input: 0.6,
|
||||
output: 3,
|
||||
cacheRead: 0.1,
|
||||
cacheWrite: 0,
|
||||
};
|
||||
|
||||
const MOONSHOT_MODEL_CATALOG = [
|
||||
{
|
||||
@@ -22,7 +34,7 @@ const MOONSHOT_MODEL_CATALOG = [
|
||||
name: "Kimi K2.6",
|
||||
reasoning: false,
|
||||
input: ["text", "image"],
|
||||
cost: MOONSHOT_DEFAULT_COST,
|
||||
cost: MOONSHOT_K2_6_COST,
|
||||
contextWindow: MOONSHOT_DEFAULT_CONTEXT_WINDOW,
|
||||
maxTokens: MOONSHOT_DEFAULT_MAX_TOKENS,
|
||||
},
|
||||
@@ -31,7 +43,7 @@ const MOONSHOT_MODEL_CATALOG = [
|
||||
name: "Kimi K2.5",
|
||||
reasoning: false,
|
||||
input: ["text", "image"],
|
||||
cost: MOONSHOT_DEFAULT_COST,
|
||||
cost: MOONSHOT_K2_5_COST,
|
||||
contextWindow: MOONSHOT_DEFAULT_CONTEXT_WINDOW,
|
||||
maxTokens: MOONSHOT_DEFAULT_MAX_TOKENS,
|
||||
},
|
||||
|
||||
@@ -1,4 +1,9 @@
|
||||
import { estimateUsageCost, formatTokenCount, formatUsd } from "../../utils/usage-format.js";
|
||||
import {
|
||||
estimateUsageCost,
|
||||
formatTokenCount,
|
||||
formatUsd,
|
||||
type ModelCostConfig,
|
||||
} from "../../utils/usage-format.js";
|
||||
import type { ReplyPayload } from "../types.js";
|
||||
|
||||
export const formatResponseUsageLine = (params: {
|
||||
@@ -9,12 +14,7 @@ export const formatResponseUsageLine = (params: {
|
||||
cacheWrite?: number;
|
||||
};
|
||||
showCost: boolean;
|
||||
costConfig?: {
|
||||
input: number;
|
||||
output: number;
|
||||
cacheRead: number;
|
||||
cacheWrite: number;
|
||||
};
|
||||
costConfig?: ModelCostConfig;
|
||||
}): string | null => {
|
||||
const usage = params.usage;
|
||||
if (!usage) {
|
||||
|
||||
@@ -148,7 +148,7 @@ export async function channelsStatusCommand(
|
||||
opts: ChannelsStatusOptions,
|
||||
runtime: RuntimeEnv = defaultRuntime,
|
||||
) {
|
||||
const timeoutMs = Number(opts.timeout ?? 10_000);
|
||||
const timeoutMs = Number(opts.timeout ?? (opts.probe ? 30_000 : 10_000));
|
||||
const statusLabel = opts.probe ? "Checking channel status (probe)…" : "Checking channel status…";
|
||||
const shouldLogStatus = opts.json !== true && !process.stderr.isTTY;
|
||||
if (shouldLogStatus) {
|
||||
|
||||
@@ -62,6 +62,7 @@ function resolveModelCost(
|
||||
cacheRead: typeof raw?.cacheRead === "number" ? raw.cacheRead : DEFAULT_MODEL_COST.cacheRead,
|
||||
cacheWrite:
|
||||
typeof raw?.cacheWrite === "number" ? raw.cacheWrite : DEFAULT_MODEL_COST.cacheWrite,
|
||||
...(raw?.tieredPricing ? { tieredPricing: raw.tieredPricing } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -2767,6 +2767,51 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
|
||||
cacheWrite: {
|
||||
type: "number",
|
||||
},
|
||||
tieredPricing: {
|
||||
type: "array",
|
||||
items: {
|
||||
type: "object",
|
||||
properties: {
|
||||
input: {
|
||||
type: "number",
|
||||
},
|
||||
output: {
|
||||
type: "number",
|
||||
},
|
||||
cacheRead: {
|
||||
type: "number",
|
||||
},
|
||||
cacheWrite: {
|
||||
type: "number",
|
||||
},
|
||||
range: {
|
||||
anyOf: [
|
||||
{
|
||||
type: "array",
|
||||
items: [
|
||||
{
|
||||
type: "number",
|
||||
},
|
||||
{
|
||||
type: "number",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
type: "array",
|
||||
items: [
|
||||
{
|
||||
type: "number",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
required: ["input", "output", "cacheRead", "cacheWrite", "range"],
|
||||
additionalProperties: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
additionalProperties: false,
|
||||
},
|
||||
|
||||
@@ -61,6 +61,18 @@ export type ModelDefinitionConfig = {
|
||||
output: number;
|
||||
cacheRead: number;
|
||||
cacheWrite: number;
|
||||
/** Optional tiered pricing. When present, cost calculation uses
|
||||
* per-tier rates instead of the flat rates above. Prices are
|
||||
* USD / million tokens; ranges are half-open `[start, end)` on the
|
||||
* input-token axis. */
|
||||
tieredPricing?: Array<{
|
||||
input: number;
|
||||
output: number;
|
||||
cacheRead: number;
|
||||
cacheWrite: number;
|
||||
/** Bounded tier: `[start, end)`. Open-ended top tier: `[start]` (normalized to `[start, Infinity]` at load time). */
|
||||
range: [number, number] | [number];
|
||||
}>;
|
||||
};
|
||||
contextWindow: number;
|
||||
/**
|
||||
|
||||
@@ -316,6 +316,19 @@ export const ModelDefinitionSchema = z
|
||||
output: z.number().optional(),
|
||||
cacheRead: z.number().optional(),
|
||||
cacheWrite: z.number().optional(),
|
||||
tieredPricing: z
|
||||
.array(
|
||||
z
|
||||
.object({
|
||||
input: z.number(),
|
||||
output: z.number(),
|
||||
cacheRead: z.number(),
|
||||
cacheWrite: z.number(),
|
||||
range: z.union([z.tuple([z.number(), z.number()]), z.tuple([z.number()])]),
|
||||
})
|
||||
.strict(),
|
||||
)
|
||||
.optional(),
|
||||
})
|
||||
.strict()
|
||||
.optional(),
|
||||
|
||||
@@ -2,11 +2,22 @@ import { normalizeModelRef } from "../agents/model-selection.js";
|
||||
import { normalizeProviderId } from "../agents/provider-id.js";
|
||||
import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js";
|
||||
|
||||
export type CachedPricingTier = {
|
||||
input: number;
|
||||
output: number;
|
||||
cacheRead: number;
|
||||
cacheWrite: number;
|
||||
/** [startTokens, endTokens) — half-open interval on the input token axis. */
|
||||
range: [number, number];
|
||||
};
|
||||
|
||||
export type CachedModelPricing = {
|
||||
input: number;
|
||||
output: number;
|
||||
cacheRead: number;
|
||||
cacheWrite: number;
|
||||
/** Optional tiered pricing tiers sourced from LiteLLM or local config. */
|
||||
tieredPricing?: CachedPricingTier[];
|
||||
};
|
||||
|
||||
let cachedPricing = new Map<string, CachedModelPricing>();
|
||||
|
||||
@@ -134,9 +134,10 @@ describe("model-pricing-cache", () => {
|
||||
},
|
||||
} as unknown as OpenClawConfig;
|
||||
|
||||
const fetchImpl = withFetchPreconnect(
|
||||
async () =>
|
||||
new Response(
|
||||
const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => {
|
||||
const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
|
||||
if (url.includes("openrouter.ai")) {
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
data: [
|
||||
{
|
||||
@@ -169,8 +170,14 @@ describe("model-pricing-cache", () => {
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
},
|
||||
),
|
||||
);
|
||||
);
|
||||
}
|
||||
// LiteLLM — return empty object (no tiered pricing for these models)
|
||||
return new Response(JSON.stringify({}), {
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
});
|
||||
});
|
||||
|
||||
await refreshGatewayModelPricingCache({ config, fetchImpl });
|
||||
|
||||
@@ -210,9 +217,10 @@ describe("model-pricing-cache", () => {
|
||||
},
|
||||
} as unknown as OpenClawConfig;
|
||||
|
||||
const fetchImpl = withFetchPreconnect(
|
||||
async () =>
|
||||
new Response(
|
||||
const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => {
|
||||
const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
|
||||
if (url.includes("openrouter.ai")) {
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
data: [
|
||||
{
|
||||
@@ -228,8 +236,13 @@ describe("model-pricing-cache", () => {
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
},
|
||||
),
|
||||
);
|
||||
);
|
||||
}
|
||||
return new Response(JSON.stringify({}), {
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
});
|
||||
});
|
||||
|
||||
await expect(refreshGatewayModelPricingCache({ config, fetchImpl })).resolves.toBeUndefined();
|
||||
expect(
|
||||
@@ -241,4 +254,303 @@ describe("model-pricing-cache", () => {
|
||||
cacheWrite: 0,
|
||||
});
|
||||
});
|
||||
|
||||
it("loads tiered pricing from LiteLLM and merges with OpenRouter flat pricing", async () => {
|
||||
const config = {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "volcengine/doubao-seed-2-0-pro" },
|
||||
},
|
||||
},
|
||||
} as unknown as OpenClawConfig;
|
||||
|
||||
const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => {
|
||||
const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
|
||||
if (url.includes("openrouter.ai")) {
|
||||
// OpenRouter does not have this model
|
||||
return new Response(JSON.stringify({ data: [] }), {
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
});
|
||||
}
|
||||
// LiteLLM catalog
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
"volcengine/doubao-seed-2-0-pro": {
|
||||
input_cost_per_token: 4.6e-7,
|
||||
output_cost_per_token: 2.3e-6,
|
||||
litellm_provider: "volcengine",
|
||||
tiered_pricing: [
|
||||
{
|
||||
input_cost_per_token: 4.6e-7,
|
||||
output_cost_per_token: 2.3e-6,
|
||||
range: [0, 32000],
|
||||
},
|
||||
{
|
||||
input_cost_per_token: 7e-7,
|
||||
output_cost_per_token: 3.5e-6,
|
||||
range: [32000, 128000],
|
||||
},
|
||||
{
|
||||
input_cost_per_token: 1.4e-6,
|
||||
output_cost_per_token: 7e-6,
|
||||
range: [128000, 256000],
|
||||
},
|
||||
],
|
||||
},
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
await refreshGatewayModelPricingCache({ config, fetchImpl });
|
||||
|
||||
const pricing = getCachedGatewayModelPricing({
|
||||
provider: "volcengine",
|
||||
model: "doubao-seed-2-0-pro",
|
||||
});
|
||||
|
||||
expect(pricing).toBeDefined();
|
||||
expect(pricing!.input).toBeCloseTo(0.46);
|
||||
expect(pricing!.output).toBeCloseTo(2.3);
|
||||
expect(pricing!.tieredPricing).toHaveLength(3);
|
||||
expect(pricing!.tieredPricing![0]).toEqual({
|
||||
input: expect.closeTo(0.46),
|
||||
output: expect.closeTo(2.3),
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
range: [0, 32000],
|
||||
});
|
||||
expect(pricing!.tieredPricing![2].range).toEqual([128000, 256000]);
|
||||
});
|
||||
|
||||
it("normalizes LiteLLM open-ended range [start] to [start, Infinity]", async () => {
|
||||
const config = {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "volcengine/doubao-open" },
|
||||
},
|
||||
},
|
||||
} as unknown as OpenClawConfig;
|
||||
|
||||
const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => {
|
||||
const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
|
||||
if (url.includes("openrouter.ai")) {
|
||||
return new Response(JSON.stringify({ data: [] }), {
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
});
|
||||
}
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
"volcengine/doubao-open": {
|
||||
input_cost_per_token: 4.6e-7,
|
||||
output_cost_per_token: 2.3e-6,
|
||||
litellm_provider: "volcengine",
|
||||
tiered_pricing: [
|
||||
{
|
||||
input_cost_per_token: 4.6e-7,
|
||||
output_cost_per_token: 2.3e-6,
|
||||
range: [0, 32000],
|
||||
},
|
||||
{
|
||||
input_cost_per_token: 7e-7,
|
||||
output_cost_per_token: 3.5e-6,
|
||||
range: [32000],
|
||||
},
|
||||
],
|
||||
},
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
await refreshGatewayModelPricingCache({ config, fetchImpl });
|
||||
|
||||
const pricing = getCachedGatewayModelPricing({
|
||||
provider: "volcengine",
|
||||
model: "doubao-open",
|
||||
});
|
||||
|
||||
expect(pricing).toBeDefined();
|
||||
expect(pricing!.tieredPricing).toHaveLength(2);
|
||||
expect(pricing!.tieredPricing![0].range).toEqual([0, 32000]);
|
||||
expect(pricing!.tieredPricing![1].range).toEqual([32000, Infinity]);
|
||||
});
|
||||
|
||||
it("merges OpenRouter flat pricing with LiteLLM tiered pricing", async () => {
|
||||
const config = {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "dashscope/qwen-plus" },
|
||||
},
|
||||
},
|
||||
} as unknown as OpenClawConfig;
|
||||
|
||||
const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => {
|
||||
const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
|
||||
if (url.includes("openrouter.ai")) {
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
data: [
|
||||
{
|
||||
id: "dashscope/qwen-plus",
|
||||
pricing: {
|
||||
prompt: "0.0000004",
|
||||
completion: "0.0000024",
|
||||
},
|
||||
},
|
||||
],
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
},
|
||||
);
|
||||
}
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
"dashscope/qwen-plus": {
|
||||
input_cost_per_token: 4e-7,
|
||||
output_cost_per_token: 2.4e-6,
|
||||
litellm_provider: "dashscope",
|
||||
tiered_pricing: [
|
||||
{
|
||||
input_cost_per_token: 4e-7,
|
||||
output_cost_per_token: 2.4e-6,
|
||||
range: [0, 256000],
|
||||
},
|
||||
{
|
||||
input_cost_per_token: 5e-7,
|
||||
output_cost_per_token: 3e-6,
|
||||
range: [256000, 1000000],
|
||||
},
|
||||
],
|
||||
},
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
await refreshGatewayModelPricingCache({ config, fetchImpl });
|
||||
|
||||
const pricing = getCachedGatewayModelPricing({
|
||||
provider: "dashscope",
|
||||
model: "qwen-plus",
|
||||
});
|
||||
|
||||
expect(pricing).toBeDefined();
|
||||
// OpenRouter base flat pricing is used
|
||||
expect(pricing!.input).toBeCloseTo(0.4);
|
||||
expect(pricing!.output).toBeCloseTo(2.4);
|
||||
// LiteLLM tiered pricing is merged in
|
||||
expect(pricing!.tieredPricing).toHaveLength(2);
|
||||
expect(pricing!.tieredPricing![1].range).toEqual([256000, 1000000]);
|
||||
});
|
||||
|
||||
it("falls back gracefully when LiteLLM fetch fails", async () => {
|
||||
const config = {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "anthropic/claude-opus-4-6" },
|
||||
},
|
||||
},
|
||||
} as unknown as OpenClawConfig;
|
||||
|
||||
const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => {
|
||||
const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
|
||||
if (url.includes("openrouter.ai")) {
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
data: [
|
||||
{
|
||||
id: "anthropic/claude-opus-4.6",
|
||||
pricing: {
|
||||
prompt: "0.000005",
|
||||
completion: "0.000025",
|
||||
},
|
||||
},
|
||||
],
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
},
|
||||
);
|
||||
}
|
||||
// LiteLLM fails
|
||||
return new Response("Internal Server Error", { status: 500 });
|
||||
});
|
||||
|
||||
await refreshGatewayModelPricingCache({ config, fetchImpl });
|
||||
|
||||
// OpenRouter pricing still works
|
||||
expect(
|
||||
getCachedGatewayModelPricing({ provider: "anthropic", model: "claude-opus-4-6" }),
|
||||
).toEqual({
|
||||
input: 5,
|
||||
output: 25,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
});
|
||||
});
|
||||
|
||||
it("treats oversized LiteLLM catalog responses as source failures", async () => {
|
||||
const config = {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "moonshot/kimi-k2.6" },
|
||||
},
|
||||
},
|
||||
} as unknown as OpenClawConfig;
|
||||
|
||||
const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => {
|
||||
const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
|
||||
if (url.includes("openrouter.ai")) {
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
data: [
|
||||
{
|
||||
id: "moonshotai/kimi-k2.6",
|
||||
pricing: {
|
||||
prompt: "0.00000095",
|
||||
completion: "0.000004",
|
||||
input_cache_read: "0.00000016",
|
||||
},
|
||||
},
|
||||
],
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
},
|
||||
);
|
||||
}
|
||||
return new Response("{}", {
|
||||
status: 200,
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"Content-Length": "6000000",
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
await refreshGatewayModelPricingCache({ config, fetchImpl });
|
||||
|
||||
expect(getCachedGatewayModelPricing({ provider: "moonshot", model: "kimi-k2.6" })).toEqual({
|
||||
input: 0.95,
|
||||
output: 4,
|
||||
cacheRead: 0.16,
|
||||
cacheWrite: 0,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -19,6 +19,7 @@ import {
|
||||
getGatewayModelPricingCacheMeta as getGatewayModelPricingCacheMetaState,
|
||||
replaceGatewayModelPricingCache,
|
||||
type CachedModelPricing,
|
||||
type CachedPricingTier,
|
||||
} from "./model-pricing-cache-state.js";
|
||||
|
||||
type OpenRouterPricingEntry = {
|
||||
@@ -36,8 +37,11 @@ type OpenRouterModelPayload = {
|
||||
export { getCachedGatewayModelPricing };
|
||||
|
||||
const OPENROUTER_MODELS_URL = "https://openrouter.ai/api/v1/models";
|
||||
const LITELLM_PRICING_URL =
|
||||
"https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json";
|
||||
const CACHE_TTL_MS = 24 * 60 * 60_000;
|
||||
const FETCH_TIMEOUT_MS = 15_000;
|
||||
const MAX_PRICING_CATALOG_BYTES = 5 * 1024 * 1024;
|
||||
const PROVIDER_ALIAS_TO_OPENROUTER: Record<string, string> = {
|
||||
"google-gemini-cli": "google",
|
||||
kimi: "moonshotai",
|
||||
@@ -98,7 +102,8 @@ function toPricePerMillion(value: number | null): number {
|
||||
if (value === null || value < 0 || !Number.isFinite(value)) {
|
||||
return 0;
|
||||
}
|
||||
return value * 1_000_000;
|
||||
const scaled = value * 1_000_000;
|
||||
return Number.isFinite(scaled) ? scaled : 0;
|
||||
}
|
||||
|
||||
function parseOpenRouterPricing(value: unknown): CachedModelPricing | null {
|
||||
@@ -119,6 +124,136 @@ function parseOpenRouterPricing(value: unknown): CachedModelPricing | null {
|
||||
};
|
||||
}
|
||||
|
||||
async function readPricingJsonObject(
|
||||
response: Response,
|
||||
source: string,
|
||||
): Promise<Record<string, unknown>> {
|
||||
const contentLength = parseNumberString(response.headers.get("content-length"));
|
||||
if (contentLength !== null && contentLength > MAX_PRICING_CATALOG_BYTES) {
|
||||
throw new Error(`${source} pricing response too large: ${contentLength} bytes`);
|
||||
}
|
||||
const buffer = await response.arrayBuffer();
|
||||
if (buffer.byteLength > MAX_PRICING_CATALOG_BYTES) {
|
||||
throw new Error(`${source} pricing response too large: ${buffer.byteLength} bytes`);
|
||||
}
|
||||
const payload = JSON.parse(Buffer.from(buffer).toString("utf8")) as unknown;
|
||||
if (!payload || typeof payload !== "object" || Array.isArray(payload)) {
|
||||
throw new Error(`${source} pricing response is not a JSON object`);
|
||||
}
|
||||
return payload as Record<string, unknown>;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// LiteLLM tiered-pricing parsing
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type LiteLLMModelEntry = Record<string, unknown>;
|
||||
|
||||
type LiteLLMTierRaw = {
|
||||
input_cost_per_token?: unknown;
|
||||
output_cost_per_token?: unknown;
|
||||
cache_read_input_token_cost?: unknown;
|
||||
range?: unknown;
|
||||
};
|
||||
|
||||
function parseLiteLLMTieredPricing(tiers: unknown): CachedPricingTier[] | undefined {
|
||||
if (!Array.isArray(tiers) || tiers.length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
const result: CachedPricingTier[] = [];
|
||||
for (const raw of tiers) {
|
||||
if (!raw || typeof raw !== "object") {
|
||||
continue;
|
||||
}
|
||||
const tier = raw as LiteLLMTierRaw;
|
||||
const inputPerToken = parseNumberString(tier.input_cost_per_token);
|
||||
const outputPerToken = parseNumberString(tier.output_cost_per_token);
|
||||
if (inputPerToken === null || outputPerToken === null) {
|
||||
continue;
|
||||
}
|
||||
const range = tier.range;
|
||||
if (!Array.isArray(range) || range.length < 1) {
|
||||
continue;
|
||||
}
|
||||
const start = parseNumberString(range[0]);
|
||||
if (start === null) {
|
||||
continue;
|
||||
}
|
||||
// Allow open-ended ranges: [128000], [128000, -1], [128000, null]
|
||||
const rawEnd = range.length >= 2 ? parseNumberString(range[1]) : null;
|
||||
const end = rawEnd === null || rawEnd <= start ? Infinity : rawEnd;
|
||||
if (
|
||||
!Number.isFinite(inputPerToken) ||
|
||||
!Number.isFinite(outputPerToken) ||
|
||||
inputPerToken < 0 ||
|
||||
outputPerToken < 0
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
result.push({
|
||||
input: toPricePerMillion(inputPerToken),
|
||||
output: toPricePerMillion(outputPerToken),
|
||||
cacheRead: toPricePerMillion(parseNumberString(tier.cache_read_input_token_cost)),
|
||||
cacheWrite: 0,
|
||||
range: [start, end],
|
||||
});
|
||||
}
|
||||
return result.length > 0 ? result.toSorted((a, b) => a.range[0] - b.range[0]) : undefined;
|
||||
}
|
||||
|
||||
function parseLiteLLMPricing(entry: LiteLLMModelEntry): CachedModelPricing | null {
|
||||
const inputPerToken = parseNumberString(entry.input_cost_per_token);
|
||||
const outputPerToken = parseNumberString(entry.output_cost_per_token);
|
||||
if (inputPerToken === null || outputPerToken === null) {
|
||||
return null;
|
||||
}
|
||||
const pricing: CachedModelPricing = {
|
||||
input: toPricePerMillion(inputPerToken),
|
||||
output: toPricePerMillion(outputPerToken),
|
||||
cacheRead: toPricePerMillion(parseNumberString(entry.cache_read_input_token_cost)),
|
||||
cacheWrite: 0,
|
||||
};
|
||||
const tieredPricing = parseLiteLLMTieredPricing(entry.tiered_pricing);
|
||||
if (tieredPricing) {
|
||||
pricing.tieredPricing = tieredPricing;
|
||||
}
|
||||
return pricing;
|
||||
}
|
||||
|
||||
type LiteLLMPricingCatalog = Map<string, CachedModelPricing>;
|
||||
|
||||
async function fetchLiteLLMPricingCatalog(fetchImpl: typeof fetch): Promise<LiteLLMPricingCatalog> {
|
||||
const response = await fetchImpl(LITELLM_PRICING_URL, {
|
||||
headers: { Accept: "application/json" },
|
||||
signal: AbortSignal.timeout(FETCH_TIMEOUT_MS),
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error(`LiteLLM pricing fetch failed: HTTP ${response.status}`);
|
||||
}
|
||||
const payload = await readPricingJsonObject(response, "LiteLLM");
|
||||
const catalog: LiteLLMPricingCatalog = new Map();
|
||||
for (const [key, value] of Object.entries(payload)) {
|
||||
if (!value || typeof value !== "object") {
|
||||
continue;
|
||||
}
|
||||
const entry = value as LiteLLMModelEntry;
|
||||
const pricing = parseLiteLLMPricing(entry);
|
||||
if (!pricing) {
|
||||
continue;
|
||||
}
|
||||
catalog.set(key, pricing);
|
||||
}
|
||||
return catalog;
|
||||
}
|
||||
|
||||
function resolveLiteLLMPricingForRef(params: {
|
||||
ref: ModelRef;
|
||||
catalog: LiteLLMPricingCatalog;
|
||||
}): CachedModelPricing | undefined {
|
||||
// Only use provider-qualified key to avoid cross-provider pricing collisions.
|
||||
return params.catalog.get(`${params.ref.provider}/${params.ref.model}`);
|
||||
}
|
||||
|
||||
function canonicalizeOpenRouterProvider(provider: string): string {
|
||||
const normalized = normalizeModelRef(provider, "placeholder").provider;
|
||||
return PROVIDER_ALIAS_TO_OPENROUTER[normalized] ?? normalized;
|
||||
@@ -328,7 +463,7 @@ async function fetchOpenRouterPricingCatalog(
|
||||
if (!response.ok) {
|
||||
throw new Error(`OpenRouter /models failed: HTTP ${response.status}`);
|
||||
}
|
||||
const payload = (await response.json()) as { data?: unknown };
|
||||
const payload = await readPricingJsonObject(response, "OpenRouter");
|
||||
const entries = Array.isArray(payload.data) ? payload.data : [];
|
||||
const catalog = new Map<string, OpenRouterPricingEntry>();
|
||||
for (const entry of entries) {
|
||||
@@ -393,7 +528,23 @@ export async function refreshGatewayModelPricingCache(params: {
|
||||
return;
|
||||
}
|
||||
|
||||
const catalogById = await fetchOpenRouterPricingCatalog(fetchImpl);
|
||||
// Fetch both pricing catalogs in parallel. Each source is
|
||||
// independently optional — a failure in one does not block the other.
|
||||
let openRouterFailed = false;
|
||||
let litellmFailed = false;
|
||||
const [catalogById, litellmCatalog] = await Promise.all([
|
||||
fetchOpenRouterPricingCatalog(fetchImpl).catch((error: unknown) => {
|
||||
log.warn(`OpenRouter pricing fetch failed: ${String(error)}`);
|
||||
openRouterFailed = true;
|
||||
return new Map<string, OpenRouterPricingEntry>();
|
||||
}),
|
||||
fetchLiteLLMPricingCatalog(fetchImpl).catch((error: unknown) => {
|
||||
log.warn(`LiteLLM pricing fetch failed: ${String(error)}`);
|
||||
litellmFailed = true;
|
||||
return new Map<string, CachedModelPricing>() as LiteLLMPricingCatalog;
|
||||
}),
|
||||
]);
|
||||
|
||||
const catalogByNormalizedId = new Map<string, OpenRouterPricingEntry>();
|
||||
for (const entry of catalogById.values()) {
|
||||
const normalizedId = canonicalizeOpenRouterLookupId(entry.id);
|
||||
@@ -405,15 +556,62 @@ export async function refreshGatewayModelPricingCache(params: {
|
||||
|
||||
const nextPricing = new Map<string, CachedModelPricing>();
|
||||
for (const ref of refs) {
|
||||
const pricing = resolveCatalogPricingForRef({
|
||||
// 1. Try OpenRouter first (existing behavior — flat pricing)
|
||||
const openRouterPricing = resolveCatalogPricingForRef({
|
||||
ref,
|
||||
catalogById,
|
||||
catalogByNormalizedId,
|
||||
});
|
||||
if (!pricing) {
|
||||
continue;
|
||||
|
||||
// 2. Try LiteLLM (may contain tiered pricing)
|
||||
const litellmPricing = resolveLiteLLMPricingForRef({
|
||||
ref,
|
||||
catalog: litellmCatalog,
|
||||
});
|
||||
|
||||
// Merge strategy: OpenRouter provides the base flat pricing;
|
||||
// LiteLLM enriches with tieredPricing when available.
|
||||
// If only one source has data, use that one.
|
||||
if (openRouterPricing && litellmPricing?.tieredPricing) {
|
||||
// Both sources present and LiteLLM has tiers — merge.
|
||||
nextPricing.set(modelKey(ref.provider, ref.model), {
|
||||
...openRouterPricing,
|
||||
tieredPricing: litellmPricing.tieredPricing,
|
||||
});
|
||||
} else if (openRouterPricing) {
|
||||
// Prefer OpenRouter flat pricing when LiteLLM has no tiers to contribute.
|
||||
nextPricing.set(modelKey(ref.provider, ref.model), openRouterPricing);
|
||||
} else if (litellmPricing) {
|
||||
// Only LiteLLM has data — use it as-is.
|
||||
nextPricing.set(modelKey(ref.provider, ref.model), litellmPricing);
|
||||
}
|
||||
}
|
||||
|
||||
// When either upstream source failed, preserve previously-cached entries
|
||||
// for any models that the refresh could not resolve. This prevents a
|
||||
// single-source outage from silently dropping pricing for models that
|
||||
// depended on the failed source.
|
||||
if (openRouterFailed || litellmFailed) {
|
||||
const existingMeta = getGatewayModelPricingCacheMetaState();
|
||||
if (nextPricing.size === 0 && existingMeta.size > 0) {
|
||||
// Both sources failed — retain the entire existing cache.
|
||||
log.warn("Both pricing sources returned empty data — retaining existing cache");
|
||||
scheduleRefresh({ config: params.config, fetchImpl });
|
||||
return;
|
||||
}
|
||||
// Partial failure — back-fill missing models from the existing cache.
|
||||
for (const ref of refs) {
|
||||
const key = modelKey(ref.provider, ref.model);
|
||||
if (!nextPricing.has(key)) {
|
||||
const existing = getCachedGatewayModelPricing({
|
||||
provider: ref.provider,
|
||||
model: ref.model,
|
||||
});
|
||||
if (existing) {
|
||||
nextPricing.set(key, existing);
|
||||
}
|
||||
}
|
||||
}
|
||||
nextPricing.set(modelKey(ref.provider, ref.model), pricing);
|
||||
}
|
||||
|
||||
replaceGatewayModelPricingCache(nextPricing);
|
||||
|
||||
@@ -133,4 +133,33 @@ describe("channelsHandlers channels.status", () => {
|
||||
undefined,
|
||||
);
|
||||
});
|
||||
|
||||
it("caps probe timeout before passing it to channel plugins", async () => {
|
||||
const autoEnabledConfig = { autoEnabled: true };
|
||||
const probeAccount = vi.fn(async () => ({ ok: true }));
|
||||
mocks.applyPluginAutoEnable.mockReturnValue({ config: autoEnabledConfig, changes: [] });
|
||||
mocks.listChannelPlugins.mockReturnValue([
|
||||
{
|
||||
id: "whatsapp",
|
||||
config: {
|
||||
listAccountIds: () => ["default"],
|
||||
resolveAccount: () => ({}),
|
||||
isEnabled: () => true,
|
||||
isConfigured: async () => true,
|
||||
},
|
||||
status: {
|
||||
probeAccount,
|
||||
},
|
||||
},
|
||||
]);
|
||||
|
||||
await channelsHandlers["channels.status"](createOptions({ probe: true, timeoutMs: 999_999 }));
|
||||
|
||||
expect(probeAccount).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
timeoutMs: 30_000,
|
||||
cfg: autoEnabledConfig,
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -16,6 +16,7 @@ import { getChannelActivity } from "../../infra/channel-activity.js";
|
||||
import { DEFAULT_ACCOUNT_ID } from "../../routing/session-key.js";
|
||||
import { defaultRuntime } from "../../runtime.js";
|
||||
import { normalizeOptionalString } from "../../shared/string-coerce.js";
|
||||
import { runTasksWithConcurrency } from "../../utils/run-with-concurrency.js";
|
||||
import {
|
||||
ErrorCodes,
|
||||
errorShape,
|
||||
@@ -41,6 +42,17 @@ type ChannelStartPayload = {
|
||||
started: boolean;
|
||||
};
|
||||
|
||||
const CHANNEL_STATUS_MAX_TIMEOUT_MS = 30_000;
|
||||
const CHANNEL_STATUS_PROBE_CONCURRENCY = 5;
|
||||
|
||||
function resolveChannelsStatusTimeoutMs(params: { probe: boolean; timeoutMsRaw: unknown }): number {
|
||||
const fallback = params.probe ? CHANNEL_STATUS_MAX_TIMEOUT_MS : 10_000;
|
||||
if (typeof params.timeoutMsRaw !== "number" || !Number.isFinite(params.timeoutMsRaw)) {
|
||||
return fallback;
|
||||
}
|
||||
return Math.min(Math.max(1000, params.timeoutMsRaw), CHANNEL_STATUS_MAX_TIMEOUT_MS);
|
||||
}
|
||||
|
||||
function resolveRuntimeAccountSnapshot(params: {
|
||||
runtime: ChannelRuntimeSnapshot;
|
||||
channelId: ChannelId;
|
||||
@@ -141,7 +153,7 @@ export const channelsHandlers: GatewayRequestHandlers = {
|
||||
}
|
||||
const probe = (params as { probe?: boolean }).probe === true;
|
||||
const timeoutMsRaw = (params as { timeoutMs?: unknown }).timeoutMs;
|
||||
const timeoutMs = typeof timeoutMsRaw === "number" ? Math.max(1000, timeoutMsRaw) : 10_000;
|
||||
const timeoutMs = resolveChannelsStatusTimeoutMs({ probe, timeoutMsRaw });
|
||||
const cfg = applyPluginAutoEnable({
|
||||
config: loadConfig(),
|
||||
env: process.env,
|
||||
@@ -174,6 +186,70 @@ export const channelsHandlers: GatewayRequestHandlers = {
|
||||
typeof account !== "object" ||
|
||||
(account as { enabled?: boolean }).enabled !== false;
|
||||
|
||||
const buildAccountSnapshot = async (
|
||||
channelId: ChannelId,
|
||||
plugin: ChannelPlugin,
|
||||
accountId: string,
|
||||
defaultAccountId: string,
|
||||
) => {
|
||||
const account = plugin.config.resolveAccount(cfg, accountId);
|
||||
const enabled = isAccountEnabled(plugin, account);
|
||||
let probeResult: unknown;
|
||||
let lastProbeAt: number | null = null;
|
||||
if (probe && enabled && plugin.status?.probeAccount) {
|
||||
let configured = true;
|
||||
if (plugin.config.isConfigured) {
|
||||
configured = await plugin.config.isConfigured(account, cfg);
|
||||
}
|
||||
if (configured) {
|
||||
probeResult = await plugin.status.probeAccount({
|
||||
account,
|
||||
timeoutMs,
|
||||
cfg,
|
||||
});
|
||||
lastProbeAt = Date.now();
|
||||
}
|
||||
}
|
||||
let auditResult: unknown;
|
||||
if (probe && enabled && plugin.status?.auditAccount) {
|
||||
let configured = true;
|
||||
if (plugin.config.isConfigured) {
|
||||
configured = await plugin.config.isConfigured(account, cfg);
|
||||
}
|
||||
if (configured) {
|
||||
auditResult = await plugin.status.auditAccount({
|
||||
account,
|
||||
timeoutMs,
|
||||
cfg,
|
||||
probe: probeResult,
|
||||
});
|
||||
}
|
||||
}
|
||||
const runtimeSnapshot = resolveRuntimeSnapshot(channelId, accountId, defaultAccountId);
|
||||
const snapshot = await buildChannelAccountSnapshot({
|
||||
plugin,
|
||||
cfg,
|
||||
accountId,
|
||||
runtime: runtimeSnapshot,
|
||||
probe: probeResult,
|
||||
audit: auditResult,
|
||||
});
|
||||
if (lastProbeAt) {
|
||||
snapshot.lastProbeAt = lastProbeAt;
|
||||
}
|
||||
const activity = getChannelActivity({
|
||||
channel: channelId as never,
|
||||
accountId,
|
||||
});
|
||||
if (snapshot.lastInboundAt == null) {
|
||||
snapshot.lastInboundAt = activity.inboundAt;
|
||||
}
|
||||
if (snapshot.lastOutboundAt == null) {
|
||||
snapshot.lastOutboundAt = activity.outboundAt;
|
||||
}
|
||||
return { accountId: accountId, account, snapshot };
|
||||
};
|
||||
|
||||
const buildChannelAccounts = async (channelId: ChannelId) => {
|
||||
const plugin = pluginMap.get(channelId);
|
||||
if (!plugin) {
|
||||
@@ -190,66 +266,20 @@ export const channelsHandlers: GatewayRequestHandlers = {
|
||||
cfg,
|
||||
accountIds,
|
||||
});
|
||||
const accounts: ChannelAccountSnapshot[] = [];
|
||||
const resolvedAccounts: Record<string, unknown> = {};
|
||||
for (const accountId of accountIds) {
|
||||
const account = plugin.config.resolveAccount(cfg, accountId);
|
||||
const enabled = isAccountEnabled(plugin, account);
|
||||
resolvedAccounts[accountId] = account;
|
||||
let probeResult: unknown;
|
||||
let lastProbeAt: number | null = null;
|
||||
if (probe && enabled && plugin.status?.probeAccount) {
|
||||
let configured = true;
|
||||
if (plugin.config.isConfigured) {
|
||||
configured = await plugin.config.isConfigured(account, cfg);
|
||||
}
|
||||
if (configured) {
|
||||
probeResult = await plugin.status.probeAccount({
|
||||
account,
|
||||
timeoutMs,
|
||||
cfg,
|
||||
});
|
||||
lastProbeAt = Date.now();
|
||||
}
|
||||
const { results } = await runTasksWithConcurrency({
|
||||
tasks: accountIds.map(
|
||||
(accountId) => async () =>
|
||||
await buildAccountSnapshot(channelId, plugin, accountId, defaultAccountId),
|
||||
),
|
||||
limit: probe ? CHANNEL_STATUS_PROBE_CONCURRENCY : accountIds.length || 1,
|
||||
});
|
||||
const accounts: ChannelAccountSnapshot[] = [];
|
||||
for (const result of results) {
|
||||
if (result) {
|
||||
resolvedAccounts[result.accountId] = result.account;
|
||||
accounts.push(result.snapshot);
|
||||
}
|
||||
let auditResult: unknown;
|
||||
if (probe && enabled && plugin.status?.auditAccount) {
|
||||
let configured = true;
|
||||
if (plugin.config.isConfigured) {
|
||||
configured = await plugin.config.isConfigured(account, cfg);
|
||||
}
|
||||
if (configured) {
|
||||
auditResult = await plugin.status.auditAccount({
|
||||
account,
|
||||
timeoutMs,
|
||||
cfg,
|
||||
probe: probeResult,
|
||||
});
|
||||
}
|
||||
}
|
||||
const runtimeSnapshot = resolveRuntimeSnapshot(channelId, accountId, defaultAccountId);
|
||||
const snapshot = await buildChannelAccountSnapshot({
|
||||
plugin,
|
||||
cfg,
|
||||
accountId,
|
||||
runtime: runtimeSnapshot,
|
||||
probe: probeResult,
|
||||
audit: auditResult,
|
||||
});
|
||||
if (lastProbeAt) {
|
||||
snapshot.lastProbeAt = lastProbeAt;
|
||||
}
|
||||
const activity = getChannelActivity({
|
||||
channel: channelId as never,
|
||||
accountId,
|
||||
});
|
||||
if (snapshot.lastInboundAt == null) {
|
||||
snapshot.lastInboundAt = activity.inboundAt;
|
||||
}
|
||||
if (snapshot.lastOutboundAt == null) {
|
||||
snapshot.lastOutboundAt = activity.outboundAt;
|
||||
}
|
||||
accounts.push(snapshot);
|
||||
}
|
||||
const defaultAccount =
|
||||
accounts.find((entry) => entry.accountId === defaultAccountId) ?? accounts[0];
|
||||
@@ -271,28 +301,36 @@ export const channelsHandlers: GatewayRequestHandlers = {
|
||||
const channelsMap = payload.channels as Record<string, unknown>;
|
||||
const accountsMap = payload.channelAccounts as Record<string, unknown>;
|
||||
const defaultAccountIdMap = payload.channelDefaultAccountId as Record<string, unknown>;
|
||||
for (const plugin of plugins) {
|
||||
const { accounts, defaultAccountId, defaultAccount, resolvedAccounts } =
|
||||
await buildChannelAccounts(plugin.id);
|
||||
const fallbackAccount =
|
||||
resolvedAccounts[defaultAccountId] ?? plugin.config.resolveAccount(cfg, defaultAccountId);
|
||||
const summary = plugin.status?.buildChannelSummary
|
||||
? await plugin.status.buildChannelSummary({
|
||||
account: fallbackAccount,
|
||||
cfg,
|
||||
defaultAccountId,
|
||||
snapshot:
|
||||
defaultAccount ??
|
||||
({
|
||||
accountId: defaultAccountId,
|
||||
} as ChannelAccountSnapshot),
|
||||
})
|
||||
: {
|
||||
configured: defaultAccount?.configured ?? false,
|
||||
};
|
||||
channelsMap[plugin.id] = summary;
|
||||
accountsMap[plugin.id] = accounts;
|
||||
defaultAccountIdMap[plugin.id] = defaultAccountId;
|
||||
const { results: channelResults } = await runTasksWithConcurrency({
|
||||
tasks: plugins.map((plugin) => async () => {
|
||||
const { accounts, defaultAccountId, defaultAccount, resolvedAccounts } =
|
||||
await buildChannelAccounts(plugin.id);
|
||||
const fallbackAccount =
|
||||
resolvedAccounts[defaultAccountId] ?? plugin.config.resolveAccount(cfg, defaultAccountId);
|
||||
const summary = plugin.status?.buildChannelSummary
|
||||
? await plugin.status.buildChannelSummary({
|
||||
account: fallbackAccount,
|
||||
cfg,
|
||||
defaultAccountId,
|
||||
snapshot:
|
||||
defaultAccount ??
|
||||
({
|
||||
accountId: defaultAccountId,
|
||||
} as ChannelAccountSnapshot),
|
||||
})
|
||||
: {
|
||||
configured: defaultAccount?.configured ?? false,
|
||||
};
|
||||
return { pluginId: plugin.id, summary, accounts, defaultAccountId };
|
||||
}),
|
||||
limit: probe ? CHANNEL_STATUS_PROBE_CONCURRENCY : plugins.length || 1,
|
||||
});
|
||||
for (const result of channelResults) {
|
||||
if (result) {
|
||||
channelsMap[result.pluginId] = result.summary;
|
||||
accountsMap[result.pluginId] = result.accounts;
|
||||
defaultAccountIdMap[result.pluginId] = result.defaultAccountId;
|
||||
}
|
||||
}
|
||||
|
||||
respond(true, payload, undefined);
|
||||
|
||||
@@ -249,13 +249,23 @@ async function scanTranscriptFile(params: {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (entry.usage && entry.costTotal === undefined) {
|
||||
if (entry.usage) {
|
||||
const cost = resolveModelCostConfig({
|
||||
provider: entry.provider,
|
||||
model: entry.model,
|
||||
config: params.config,
|
||||
});
|
||||
entry.costTotal = estimateUsageCost({ usage: entry.usage, cost });
|
||||
if (cost?.tieredPricing && cost.tieredPricing.length > 0) {
|
||||
// When tiered pricing is configured, always recompute to override
|
||||
// the flat-rate cost that the transport layer wrote into the transcript.
|
||||
// Clear costBreakdown so downstream aggregation uses the recomputed total
|
||||
// instead of the stale flat-rate breakdown from the transport layer.
|
||||
entry.costTotal = estimateUsageCost({ usage: entry.usage, cost });
|
||||
entry.costBreakdown = undefined;
|
||||
} else if (entry.costTotal === undefined) {
|
||||
// Fill in missing cost estimates.
|
||||
entry.costTotal = estimateUsageCost({ usage: entry.usage, cost });
|
||||
}
|
||||
}
|
||||
|
||||
params.onEntry(entry);
|
||||
|
||||
@@ -13,6 +13,7 @@ import {
|
||||
formatTokenCount,
|
||||
formatUsd,
|
||||
resolveModelCostConfig,
|
||||
type PricingTier,
|
||||
} from "./usage-format.js";
|
||||
|
||||
describe("usage-format", () => {
|
||||
@@ -254,4 +255,368 @@ describe("usage-format", () => {
|
||||
cacheWrite: 0.8,
|
||||
});
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Tiered pricing tests
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
it("uses flat pricing when tieredPricing is absent", () => {
|
||||
const cost = { input: 1, output: 2, cacheRead: 0.5, cacheWrite: 0 };
|
||||
const total = estimateUsageCost({
|
||||
usage: { input: 1000, output: 500, cacheRead: 2000 },
|
||||
cost,
|
||||
});
|
||||
expect(total).toBeCloseTo(0.003);
|
||||
});
|
||||
|
||||
it("estimates cost with single-tier tiered pricing (equivalent to flat)", () => {
|
||||
const tiers: PricingTier[] = [
|
||||
{ input: 1, output: 2, cacheRead: 0.5, cacheWrite: 0, range: [0, 1_000_000] },
|
||||
];
|
||||
const cost = { input: 1, output: 2, cacheRead: 0.5, cacheWrite: 0, tieredPricing: tiers };
|
||||
const total = estimateUsageCost({
|
||||
usage: { input: 1000, output: 500, cacheRead: 2000 },
|
||||
cost,
|
||||
});
|
||||
// Same as flat: (1000*1 + 500*2 + 2000*0.5) / 1M = 3000/1M = 0.003
|
||||
expect(total).toBeCloseTo(0.003);
|
||||
});
|
||||
|
||||
it("estimates cost with two tiers — input split across tiers", () => {
|
||||
// Tier 1: [0, 32000) → input $0.30/M, output $1.50/M
|
||||
// Tier 2: [32000, 128000) → input $0.50/M, output $2.50/M
|
||||
const tiers: PricingTier[] = [
|
||||
{ input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, range: [0, 32_000] },
|
||||
{ input: 0.5, output: 2.5, cacheRead: 0, cacheWrite: 0, range: [32_000, 128_000] },
|
||||
];
|
||||
const cost = { input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers };
|
||||
|
||||
// 40000 input tokens, 10000 output tokens
|
||||
// Tier 1 gets 32000/40000 = 80% of input → 32000 input tokens
|
||||
// Tier 2 gets 8000/40000 = 20% of input → 8000 input tokens
|
||||
// Input cost = (32000 * 0.3 + 8000 * 0.5) / 1M = (9600 + 4000) / 1M = 0.0136
|
||||
// Output cost = (10000 * 0.8 * 1.5 + 10000 * 0.2 * 2.5) / 1M = (12000 + 5000) / 1M = 0.017
|
||||
// Total = 0.0136 + 0.017 = 0.0306
|
||||
const total = estimateUsageCost({
|
||||
usage: { input: 40_000, output: 10_000 },
|
||||
cost,
|
||||
});
|
||||
expect(total).toBeCloseTo(0.0306, 4);
|
||||
});
|
||||
|
||||
it("estimates cost with three tiers — volcengine-style pricing", () => {
|
||||
// Simulates volcengine/doubao pricing (per-million):
|
||||
// Tier 1: [0, 32000) → in $0.46, out $2.30
|
||||
// Tier 2: [32000, 128000) → in $0.70, out $3.50
|
||||
// Tier 3: [128000, 256000) → in $1.40, out $7.00
|
||||
const tiers: PricingTier[] = [
|
||||
{ input: 0.46, output: 2.3, cacheRead: 0, cacheWrite: 0, range: [0, 32_000] },
|
||||
{ input: 0.7, output: 3.5, cacheRead: 0, cacheWrite: 0, range: [32_000, 128_000] },
|
||||
{ input: 1.4, output: 7.0, cacheRead: 0, cacheWrite: 0, range: [128_000, 256_000] },
|
||||
];
|
||||
const cost = { input: 0.46, output: 2.3, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers };
|
||||
|
||||
// 200000 input tokens, 5000 output tokens
|
||||
// Tier 1: 32000 tokens, fraction = 32000/200000 = 0.16
|
||||
// Tier 2: 96000 tokens, fraction = 96000/200000 = 0.48
|
||||
// Tier 3: 72000 tokens, fraction = 72000/200000 = 0.36
|
||||
//
|
||||
// Input cost = (32000*0.46 + 96000*0.70 + 72000*1.40) / 1M
|
||||
// = (14720 + 67200 + 100800) / 1M = 182720 / 1M = 0.18272
|
||||
// Output cost = 5000 * (0.16*2.3 + 0.48*3.5 + 0.36*7.0) / 1M
|
||||
// = 5000 * (0.368 + 1.68 + 2.52) / 1M
|
||||
// = 5000 * 4.568 / 1M = 22840 / 1M = 0.02284
|
||||
// Total = 0.18272 + 0.02284 = 0.20556
|
||||
const total = estimateUsageCost({
|
||||
usage: { input: 200_000, output: 5_000 },
|
||||
cost,
|
||||
});
|
||||
expect(total).toBeCloseTo(0.20556, 4);
|
||||
});
|
||||
|
||||
it("uses first tier rates for output when input is zero", () => {
|
||||
const tiers: PricingTier[] = [
|
||||
{ input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, range: [0, 32_000] },
|
||||
{ input: 0.5, output: 2.5, cacheRead: 0, cacheWrite: 0, range: [32_000, 128_000] },
|
||||
];
|
||||
const cost = { input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers };
|
||||
|
||||
const total = estimateUsageCost({
|
||||
usage: { input: 0, output: 10_000 },
|
||||
cost,
|
||||
});
|
||||
// Falls back to first tier: 10000 * 1.5 / 1M = 0.015
|
||||
expect(total).toBeCloseTo(0.015, 6);
|
||||
});
|
||||
|
||||
it("falls back to flat pricing when tieredPricing is empty array", () => {
|
||||
const cost = {
|
||||
input: 1,
|
||||
output: 2,
|
||||
cacheRead: 0.5,
|
||||
cacheWrite: 0,
|
||||
tieredPricing: [] as PricingTier[],
|
||||
};
|
||||
const total = estimateUsageCost({
|
||||
usage: { input: 1000, output: 500, cacheRead: 2000 },
|
||||
cost,
|
||||
});
|
||||
expect(total).toBeCloseTo(0.003);
|
||||
});
|
||||
|
||||
it("bills overflow input tokens at last tier rate when input exceeds max range", () => {
|
||||
// Tiers only cover up to 128000, but input is 200000
|
||||
// Tier 1: [0, 32000) → in $0.30/M, out $1.50/M
|
||||
// Tier 2: [32000, 128000) → in $0.50/M, out $2.50/M
|
||||
// Overflow: 72000 tokens billed at Tier 2 rates
|
||||
const tiers: PricingTier[] = [
|
||||
{ input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, range: [0, 32_000] },
|
||||
{ input: 0.5, output: 2.5, cacheRead: 0, cacheWrite: 0, range: [32_000, 128_000] },
|
||||
];
|
||||
const cost = { input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers };
|
||||
|
||||
// 200000 input, 10000 output
|
||||
// Tier 1: 32000 tokens, fraction = 32000/200000 = 0.16
|
||||
// Tier 2: 96000 tokens, fraction = 96000/200000 = 0.48
|
||||
// Overflow (at Tier 2 rates): 72000 tokens, fraction = 72000/200000 = 0.36
|
||||
//
|
||||
// Input cost = (32000*0.3 + 96000*0.5 + 72000*0.5) / 1M
|
||||
// = (9600 + 48000 + 36000) / 1M = 93600/1M = 0.0936
|
||||
// Output cost = 10000 * (0.16*1.5 + 0.48*2.5 + 0.36*2.5) / 1M
|
||||
// = 10000 * (0.24 + 1.2 + 0.9) / 1M
|
||||
// = 10000 * 2.34 / 1M = 23400/1M = 0.0234
|
||||
// Total = 0.0936 + 0.0234 = 0.117
|
||||
const total = estimateUsageCost({
|
||||
usage: { input: 200_000, output: 10_000 },
|
||||
cost,
|
||||
});
|
||||
expect(total).toBeCloseTo(0.117, 4);
|
||||
});
|
||||
|
||||
it("bills overflow at last tier when only a single small-range tier exists (e.g. <30K)", () => {
|
||||
// Only one tier covering [0, 30000), input is 100000
|
||||
const tiers: PricingTier[] = [
|
||||
{ input: 1.0, output: 3.0, cacheRead: 0.5, cacheWrite: 0, range: [0, 30_000] },
|
||||
];
|
||||
const cost = { input: 1.0, output: 3.0, cacheRead: 0.5, cacheWrite: 0, tieredPricing: tiers };
|
||||
|
||||
// 100000 input, 5000 output, 2000 cacheRead
|
||||
// Tier 1: 30000 tokens, fraction = 30000/100000 = 0.3
|
||||
// Overflow (at Tier 1 rates): 70000 tokens, fraction = 70000/100000 = 0.7
|
||||
// Fractions sum to 1.0 — all output/cache fully billed
|
||||
//
|
||||
// Input cost = (30000*1.0 + 70000*1.0) / 1M = 100000/1M = 0.1
|
||||
// Output cost = 5000 * (0.3*3.0 + 0.7*3.0) / 1M = 5000*3.0/1M = 0.015
|
||||
// CacheRead cost = 2000 * (0.3*0.5 + 0.7*0.5) / 1M = 2000*0.5/1M = 0.001
|
||||
// Total = 0.1 + 0.015 + 0.001 = 0.116
|
||||
const total = estimateUsageCost({
|
||||
usage: { input: 100_000, output: 5_000, cacheRead: 2_000 },
|
||||
cost,
|
||||
});
|
||||
expect(total).toBeCloseTo(0.116, 4);
|
||||
});
|
||||
|
||||
it("supports open-ended range [start] in tiered pricing (greater-than syntax)", () => {
|
||||
// Tier 1: [0, 32000) → in $0.30/M, out $1.50/M
|
||||
// Tier 2: [32000, Infinity) → in $0.50/M, out $2.50/M (open-ended)
|
||||
const tiers: PricingTier[] = [
|
||||
{ input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, range: [0, 32_000] },
|
||||
{ input: 0.5, output: 2.5, cacheRead: 0, cacheWrite: 0, range: [32_000, Infinity] },
|
||||
];
|
||||
const cost = { input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers };
|
||||
|
||||
// 200000 input, 10000 output
|
||||
// Tier 1: 32000 tokens, fraction = 32000/200000 = 0.16
|
||||
// Tier 2: 168000 tokens, fraction = 168000/200000 = 0.84
|
||||
// No overflow — Tier 2 absorbs everything beyond 32K
|
||||
//
|
||||
// Input cost = (32000*0.3 + 168000*0.5) / 1M = (9600 + 84000) / 1M = 0.0936
|
||||
// Output cost = 10000 * (0.16*1.5 + 0.84*2.5) / 1M = 10000 * (0.24 + 2.1) / 1M = 0.0234
|
||||
// Total = 0.0936 + 0.0234 = 0.117
|
||||
const total = estimateUsageCost({
|
||||
usage: { input: 200_000, output: 10_000 },
|
||||
cost,
|
||||
});
|
||||
expect(total).toBeCloseTo(0.117, 4);
|
||||
});
|
||||
|
||||
it("uses declared tier ranges instead of sequential widths", () => {
|
||||
const tiers: PricingTier[] = [
|
||||
{ input: 1, output: 10, cacheRead: 0, cacheWrite: 0, range: [100, 200] },
|
||||
{ input: 2, output: 20, cacheRead: 0, cacheWrite: 0, range: [0, 100] },
|
||||
];
|
||||
const cost = { input: 1, output: 10, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers };
|
||||
|
||||
const total = estimateUsageCost({
|
||||
usage: { input: 150, output: 60 },
|
||||
cost,
|
||||
});
|
||||
|
||||
expect(total).toBeCloseTo(0.00125, 8);
|
||||
});
|
||||
|
||||
it("bills malformed tier gaps at a fallback tier instead of dropping them", () => {
|
||||
const tiers: PricingTier[] = [
|
||||
{ input: 1, output: 10, cacheRead: 0, cacheWrite: 0, range: [0, 50] },
|
||||
{ input: 3, output: 30, cacheRead: 0, cacheWrite: 0, range: [100, 150] },
|
||||
];
|
||||
const cost = { input: 1, output: 10, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers };
|
||||
|
||||
const total = estimateUsageCost({
|
||||
usage: { input: 150, output: 60 },
|
||||
cost,
|
||||
});
|
||||
|
||||
expect(total).toBeCloseTo(0.00175, 8);
|
||||
});
|
||||
|
||||
it("normalizes open-ended range from models.json ([start] and [start, -1])", async () => {
|
||||
await fs.writeFile(
|
||||
path.join(agentDir, "models.json"),
|
||||
JSON.stringify(
|
||||
{
|
||||
providers: {
|
||||
volcengine: {
|
||||
models: [
|
||||
{
|
||||
id: "doubao-open-ended",
|
||||
cost: {
|
||||
input: 0.46,
|
||||
output: 2.3,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
tieredPricing: [
|
||||
{ input: 0.46, output: 2.3, cacheRead: 0, cacheWrite: 0, range: [0, 32000] },
|
||||
{ input: 0.7, output: 3.5, cacheRead: 0, cacheWrite: 0, range: [32000] },
|
||||
],
|
||||
},
|
||||
},
|
||||
{
|
||||
id: "doubao-neg-one",
|
||||
cost: {
|
||||
input: 0.46,
|
||||
output: 2.3,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
tieredPricing: [
|
||||
{ input: 0.46, output: 2.3, cacheRead: 0, cacheWrite: 0, range: [0, 32000] },
|
||||
{ input: 0.7, output: 3.5, cacheRead: 0, cacheWrite: 0, range: [32000, -1] },
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
"utf8",
|
||||
);
|
||||
|
||||
// [32000] should be normalized to [32000, Infinity]
|
||||
const cost1 = resolveModelCostConfig({
|
||||
provider: "volcengine",
|
||||
model: "doubao-open-ended",
|
||||
});
|
||||
expect(cost1).toBeDefined();
|
||||
expect(cost1!.tieredPricing).toHaveLength(2);
|
||||
expect(cost1!.tieredPricing![1].range).toEqual([32000, Infinity]);
|
||||
|
||||
// [32000, -1] should also be normalized to [32000, Infinity]
|
||||
const cost2 = resolveModelCostConfig({
|
||||
provider: "volcengine",
|
||||
model: "doubao-neg-one",
|
||||
});
|
||||
expect(cost2).toBeDefined();
|
||||
expect(cost2!.tieredPricing).toHaveLength(2);
|
||||
expect(cost2!.tieredPricing![1].range).toEqual([32000, Infinity]);
|
||||
});
|
||||
|
||||
it("resolves tiered pricing from models.json", async () => {
|
||||
await fs.writeFile(
|
||||
path.join(agentDir, "models.json"),
|
||||
JSON.stringify(
|
||||
{
|
||||
providers: {
|
||||
volcengine: {
|
||||
models: [
|
||||
{
|
||||
id: "doubao-seed-2-0-pro",
|
||||
cost: {
|
||||
input: 0.46,
|
||||
output: 2.3,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
tieredPricing: [
|
||||
{ input: 0.46, output: 2.3, cacheRead: 0, cacheWrite: 0, range: [0, 32000] },
|
||||
{
|
||||
input: 0.7,
|
||||
output: 3.5,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
range: [32000, 128000],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
"utf8",
|
||||
);
|
||||
|
||||
const cost = resolveModelCostConfig({
|
||||
provider: "volcengine",
|
||||
model: "doubao-seed-2-0-pro",
|
||||
});
|
||||
|
||||
expect(cost).toBeDefined();
|
||||
expect(cost!.tieredPricing).toHaveLength(2);
|
||||
expect(cost!.tieredPricing![0].range).toEqual([0, 32000]);
|
||||
expect(cost!.tieredPricing![1].input).toBe(0.7);
|
||||
});
|
||||
|
||||
it("resolves tiered pricing from cached gateway (LiteLLM)", () => {
|
||||
__setGatewayModelPricingForTest([
|
||||
{
|
||||
provider: "volcengine",
|
||||
model: "doubao-seed",
|
||||
pricing: {
|
||||
input: 0.46,
|
||||
output: 2.3,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
tieredPricing: [
|
||||
{
|
||||
input: 0.46,
|
||||
output: 2.3,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
range: [0, 32000] as [number, number],
|
||||
},
|
||||
{
|
||||
input: 0.7,
|
||||
output: 3.5,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
range: [32000, 128000] as [number, number],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
]);
|
||||
|
||||
const cost = resolveModelCostConfig({
|
||||
provider: "volcengine",
|
||||
model: "doubao-seed",
|
||||
});
|
||||
|
||||
expect(cost).toBeDefined();
|
||||
expect(cost!.tieredPricing).toHaveLength(2);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -8,11 +8,41 @@ import type { OpenClawConfig } from "../config/types.openclaw.js";
|
||||
import { getCachedGatewayModelPricing } from "../gateway/model-pricing-cache.js";
|
||||
import { normalizeOptionalString } from "../shared/string-coerce.js";
|
||||
|
||||
/**
|
||||
* A single tier in a tiered-pricing schedule. Prices are expressed as
|
||||
* USD per-million tokens, just like the flat `ModelCostConfig` fields.
|
||||
*
|
||||
* `range` is a half-open interval `[start, end)` expressed in *input*
|
||||
* token counts. The tiers MUST be sorted in ascending `range[0]` order
|
||||
* with no gaps.
|
||||
*/
|
||||
export type PricingTier = {
|
||||
input: number;
|
||||
output: number;
|
||||
cacheRead: number;
|
||||
cacheWrite: number;
|
||||
/** [startTokens, endTokens) — half-open interval on the input token axis. */
|
||||
range: [number, number];
|
||||
};
|
||||
|
||||
type RawPricingTier = {
|
||||
input: number;
|
||||
output: number;
|
||||
cacheRead: number;
|
||||
cacheWrite: number;
|
||||
range: [number, number] | [number];
|
||||
};
|
||||
|
||||
export type ModelCostConfig = {
|
||||
input: number;
|
||||
output: number;
|
||||
cacheRead: number;
|
||||
cacheWrite: number;
|
||||
/** Optional tiered pricing tiers. When present, `estimateUsageCost`
|
||||
* uses them instead of the flat rates above. The flat rates still
|
||||
* serve as the "default / first-tier" fallback for callers that are
|
||||
* unaware of tiered pricing. */
|
||||
tieredPricing?: PricingTier[];
|
||||
};
|
||||
|
||||
export type UsageTotals = {
|
||||
@@ -99,6 +129,47 @@ function shouldUseNormalizedCostLookup(params: { provider?: string; model?: stri
|
||||
return provider === "anthropic" || provider === "openrouter" || provider === "vercel-ai-gateway";
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize a raw tieredPricing array from models.json / config.
|
||||
* Supports open-ended ranges such as `[128000]` or `[128000, -1]`,
|
||||
* which are converted to `[128000, Infinity]`.
|
||||
*/
|
||||
function normalizeTieredPricing(raw: RawPricingTier[] | undefined): PricingTier[] | undefined {
|
||||
if (!raw || raw.length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
const result: PricingTier[] = [];
|
||||
for (const tier of raw) {
|
||||
const range = tier.range;
|
||||
if (!Array.isArray(range) || range.length < 1) {
|
||||
continue;
|
||||
}
|
||||
const start = typeof range[0] === "number" ? range[0] : NaN;
|
||||
if (!Number.isFinite(start)) {
|
||||
continue;
|
||||
}
|
||||
const rawEnd = range.length >= 2 ? range[1] : null;
|
||||
const end =
|
||||
typeof rawEnd === "number" && Number.isFinite(rawEnd) && rawEnd > start ? rawEnd : Infinity;
|
||||
if (
|
||||
!Number.isFinite(tier.input) ||
|
||||
!Number.isFinite(tier.output) ||
|
||||
!Number.isFinite(tier.cacheRead) ||
|
||||
!Number.isFinite(tier.cacheWrite)
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
result.push({
|
||||
input: tier.input,
|
||||
output: tier.output,
|
||||
cacheRead: tier.cacheRead,
|
||||
cacheWrite: tier.cacheWrite,
|
||||
range: [start, end],
|
||||
});
|
||||
}
|
||||
return result.length > 0 ? result.toSorted((a, b) => a.range[0] - b.range[0]) : undefined;
|
||||
}
|
||||
|
||||
function buildProviderCostIndex(
|
||||
providers: Record<string, ModelProviderConfig> | undefined,
|
||||
options?: { allowPluginNormalization?: boolean },
|
||||
@@ -113,7 +184,16 @@ function buildProviderCostIndex(
|
||||
const normalized = normalizeModelRef(normalizedProvider, model.id, {
|
||||
allowPluginNormalization: options?.allowPluginNormalization,
|
||||
});
|
||||
entries.set(modelKey(normalized.provider, normalized.model), model.cost);
|
||||
const cost = { ...model.cost };
|
||||
const normalizedTiers = normalizeTieredPricing(cost.tieredPricing);
|
||||
const costConfig: ModelCostConfig = {
|
||||
input: cost.input,
|
||||
output: cost.output,
|
||||
cacheRead: cost.cacheRead,
|
||||
cacheWrite: cost.cacheWrite,
|
||||
...(normalizedTiers ? { tieredPricing: normalizedTiers } : {}),
|
||||
};
|
||||
entries.set(modelKey(normalized.provider, normalized.model), costConfig);
|
||||
}
|
||||
}
|
||||
return entries;
|
||||
@@ -233,6 +313,87 @@ export function resolveModelCostConfig(params: {
|
||||
const toNumber = (value: number | undefined): number =>
|
||||
typeof value === "number" && Number.isFinite(value) ? value : 0;
|
||||
|
||||
/**
|
||||
* Compute the cost for a single token dimension (input, output, cacheRead,
|
||||
* or cacheWrite) across a set of sorted tiered-pricing tiers.
|
||||
*
|
||||
* The tiers define ranges on the **input** token axis. For each tier,
|
||||
* the proportion of the total input that falls into that range determines
|
||||
* the fraction of *all* token types billed at that tier's rates.
|
||||
*
|
||||
* For example, if the input is 40 000 tokens and the tiers are:
|
||||
* [0, 32000) → $0.30/M input, $1.50/M output
|
||||
* [32000, 128000) → $0.50/M input, $2.50/M output
|
||||
*
|
||||
* Then 80 % of every dimension is billed at the first tier and 20 % at the
|
||||
* second tier.
|
||||
*
|
||||
* Prices are per-million; the caller divides by 1 000 000 after summing.
|
||||
*/
|
||||
function computeTieredCost(
|
||||
tiers: PricingTier[],
|
||||
input: number,
|
||||
output: number,
|
||||
cacheRead: number,
|
||||
cacheWrite: number,
|
||||
): number {
|
||||
const totalInputTokens = input;
|
||||
const sortedTiers = tiers.toSorted((a, b) => a.range[0] - b.range[0]);
|
||||
if (totalInputTokens <= 0) {
|
||||
// If there are no input tokens the tier proportion is undefined;
|
||||
// fall back to the first tier for any residual output/cache usage.
|
||||
const tier = sortedTiers[0];
|
||||
if (!tier) {
|
||||
return 0;
|
||||
}
|
||||
return output * tier.output + cacheRead * tier.cacheRead + cacheWrite * tier.cacheWrite;
|
||||
}
|
||||
|
||||
let total = 0;
|
||||
let billedInput = 0;
|
||||
let coveredUntil = 0;
|
||||
let lastTier: PricingTier | undefined;
|
||||
|
||||
for (const tier of sortedTiers) {
|
||||
const [start, end] = tier.range;
|
||||
const tierStart = Math.max(0, start, coveredUntil);
|
||||
const tierEnd = Math.min(totalInputTokens, end);
|
||||
const inputInTier = Math.max(0, tierEnd - tierStart);
|
||||
if (end > coveredUntil) {
|
||||
coveredUntil = end;
|
||||
}
|
||||
if (inputInTier <= 0) {
|
||||
continue;
|
||||
}
|
||||
const fraction = inputInTier / totalInputTokens;
|
||||
total +=
|
||||
inputInTier * tier.input +
|
||||
output * fraction * tier.output +
|
||||
cacheRead * fraction * tier.cacheRead +
|
||||
cacheWrite * fraction * tier.cacheWrite;
|
||||
billedInput += inputInTier;
|
||||
lastTier = tier;
|
||||
}
|
||||
|
||||
// Bill any uncovered gaps or overflow at the highest matched tier's rate.
|
||||
// This keeps malformed remote/user tier ranges from underestimating cost.
|
||||
const unbilledInput = totalInputTokens - billedInput;
|
||||
if (unbilledInput > 0) {
|
||||
const fallbackTier = lastTier ?? sortedTiers[sortedTiers.length - 1];
|
||||
if (!fallbackTier) {
|
||||
return total;
|
||||
}
|
||||
const fraction = unbilledInput / totalInputTokens;
|
||||
total +=
|
||||
unbilledInput * fallbackTier.input +
|
||||
output * fraction * fallbackTier.output +
|
||||
cacheRead * fraction * fallbackTier.cacheRead +
|
||||
cacheWrite * fraction * fallbackTier.cacheWrite;
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
export function estimateUsageCost(params: {
|
||||
usage?: NormalizedUsage | UsageTotals | null;
|
||||
cost?: ModelCostConfig;
|
||||
@@ -246,11 +407,18 @@ export function estimateUsageCost(params: {
|
||||
const output = toNumber(usage.output);
|
||||
const cacheRead = toNumber(usage.cacheRead);
|
||||
const cacheWrite = toNumber(usage.cacheWrite);
|
||||
const total =
|
||||
input * cost.input +
|
||||
output * cost.output +
|
||||
cacheRead * cost.cacheRead +
|
||||
cacheWrite * cost.cacheWrite;
|
||||
|
||||
let total: number;
|
||||
if (cost.tieredPricing && cost.tieredPricing.length > 0) {
|
||||
total = computeTieredCost(cost.tieredPricing, input, output, cacheRead, cacheWrite);
|
||||
} else {
|
||||
total =
|
||||
input * cost.input +
|
||||
output * cost.output +
|
||||
cacheRead * cost.cacheRead +
|
||||
cacheWrite * cost.cacheWrite;
|
||||
}
|
||||
|
||||
if (!Number.isFinite(total)) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user