feat: add tiered model pricing support (#67605)

Adds tiered model pricing support for cost tracking, keeps configured pricing ahead of cached catalog values, and includes latest Moonshot Kimi K2.6/K2.5 cost estimates.\n\nThanks @sliverp.
2026-05-06 05:10:44 +00:00 · 2026-04-21 10:02:57 +08:00
parent 8d747d20b8
commit b938e6398b
18 changed files with 1351 additions and 118 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@ Docs: https://docs.openclaw.ai

 ### Changes

+- Models/costs: support tiered model pricing from cached catalogs and configured models, and include bundled Moonshot Kimi K2.6/K2.5 cost estimates for token-usage reports. (#67605) Thanks @sliverp.
 - Plugins/tests: reuse plugin loader alias and Jiti config resolution across repeated same-context loads, reducing import-heavy test overhead. (#69316) Thanks @amknight.
 - Cron: split runtime execution state into `jobs-state.json` so `jobs.json` stays stable for git-tracked job definitions. (#63105) Thanks @Feelw00.
 - Agents/compaction: send opt-in start and completion notices during context compaction. (#67830) Thanks @feniix.
--- a/docs/providers/moonshot.md
+++ b/docs/providers/moonshot.md
@@ -31,6 +31,12 @@ Moonshot and Kimi Coding are **separate providers**. Keys are not interchangeabl

 [//]: # "moonshot-kimi-k2-ids:end"

+Bundled cost estimates for current Moonshot-hosted K2 models use Moonshot's
+published pay-as-you-go rates: Kimi K2.6 is $0.16/MTok cache hit,
+$0.95/MTok input, and $4.00/MTok output; Kimi K2.5 is $0.10/MTok cache hit,
+$0.60/MTok input, and $3.00/MTok output. Other legacy catalog entries keep
+zero-cost placeholders unless you override them in config.
+
 ## Getting started

 Choose your provider and follow the setup steps.
@@ -108,7 +114,7 @@ Choose your provider and follow the setup steps.
                name: "Kimi K2.6",
                reasoning: false,
                input: ["text", "image"],
-                cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+                cost: { input: 0.95, output: 4, cacheRead: 0.16, cacheWrite: 0 },
                contextWindow: 262144,
                maxTokens: 262144,
              },
@@ -117,7 +123,7 @@ Choose your provider and follow the setup steps.
                name: "Kimi K2.5",
                reasoning: false,
                input: ["text", "image"],
-                cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+                cost: { input: 0.6, output: 3, cacheRead: 0.1, cacheWrite: 0 },
                contextWindow: 262144,
                maxTokens: 262144,
              },
--- a/extensions/moonshot/provider-catalog.test.ts
+++ b/extensions/moonshot/provider-catalog.test.ts
@@ -19,6 +19,18 @@ describe("moonshot provider catalog", () => {
      "kimi-k2-thinking-turbo",
      "kimi-k2-turbo",
    ]);
+    expect(provider.models.find((model) => model.id === "kimi-k2.6")?.cost).toEqual({
+      input: 0.95,
+      output: 4,
+      cacheRead: 0.16,
+      cacheWrite: 0,
+    });
+    expect(provider.models.find((model) => model.id === "kimi-k2.5")?.cost).toEqual({
+      input: 0.6,
+      output: 3,
+      cacheRead: 0.1,
+      cacheWrite: 0,
+    });
  });

  it("opts native Moonshot baseUrls into streaming usage only inside the extension", () => {
--- a/extensions/moonshot/provider-catalog.ts
+++ b/extensions/moonshot/provider-catalog.ts
@@ -15,6 +15,18 @@ const MOONSHOT_DEFAULT_COST = {
  cacheRead: 0,
  cacheWrite: 0,
 };
+const MOONSHOT_K2_6_COST = {
+  input: 0.95,
+  output: 4,
+  cacheRead: 0.16,
+  cacheWrite: 0,
+};
+const MOONSHOT_K2_5_COST = {
+  input: 0.6,
+  output: 3,
+  cacheRead: 0.1,
+  cacheWrite: 0,
+};

 const MOONSHOT_MODEL_CATALOG = [
  {
@@ -22,7 +34,7 @@ const MOONSHOT_MODEL_CATALOG = [
    name: "Kimi K2.6",
    reasoning: false,
    input: ["text", "image"],
-    cost: MOONSHOT_DEFAULT_COST,
+    cost: MOONSHOT_K2_6_COST,
    contextWindow: MOONSHOT_DEFAULT_CONTEXT_WINDOW,
    maxTokens: MOONSHOT_DEFAULT_MAX_TOKENS,
  },
@@ -31,7 +43,7 @@ const MOONSHOT_MODEL_CATALOG = [
    name: "Kimi K2.5",
    reasoning: false,
    input: ["text", "image"],
-    cost: MOONSHOT_DEFAULT_COST,
+    cost: MOONSHOT_K2_5_COST,
    contextWindow: MOONSHOT_DEFAULT_CONTEXT_WINDOW,
    maxTokens: MOONSHOT_DEFAULT_MAX_TOKENS,
  },
--- a/src/auto-reply/reply/agent-runner-usage-line.ts
+++ b/src/auto-reply/reply/agent-runner-usage-line.ts
@@ -1,4 +1,9 @@
-import { estimateUsageCost, formatTokenCount, formatUsd } from "../../utils/usage-format.js";
+import {
+  estimateUsageCost,
+  formatTokenCount,
+  formatUsd,
+  type ModelCostConfig,
+} from "../../utils/usage-format.js";
 import type { ReplyPayload } from "../types.js";

 export const formatResponseUsageLine = (params: {
@@ -9,12 +14,7 @@ export const formatResponseUsageLine = (params: {
    cacheWrite?: number;
  };
  showCost: boolean;
-  costConfig?: {
-    input: number;
-    output: number;
-    cacheRead: number;
-    cacheWrite: number;
-  };
+  costConfig?: ModelCostConfig;
 }): string | null => {
  const usage = params.usage;
  if (!usage) {
--- a/src/commands/channels/status.ts
+++ b/src/commands/channels/status.ts
@@ -148,7 +148,7 @@ export async function channelsStatusCommand(
  opts: ChannelsStatusOptions,
  runtime: RuntimeEnv = defaultRuntime,
 ) {
-  const timeoutMs = Number(opts.timeout ?? 10_000);
+  const timeoutMs = Number(opts.timeout ?? (opts.probe ? 30_000 : 10_000));
  const statusLabel = opts.probe ? "Checking channel status (probe)…" : "Checking channel status…";
  const shouldLogStatus = opts.json !== true && !process.stderr.isTTY;
  if (shouldLogStatus) {
--- a/src/config/defaults.ts
+++ b/src/config/defaults.ts
@@ -62,6 +62,7 @@ function resolveModelCost(
    cacheRead: typeof raw?.cacheRead === "number" ? raw.cacheRead : DEFAULT_MODEL_COST.cacheRead,
    cacheWrite:
      typeof raw?.cacheWrite === "number" ? raw.cacheWrite : DEFAULT_MODEL_COST.cacheWrite,
+    ...(raw?.tieredPricing ? { tieredPricing: raw.tieredPricing } : {}),
  };
 }

--- a/src/config/schema.base.generated.ts
+++ b/src/config/schema.base.generated.ts
@@ -2767,6 +2767,51 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
                          cacheWrite: {
                            type: "number",
                          },
+                          tieredPricing: {
+                            type: "array",
+                            items: {
+                              type: "object",
+                              properties: {
+                                input: {
+                                  type: "number",
+                                },
+                                output: {
+                                  type: "number",
+                                },
+                                cacheRead: {
+                                  type: "number",
+                                },
+                                cacheWrite: {
+                                  type: "number",
+                                },
+                                range: {
+                                  anyOf: [
+                                    {
+                                      type: "array",
+                                      items: [
+                                        {
+                                          type: "number",
+                                        },
+                                        {
+                                          type: "number",
+                                        },
+                                      ],
+                                    },
+                                    {
+                                      type: "array",
+                                      items: [
+                                        {
+                                          type: "number",
+                                        },
+                                      ],
+                                    },
+                                  ],
+                                },
+                              },
+                              required: ["input", "output", "cacheRead", "cacheWrite", "range"],
+                              additionalProperties: false,
+                            },
+                          },
                        },
                        additionalProperties: false,
                      },
--- a/src/config/types.models.ts
+++ b/src/config/types.models.ts
@@ -61,6 +61,18 @@ export type ModelDefinitionConfig = {
    output: number;
    cacheRead: number;
    cacheWrite: number;
+    /** Optional tiered pricing.  When present, cost calculation uses
+     *  per-tier rates instead of the flat rates above.  Prices are
+     *  USD / million tokens; ranges are half-open `[start, end)` on the
+     *  input-token axis. */
+    tieredPricing?: Array<{
+      input: number;
+      output: number;
+      cacheRead: number;
+      cacheWrite: number;
+      /** Bounded tier: `[start, end)`. Open-ended top tier: `[start]` (normalized to `[start, Infinity]` at load time). */
+      range: [number, number] | [number];
+    }>;
  };
  contextWindow: number;
  /**
--- a/src/config/zod-schema.core.ts
+++ b/src/config/zod-schema.core.ts
@@ -316,6 +316,19 @@ export const ModelDefinitionSchema = z
        output: z.number().optional(),
        cacheRead: z.number().optional(),
        cacheWrite: z.number().optional(),
+        tieredPricing: z
+          .array(
+            z
+              .object({
+                input: z.number(),
+                output: z.number(),
+                cacheRead: z.number(),
+                cacheWrite: z.number(),
+                range: z.union([z.tuple([z.number(), z.number()]), z.tuple([z.number()])]),
+              })
+              .strict(),
+          )
+          .optional(),
      })
      .strict()
      .optional(),
--- a/src/gateway/model-pricing-cache-state.ts
+++ b/src/gateway/model-pricing-cache-state.ts
@@ -2,11 +2,22 @@ import { normalizeModelRef } from "../agents/model-selection.js";
 import { normalizeProviderId } from "../agents/provider-id.js";
 import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js";

+export type CachedPricingTier = {
+  input: number;
+  output: number;
+  cacheRead: number;
+  cacheWrite: number;
+  /** [startTokens, endTokens) — half-open interval on the input token axis. */
+  range: [number, number];
+};
+
 export type CachedModelPricing = {
  input: number;
  output: number;
  cacheRead: number;
  cacheWrite: number;
+  /** Optional tiered pricing tiers sourced from LiteLLM or local config. */
+  tieredPricing?: CachedPricingTier[];
 };

 let cachedPricing = new Map<string, CachedModelPricing>();
--- a/src/gateway/model-pricing-cache.test.ts
+++ b/src/gateway/model-pricing-cache.test.ts
@@ -134,9 +134,10 @@ describe("model-pricing-cache", () => {
      },
    } as unknown as OpenClawConfig;

-    const fetchImpl = withFetchPreconnect(
-      async () =>
-        new Response(
+    const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => {
+      const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
+      if (url.includes("openrouter.ai")) {
+        return new Response(
          JSON.stringify({
            data: [
              {
@@ -169,8 +170,14 @@ describe("model-pricing-cache", () => {
            status: 200,
            headers: { "Content-Type": "application/json" },
          },
-        ),
-    );
+        );
+      }
+      // LiteLLM — return empty object (no tiered pricing for these models)
+      return new Response(JSON.stringify({}), {
+        status: 200,
+        headers: { "Content-Type": "application/json" },
+      });
+    });

    await refreshGatewayModelPricingCache({ config, fetchImpl });

@@ -210,9 +217,10 @@ describe("model-pricing-cache", () => {
      },
    } as unknown as OpenClawConfig;

-    const fetchImpl = withFetchPreconnect(
-      async () =>
-        new Response(
+    const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => {
+      const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
+      if (url.includes("openrouter.ai")) {
+        return new Response(
          JSON.stringify({
            data: [
              {
@@ -228,8 +236,13 @@ describe("model-pricing-cache", () => {
            status: 200,
            headers: { "Content-Type": "application/json" },
          },
-        ),
-    );
+        );
+      }
+      return new Response(JSON.stringify({}), {
+        status: 200,
+        headers: { "Content-Type": "application/json" },
+      });
+    });

    await expect(refreshGatewayModelPricingCache({ config, fetchImpl })).resolves.toBeUndefined();
    expect(
@@ -241,4 +254,303 @@ describe("model-pricing-cache", () => {
      cacheWrite: 0,
    });
  });
+
+  it("loads tiered pricing from LiteLLM and merges with OpenRouter flat pricing", async () => {
+    const config = {
+      agents: {
+        defaults: {
+          model: { primary: "volcengine/doubao-seed-2-0-pro" },
+        },
+      },
+    } as unknown as OpenClawConfig;
+
+    const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => {
+      const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
+      if (url.includes("openrouter.ai")) {
+        // OpenRouter does not have this model
+        return new Response(JSON.stringify({ data: [] }), {
+          status: 200,
+          headers: { "Content-Type": "application/json" },
+        });
+      }
+      // LiteLLM catalog
+      return new Response(
+        JSON.stringify({
+          "volcengine/doubao-seed-2-0-pro": {
+            input_cost_per_token: 4.6e-7,
+            output_cost_per_token: 2.3e-6,
+            litellm_provider: "volcengine",
+            tiered_pricing: [
+              {
+                input_cost_per_token: 4.6e-7,
+                output_cost_per_token: 2.3e-6,
+                range: [0, 32000],
+              },
+              {
+                input_cost_per_token: 7e-7,
+                output_cost_per_token: 3.5e-6,
+                range: [32000, 128000],
+              },
+              {
+                input_cost_per_token: 1.4e-6,
+                output_cost_per_token: 7e-6,
+                range: [128000, 256000],
+              },
+            ],
+          },
+        }),
+        {
+          status: 200,
+          headers: { "Content-Type": "application/json" },
+        },
+      );
+    });
+
+    await refreshGatewayModelPricingCache({ config, fetchImpl });
+
+    const pricing = getCachedGatewayModelPricing({
+      provider: "volcengine",
+      model: "doubao-seed-2-0-pro",
+    });
+
+    expect(pricing).toBeDefined();
+    expect(pricing!.input).toBeCloseTo(0.46);
+    expect(pricing!.output).toBeCloseTo(2.3);
+    expect(pricing!.tieredPricing).toHaveLength(3);
+    expect(pricing!.tieredPricing![0]).toEqual({
+      input: expect.closeTo(0.46),
+      output: expect.closeTo(2.3),
+      cacheRead: 0,
+      cacheWrite: 0,
+      range: [0, 32000],
+    });
+    expect(pricing!.tieredPricing![2].range).toEqual([128000, 256000]);
+  });
+
+  it("normalizes LiteLLM open-ended range [start] to [start, Infinity]", async () => {
+    const config = {
+      agents: {
+        defaults: {
+          model: { primary: "volcengine/doubao-open" },
+        },
+      },
+    } as unknown as OpenClawConfig;
+
+    const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => {
+      const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
+      if (url.includes("openrouter.ai")) {
+        return new Response(JSON.stringify({ data: [] }), {
+          status: 200,
+          headers: { "Content-Type": "application/json" },
+        });
+      }
+      return new Response(
+        JSON.stringify({
+          "volcengine/doubao-open": {
+            input_cost_per_token: 4.6e-7,
+            output_cost_per_token: 2.3e-6,
+            litellm_provider: "volcengine",
+            tiered_pricing: [
+              {
+                input_cost_per_token: 4.6e-7,
+                output_cost_per_token: 2.3e-6,
+                range: [0, 32000],
+              },
+              {
+                input_cost_per_token: 7e-7,
+                output_cost_per_token: 3.5e-6,
+                range: [32000],
+              },
+            ],
+          },
+        }),
+        {
+          status: 200,
+          headers: { "Content-Type": "application/json" },
+        },
+      );
+    });
+
+    await refreshGatewayModelPricingCache({ config, fetchImpl });
+
+    const pricing = getCachedGatewayModelPricing({
+      provider: "volcengine",
+      model: "doubao-open",
+    });
+
+    expect(pricing).toBeDefined();
+    expect(pricing!.tieredPricing).toHaveLength(2);
+    expect(pricing!.tieredPricing![0].range).toEqual([0, 32000]);
+    expect(pricing!.tieredPricing![1].range).toEqual([32000, Infinity]);
+  });
+
+  it("merges OpenRouter flat pricing with LiteLLM tiered pricing", async () => {
+    const config = {
+      agents: {
+        defaults: {
+          model: { primary: "dashscope/qwen-plus" },
+        },
+      },
+    } as unknown as OpenClawConfig;
+
+    const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => {
+      const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
+      if (url.includes("openrouter.ai")) {
+        return new Response(
+          JSON.stringify({
+            data: [
+              {
+                id: "dashscope/qwen-plus",
+                pricing: {
+                  prompt: "0.0000004",
+                  completion: "0.0000024",
+                },
+              },
+            ],
+          }),
+          {
+            status: 200,
+            headers: { "Content-Type": "application/json" },
+          },
+        );
+      }
+      return new Response(
+        JSON.stringify({
+          "dashscope/qwen-plus": {
+            input_cost_per_token: 4e-7,
+            output_cost_per_token: 2.4e-6,
+            litellm_provider: "dashscope",
+            tiered_pricing: [
+              {
+                input_cost_per_token: 4e-7,
+                output_cost_per_token: 2.4e-6,
+                range: [0, 256000],
+              },
+              {
+                input_cost_per_token: 5e-7,
+                output_cost_per_token: 3e-6,
+                range: [256000, 1000000],
+              },
+            ],
+          },
+        }),
+        {
+          status: 200,
+          headers: { "Content-Type": "application/json" },
+        },
+      );
+    });
+
+    await refreshGatewayModelPricingCache({ config, fetchImpl });
+
+    const pricing = getCachedGatewayModelPricing({
+      provider: "dashscope",
+      model: "qwen-plus",
+    });
+
+    expect(pricing).toBeDefined();
+    // OpenRouter base flat pricing is used
+    expect(pricing!.input).toBeCloseTo(0.4);
+    expect(pricing!.output).toBeCloseTo(2.4);
+    // LiteLLM tiered pricing is merged in
+    expect(pricing!.tieredPricing).toHaveLength(2);
+    expect(pricing!.tieredPricing![1].range).toEqual([256000, 1000000]);
+  });
+
+  it("falls back gracefully when LiteLLM fetch fails", async () => {
+    const config = {
+      agents: {
+        defaults: {
+          model: { primary: "anthropic/claude-opus-4-6" },
+        },
+      },
+    } as unknown as OpenClawConfig;
+
+    const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => {
+      const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
+      if (url.includes("openrouter.ai")) {
+        return new Response(
+          JSON.stringify({
+            data: [
+              {
+                id: "anthropic/claude-opus-4.6",
+                pricing: {
+                  prompt: "0.000005",
+                  completion: "0.000025",
+                },
+              },
+            ],
+          }),
+          {
+            status: 200,
+            headers: { "Content-Type": "application/json" },
+          },
+        );
+      }
+      // LiteLLM fails
+      return new Response("Internal Server Error", { status: 500 });
+    });
+
+    await refreshGatewayModelPricingCache({ config, fetchImpl });
+
+    // OpenRouter pricing still works
+    expect(
+      getCachedGatewayModelPricing({ provider: "anthropic", model: "claude-opus-4-6" }),
+    ).toEqual({
+      input: 5,
+      output: 25,
+      cacheRead: 0,
+      cacheWrite: 0,
+    });
+  });
+
+  it("treats oversized LiteLLM catalog responses as source failures", async () => {
+    const config = {
+      agents: {
+        defaults: {
+          model: { primary: "moonshot/kimi-k2.6" },
+        },
+      },
+    } as unknown as OpenClawConfig;
+
+    const fetchImpl = withFetchPreconnect(async (input: RequestInfo | URL) => {
+      const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
+      if (url.includes("openrouter.ai")) {
+        return new Response(
+          JSON.stringify({
+            data: [
+              {
+                id: "moonshotai/kimi-k2.6",
+                pricing: {
+                  prompt: "0.00000095",
+                  completion: "0.000004",
+                  input_cache_read: "0.00000016",
+                },
+              },
+            ],
+          }),
+          {
+            status: 200,
+            headers: { "Content-Type": "application/json" },
+          },
+        );
+      }
+      return new Response("{}", {
+        status: 200,
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": "6000000",
+        },
+      });
+    });
+
+    await refreshGatewayModelPricingCache({ config, fetchImpl });
+
+    expect(getCachedGatewayModelPricing({ provider: "moonshot", model: "kimi-k2.6" })).toEqual({
+      input: 0.95,
+      output: 4,
+      cacheRead: 0.16,
+      cacheWrite: 0,
+    });
+  });
 });
--- a/src/gateway/model-pricing-cache.ts
+++ b/src/gateway/model-pricing-cache.ts
@@ -19,6 +19,7 @@ import {
  getGatewayModelPricingCacheMeta as getGatewayModelPricingCacheMetaState,
  replaceGatewayModelPricingCache,
  type CachedModelPricing,
+  type CachedPricingTier,
 } from "./model-pricing-cache-state.js";

 type OpenRouterPricingEntry = {
@@ -36,8 +37,11 @@ type OpenRouterModelPayload = {
 export { getCachedGatewayModelPricing };

 const OPENROUTER_MODELS_URL = "https://openrouter.ai/api/v1/models";
+const LITELLM_PRICING_URL =
+  "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json";
 const CACHE_TTL_MS = 24 * 60 * 60_000;
 const FETCH_TIMEOUT_MS = 15_000;
+const MAX_PRICING_CATALOG_BYTES = 5 * 1024 * 1024;
 const PROVIDER_ALIAS_TO_OPENROUTER: Record<string, string> = {
  "google-gemini-cli": "google",
  kimi: "moonshotai",
@@ -98,7 +102,8 @@ function toPricePerMillion(value: number | null): number {
  if (value === null || value < 0 || !Number.isFinite(value)) {
    return 0;
  }
-  return value * 1_000_000;
+  const scaled = value * 1_000_000;
+  return Number.isFinite(scaled) ? scaled : 0;
 }

 function parseOpenRouterPricing(value: unknown): CachedModelPricing | null {
@@ -119,6 +124,136 @@ function parseOpenRouterPricing(value: unknown): CachedModelPricing | null {
  };
 }

+async function readPricingJsonObject(
+  response: Response,
+  source: string,
+): Promise<Record<string, unknown>> {
+  const contentLength = parseNumberString(response.headers.get("content-length"));
+  if (contentLength !== null && contentLength > MAX_PRICING_CATALOG_BYTES) {
+    throw new Error(`${source} pricing response too large: ${contentLength} bytes`);
+  }
+  const buffer = await response.arrayBuffer();
+  if (buffer.byteLength > MAX_PRICING_CATALOG_BYTES) {
+    throw new Error(`${source} pricing response too large: ${buffer.byteLength} bytes`);
+  }
+  const payload = JSON.parse(Buffer.from(buffer).toString("utf8")) as unknown;
+  if (!payload || typeof payload !== "object" || Array.isArray(payload)) {
+    throw new Error(`${source} pricing response is not a JSON object`);
+  }
+  return payload as Record<string, unknown>;
+}
+
+// ---------------------------------------------------------------------------
+// LiteLLM tiered-pricing parsing
+// ---------------------------------------------------------------------------
+
+type LiteLLMModelEntry = Record<string, unknown>;
+
+type LiteLLMTierRaw = {
+  input_cost_per_token?: unknown;
+  output_cost_per_token?: unknown;
+  cache_read_input_token_cost?: unknown;
+  range?: unknown;
+};
+
+function parseLiteLLMTieredPricing(tiers: unknown): CachedPricingTier[] | undefined {
+  if (!Array.isArray(tiers) || tiers.length === 0) {
+    return undefined;
+  }
+  const result: CachedPricingTier[] = [];
+  for (const raw of tiers) {
+    if (!raw || typeof raw !== "object") {
+      continue;
+    }
+    const tier = raw as LiteLLMTierRaw;
+    const inputPerToken = parseNumberString(tier.input_cost_per_token);
+    const outputPerToken = parseNumberString(tier.output_cost_per_token);
+    if (inputPerToken === null || outputPerToken === null) {
+      continue;
+    }
+    const range = tier.range;
+    if (!Array.isArray(range) || range.length < 1) {
+      continue;
+    }
+    const start = parseNumberString(range[0]);
+    if (start === null) {
+      continue;
+    }
+    // Allow open-ended ranges: [128000], [128000, -1], [128000, null]
+    const rawEnd = range.length >= 2 ? parseNumberString(range[1]) : null;
+    const end = rawEnd === null || rawEnd <= start ? Infinity : rawEnd;
+    if (
+      !Number.isFinite(inputPerToken) ||
+      !Number.isFinite(outputPerToken) ||
+      inputPerToken < 0 ||
+      outputPerToken < 0
+    ) {
+      continue;
+    }
+    result.push({
+      input: toPricePerMillion(inputPerToken),
+      output: toPricePerMillion(outputPerToken),
+      cacheRead: toPricePerMillion(parseNumberString(tier.cache_read_input_token_cost)),
+      cacheWrite: 0,
+      range: [start, end],
+    });
+  }
+  return result.length > 0 ? result.toSorted((a, b) => a.range[0] - b.range[0]) : undefined;
+}
+
+function parseLiteLLMPricing(entry: LiteLLMModelEntry): CachedModelPricing | null {
+  const inputPerToken = parseNumberString(entry.input_cost_per_token);
+  const outputPerToken = parseNumberString(entry.output_cost_per_token);
+  if (inputPerToken === null || outputPerToken === null) {
+    return null;
+  }
+  const pricing: CachedModelPricing = {
+    input: toPricePerMillion(inputPerToken),
+    output: toPricePerMillion(outputPerToken),
+    cacheRead: toPricePerMillion(parseNumberString(entry.cache_read_input_token_cost)),
+    cacheWrite: 0,
+  };
+  const tieredPricing = parseLiteLLMTieredPricing(entry.tiered_pricing);
+  if (tieredPricing) {
+    pricing.tieredPricing = tieredPricing;
+  }
+  return pricing;
+}
+
+type LiteLLMPricingCatalog = Map<string, CachedModelPricing>;
+
+async function fetchLiteLLMPricingCatalog(fetchImpl: typeof fetch): Promise<LiteLLMPricingCatalog> {
+  const response = await fetchImpl(LITELLM_PRICING_URL, {
+    headers: { Accept: "application/json" },
+    signal: AbortSignal.timeout(FETCH_TIMEOUT_MS),
+  });
+  if (!response.ok) {
+    throw new Error(`LiteLLM pricing fetch failed: HTTP ${response.status}`);
+  }
+  const payload = await readPricingJsonObject(response, "LiteLLM");
+  const catalog: LiteLLMPricingCatalog = new Map();
+  for (const [key, value] of Object.entries(payload)) {
+    if (!value || typeof value !== "object") {
+      continue;
+    }
+    const entry = value as LiteLLMModelEntry;
+    const pricing = parseLiteLLMPricing(entry);
+    if (!pricing) {
+      continue;
+    }
+    catalog.set(key, pricing);
+  }
+  return catalog;
+}
+
+function resolveLiteLLMPricingForRef(params: {
+  ref: ModelRef;
+  catalog: LiteLLMPricingCatalog;
+}): CachedModelPricing | undefined {
+  // Only use provider-qualified key to avoid cross-provider pricing collisions.
+  return params.catalog.get(`${params.ref.provider}/${params.ref.model}`);
+}
+
 function canonicalizeOpenRouterProvider(provider: string): string {
  const normalized = normalizeModelRef(provider, "placeholder").provider;
  return PROVIDER_ALIAS_TO_OPENROUTER[normalized] ?? normalized;
@@ -328,7 +463,7 @@ async function fetchOpenRouterPricingCatalog(
  if (!response.ok) {
    throw new Error(`OpenRouter /models failed: HTTP ${response.status}`);
  }
-  const payload = (await response.json()) as { data?: unknown };
+  const payload = await readPricingJsonObject(response, "OpenRouter");
  const entries = Array.isArray(payload.data) ? payload.data : [];
  const catalog = new Map<string, OpenRouterPricingEntry>();
  for (const entry of entries) {
@@ -393,7 +528,23 @@ export async function refreshGatewayModelPricingCache(params: {
      return;
    }

-    const catalogById = await fetchOpenRouterPricingCatalog(fetchImpl);
+    // Fetch both pricing catalogs in parallel.  Each source is
+    // independently optional — a failure in one does not block the other.
+    let openRouterFailed = false;
+    let litellmFailed = false;
+    const [catalogById, litellmCatalog] = await Promise.all([
+      fetchOpenRouterPricingCatalog(fetchImpl).catch((error: unknown) => {
+        log.warn(`OpenRouter pricing fetch failed: ${String(error)}`);
+        openRouterFailed = true;
+        return new Map<string, OpenRouterPricingEntry>();
+      }),
+      fetchLiteLLMPricingCatalog(fetchImpl).catch((error: unknown) => {
+        log.warn(`LiteLLM pricing fetch failed: ${String(error)}`);
+        litellmFailed = true;
+        return new Map<string, CachedModelPricing>() as LiteLLMPricingCatalog;
+      }),
+    ]);
+
    const catalogByNormalizedId = new Map<string, OpenRouterPricingEntry>();
    for (const entry of catalogById.values()) {
      const normalizedId = canonicalizeOpenRouterLookupId(entry.id);
@@ -405,15 +556,62 @@ export async function refreshGatewayModelPricingCache(params: {

    const nextPricing = new Map<string, CachedModelPricing>();
    for (const ref of refs) {
-      const pricing = resolveCatalogPricingForRef({
+      // 1. Try OpenRouter first (existing behavior — flat pricing)
+      const openRouterPricing = resolveCatalogPricingForRef({
        ref,
        catalogById,
        catalogByNormalizedId,
      });
-      if (!pricing) {
-        continue;
+
+      // 2. Try LiteLLM (may contain tiered pricing)
+      const litellmPricing = resolveLiteLLMPricingForRef({
+        ref,
+        catalog: litellmCatalog,
+      });
+
+      // Merge strategy: OpenRouter provides the base flat pricing;
+      // LiteLLM enriches with tieredPricing when available.
+      // If only one source has data, use that one.
+      if (openRouterPricing && litellmPricing?.tieredPricing) {
+        // Both sources present and LiteLLM has tiers — merge.
+        nextPricing.set(modelKey(ref.provider, ref.model), {
+          ...openRouterPricing,
+          tieredPricing: litellmPricing.tieredPricing,
+        });
+      } else if (openRouterPricing) {
+        // Prefer OpenRouter flat pricing when LiteLLM has no tiers to contribute.
+        nextPricing.set(modelKey(ref.provider, ref.model), openRouterPricing);
+      } else if (litellmPricing) {
+        // Only LiteLLM has data — use it as-is.
+        nextPricing.set(modelKey(ref.provider, ref.model), litellmPricing);
+      }
+    }
+
+    // When either upstream source failed, preserve previously-cached entries
+    // for any models that the refresh could not resolve.  This prevents a
+    // single-source outage from silently dropping pricing for models that
+    // depended on the failed source.
+    if (openRouterFailed || litellmFailed) {
+      const existingMeta = getGatewayModelPricingCacheMetaState();
+      if (nextPricing.size === 0 && existingMeta.size > 0) {
+        // Both sources failed — retain the entire existing cache.
+        log.warn("Both pricing sources returned empty data — retaining existing cache");
+        scheduleRefresh({ config: params.config, fetchImpl });
+        return;
+      }
+      // Partial failure — back-fill missing models from the existing cache.
+      for (const ref of refs) {
+        const key = modelKey(ref.provider, ref.model);
+        if (!nextPricing.has(key)) {
+          const existing = getCachedGatewayModelPricing({
+            provider: ref.provider,
+            model: ref.model,
+          });
+          if (existing) {
+            nextPricing.set(key, existing);
+          }
+        }
      }
-      nextPricing.set(modelKey(ref.provider, ref.model), pricing);
    }

    replaceGatewayModelPricingCache(nextPricing);
--- a/src/gateway/server-methods/channels.status.test.ts
+++ b/src/gateway/server-methods/channels.status.test.ts
@@ -133,4 +133,33 @@ describe("channelsHandlers channels.status", () => {
      undefined,
    );
  });
+
+  it("caps probe timeout before passing it to channel plugins", async () => {
+    const autoEnabledConfig = { autoEnabled: true };
+    const probeAccount = vi.fn(async () => ({ ok: true }));
+    mocks.applyPluginAutoEnable.mockReturnValue({ config: autoEnabledConfig, changes: [] });
+    mocks.listChannelPlugins.mockReturnValue([
+      {
+        id: "whatsapp",
+        config: {
+          listAccountIds: () => ["default"],
+          resolveAccount: () => ({}),
+          isEnabled: () => true,
+          isConfigured: async () => true,
+        },
+        status: {
+          probeAccount,
+        },
+      },
+    ]);
+
+    await channelsHandlers["channels.status"](createOptions({ probe: true, timeoutMs: 999_999 }));
+
+    expect(probeAccount).toHaveBeenCalledWith(
+      expect.objectContaining({
+        timeoutMs: 30_000,
+        cfg: autoEnabledConfig,
+      }),
+    );
+  });
 });
--- a/src/gateway/server-methods/channels.ts
+++ b/src/gateway/server-methods/channels.ts
@@ -16,6 +16,7 @@ import { getChannelActivity } from "../../infra/channel-activity.js";
 import { DEFAULT_ACCOUNT_ID } from "../../routing/session-key.js";
 import { defaultRuntime } from "../../runtime.js";
 import { normalizeOptionalString } from "../../shared/string-coerce.js";
+import { runTasksWithConcurrency } from "../../utils/run-with-concurrency.js";
 import {
  ErrorCodes,
  errorShape,
@@ -41,6 +42,17 @@ type ChannelStartPayload = {
  started: boolean;
 };

+const CHANNEL_STATUS_MAX_TIMEOUT_MS = 30_000;
+const CHANNEL_STATUS_PROBE_CONCURRENCY = 5;
+
+function resolveChannelsStatusTimeoutMs(params: { probe: boolean; timeoutMsRaw: unknown }): number {
+  const fallback = params.probe ? CHANNEL_STATUS_MAX_TIMEOUT_MS : 10_000;
+  if (typeof params.timeoutMsRaw !== "number" || !Number.isFinite(params.timeoutMsRaw)) {
+    return fallback;
+  }
+  return Math.min(Math.max(1000, params.timeoutMsRaw), CHANNEL_STATUS_MAX_TIMEOUT_MS);
+}
+
 function resolveRuntimeAccountSnapshot(params: {
  runtime: ChannelRuntimeSnapshot;
  channelId: ChannelId;
@@ -141,7 +153,7 @@ export const channelsHandlers: GatewayRequestHandlers = {
    }
    const probe = (params as { probe?: boolean }).probe === true;
    const timeoutMsRaw = (params as { timeoutMs?: unknown }).timeoutMs;
-    const timeoutMs = typeof timeoutMsRaw === "number" ? Math.max(1000, timeoutMsRaw) : 10_000;
+    const timeoutMs = resolveChannelsStatusTimeoutMs({ probe, timeoutMsRaw });
    const cfg = applyPluginAutoEnable({
      config: loadConfig(),
      env: process.env,
@@ -174,6 +186,70 @@ export const channelsHandlers: GatewayRequestHandlers = {
          typeof account !== "object" ||
          (account as { enabled?: boolean }).enabled !== false;

+    const buildAccountSnapshot = async (
+      channelId: ChannelId,
+      plugin: ChannelPlugin,
+      accountId: string,
+      defaultAccountId: string,
+    ) => {
+      const account = plugin.config.resolveAccount(cfg, accountId);
+      const enabled = isAccountEnabled(plugin, account);
+      let probeResult: unknown;
+      let lastProbeAt: number | null = null;
+      if (probe && enabled && plugin.status?.probeAccount) {
+        let configured = true;
+        if (plugin.config.isConfigured) {
+          configured = await plugin.config.isConfigured(account, cfg);
+        }
+        if (configured) {
+          probeResult = await plugin.status.probeAccount({
+            account,
+            timeoutMs,
+            cfg,
+          });
+          lastProbeAt = Date.now();
+        }
+      }
+      let auditResult: unknown;
+      if (probe && enabled && plugin.status?.auditAccount) {
+        let configured = true;
+        if (plugin.config.isConfigured) {
+          configured = await plugin.config.isConfigured(account, cfg);
+        }
+        if (configured) {
+          auditResult = await plugin.status.auditAccount({
+            account,
+            timeoutMs,
+            cfg,
+            probe: probeResult,
+          });
+        }
+      }
+      const runtimeSnapshot = resolveRuntimeSnapshot(channelId, accountId, defaultAccountId);
+      const snapshot = await buildChannelAccountSnapshot({
+        plugin,
+        cfg,
+        accountId,
+        runtime: runtimeSnapshot,
+        probe: probeResult,
+        audit: auditResult,
+      });
+      if (lastProbeAt) {
+        snapshot.lastProbeAt = lastProbeAt;
+      }
+      const activity = getChannelActivity({
+        channel: channelId as never,
+        accountId,
+      });
+      if (snapshot.lastInboundAt == null) {
+        snapshot.lastInboundAt = activity.inboundAt;
+      }
+      if (snapshot.lastOutboundAt == null) {
+        snapshot.lastOutboundAt = activity.outboundAt;
+      }
+      return { accountId: accountId, account, snapshot };
+    };
+
    const buildChannelAccounts = async (channelId: ChannelId) => {
      const plugin = pluginMap.get(channelId);
      if (!plugin) {
@@ -190,66 +266,20 @@ export const channelsHandlers: GatewayRequestHandlers = {
        cfg,
        accountIds,
      });
-      const accounts: ChannelAccountSnapshot[] = [];
      const resolvedAccounts: Record<string, unknown> = {};
-      for (const accountId of accountIds) {
-        const account = plugin.config.resolveAccount(cfg, accountId);
-        const enabled = isAccountEnabled(plugin, account);
-        resolvedAccounts[accountId] = account;
-        let probeResult: unknown;
-        let lastProbeAt: number | null = null;
-        if (probe && enabled && plugin.status?.probeAccount) {
-          let configured = true;
-          if (plugin.config.isConfigured) {
-            configured = await plugin.config.isConfigured(account, cfg);
-          }
-          if (configured) {
-            probeResult = await plugin.status.probeAccount({
-              account,
-              timeoutMs,
-              cfg,
-            });
-            lastProbeAt = Date.now();
-          }
+      const { results } = await runTasksWithConcurrency({
+        tasks: accountIds.map(
+          (accountId) => async () =>
+            await buildAccountSnapshot(channelId, plugin, accountId, defaultAccountId),
+        ),
+        limit: probe ? CHANNEL_STATUS_PROBE_CONCURRENCY : accountIds.length || 1,
+      });
+      const accounts: ChannelAccountSnapshot[] = [];
+      for (const result of results) {
+        if (result) {
+          resolvedAccounts[result.accountId] = result.account;
+          accounts.push(result.snapshot);
        }
-        let auditResult: unknown;
-        if (probe && enabled && plugin.status?.auditAccount) {
-          let configured = true;
-          if (plugin.config.isConfigured) {
-            configured = await plugin.config.isConfigured(account, cfg);
-          }
-          if (configured) {
-            auditResult = await plugin.status.auditAccount({
-              account,
-              timeoutMs,
-              cfg,
-              probe: probeResult,
-            });
-          }
-        }
-        const runtimeSnapshot = resolveRuntimeSnapshot(channelId, accountId, defaultAccountId);
-        const snapshot = await buildChannelAccountSnapshot({
-          plugin,
-          cfg,
-          accountId,
-          runtime: runtimeSnapshot,
-          probe: probeResult,
-          audit: auditResult,
-        });
-        if (lastProbeAt) {
-          snapshot.lastProbeAt = lastProbeAt;
-        }
-        const activity = getChannelActivity({
-          channel: channelId as never,
-          accountId,
-        });
-        if (snapshot.lastInboundAt == null) {
-          snapshot.lastInboundAt = activity.inboundAt;
-        }
-        if (snapshot.lastOutboundAt == null) {
-          snapshot.lastOutboundAt = activity.outboundAt;
-        }
-        accounts.push(snapshot);
      }
      const defaultAccount =
        accounts.find((entry) => entry.accountId === defaultAccountId) ?? accounts[0];
@@ -271,28 +301,36 @@ export const channelsHandlers: GatewayRequestHandlers = {
    const channelsMap = payload.channels as Record<string, unknown>;
    const accountsMap = payload.channelAccounts as Record<string, unknown>;
    const defaultAccountIdMap = payload.channelDefaultAccountId as Record<string, unknown>;
-    for (const plugin of plugins) {
-      const { accounts, defaultAccountId, defaultAccount, resolvedAccounts } =
-        await buildChannelAccounts(plugin.id);
-      const fallbackAccount =
-        resolvedAccounts[defaultAccountId] ?? plugin.config.resolveAccount(cfg, defaultAccountId);
-      const summary = plugin.status?.buildChannelSummary
-        ? await plugin.status.buildChannelSummary({
-            account: fallbackAccount,
-            cfg,
-            defaultAccountId,
-            snapshot:
-              defaultAccount ??
-              ({
-                accountId: defaultAccountId,
-              } as ChannelAccountSnapshot),
-          })
-        : {
-            configured: defaultAccount?.configured ?? false,
-          };
-      channelsMap[plugin.id] = summary;
-      accountsMap[plugin.id] = accounts;
-      defaultAccountIdMap[plugin.id] = defaultAccountId;
+    const { results: channelResults } = await runTasksWithConcurrency({
+      tasks: plugins.map((plugin) => async () => {
+        const { accounts, defaultAccountId, defaultAccount, resolvedAccounts } =
+          await buildChannelAccounts(plugin.id);
+        const fallbackAccount =
+          resolvedAccounts[defaultAccountId] ?? plugin.config.resolveAccount(cfg, defaultAccountId);
+        const summary = plugin.status?.buildChannelSummary
+          ? await plugin.status.buildChannelSummary({
+              account: fallbackAccount,
+              cfg,
+              defaultAccountId,
+              snapshot:
+                defaultAccount ??
+                ({
+                  accountId: defaultAccountId,
+                } as ChannelAccountSnapshot),
+            })
+          : {
+              configured: defaultAccount?.configured ?? false,
+            };
+        return { pluginId: plugin.id, summary, accounts, defaultAccountId };
+      }),
+      limit: probe ? CHANNEL_STATUS_PROBE_CONCURRENCY : plugins.length || 1,
+    });
+    for (const result of channelResults) {
+      if (result) {
+        channelsMap[result.pluginId] = result.summary;
+        accountsMap[result.pluginId] = result.accounts;
+        defaultAccountIdMap[result.pluginId] = result.defaultAccountId;
+      }
    }

    respond(true, payload, undefined);
--- a/src/infra/session-cost-usage.ts
+++ b/src/infra/session-cost-usage.ts
@@ -249,13 +249,23 @@ async function scanTranscriptFile(params: {
      continue;
    }

-    if (entry.usage && entry.costTotal === undefined) {
+    if (entry.usage) {
      const cost = resolveModelCostConfig({
        provider: entry.provider,
        model: entry.model,
        config: params.config,
      });
-      entry.costTotal = estimateUsageCost({ usage: entry.usage, cost });
+      if (cost?.tieredPricing && cost.tieredPricing.length > 0) {
+        // When tiered pricing is configured, always recompute to override
+        // the flat-rate cost that the transport layer wrote into the transcript.
+        // Clear costBreakdown so downstream aggregation uses the recomputed total
+        // instead of the stale flat-rate breakdown from the transport layer.
+        entry.costTotal = estimateUsageCost({ usage: entry.usage, cost });
+        entry.costBreakdown = undefined;
+      } else if (entry.costTotal === undefined) {
+        // Fill in missing cost estimates.
+        entry.costTotal = estimateUsageCost({ usage: entry.usage, cost });
+      }
    }

    params.onEntry(entry);
--- a/src/utils/usage-format.test.ts
+++ b/src/utils/usage-format.test.ts
@@ -13,6 +13,7 @@ import {
  formatTokenCount,
  formatUsd,
  resolveModelCostConfig,
+  type PricingTier,
 } from "./usage-format.js";

 describe("usage-format", () => {
@@ -254,4 +255,368 @@ describe("usage-format", () => {
      cacheWrite: 0.8,
    });
  });
+
+  // -----------------------------------------------------------------------
+  // Tiered pricing tests
+  // -----------------------------------------------------------------------
+
+  it("uses flat pricing when tieredPricing is absent", () => {
+    const cost = { input: 1, output: 2, cacheRead: 0.5, cacheWrite: 0 };
+    const total = estimateUsageCost({
+      usage: { input: 1000, output: 500, cacheRead: 2000 },
+      cost,
+    });
+    expect(total).toBeCloseTo(0.003);
+  });
+
+  it("estimates cost with single-tier tiered pricing (equivalent to flat)", () => {
+    const tiers: PricingTier[] = [
+      { input: 1, output: 2, cacheRead: 0.5, cacheWrite: 0, range: [0, 1_000_000] },
+    ];
+    const cost = { input: 1, output: 2, cacheRead: 0.5, cacheWrite: 0, tieredPricing: tiers };
+    const total = estimateUsageCost({
+      usage: { input: 1000, output: 500, cacheRead: 2000 },
+      cost,
+    });
+    // Same as flat: (1000*1 + 500*2 + 2000*0.5) / 1M = 3000/1M = 0.003
+    expect(total).toBeCloseTo(0.003);
+  });
+
+  it("estimates cost with two tiers — input split across tiers", () => {
+    // Tier 1: [0, 32000) → input $0.30/M, output $1.50/M
+    // Tier 2: [32000, 128000) → input $0.50/M, output $2.50/M
+    const tiers: PricingTier[] = [
+      { input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, range: [0, 32_000] },
+      { input: 0.5, output: 2.5, cacheRead: 0, cacheWrite: 0, range: [32_000, 128_000] },
+    ];
+    const cost = { input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers };
+
+    // 40000 input tokens, 10000 output tokens
+    // Tier 1 gets 32000/40000 = 80% of input → 32000 input tokens
+    // Tier 2 gets 8000/40000 = 20% of input → 8000 input tokens
+    // Input cost = (32000 * 0.3 + 8000 * 0.5) / 1M = (9600 + 4000) / 1M = 0.0136
+    // Output cost = (10000 * 0.8 * 1.5 + 10000 * 0.2 * 2.5) / 1M = (12000 + 5000) / 1M = 0.017
+    // Total = 0.0136 + 0.017 = 0.0306
+    const total = estimateUsageCost({
+      usage: { input: 40_000, output: 10_000 },
+      cost,
+    });
+    expect(total).toBeCloseTo(0.0306, 4);
+  });
+
+  it("estimates cost with three tiers — volcengine-style pricing", () => {
+    // Simulates volcengine/doubao pricing (per-million):
+    // Tier 1: [0, 32000) → in $0.46, out $2.30
+    // Tier 2: [32000, 128000) → in $0.70, out $3.50
+    // Tier 3: [128000, 256000) → in $1.40, out $7.00
+    const tiers: PricingTier[] = [
+      { input: 0.46, output: 2.3, cacheRead: 0, cacheWrite: 0, range: [0, 32_000] },
+      { input: 0.7, output: 3.5, cacheRead: 0, cacheWrite: 0, range: [32_000, 128_000] },
+      { input: 1.4, output: 7.0, cacheRead: 0, cacheWrite: 0, range: [128_000, 256_000] },
+    ];
+    const cost = { input: 0.46, output: 2.3, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers };
+
+    // 200000 input tokens, 5000 output tokens
+    // Tier 1: 32000 tokens, fraction = 32000/200000 = 0.16
+    // Tier 2: 96000 tokens, fraction = 96000/200000 = 0.48
+    // Tier 3: 72000 tokens, fraction = 72000/200000 = 0.36
+    //
+    // Input cost = (32000*0.46 + 96000*0.70 + 72000*1.40) / 1M
+    //            = (14720 + 67200 + 100800) / 1M = 182720 / 1M = 0.18272
+    // Output cost = 5000 * (0.16*2.3 + 0.48*3.5 + 0.36*7.0) / 1M
+    //             = 5000 * (0.368 + 1.68 + 2.52) / 1M
+    //             = 5000 * 4.568 / 1M = 22840 / 1M = 0.02284
+    // Total = 0.18272 + 0.02284 = 0.20556
+    const total = estimateUsageCost({
+      usage: { input: 200_000, output: 5_000 },
+      cost,
+    });
+    expect(total).toBeCloseTo(0.20556, 4);
+  });
+
+  it("uses first tier rates for output when input is zero", () => {
+    const tiers: PricingTier[] = [
+      { input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, range: [0, 32_000] },
+      { input: 0.5, output: 2.5, cacheRead: 0, cacheWrite: 0, range: [32_000, 128_000] },
+    ];
+    const cost = { input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers };
+
+    const total = estimateUsageCost({
+      usage: { input: 0, output: 10_000 },
+      cost,
+    });
+    // Falls back to first tier: 10000 * 1.5 / 1M = 0.015
+    expect(total).toBeCloseTo(0.015, 6);
+  });
+
+  it("falls back to flat pricing when tieredPricing is empty array", () => {
+    const cost = {
+      input: 1,
+      output: 2,
+      cacheRead: 0.5,
+      cacheWrite: 0,
+      tieredPricing: [] as PricingTier[],
+    };
+    const total = estimateUsageCost({
+      usage: { input: 1000, output: 500, cacheRead: 2000 },
+      cost,
+    });
+    expect(total).toBeCloseTo(0.003);
+  });
+
+  it("bills overflow input tokens at last tier rate when input exceeds max range", () => {
+    // Tiers only cover up to 128000, but input is 200000
+    // Tier 1: [0, 32000) → in $0.30/M, out $1.50/M
+    // Tier 2: [32000, 128000) → in $0.50/M, out $2.50/M
+    // Overflow: 72000 tokens billed at Tier 2 rates
+    const tiers: PricingTier[] = [
+      { input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, range: [0, 32_000] },
+      { input: 0.5, output: 2.5, cacheRead: 0, cacheWrite: 0, range: [32_000, 128_000] },
+    ];
+    const cost = { input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers };
+
+    // 200000 input, 10000 output
+    // Tier 1: 32000 tokens, fraction = 32000/200000 = 0.16
+    // Tier 2: 96000 tokens, fraction = 96000/200000 = 0.48
+    // Overflow (at Tier 2 rates): 72000 tokens, fraction = 72000/200000 = 0.36
+    //
+    // Input cost = (32000*0.3 + 96000*0.5 + 72000*0.5) / 1M
+    //            = (9600 + 48000 + 36000) / 1M = 93600/1M = 0.0936
+    // Output cost = 10000 * (0.16*1.5 + 0.48*2.5 + 0.36*2.5) / 1M
+    //             = 10000 * (0.24 + 1.2 + 0.9) / 1M
+    //             = 10000 * 2.34 / 1M = 23400/1M = 0.0234
+    // Total = 0.0936 + 0.0234 = 0.117
+    const total = estimateUsageCost({
+      usage: { input: 200_000, output: 10_000 },
+      cost,
+    });
+    expect(total).toBeCloseTo(0.117, 4);
+  });
+
+  it("bills overflow at last tier when only a single small-range tier exists (e.g. <30K)", () => {
+    // Only one tier covering [0, 30000), input is 100000
+    const tiers: PricingTier[] = [
+      { input: 1.0, output: 3.0, cacheRead: 0.5, cacheWrite: 0, range: [0, 30_000] },
+    ];
+    const cost = { input: 1.0, output: 3.0, cacheRead: 0.5, cacheWrite: 0, tieredPricing: tiers };
+
+    // 100000 input, 5000 output, 2000 cacheRead
+    // Tier 1: 30000 tokens, fraction = 30000/100000 = 0.3
+    // Overflow (at Tier 1 rates): 70000 tokens, fraction = 70000/100000 = 0.7
+    // Fractions sum to 1.0 — all output/cache fully billed
+    //
+    // Input cost = (30000*1.0 + 70000*1.0) / 1M = 100000/1M = 0.1
+    // Output cost = 5000 * (0.3*3.0 + 0.7*3.0) / 1M = 5000*3.0/1M = 0.015
+    // CacheRead cost = 2000 * (0.3*0.5 + 0.7*0.5) / 1M = 2000*0.5/1M = 0.001
+    // Total = 0.1 + 0.015 + 0.001 = 0.116
+    const total = estimateUsageCost({
+      usage: { input: 100_000, output: 5_000, cacheRead: 2_000 },
+      cost,
+    });
+    expect(total).toBeCloseTo(0.116, 4);
+  });
+
+  it("supports open-ended range [start] in tiered pricing (greater-than syntax)", () => {
+    // Tier 1: [0, 32000) → in $0.30/M, out $1.50/M
+    // Tier 2: [32000, Infinity) → in $0.50/M, out $2.50/M  (open-ended)
+    const tiers: PricingTier[] = [
+      { input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, range: [0, 32_000] },
+      { input: 0.5, output: 2.5, cacheRead: 0, cacheWrite: 0, range: [32_000, Infinity] },
+    ];
+    const cost = { input: 0.3, output: 1.5, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers };
+
+    // 200000 input, 10000 output
+    // Tier 1: 32000 tokens, fraction = 32000/200000 = 0.16
+    // Tier 2: 168000 tokens, fraction = 168000/200000 = 0.84
+    // No overflow — Tier 2 absorbs everything beyond 32K
+    //
+    // Input cost = (32000*0.3 + 168000*0.5) / 1M = (9600 + 84000) / 1M = 0.0936
+    // Output cost = 10000 * (0.16*1.5 + 0.84*2.5) / 1M = 10000 * (0.24 + 2.1) / 1M = 0.0234
+    // Total = 0.0936 + 0.0234 = 0.117
+    const total = estimateUsageCost({
+      usage: { input: 200_000, output: 10_000 },
+      cost,
+    });
+    expect(total).toBeCloseTo(0.117, 4);
+  });
+
+  it("uses declared tier ranges instead of sequential widths", () => {
+    const tiers: PricingTier[] = [
+      { input: 1, output: 10, cacheRead: 0, cacheWrite: 0, range: [100, 200] },
+      { input: 2, output: 20, cacheRead: 0, cacheWrite: 0, range: [0, 100] },
+    ];
+    const cost = { input: 1, output: 10, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers };
+
+    const total = estimateUsageCost({
+      usage: { input: 150, output: 60 },
+      cost,
+    });
+
+    expect(total).toBeCloseTo(0.00125, 8);
+  });
+
+  it("bills malformed tier gaps at a fallback tier instead of dropping them", () => {
+    const tiers: PricingTier[] = [
+      { input: 1, output: 10, cacheRead: 0, cacheWrite: 0, range: [0, 50] },
+      { input: 3, output: 30, cacheRead: 0, cacheWrite: 0, range: [100, 150] },
+    ];
+    const cost = { input: 1, output: 10, cacheRead: 0, cacheWrite: 0, tieredPricing: tiers };
+
+    const total = estimateUsageCost({
+      usage: { input: 150, output: 60 },
+      cost,
+    });
+
+    expect(total).toBeCloseTo(0.00175, 8);
+  });
+
+  it("normalizes open-ended range from models.json ([start] and [start, -1])", async () => {
+    await fs.writeFile(
+      path.join(agentDir, "models.json"),
+      JSON.stringify(
+        {
+          providers: {
+            volcengine: {
+              models: [
+                {
+                  id: "doubao-open-ended",
+                  cost: {
+                    input: 0.46,
+                    output: 2.3,
+                    cacheRead: 0,
+                    cacheWrite: 0,
+                    tieredPricing: [
+                      { input: 0.46, output: 2.3, cacheRead: 0, cacheWrite: 0, range: [0, 32000] },
+                      { input: 0.7, output: 3.5, cacheRead: 0, cacheWrite: 0, range: [32000] },
+                    ],
+                  },
+                },
+                {
+                  id: "doubao-neg-one",
+                  cost: {
+                    input: 0.46,
+                    output: 2.3,
+                    cacheRead: 0,
+                    cacheWrite: 0,
+                    tieredPricing: [
+                      { input: 0.46, output: 2.3, cacheRead: 0, cacheWrite: 0, range: [0, 32000] },
+                      { input: 0.7, output: 3.5, cacheRead: 0, cacheWrite: 0, range: [32000, -1] },
+                    ],
+                  },
+                },
+              ],
+            },
+          },
+        },
+        null,
+        2,
+      ),
+      "utf8",
+    );
+
+    // [32000] should be normalized to [32000, Infinity]
+    const cost1 = resolveModelCostConfig({
+      provider: "volcengine",
+      model: "doubao-open-ended",
+    });
+    expect(cost1).toBeDefined();
+    expect(cost1!.tieredPricing).toHaveLength(2);
+    expect(cost1!.tieredPricing![1].range).toEqual([32000, Infinity]);
+
+    // [32000, -1] should also be normalized to [32000, Infinity]
+    const cost2 = resolveModelCostConfig({
+      provider: "volcengine",
+      model: "doubao-neg-one",
+    });
+    expect(cost2).toBeDefined();
+    expect(cost2!.tieredPricing).toHaveLength(2);
+    expect(cost2!.tieredPricing![1].range).toEqual([32000, Infinity]);
+  });
+
+  it("resolves tiered pricing from models.json", async () => {
+    await fs.writeFile(
+      path.join(agentDir, "models.json"),
+      JSON.stringify(
+        {
+          providers: {
+            volcengine: {
+              models: [
+                {
+                  id: "doubao-seed-2-0-pro",
+                  cost: {
+                    input: 0.46,
+                    output: 2.3,
+                    cacheRead: 0,
+                    cacheWrite: 0,
+                    tieredPricing: [
+                      { input: 0.46, output: 2.3, cacheRead: 0, cacheWrite: 0, range: [0, 32000] },
+                      {
+                        input: 0.7,
+                        output: 3.5,
+                        cacheRead: 0,
+                        cacheWrite: 0,
+                        range: [32000, 128000],
+                      },
+                    ],
+                  },
+                },
+              ],
+            },
+          },
+        },
+        null,
+        2,
+      ),
+      "utf8",
+    );
+
+    const cost = resolveModelCostConfig({
+      provider: "volcengine",
+      model: "doubao-seed-2-0-pro",
+    });
+
+    expect(cost).toBeDefined();
+    expect(cost!.tieredPricing).toHaveLength(2);
+    expect(cost!.tieredPricing![0].range).toEqual([0, 32000]);
+    expect(cost!.tieredPricing![1].input).toBe(0.7);
+  });
+
+  it("resolves tiered pricing from cached gateway (LiteLLM)", () => {
+    __setGatewayModelPricingForTest([
+      {
+        provider: "volcengine",
+        model: "doubao-seed",
+        pricing: {
+          input: 0.46,
+          output: 2.3,
+          cacheRead: 0,
+          cacheWrite: 0,
+          tieredPricing: [
+            {
+              input: 0.46,
+              output: 2.3,
+              cacheRead: 0,
+              cacheWrite: 0,
+              range: [0, 32000] as [number, number],
+            },
+            {
+              input: 0.7,
+              output: 3.5,
+              cacheRead: 0,
+              cacheWrite: 0,
+              range: [32000, 128000] as [number, number],
+            },
+          ],
+        },
+      },
+    ]);
+
+    const cost = resolveModelCostConfig({
+      provider: "volcengine",
+      model: "doubao-seed",
+    });
+
+    expect(cost).toBeDefined();
+    expect(cost!.tieredPricing).toHaveLength(2);
+  });
 });
--- a/src/utils/usage-format.ts
+++ b/src/utils/usage-format.ts
@@ -8,11 +8,41 @@ import type { OpenClawConfig } from "../config/types.openclaw.js";
 import { getCachedGatewayModelPricing } from "../gateway/model-pricing-cache.js";
 import { normalizeOptionalString } from "../shared/string-coerce.js";

+/**
+ * A single tier in a tiered-pricing schedule.  Prices are expressed as
+ * USD per-million tokens, just like the flat `ModelCostConfig` fields.
+ *
+ * `range` is a half-open interval `[start, end)` expressed in *input*
+ * token counts.  The tiers MUST be sorted in ascending `range[0]` order
+ * with no gaps.
+ */
+export type PricingTier = {
+  input: number;
+  output: number;
+  cacheRead: number;
+  cacheWrite: number;
+  /** [startTokens, endTokens) — half-open interval on the input token axis. */
+  range: [number, number];
+};
+
+type RawPricingTier = {
+  input: number;
+  output: number;
+  cacheRead: number;
+  cacheWrite: number;
+  range: [number, number] | [number];
+};
+
 export type ModelCostConfig = {
  input: number;
  output: number;
  cacheRead: number;
  cacheWrite: number;
+  /** Optional tiered pricing tiers.  When present, `estimateUsageCost`
+   *  uses them instead of the flat rates above.  The flat rates still
+   *  serve as the "default / first-tier" fallback for callers that are
+   *  unaware of tiered pricing. */
+  tieredPricing?: PricingTier[];
 };

 export type UsageTotals = {
@@ -99,6 +129,47 @@ function shouldUseNormalizedCostLookup(params: { provider?: string; model?: stri
  return provider === "anthropic" || provider === "openrouter" || provider === "vercel-ai-gateway";
 }

+/**
+ * Normalize a raw tieredPricing array from models.json / config.
+ * Supports open-ended ranges such as `[128000]` or `[128000, -1]`,
+ * which are converted to `[128000, Infinity]`.
+ */
+function normalizeTieredPricing(raw: RawPricingTier[] | undefined): PricingTier[] | undefined {
+  if (!raw || raw.length === 0) {
+    return undefined;
+  }
+  const result: PricingTier[] = [];
+  for (const tier of raw) {
+    const range = tier.range;
+    if (!Array.isArray(range) || range.length < 1) {
+      continue;
+    }
+    const start = typeof range[0] === "number" ? range[0] : NaN;
+    if (!Number.isFinite(start)) {
+      continue;
+    }
+    const rawEnd = range.length >= 2 ? range[1] : null;
+    const end =
+      typeof rawEnd === "number" && Number.isFinite(rawEnd) && rawEnd > start ? rawEnd : Infinity;
+    if (
+      !Number.isFinite(tier.input) ||
+      !Number.isFinite(tier.output) ||
+      !Number.isFinite(tier.cacheRead) ||
+      !Number.isFinite(tier.cacheWrite)
+    ) {
+      continue;
+    }
+    result.push({
+      input: tier.input,
+      output: tier.output,
+      cacheRead: tier.cacheRead,
+      cacheWrite: tier.cacheWrite,
+      range: [start, end],
+    });
+  }
+  return result.length > 0 ? result.toSorted((a, b) => a.range[0] - b.range[0]) : undefined;
+}
+
 function buildProviderCostIndex(
  providers: Record<string, ModelProviderConfig> | undefined,
  options?: { allowPluginNormalization?: boolean },
@@ -113,7 +184,16 @@ function buildProviderCostIndex(
      const normalized = normalizeModelRef(normalizedProvider, model.id, {
        allowPluginNormalization: options?.allowPluginNormalization,
      });
-      entries.set(modelKey(normalized.provider, normalized.model), model.cost);
+      const cost = { ...model.cost };
+      const normalizedTiers = normalizeTieredPricing(cost.tieredPricing);
+      const costConfig: ModelCostConfig = {
+        input: cost.input,
+        output: cost.output,
+        cacheRead: cost.cacheRead,
+        cacheWrite: cost.cacheWrite,
+        ...(normalizedTiers ? { tieredPricing: normalizedTiers } : {}),
+      };
+      entries.set(modelKey(normalized.provider, normalized.model), costConfig);
    }
  }
  return entries;
@@ -233,6 +313,87 @@ export function resolveModelCostConfig(params: {
 const toNumber = (value: number | undefined): number =>
  typeof value === "number" && Number.isFinite(value) ? value : 0;

+/**
+ * Compute the cost for a single token dimension (input, output, cacheRead,
+ * or cacheWrite) across a set of sorted tiered-pricing tiers.
+ *
+ * The tiers define ranges on the **input** token axis.  For each tier,
+ * the proportion of the total input that falls into that range determines
+ * the fraction of *all* token types billed at that tier's rates.
+ *
+ * For example, if the input is 40 000 tokens and the tiers are:
+ *   [0, 32000)  → $0.30/M input, $1.50/M output
+ *   [32000, 128000) → $0.50/M input, $2.50/M output
+ *
+ * Then 80 % of every dimension is billed at the first tier and 20 % at the
+ * second tier.
+ *
+ * Prices are per-million; the caller divides by 1 000 000 after summing.
+ */
+function computeTieredCost(
+  tiers: PricingTier[],
+  input: number,
+  output: number,
+  cacheRead: number,
+  cacheWrite: number,
+): number {
+  const totalInputTokens = input;
+  const sortedTiers = tiers.toSorted((a, b) => a.range[0] - b.range[0]);
+  if (totalInputTokens <= 0) {
+    // If there are no input tokens the tier proportion is undefined;
+    // fall back to the first tier for any residual output/cache usage.
+    const tier = sortedTiers[0];
+    if (!tier) {
+      return 0;
+    }
+    return output * tier.output + cacheRead * tier.cacheRead + cacheWrite * tier.cacheWrite;
+  }
+
+  let total = 0;
+  let billedInput = 0;
+  let coveredUntil = 0;
+  let lastTier: PricingTier | undefined;
+
+  for (const tier of sortedTiers) {
+    const [start, end] = tier.range;
+    const tierStart = Math.max(0, start, coveredUntil);
+    const tierEnd = Math.min(totalInputTokens, end);
+    const inputInTier = Math.max(0, tierEnd - tierStart);
+    if (end > coveredUntil) {
+      coveredUntil = end;
+    }
+    if (inputInTier <= 0) {
+      continue;
+    }
+    const fraction = inputInTier / totalInputTokens;
+    total +=
+      inputInTier * tier.input +
+      output * fraction * tier.output +
+      cacheRead * fraction * tier.cacheRead +
+      cacheWrite * fraction * tier.cacheWrite;
+    billedInput += inputInTier;
+    lastTier = tier;
+  }
+
+  // Bill any uncovered gaps or overflow at the highest matched tier's rate.
+  // This keeps malformed remote/user tier ranges from underestimating cost.
+  const unbilledInput = totalInputTokens - billedInput;
+  if (unbilledInput > 0) {
+    const fallbackTier = lastTier ?? sortedTiers[sortedTiers.length - 1];
+    if (!fallbackTier) {
+      return total;
+    }
+    const fraction = unbilledInput / totalInputTokens;
+    total +=
+      unbilledInput * fallbackTier.input +
+      output * fraction * fallbackTier.output +
+      cacheRead * fraction * fallbackTier.cacheRead +
+      cacheWrite * fraction * fallbackTier.cacheWrite;
+  }
+
+  return total;
+}
+
 export function estimateUsageCost(params: {
  usage?: NormalizedUsage | UsageTotals | null;
  cost?: ModelCostConfig;
@@ -246,11 +407,18 @@ export function estimateUsageCost(params: {
  const output = toNumber(usage.output);
  const cacheRead = toNumber(usage.cacheRead);
  const cacheWrite = toNumber(usage.cacheWrite);
-  const total =
-    input * cost.input +
-    output * cost.output +
-    cacheRead * cost.cacheRead +
-    cacheWrite * cost.cacheWrite;
+
+  let total: number;
+  if (cost.tieredPricing && cost.tieredPricing.length > 0) {
+    total = computeTieredCost(cost.tieredPricing, input, output, cacheRead, cacheWrite);
+  } else {
+    total =
+      input * cost.input +
+      output * cost.output +
+      cacheRead * cost.cacheRead +
+      cacheWrite * cost.cacheWrite;
+  }
+
  if (!Number.isFinite(total)) {
    return undefined;
  }