mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-12 17:51:22 +00:00
fix: normalize cached prompt token accounting
This commit is contained in:
@@ -44,6 +44,9 @@ export interface UsageInfo {
|
||||
total_tokens?: number;
|
||||
prompt_tokens?: number;
|
||||
completion_tokens?: number;
|
||||
input_tokens_details?: {
|
||||
cached_tokens?: number;
|
||||
};
|
||||
}
|
||||
|
||||
export type OpenAIResponsesAssistantPhase = "commentary" | "final_answer";
|
||||
|
||||
82
src/agents/openai-ws-message-conversion.test.ts
Normal file
82
src/agents/openai-ws-message-conversion.test.ts
Normal file
@@ -0,0 +1,82 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import type { ResponseObject } from "./openai-ws-connection.js";
|
||||
import { buildAssistantMessageFromResponse } from "./openai-ws-message-conversion.js";
|
||||
|
||||
describe("openai ws message conversion", () => {
|
||||
it("preserves cached token usage from responses usage details", () => {
|
||||
const response: ResponseObject = {
|
||||
id: "resp_123",
|
||||
object: "response",
|
||||
created_at: Date.now(),
|
||||
status: "completed",
|
||||
model: "gpt-5",
|
||||
output: [
|
||||
{
|
||||
type: "message",
|
||||
id: "msg_123",
|
||||
role: "assistant",
|
||||
status: "completed",
|
||||
content: [{ type: "output_text", text: "hello" }],
|
||||
},
|
||||
],
|
||||
usage: {
|
||||
input_tokens: 120,
|
||||
output_tokens: 30,
|
||||
total_tokens: 250,
|
||||
input_tokens_details: { cached_tokens: 100 },
|
||||
},
|
||||
};
|
||||
|
||||
const message = buildAssistantMessageFromResponse(response, {
|
||||
api: "openai-responses",
|
||||
provider: "openai",
|
||||
id: "gpt-5",
|
||||
});
|
||||
|
||||
expect(message.usage).toMatchObject({
|
||||
input: 20,
|
||||
output: 30,
|
||||
cacheRead: 100,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 250,
|
||||
});
|
||||
});
|
||||
|
||||
it("derives cache-inclusive total tokens when responses total is missing", () => {
|
||||
const response: ResponseObject = {
|
||||
id: "resp_124",
|
||||
object: "response",
|
||||
created_at: Date.now(),
|
||||
status: "completed",
|
||||
model: "gpt-5",
|
||||
output: [
|
||||
{
|
||||
type: "message",
|
||||
id: "msg_124",
|
||||
role: "assistant",
|
||||
status: "completed",
|
||||
content: [{ type: "output_text", text: "hello" }],
|
||||
},
|
||||
],
|
||||
usage: {
|
||||
input_tokens: 120,
|
||||
output_tokens: 30,
|
||||
input_tokens_details: { cached_tokens: 100 },
|
||||
},
|
||||
};
|
||||
|
||||
const message = buildAssistantMessageFromResponse(response, {
|
||||
api: "openai-responses",
|
||||
provider: "openai",
|
||||
id: "gpt-5",
|
||||
});
|
||||
|
||||
expect(message.usage).toMatchObject({
|
||||
input: 20,
|
||||
output: 30,
|
||||
cacheRead: 100,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 150,
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -563,6 +563,13 @@ export function buildAssistantMessageFromResponse(
|
||||
const stopReason: StopReason = hasToolCalls ? "toolUse" : "stop";
|
||||
const normalizedUsage = normalizeUsage(response.usage);
|
||||
const rawTotalTokens = normalizedUsage?.total;
|
||||
const resolvedTotalTokens =
|
||||
rawTotalTokens && rawTotalTokens > 0
|
||||
? rawTotalTokens
|
||||
: (normalizedUsage?.input ?? 0) +
|
||||
(normalizedUsage?.output ?? 0) +
|
||||
(normalizedUsage?.cacheRead ?? 0) +
|
||||
(normalizedUsage?.cacheWrite ?? 0);
|
||||
|
||||
const message = buildAssistantMessage({
|
||||
model: modelInfo,
|
||||
@@ -571,7 +578,9 @@ export function buildAssistantMessageFromResponse(
|
||||
usage: buildUsageWithNoCost({
|
||||
input: normalizedUsage?.input ?? 0,
|
||||
output: normalizedUsage?.output ?? 0,
|
||||
totalTokens: rawTotalTokens && rawTotalTokens > 0 ? rawTotalTokens : undefined,
|
||||
cacheRead: normalizedUsage?.cacheRead ?? 0,
|
||||
cacheWrite: normalizedUsage?.cacheWrite ?? 0,
|
||||
totalTokens: resolvedTotalTokens > 0 ? resolvedTotalTokens : undefined,
|
||||
}),
|
||||
});
|
||||
|
||||
|
||||
@@ -63,7 +63,7 @@ describe("normalizeUsage", () => {
|
||||
cached_tokens: 19,
|
||||
});
|
||||
expect(usage).toEqual({
|
||||
input: 30,
|
||||
input: 11,
|
||||
output: 9,
|
||||
cacheRead: 19,
|
||||
cacheWrite: undefined,
|
||||
@@ -80,7 +80,7 @@ describe("normalizeUsage", () => {
|
||||
prompt_tokens_details: { cached_tokens: 1024 },
|
||||
});
|
||||
expect(usage).toEqual({
|
||||
input: 1113,
|
||||
input: 89,
|
||||
output: 5,
|
||||
cacheRead: 1024,
|
||||
cacheWrite: undefined,
|
||||
@@ -88,6 +88,22 @@ describe("normalizeUsage", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("handles OpenAI Responses input_tokens_details.cached_tokens field", () => {
|
||||
const usage = normalizeUsage({
|
||||
input_tokens: 120,
|
||||
output_tokens: 30,
|
||||
total_tokens: 250,
|
||||
input_tokens_details: { cached_tokens: 100 },
|
||||
});
|
||||
expect(usage).toEqual({
|
||||
input: 20,
|
||||
output: 30,
|
||||
cacheRead: 100,
|
||||
cacheWrite: undefined,
|
||||
total: 250,
|
||||
});
|
||||
});
|
||||
|
||||
it("clamps negative input to zero (pre-subtracted cached_tokens > prompt_tokens)", () => {
|
||||
// pi-ai OpenAI-format providers subtract cached_tokens from prompt_tokens
|
||||
// upstream. When cached_tokens exceeds prompt_tokens the result is negative.
|
||||
|
||||
@@ -17,6 +17,8 @@ export type UsageLike = {
|
||||
cache_creation_input_tokens?: number;
|
||||
// Moonshot/Kimi uses cached_tokens for cache read count (explicit caching API).
|
||||
cached_tokens?: number;
|
||||
// OpenAI Responses reports cached prompt reuse here.
|
||||
input_tokens_details?: { cached_tokens?: number };
|
||||
// Kimi K2 uses prompt_tokens_details.cached_tokens for automatic prefix caching.
|
||||
prompt_tokens_details?: { cached_tokens?: number };
|
||||
// Some agents/logs emit alternate naming.
|
||||
@@ -90,13 +92,34 @@ export function normalizeUsage(raw?: UsageLike | null): NormalizedUsage | undefi
|
||||
return undefined;
|
||||
}
|
||||
|
||||
// Some providers (pi-ai OpenAI-format) pre-subtract cached_tokens from
|
||||
// prompt_tokens upstream. When cached_tokens > prompt_tokens the result is
|
||||
// negative, which is nonsensical. Clamp to 0.
|
||||
const rawInput = asFiniteNumber(
|
||||
raw.input ?? raw.inputTokens ?? raw.input_tokens ?? raw.promptTokens ?? raw.prompt_tokens,
|
||||
const cacheRead = asFiniteNumber(
|
||||
raw.cacheRead ??
|
||||
raw.cache_read ??
|
||||
raw.cache_read_input_tokens ??
|
||||
raw.cached_tokens ??
|
||||
raw.input_tokens_details?.cached_tokens ??
|
||||
raw.prompt_tokens_details?.cached_tokens,
|
||||
);
|
||||
const input = rawInput !== undefined && rawInput < 0 ? 0 : rawInput;
|
||||
|
||||
const rawInputValue =
|
||||
raw.input ?? raw.inputTokens ?? raw.input_tokens ?? raw.promptTokens ?? raw.prompt_tokens;
|
||||
|
||||
const usesOpenAIStylePromptTotals =
|
||||
raw.cached_tokens !== undefined ||
|
||||
raw.input_tokens_details?.cached_tokens !== undefined ||
|
||||
raw.prompt_tokens_details?.cached_tokens !== undefined;
|
||||
|
||||
// Some providers (pi-ai OpenAI-format) pre-subtract cached_tokens from
|
||||
// prompt/input totals upstream, while OpenAI-style prompt/input aliases
|
||||
// include cached tokens in the reported prompt total. Normalize both cases
|
||||
// to uncached input tokens so downstream prompt-token math does not double-
|
||||
// count cache reads.
|
||||
const rawInput = asFiniteNumber(rawInputValue);
|
||||
const normalizedInput =
|
||||
rawInput !== undefined && usesOpenAIStylePromptTotals && cacheRead !== undefined
|
||||
? rawInput - cacheRead
|
||||
: rawInput;
|
||||
const input = normalizedInput !== undefined && normalizedInput < 0 ? 0 : normalizedInput;
|
||||
const output = asFiniteNumber(
|
||||
raw.output ??
|
||||
raw.outputTokens ??
|
||||
@@ -104,13 +127,6 @@ export function normalizeUsage(raw?: UsageLike | null): NormalizedUsage | undefi
|
||||
raw.completionTokens ??
|
||||
raw.completion_tokens,
|
||||
);
|
||||
const cacheRead = asFiniteNumber(
|
||||
raw.cacheRead ??
|
||||
raw.cache_read ??
|
||||
raw.cache_read_input_tokens ??
|
||||
raw.cached_tokens ??
|
||||
raw.prompt_tokens_details?.cached_tokens,
|
||||
);
|
||||
const cacheWrite = asFiniteNumber(
|
||||
raw.cacheWrite ?? raw.cache_write ?? raw.cache_creation_input_tokens,
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user