mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-24 09:29:48 +00:00
* chore(lint): reduce underscore-dangle exceptions * chore(lint): reduce more underscore exceptions * chore(lint): remove underscore-dangle allow list * fix(lint): repair underscore cleanup regressions * test(lint): track version define suppression
243 lines
6.2 KiB
TypeScript
243 lines
6.2 KiB
TypeScript
import { describe, expect, it } from "vitest";
|
|
import { testing } from "./live-cache-regression-runner.js";
|
|
|
|
describe("live cache regression runner", () => {
|
|
it("keeps OpenAI image cache floors observable without blocking release validation", () => {
|
|
const regressions: string[] = [];
|
|
const warnings: string[] = [];
|
|
|
|
testing.assertAgainstBaseline({
|
|
lane: "image",
|
|
provider: "openai",
|
|
result: {
|
|
best: {
|
|
hitRate: 0,
|
|
suffix: "image-hit",
|
|
text: "CACHE-OK image-hit",
|
|
usage: { cacheRead: 0, cacheWrite: 0, input: 5_096 },
|
|
},
|
|
},
|
|
regressions,
|
|
warnings,
|
|
});
|
|
|
|
expect(regressions).toStrictEqual([]);
|
|
expect(warnings).toEqual([
|
|
"openai:image cacheRead=0 < min=3840",
|
|
"openai:image hitRate=0.000 < min=0.820",
|
|
]);
|
|
});
|
|
|
|
it("keeps OpenAI text cache floor misses advisory", () => {
|
|
const regressions: string[] = [];
|
|
const warnings: string[] = [];
|
|
|
|
testing.assertAgainstBaseline({
|
|
lane: "stable",
|
|
provider: "openai",
|
|
result: {
|
|
best: {
|
|
hitRate: 0,
|
|
suffix: "stable-hit",
|
|
text: "CACHE-OK stable-hit",
|
|
usage: { cacheRead: 0, cacheWrite: 0, input: 5_034 },
|
|
},
|
|
},
|
|
regressions,
|
|
warnings,
|
|
});
|
|
|
|
expect(regressions).toStrictEqual([]);
|
|
expect(warnings).toEqual([
|
|
"openai:stable cacheRead=0 < min=4608",
|
|
"openai:stable hitRate=0.000 < min=0.900",
|
|
]);
|
|
});
|
|
|
|
it("retries hard cache baseline misses once", () => {
|
|
expect(
|
|
testing.shouldRetryBaselineFindings(
|
|
{
|
|
regressions: ["anthropic:image cacheRead=0 < min=4500"],
|
|
warnings: [],
|
|
},
|
|
1,
|
|
),
|
|
).toBe(true);
|
|
expect(
|
|
testing.shouldRetryBaselineFindings(
|
|
{
|
|
regressions: ["anthropic:image cacheRead=0 < min=4500"],
|
|
warnings: [],
|
|
},
|
|
2,
|
|
),
|
|
).toBe(false);
|
|
expect(
|
|
testing.shouldRetryBaselineFindings(
|
|
{
|
|
regressions: [],
|
|
warnings: ["openai:image cacheRead=0 < min=3840"],
|
|
},
|
|
1,
|
|
),
|
|
).toBe(false);
|
|
});
|
|
|
|
it("retries a cache probe twice when provider text misses the sentinel", () => {
|
|
expect(
|
|
testing.shouldRetryCacheProbeText({
|
|
attempt: 1,
|
|
suffix: "openai-stable-hit-a",
|
|
text: "",
|
|
}),
|
|
).toBe(true);
|
|
expect(
|
|
testing.shouldRetryCacheProbeText({
|
|
attempt: 2,
|
|
suffix: "openai-stable-hit-a",
|
|
text: "",
|
|
}),
|
|
).toBe(true);
|
|
expect(
|
|
testing.shouldRetryCacheProbeText({
|
|
attempt: 3,
|
|
suffix: "openai-stable-hit-a",
|
|
text: "",
|
|
}),
|
|
).toBe(false);
|
|
expect(
|
|
testing.shouldRetryCacheProbeText({
|
|
attempt: 1,
|
|
suffix: "openai-stable-hit-a",
|
|
text: "I saw openai-stable-hit-a.",
|
|
}),
|
|
).toBe(true);
|
|
expect(
|
|
testing.shouldRetryCacheProbeText({
|
|
attempt: 1,
|
|
suffix: "openai-stable-hit-a",
|
|
text: "CACHE-OK openai-stable-hit-a",
|
|
}),
|
|
).toBe(false);
|
|
});
|
|
|
|
it("keeps cache probes above the provider empty-output floor", () => {
|
|
expect(
|
|
testing.resolveCacheProbeMaxTokens({
|
|
maxTokens: 32,
|
|
providerTag: "openai",
|
|
}),
|
|
).toBe(256);
|
|
expect(
|
|
testing.resolveCacheProbeMaxTokens({
|
|
maxTokens: 512,
|
|
providerTag: "openai",
|
|
}),
|
|
).toBe(512);
|
|
expect(
|
|
testing.resolveCacheProbeMaxTokens({
|
|
maxTokens: 32,
|
|
providerTag: "anthropic",
|
|
}),
|
|
).toBe(1024);
|
|
});
|
|
|
|
it("classifies Anthropic tool-only probe misses as provider drift", () => {
|
|
expect(testing.isAnthropicToolProbeDrift(new Error("expected tool call for noop"))).toBe(true);
|
|
expect(
|
|
testing.isAnthropicToolProbeDrift(
|
|
new Error('expected tool-only response for noop, got "ok"'),
|
|
),
|
|
).toBe(true);
|
|
expect(testing.isAnthropicToolProbeDrift(new Error("other failure"))).toBe(false);
|
|
});
|
|
|
|
it("accepts empty cache probe text only when usage is observable", () => {
|
|
expect(
|
|
testing.shouldAcceptEmptyCacheProbe({
|
|
providerTag: "openai",
|
|
text: "",
|
|
usage: { input: 5_000 },
|
|
}),
|
|
).toBe(true);
|
|
expect(
|
|
testing.shouldAcceptEmptyCacheProbe({
|
|
providerTag: "openai",
|
|
text: "",
|
|
usage: { cacheRead: 4_608 },
|
|
}),
|
|
).toBe(true);
|
|
expect(
|
|
testing.shouldAcceptEmptyCacheProbe({
|
|
providerTag: "openai",
|
|
text: "wrong",
|
|
usage: { input: 5_000 },
|
|
}),
|
|
).toBe(false);
|
|
expect(
|
|
testing.shouldAcceptEmptyCacheProbe({
|
|
providerTag: "anthropic",
|
|
text: "",
|
|
usage: { input: 5_000 },
|
|
}),
|
|
).toBe(true);
|
|
expect(
|
|
testing.shouldAcceptEmptyCacheProbe({
|
|
providerTag: "openai",
|
|
text: "",
|
|
usage: {},
|
|
}),
|
|
).toBe(false);
|
|
});
|
|
|
|
it("accepts a warmup that already hits the provider cache", () => {
|
|
const findings = testing.evaluateAgainstBaseline({
|
|
lane: "image",
|
|
provider: "anthropic",
|
|
result: {
|
|
best: {
|
|
hitRate: 0.999,
|
|
suffix: "image-hit",
|
|
text: "CACHE-OK image-hit",
|
|
usage: { cacheRead: 5_742, cacheWrite: 0, input: 3 },
|
|
},
|
|
warmup: {
|
|
hitRate: 0.999,
|
|
suffix: "image-warmup",
|
|
text: "CACHE-OK image-warmup",
|
|
usage: { cacheRead: 5_741, cacheWrite: 0, input: 3 },
|
|
},
|
|
},
|
|
});
|
|
|
|
expect(findings).toEqual({ regressions: [], warnings: [] });
|
|
});
|
|
|
|
it("still rejects warmups with no cache write or cache hit evidence", () => {
|
|
const findings = testing.evaluateAgainstBaseline({
|
|
lane: "image",
|
|
provider: "anthropic",
|
|
result: {
|
|
best: {
|
|
hitRate: 0.999,
|
|
suffix: "image-hit",
|
|
text: "CACHE-OK image-hit",
|
|
usage: { cacheRead: 5_742, cacheWrite: 0, input: 3 },
|
|
},
|
|
warmup: {
|
|
hitRate: 0,
|
|
suffix: "image-warmup",
|
|
text: "CACHE-OK image-warmup",
|
|
usage: { cacheRead: 0, cacheWrite: 0, input: 5_741 },
|
|
},
|
|
},
|
|
});
|
|
|
|
expect(findings).toEqual({
|
|
regressions: ["anthropic:image warmup cacheWrite=0 < min=1"],
|
|
warnings: [],
|
|
});
|
|
});
|
|
});
|