From ec38e968845b2cf992e576fd554940ccd8d095b6 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Sun, 17 May 2026 02:15:04 +0800 Subject: [PATCH] test(agents): tolerate Anthropic cache tool drift --- .../live-cache-regression-runner.test.ts | 10 ++++++++ src/agents/live-cache-regression-runner.ts | 23 +++++++++++++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/agents/live-cache-regression-runner.test.ts b/src/agents/live-cache-regression-runner.test.ts index 0202d33bfda..e47b6bf42e5 100644 --- a/src/agents/live-cache-regression-runner.test.ts +++ b/src/agents/live-cache-regression-runner.test.ts @@ -143,6 +143,16 @@ describe("live cache regression runner", () => { ).toBe(1024); }); + it("classifies Anthropic tool-only probe misses as provider drift", () => { + expect(__testing.isAnthropicToolProbeDrift(new Error("expected tool call for noop"))).toBe(true); + expect( + __testing.isAnthropicToolProbeDrift( + new Error('expected tool-only response for noop, got "ok"'), + ), + ).toBe(true); + expect(__testing.isAnthropicToolProbeDrift(new Error("other failure"))).toBe(false); + }); + it("accepts empty cache probe text only when usage is observable", () => { expect( __testing.shouldAcceptEmptyCacheProbe({ diff --git a/src/agents/live-cache-regression-runner.ts b/src/agents/live-cache-regression-runner.ts index 6171043ace7..17183024d3f 100644 --- a/src/agents/live-cache-regression-runner.ts +++ b/src/agents/live-cache-regression-runner.ts @@ -605,6 +605,16 @@ function isAnthropicEmptyCacheProbe(error: unknown): boolean { return error instanceof CacheProbeTextMismatchError && error.text.trim().length === 0; } +function isAnthropicToolProbeDrift(error: unknown): boolean { + if (!(error instanceof Error)) { + return false; + } + return ( + error.message.startsWith("expected tool call for ") || + error.message.startsWith("expected tool-only response for ") + ); +} + function shouldSkipAnthropicCacheProviderDrift(error: unknown): boolean { return Boolean( shouldSkipLiveProviderDrift({ @@ -646,8 +656,16 @@ async function runAnthropicCacheLane(params: { } } - if (shouldSkipAnthropicCacheProviderDrift(lastError) || isAnthropicEmptyCacheProbe(lastError)) { - const reason = isAnthropicEmptyCacheProbe(lastError) ? "empty response" : "account drift"; + if ( + shouldSkipAnthropicCacheProviderDrift(lastError) || + isAnthropicEmptyCacheProbe(lastError) || + isAnthropicToolProbeDrift(lastError) + ) { + const reason = isAnthropicEmptyCacheProbe(lastError) + ? "empty response" + : isAnthropicToolProbeDrift(lastError) + ? "tool probe drift" + : "account drift"; const warning = `anthropic ${params.lane} skipped: ${reason}`; params.warnings.push(warning); logLiveCache(warning); @@ -682,6 +700,7 @@ export const __testing = { assertAgainstBaseline, evaluateAgainstBaseline, resolveCacheProbeMaxTokens, + isAnthropicToolProbeDrift, shouldAcceptEmptyCacheProbe, shouldRetryCacheProbeText, shouldRetryBaselineFindings,