From b378a912573e1d0d32cf1e20ff51e4d425ada6d1 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Mon, 4 May 2026 17:44:22 -0700 Subject: [PATCH] test(live): retry cache probe text misses --- .../live-cache-regression-runner.test.ts | 9 +++- src/agents/live-cache-regression-runner.ts | 42 +++++++++++++------ 2 files changed, 38 insertions(+), 13 deletions(-) diff --git a/src/agents/live-cache-regression-runner.test.ts b/src/agents/live-cache-regression-runner.test.ts index 97bc267836f..f462b1e214d 100644 --- a/src/agents/live-cache-regression-runner.test.ts +++ b/src/agents/live-cache-regression-runner.test.ts @@ -84,7 +84,7 @@ describe("live cache regression runner", () => { ).toBe(false); }); - it("retries a cache probe once when provider text misses the sentinel", () => { + it("retries a cache probe twice when provider text misses the sentinel", () => { expect( __testing.shouldRetryCacheProbeText({ attempt: 1, @@ -98,6 +98,13 @@ describe("live cache regression runner", () => { suffix: "openai-stable-hit-a", text: "", }), + ).toBe(true); + expect( + __testing.shouldRetryCacheProbeText({ + attempt: 3, + suffix: "openai-stable-hit-a", + text: "", + }), ).toBe(false); expect( __testing.shouldRetryCacheProbeText({ diff --git a/src/agents/live-cache-regression-runner.ts b/src/agents/live-cache-regression-runner.ts index c0c54d046d9..559f775c585 100644 --- a/src/agents/live-cache-regression-runner.ts +++ b/src/agents/live-cache-regression-runner.ts @@ -20,7 +20,7 @@ import { const OPENAI_TIMEOUT_MS = 120_000; const ANTHROPIC_TIMEOUT_MS = 120_000; const LIVE_CACHE_LANE_RETRIES = 1; -const LIVE_CACHE_RESPONSE_RETRIES = 1; +const LIVE_CACHE_RESPONSE_RETRIES = 2; const OPENAI_PREFIX = buildStableCachePrefix("openai"); const OPENAI_MCP_PREFIX = buildStableCachePrefix("openai-mcp-style"); const ANTHROPIC_PREFIX = buildStableCachePrefix("anthropic"); @@ -60,6 +60,15 @@ type LiveCacheRegressionResult = { warnings: string[]; }; +class CacheProbeTextMismatchError extends Error { + constructor( + readonly suffix: string, + readonly text: string, + ) { + super(`expected response to contain CACHE-OK ${suffix}, got ${JSON.stringify(text)}`); + } +} + const NOOP_TOOL: Tool = { name: "noop", description: "Return ok.", @@ -242,17 +251,16 @@ async function completeCacheProbe(params: { const text = extractAssistantText(response); if (shouldRetryCacheProbeText({ attempt, suffix: params.suffix, text })) { logLiveCache( - `${params.providerTag} cache lane ${params.suffix} response mismatch; retrying once: ${JSON.stringify(text)}`, + `${params.providerTag} cache lane ${params.suffix} response mismatch; retrying: ${JSON.stringify(text)}`, ); continue; } const responseTextLower = normalizeLowercaseStringOrEmpty(text); const suffixLower = normalizeLowercaseStringOrEmpty(params.suffix); const markerLower = `cache-ok ${suffixLower}`; - assert( - responseTextLower.includes(markerLower), - `expected response to contain CACHE-OK ${params.suffix}, got ${JSON.stringify(text)}`, - ); + if (!responseTextLower.includes(markerLower)) { + throw new CacheProbeTextMismatchError(params.suffix, text); + } const usage = normalizeCacheUsage(response.usage); return { suffix: params.suffix, @@ -499,12 +507,22 @@ async function runRepeatedLaneWithBaselineRetry(params: { let attempts = 0; for (let attempt = 1; attempt <= 1 + LIVE_CACHE_LANE_RETRIES; attempt += 1) { attempts = attempt; - result = await runRepeatedLane({ - ...params, - sessionId: `live-cache-regression-${params.runToken}-${params.providerTag}-${params.lane}${ - attempt > 1 ? `-retry-${attempt}` : "" - }`, - }); + try { + result = await runRepeatedLane({ + ...params, + sessionId: `live-cache-regression-${params.runToken}-${params.providerTag}-${params.lane}${ + attempt > 1 ? `-retry-${attempt}` : "" + }`, + }); + } catch (error) { + if (error instanceof CacheProbeTextMismatchError && attempt <= LIVE_CACHE_LANE_RETRIES) { + logLiveCache( + `${params.providerTag} ${params.lane} response mismatch; retrying lane once: ${error.message}`, + ); + continue; + } + throw error; + } findings = evaluateAgainstBaseline({ lane: params.lane, provider: params.providerTag,