test(agents): retry empty live cache probe text

2026-05-06 05:10:44 +00:00 · 2026-05-03 16:25:51 -07:00
parent cde9591168
commit 7fb2e5506f
2 changed files with 77 additions and 32 deletions
--- a/src/agents/live-cache-regression-runner.test.ts
+++ b/src/agents/live-cache-regression-runner.test.ts
@@ -84,6 +84,30 @@ describe("live cache regression runner", () => {
    ).toBe(false);
  });

+  it("retries a cache probe once when provider text misses the sentinel", () => {
+    expect(
+      __testing.shouldRetryCacheProbeText({
+        attempt: 1,
+        suffix: "openai-stable-hit-a",
+        text: "",
+      }),
+    ).toBe(true);
+    expect(
+      __testing.shouldRetryCacheProbeText({
+        attempt: 2,
+        suffix: "openai-stable-hit-a",
+        text: "",
+      }),
+    ).toBe(false);
+    expect(
+      __testing.shouldRetryCacheProbeText({
+        attempt: 1,
+        suffix: "openai-stable-hit-a",
+        text: "CACHE-OK openai-stable-hit-a",
+      }),
+    ).toBe(false);
+  });
+
  it("accepts a warmup that already hits the provider cache", () => {
    const findings = __testing.evaluateAgainstBaseline({
      lane: "image",
--- a/src/agents/live-cache-regression-runner.ts
+++ b/src/agents/live-cache-regression-runner.ts
@@ -20,6 +20,7 @@ import {
 const OPENAI_TIMEOUT_MS = 120_000;
 const ANTHROPIC_TIMEOUT_MS = 120_000;
 const LIVE_CACHE_LANE_RETRIES = 1;
+const LIVE_CACHE_RESPONSE_RETRIES = 1;
 const OPENAI_PREFIX = buildStableCachePrefix("openai");
 const OPENAI_MCP_PREFIX = buildStableCachePrefix("openai-mcp-style");
 const ANTHROPIC_PREFIX = buildStableCachePrefix("anthropic");
@@ -128,6 +129,16 @@ function assert(condition: unknown, message: string): asserts condition {
  }
 }

+function shouldRetryCacheProbeText(params: {
+  attempt: number;
+  suffix: string;
+  text: string;
+}): boolean {
+  const responseTextLower = normalizeLowercaseStringOrEmpty(params.text);
+  const suffixLower = normalizeLowercaseStringOrEmpty(params.suffix);
+  return !responseTextLower.includes(suffixLower) && params.attempt <= LIVE_CACHE_RESPONSE_RETRIES;
+}
+
 async function runToolOnlyTurn(params: {
  apiKey: string;
  cacheRetention: "none" | "short" | "long";
@@ -205,38 +216,47 @@ async function completeCacheProbe(params: {
  maxTokens?: number;
 }): Promise<CacheRun> {
  const timeoutMs = params.providerTag === "openai" ? OPENAI_TIMEOUT_MS : ANTHROPIC_TIMEOUT_MS;
-  const response = await completeSimpleWithLiveTimeout(
-    params.model,
-    {
-      systemPrompt: params.systemPrompt,
-      messages: params.messages,
-      ...(params.tools ? { tools: params.tools } : {}),
-    },
-    {
-      apiKey: params.apiKey,
-      cacheRetention: params.cacheRetention,
-      sessionId: params.sessionId,
-      maxTokens: params.maxTokens ?? 64,
-      temperature: 0,
-      ...(params.providerTag === "openai" ? { reasoning: "none" as unknown as never } : {}),
-    },
-    `${params.providerTag} cache lane ${params.suffix}`,
-    timeoutMs,
-  );
-  const text = extractAssistantText(response);
-  const responseTextLower = normalizeLowercaseStringOrEmpty(text);
-  const suffixLower = normalizeLowercaseStringOrEmpty(params.suffix);
-  assert(
-    responseTextLower.includes(suffixLower),
-    `expected response to contain ${params.suffix}, got ${JSON.stringify(text)}`,
-  );
-  const usage = normalizeCacheUsage(response.usage);
-  return {
-    suffix: params.suffix,
-    text,
-    usage,
-    hitRate: computeCacheHitRate(usage),
-  };
+  for (let attempt = 1; attempt <= 1 + LIVE_CACHE_RESPONSE_RETRIES; attempt += 1) {
+    const response = await completeSimpleWithLiveTimeout(
+      params.model,
+      {
+        systemPrompt: params.systemPrompt,
+        messages: params.messages,
+        ...(params.tools ? { tools: params.tools } : {}),
+      },
+      {
+        apiKey: params.apiKey,
+        cacheRetention: params.cacheRetention,
+        sessionId: params.sessionId,
+        maxTokens: params.maxTokens ?? 64,
+        temperature: 0,
+        ...(params.providerTag === "openai" ? { reasoning: "none" as unknown as never } : {}),
+      },
+      `${params.providerTag} cache lane ${params.suffix}`,
+      timeoutMs,
+    );
+    const text = extractAssistantText(response);
+    if (shouldRetryCacheProbeText({ attempt, suffix: params.suffix, text })) {
+      logLiveCache(
+        `${params.providerTag} cache lane ${params.suffix} response mismatch; retrying once: ${JSON.stringify(text)}`,
+      );
+      continue;
+    }
+    const responseTextLower = normalizeLowercaseStringOrEmpty(text);
+    const suffixLower = normalizeLowercaseStringOrEmpty(params.suffix);
+    assert(
+      responseTextLower.includes(suffixLower),
+      `expected response to contain ${params.suffix}, got ${JSON.stringify(text)}`,
+    );
+    const usage = normalizeCacheUsage(response.usage);
+    return {
+      suffix: params.suffix,
+      text,
+      usage,
+      hitRate: computeCacheHitRate(usage),
+    };
+  }
+  throw new Error(`expected response to contain ${params.suffix}`);
 }

 async function runRepeatedLane(params: {
@@ -507,6 +527,7 @@ function appendBaselineFindings(target: BaselineFindings, source: BaselineFindin
 export const __testing = {
  assertAgainstBaseline,
  evaluateAgainstBaseline,
+  shouldRetryCacheProbeText,
  shouldRetryBaselineFindings,
 };