diff --git a/.github/workflows/full-release-validation.yml b/.github/workflows/full-release-validation.yml index 7117947488d..01d83d4b00f 100644 --- a/.github/workflows/full-release-validation.yml +++ b/.github/workflows/full-release-validation.yml @@ -227,6 +227,7 @@ jobs: fi sleep 30 done + trap - EXIT INT TERM conclusion="$(gh run view "$run_id" --json conclusion --jq '.conclusion')" url="$(gh run view "$run_id" --json url --jq '.url')" @@ -328,6 +329,7 @@ jobs: fi sleep 30 done + trap - EXIT INT TERM conclusion="$(gh run view "$run_id" --json conclusion --jq '.conclusion')" url="$(gh run view "$run_id" --json url --jq '.url')" @@ -435,6 +437,7 @@ jobs: fi sleep 30 done + trap - EXIT INT TERM conclusion="$(gh run view "$run_id" --json conclusion --jq '.conclusion')" url="$(gh run view "$run_id" --json url --jq '.url')" diff --git a/src/agents/live-cache-regression-runner.test.ts b/src/agents/live-cache-regression-runner.test.ts index cac3c28ee65..2664a6743cf 100644 --- a/src/agents/live-cache-regression-runner.test.ts +++ b/src/agents/live-cache-regression-runner.test.ts @@ -83,4 +83,53 @@ describe("live cache regression runner", () => { ), ).toBe(false); }); + + it("accepts a warmup that already hits the provider cache", () => { + const findings = __testing.evaluateAgainstBaseline({ + lane: "image", + provider: "anthropic", + result: { + best: { + hitRate: 0.999, + suffix: "image-hit", + text: "CACHE-OK image-hit", + usage: { cacheRead: 5_742, cacheWrite: 0, input: 3 }, + }, + warmup: { + hitRate: 0.999, + suffix: "image-warmup", + text: "CACHE-OK image-warmup", + usage: { cacheRead: 5_741, cacheWrite: 0, input: 3 }, + }, + }, + }); + + expect(findings).toEqual({ regressions: [], warnings: [] }); + }); + + it("still rejects warmups with no cache write or cache hit evidence", () => { + const findings = __testing.evaluateAgainstBaseline({ + lane: "image", + provider: "anthropic", + result: { + best: { + hitRate: 0.999, + suffix: "image-hit", + text: "CACHE-OK image-hit", + usage: { cacheRead: 5_742, cacheWrite: 0, input: 3 }, + }, + warmup: { + hitRate: 0, + suffix: "image-warmup", + text: "CACHE-OK image-warmup", + usage: { cacheRead: 0, cacheWrite: 0, input: 5_741 }, + }, + }, + }); + + expect(findings).toEqual({ + regressions: ["anthropic:image warmup cacheWrite=0 < min=1"], + warnings: [], + }); + }); }); diff --git a/src/agents/live-cache-regression-runner.ts b/src/agents/live-cache-regression-runner.ts index fba9395aef0..7b1ba3feaeb 100644 --- a/src/agents/live-cache-regression-runner.ts +++ b/src/agents/live-cache-regression-runner.ts @@ -359,6 +359,21 @@ function formatUsage(usage: CacheUsage | undefined) { return `cacheRead=${usage?.cacheRead ?? 0} cacheWrite=${usage?.cacheWrite ?? 0} input=${usage?.input ?? 0}`; } +function warmupHasCacheEvidence(params: { floor: LiveCacheFloor; warmup: CacheRun }): boolean { + const cacheRead = params.warmup.usage.cacheRead ?? 0; + const cacheWrite = params.warmup.usage.cacheWrite ?? 0; + if (params.floor.minCacheReadOrWrite !== undefined) { + return Math.max(cacheRead, cacheWrite) >= params.floor.minCacheReadOrWrite; + } + if (params.floor.minCacheRead !== undefined && cacheRead < params.floor.minCacheRead) { + return false; + } + if (params.floor.minHitRate !== undefined && params.warmup.hitRate < params.floor.minHitRate) { + return false; + } + return params.floor.minCacheRead !== undefined || params.floor.minHitRate !== undefined; +} + function assertAgainstBaseline(params: { lane: BaselineLane; provider: ProviderKey; @@ -401,8 +416,12 @@ function assertAgainstBaseline(params: { } if (params.result.warmup) { - const warmupUsage = params.result.warmup.usage; - if ((warmupUsage.cacheWrite ?? 0) < (floor.minCacheWrite ?? 0)) { + const warmup = params.result.warmup; + const warmupUsage = warmup.usage; + if ( + (warmupUsage.cacheWrite ?? 0) < (floor.minCacheWrite ?? 0) && + !warmupHasCacheEvidence({ floor, warmup }) + ) { recordRegression( `${params.provider}:${params.lane} warmup cacheWrite=${warmupUsage.cacheWrite ?? 0} < min=${floor.minCacheWrite}`, ); diff --git a/test/scripts/package-acceptance-workflow.test.ts b/test/scripts/package-acceptance-workflow.test.ts index ff223af7518..932b0e91a6e 100644 --- a/test/scripts/package-acceptance-workflow.test.ts +++ b/test/scripts/package-acceptance-workflow.test.ts @@ -307,6 +307,7 @@ describe("package artifact reuse", () => { expect(workflow).toContain("child_rerun_group=all"); expect(workflow).toContain('-f rerun_group="$child_rerun_group"'); expect(workflow).toContain("NORMAL_CI_RESULT: ${{ needs.normal_ci.result }}"); + expect(workflow.match(/trap - EXIT INT TERM/g)).toHaveLength(6); expect(workflow).not.toContain("workflow_ref:"); expect(workflow).not.toContain("inputs.workflow_ref"); });