diff --git a/src/agents/model-fallback.probe.test.ts b/src/agents/model-fallback.probe.test.ts index d84f138e27f..d08bd0d4beb 100644 --- a/src/agents/model-fallback.probe.test.ts +++ b/src/agents/model-fallback.probe.test.ts @@ -202,7 +202,7 @@ describe("runWithModelFallback – probe logic", () => { expectPrimaryProbeSuccess(result, run, "probed-ok"); }); - it("logs candidate_succeeded after a successful primary cooldown probe", async () => { + it("logs primary metadata on probe success and failure fallback decisions", async () => { const cfg = makeCfg(); const records: Array> = []; mockedGetSoonestCooldownExpiry.mockReturnValue(NOW + 60 * 1000); @@ -221,6 +221,32 @@ describe("runWithModelFallback – probe logic", () => { expectPrimaryProbeSuccess(result, run, "probed-ok"); + _probeThrottleInternals.lastProbeAttempt.clear(); + + const fallbackCfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "openai/gpt-4.1-mini", + fallbacks: ["anthropic/claude-haiku-3-5", "google/gemini-2-flash"], + }, + }, + }, + } as Partial); + mockedGetSoonestCooldownExpiry.mockReturnValue(NOW + 60 * 1000); + const fallbackRun = vi + .fn() + .mockRejectedValueOnce(Object.assign(new Error("rate limited"), { status: 429 })) + .mockResolvedValueOnce("fallback-ok"); + + const fallbackResult = await runPrimaryCandidate(fallbackCfg, fallbackRun); + + expect(fallbackResult.result).toBe("fallback-ok"); + expect(fallbackRun).toHaveBeenNthCalledWith(1, "openai", "gpt-4.1-mini", { + allowTransientCooldownProbe: true, + }); + expect(fallbackRun).toHaveBeenNthCalledWith(2, "anthropic", "claude-haiku-3-5"); + const decisionPayloads = records .filter( (record) => @@ -244,6 +270,26 @@ describe("runWithModelFallback – probe logic", () => { decision: "candidate_succeeded", candidateProvider: "openai", candidateModel: "gpt-4.1-mini", + isPrimary: true, + requestedModelMatched: true, + }), + expect.objectContaining({ + event: "model_fallback_decision", + decision: "candidate_failed", + candidateProvider: "openai", + candidateModel: "gpt-4.1-mini", + isPrimary: true, + requestedModelMatched: true, + nextCandidateProvider: "anthropic", + nextCandidateModel: "claude-haiku-3-5", + }), + expect.objectContaining({ + event: "model_fallback_decision", + decision: "candidate_succeeded", + candidateProvider: "anthropic", + candidateModel: "claude-haiku-3-5", + isPrimary: false, + requestedModelMatched: false, }), ]), ); diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index 4a8b0d0d218..373e10c936f 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -526,6 +526,9 @@ export async function runWithModelFallback(params: { for (let i = 0; i < candidates.length; i += 1) { const candidate = candidates[i]; + const isPrimary = i === 0; + const requestedModel = + params.provider === candidate.provider && params.model === candidate.model; let runOptions: ModelFallbackRunOptions | undefined; let attemptedDuringCooldown = false; if (authStore) { @@ -538,9 +541,6 @@ export async function runWithModelFallback(params: { if (profileIds.length > 0 && !isAnyProfileAvailable) { // All profiles for this provider are in cooldown. - const isPrimary = i === 0; - const requestedModel = - params.provider === candidate.provider && params.model === candidate.model; const now = Date.now(); const probeThrottleKey = resolveProbeThrottleKey(candidate.provider, params.agentDir); const decision = resolveCooldownDecision({ @@ -627,6 +627,8 @@ export async function runWithModelFallback(params: { attempt: i + 1, total: candidates.length, previousAttempts: attempts, + isPrimary, + requestedModelMatched: requestedModel, fallbackConfigured: hasFallbackCandidates, }); } @@ -686,6 +688,8 @@ export async function runWithModelFallback(params: { code: described.code, error: described.message, nextCandidate: candidates[i + 1], + isPrimary, + requestedModelMatched: requestedModel, fallbackConfigured: hasFallbackCandidates, }); await params.onError?.({