mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 06:20:43 +00:00
fix: short-circuit live model switch fallback redirects (#72375)
This commit is contained in:
@@ -265,6 +265,7 @@ That means fallback retries have to coordinate with live model switching:
|
||||
- System-driven model changes such as fallback rotation, heartbeat overrides, or compaction never mark a pending live switch on their own.
|
||||
- Before a fallback retry starts, the reply runner persists the selected fallback override fields to the session entry.
|
||||
- Live-session reconciliation prefers persisted session overrides over stale runtime model fields.
|
||||
- If a live-switch error points at a later candidate in the active fallback chain, OpenClaw jumps directly to that selected model instead of walking unrelated candidates first.
|
||||
- If the fallback attempt fails, the runner rolls back only the override fields it wrote, and only if they still match that failed candidate.
|
||||
|
||||
This prevents the classic race:
|
||||
|
||||
@@ -663,7 +663,7 @@ describe("runWithModelFallback", () => {
|
||||
expect(run).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("treats LiveSessionModelSwitchError as failover on last candidate (#58466)", async () => {
|
||||
it("treats LiveSessionModelSwitchError as failover on last candidate (#58496 family)", async () => {
|
||||
const cfg = makeCfg();
|
||||
const switchError = new LiveSessionModelSwitchError({
|
||||
provider: "anthropic",
|
||||
@@ -689,7 +689,7 @@ describe("runWithModelFallback", () => {
|
||||
expect(run).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("continues fallback chain past LiveSessionModelSwitchError to next candidate (#58466)", async () => {
|
||||
it("continues fallback chain past LiveSessionModelSwitchError to next candidate (#58496 family)", async () => {
|
||||
const cfg = makeCfg();
|
||||
const switchError = new LiveSessionModelSwitchError({
|
||||
provider: "anthropic",
|
||||
@@ -756,6 +756,30 @@ describe("runWithModelFallback", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it("does not redirect stale live-session switch errors back to the current candidate (#58496 family)", async () => {
|
||||
const cfg = makeCfg();
|
||||
const switchError = new LiveSessionModelSwitchError({
|
||||
provider: "openai",
|
||||
model: "gpt-4.1-mini",
|
||||
});
|
||||
const run = vi.fn().mockRejectedValueOnce(switchError).mockResolvedValueOnce("ok");
|
||||
|
||||
const result = await runWithModelFallback({
|
||||
cfg,
|
||||
provider: "openai",
|
||||
model: "gpt-4.1-mini",
|
||||
run,
|
||||
});
|
||||
|
||||
expect(result.result).toBe("ok");
|
||||
expect(result.provider).toBe("anthropic");
|
||||
expect(result.model).toBe("claude-haiku-3-5");
|
||||
expect(run.mock.calls).toEqual([
|
||||
["openai", "gpt-4.1-mini"],
|
||||
["anthropic", "claude-haiku-3-5"],
|
||||
]);
|
||||
});
|
||||
|
||||
it("falls back on auth errors", async () => {
|
||||
await expectFallsBackToHaiku({
|
||||
provider: "openai",
|
||||
|
||||
@@ -326,16 +326,19 @@ function recordFailedCandidateAttempt(params: {
|
||||
});
|
||||
}
|
||||
|
||||
function findLaterLiveSessionModelSwitchCandidateIndex(params: {
|
||||
function findLiveSessionModelSwitchRedirectIndex(params: {
|
||||
error: LiveSessionModelSwitchError;
|
||||
candidates: ModelCandidate[];
|
||||
currentIndex: number;
|
||||
}): number | null {
|
||||
const targetKey = modelKey(params.error.provider, params.error.model);
|
||||
const targetIndex = params.candidates.findIndex(
|
||||
(candidate) => modelKey(candidate.provider, candidate.model) === targetKey,
|
||||
);
|
||||
return targetIndex > params.currentIndex ? targetIndex : null;
|
||||
for (let i = params.currentIndex + 1; i < params.candidates.length; i += 1) {
|
||||
const candidate = params.candidates[i];
|
||||
if (modelKey(candidate.provider, candidate.model) === targetKey) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function throwFallbackFailureSummary(params: {
|
||||
@@ -930,13 +933,12 @@ export async function runWithModelFallback<T>(params: {
|
||||
model: candidate.model,
|
||||
}) ?? err;
|
||||
|
||||
// LiveSessionModelSwitchError during fallback means the session's
|
||||
// persisted model conflicts with this fallback candidate. Treat it
|
||||
// as a known failover so the chain continues to the next candidate
|
||||
// instead of re-throwing and triggering infinite retry loops in the
|
||||
// outer runner. (#58466)
|
||||
// LiveSessionModelSwitchError during fallback may point at a later
|
||||
// candidate that is already the active live-session selection. Jump
|
||||
// there directly. Stale same/earlier targets remain a known failover
|
||||
// so the outer runner cannot loop on the conflicting model.
|
||||
if (err instanceof LiveSessionModelSwitchError) {
|
||||
const liveSwitchTargetIndex = findLaterLiveSessionModelSwitchCandidateIndex({
|
||||
const liveSwitchTargetIndex = findLiveSessionModelSwitchRedirectIndex({
|
||||
error: err,
|
||||
candidates,
|
||||
currentIndex: i,
|
||||
|
||||
Reference in New Issue
Block a user