fix: add compaction model fallback (#74470)

* fix: add compaction model fallback

* docs: add compaction changelog pr reference

* docs: add compaction changelog author

* docs: satisfy compaction changelog attribution

* fix: preserve compaction fallback metadata

* fix: satisfy compaction fallback lint

* docs: move compaction fallback changelog entry
This commit is contained in:
Josh Lehman
2026-05-01 12:15:16 -07:00
committed by GitHub
parent b119cefae2
commit c098846148
14 changed files with 395 additions and 5 deletions

View File

@@ -7,6 +7,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Gateway/config: report failed backup restores as failed in logs and config observe audit records instead of marking them valid. (#70515) Thanks @davidangularme.
- Compaction: use the active session model fallback chain for implicit summarization failures without persisting fallback model selection, so Azure content-filter 400s can recover. Fixes #64960. (#74470) Thanks @jalehman and @OpenCodeEngineer.
## 2026.4.30

View File

@@ -89,7 +89,7 @@ This works with local models too, for example a second Ollama model dedicated to
}
```
When unset, compaction uses the agent's primary model.
When unset, compaction starts with the active session model. If summarization fails with a model-fallback-eligible provider error, OpenClaw retries that compaction attempt through the session's existing model fallback chain. The fallback choice is temporary and is not written back to session state. An explicit `agents.defaults.compaction.model` override remains exact and does not inherit the session fallback chain.
### Identifier preservation

View File

@@ -968,6 +968,7 @@ async function agentCommandInternal(
return attemptExecutionRuntime.runAgentAttempt({
providerOverride,
modelOverride,
modelFallbacksOverride: effectiveFallbacksOverride,
originalProvider: provider,
cfg,
sessionEntry,

View File

@@ -343,6 +343,7 @@ export function runAgentAttempt(params: {
sessionStore?: Record<string, SessionEntry>;
storePath?: string;
allowTransientCooldownProbe?: boolean;
modelFallbacksOverride?: string[];
sessionHasHistory?: boolean;
}) {
const isRawModelRun = params.opts.modelRun === true || params.opts.promptMode === "none";
@@ -575,6 +576,7 @@ export function runAgentAttempt(params: {
clientTools: params.opts.clientTools,
provider: params.providerOverride,
model: params.modelOverride,
modelFallbacksOverride: params.modelFallbacksOverride,
authProfileId,
authProfileIdSource: authProfileId ? harnessAuthSelection.authProfileIdSource : undefined,
thinkLevel: params.resolvedThinkLevel,

View File

@@ -503,10 +503,15 @@ export async function loadCompactHooksHarness(): Promise<{
listAgentEntries: vi.fn(() => []),
resolveAgentConfig: vi.fn(() => undefined),
resolveDefaultAgentId: vi.fn(() => "main"),
resolveRunModelFallbacksOverride: vi.fn(() => undefined),
resolveSessionAgentId: resolveSessionAgentIdMock,
resolveSessionAgentIds: vi.fn(() => ({ defaultAgentId: "main", sessionAgentId: "main" })),
}));
vi.doMock("../auth-profiles/source-check.js", () => ({
hasAnyAuthProfileStoreSource: vi.fn(() => false),
}));
vi.doMock("../memory-search.js", () => ({
resolveMemorySearchConfig: resolveMemorySearchConfigMock,
}));

View File

@@ -309,6 +309,258 @@ describe("compactEmbeddedPiSessionDirect hooks", () => {
);
});
it("uses the session model fallback chain when implicit compaction fails", async () => {
resolveModelMock.mockImplementation((provider = "openai", modelId = "fake") => ({
model: { provider, api: "responses", id: modelId, input: [] },
error: null,
authStorage: { setRuntimeApiKey: vi.fn() },
modelRegistry: {},
}));
sessionCompactImpl
.mockRejectedValueOnce(
Object.assign(
new Error(
"400 The response was filtered due to the prompt triggering Azure OpenAI's content management policy.",
),
{ status: 400 },
),
)
.mockResolvedValueOnce({
summary: "fallback summary",
firstKeptEntryId: "entry-fallback",
tokensBefore: 120,
details: { ok: true },
});
const result = await compactEmbeddedPiSessionDirect({
sessionId: "session-1",
sessionKey: TEST_SESSION_KEY,
sessionFile: "/tmp/session.jsonl",
workspaceDir: "/tmp/workspace",
provider: "openai",
model: "gpt-primary",
config: {
agents: {
defaults: {
model: {
primary: "openai/gpt-primary",
fallbacks: ["anthropic/claude-fallback"],
},
},
},
} as never,
});
expect(result.ok).toBe(true);
expect(result.result?.summary).toBe("fallback summary");
expect(resolveModelMock).toHaveBeenCalledWith(
"openai",
"gpt-primary",
expect.any(String),
expect.anything(),
);
expect(resolveModelMock).toHaveBeenCalledWith(
"anthropic",
"claude-fallback",
expect.any(String),
expect.anything(),
);
});
it("uses the session model fallback chain when overflow compaction fails", async () => {
resolveModelMock.mockImplementation((provider = "openai", modelId = "fake") => ({
model: { provider, api: "responses", id: modelId, input: [] },
error: null,
authStorage: { setRuntimeApiKey: vi.fn() },
modelRegistry: {},
}));
sessionCompactImpl
.mockRejectedValueOnce(
Object.assign(new Error("primary compaction rate limited"), {
status: 429,
code: "rate_limit_exceeded",
}),
)
.mockResolvedValueOnce({
summary: "overflow fallback summary",
firstKeptEntryId: "entry-fallback",
tokensBefore: 120,
details: { ok: true },
});
const result = await compactEmbeddedPiSessionDirect({
sessionId: "session-1",
sessionKey: TEST_SESSION_KEY,
sessionFile: "/tmp/session.jsonl",
workspaceDir: "/tmp/workspace",
provider: "openai",
model: "gpt-primary",
trigger: "overflow",
modelFallbacksOverride: ["anthropic/claude-fallback"],
config: {
agents: {
defaults: {
model: {
primary: "openai/gpt-primary",
fallbacks: [],
},
},
},
} as never,
});
expect(result.ok).toBe(true);
expect(result.result?.summary).toBe("overflow fallback summary");
expect(resolveModelMock).toHaveBeenCalledWith(
"openai",
"gpt-primary",
expect.any(String),
expect.anything(),
);
expect(resolveModelMock).toHaveBeenCalledWith(
"anthropic",
"claude-fallback",
expect.any(String),
expect.anything(),
);
});
it("keeps compaction fallback selection ephemeral", async () => {
resolveModelMock.mockImplementation((provider = "openai", modelId = "fake") => ({
model: { provider, api: "responses", id: modelId, input: [] },
error: null,
authStorage: { setRuntimeApiKey: vi.fn() },
modelRegistry: {},
}));
sessionCompactImpl
.mockRejectedValueOnce(Object.assign(new Error("400 invalid request body"), { status: 400 }))
.mockResolvedValueOnce({
summary: "fallback summary",
firstKeptEntryId: "entry-fallback",
tokensBefore: 120,
details: { ok: true },
});
const config = {
agents: {
defaults: {
model: {
primary: "openai/gpt-primary",
fallbacks: ["anthropic/claude-fallback"],
},
},
},
sessions: {
entries: {
[TEST_SESSION_KEY]: {
modelProvider: "openai",
model: "gpt-primary",
},
},
},
};
const configBefore = structuredClone(config);
const result = await compactEmbeddedPiSessionDirect({
sessionId: "session-1",
sessionKey: TEST_SESSION_KEY,
sessionFile: "/tmp/session.jsonl",
workspaceDir: "/tmp/workspace",
provider: "openai",
model: "gpt-primary",
config: config as never,
});
expect(result.ok).toBe(true);
expect(config).toEqual(configBefore);
});
it("preserves explicit compaction.model behavior without session fallback", async () => {
resolveModelMock.mockImplementation((provider = "openai", modelId = "fake") => ({
model: { provider, api: "responses", id: modelId, input: [] },
error: null,
authStorage: { setRuntimeApiKey: vi.fn() },
modelRegistry: {},
}));
sessionCompactImpl.mockRejectedValueOnce(
Object.assign(new Error("400 invalid request body"), { status: 400 }),
);
const result = await compactEmbeddedPiSessionDirect({
sessionId: "session-1",
sessionKey: TEST_SESSION_KEY,
sessionFile: "/tmp/session.jsonl",
workspaceDir: "/tmp/workspace",
provider: "openai",
model: "gpt-primary",
config: {
agents: {
defaults: {
model: {
primary: "openai/gpt-primary",
fallbacks: ["anthropic/claude-fallback"],
},
compaction: {
model: "azure/compact-primary",
},
},
},
} as never,
});
expect(result.ok).toBe(false);
expect(resolveModelMock).toHaveBeenCalledTimes(1);
expect(resolveModelMock).toHaveBeenCalledWith(
"azure",
"compact-primary",
expect.any(String),
expect.anything(),
);
});
it("preserves compaction failure status and code metadata", async () => {
resolveModelMock.mockImplementation((provider = "openai", modelId = "fake") => ({
model: { provider, api: "responses", id: modelId, input: [] },
error: null,
authStorage: { setRuntimeApiKey: vi.fn() },
modelRegistry: {},
}));
sessionCompactImpl.mockRejectedValueOnce(
Object.assign(new Error("primary compaction rate limited"), {
status: 429,
code: "rate_limit_exceeded",
}),
);
const result = await compactEmbeddedPiSessionDirect({
sessionId: "session-1",
sessionKey: TEST_SESSION_KEY,
sessionFile: "/tmp/session.jsonl",
workspaceDir: "/tmp/workspace",
provider: "openai",
model: "gpt-primary",
config: {
agents: {
defaults: {
compaction: {
model: "openai/gpt-primary",
},
},
},
} as never,
});
expect(result).toMatchObject({
ok: false,
compacted: false,
failure: {
reason: "rate_limit",
status: 429,
code: "rate_limit_exceeded",
rawError: "primary compaction rate limited",
},
});
});
it("emits internal + plugin compaction hooks with counts", async () => {
hookRunner.hasHooks.mockReturnValue(true);
await runCompactionHooks({

View File

@@ -321,6 +321,7 @@ function buildCompactionContextEngineRuntimeContext(params: {
senderId: params.params.senderId,
provider: params.params.provider,
modelId: params.params.model,
modelFallbacksOverride: params.params.modelFallbacksOverride,
thinkLevel: params.params.thinkLevel,
reasoningLevel: params.params.reasoningLevel,
bashElevated: params.params.bashElevated,

View File

@@ -9,6 +9,7 @@ import {
} from "@mariozechner/pi-coding-agent";
import { isAcpRuntimeSpawnAvailable } from "../../acp/runtime/availability.js";
import type { ThinkLevel } from "../../auto-reply/thinking.js";
import { resolveAgentModelFallbackValues } from "../../config/model-input.js";
import type { OpenClawConfig } from "../../config/types.openclaw.js";
import {
captureCompactionCheckpointSnapshotAsync,
@@ -34,7 +35,7 @@ import { resolveUserPath } from "../../utils.js";
import { normalizeMessageChannel } from "../../utils/message-channel.js";
import { isReasoningTagProvider } from "../../utils/provider-utils.js";
import { resolveOpenClawAgentDir } from "../agent-paths.js";
import { resolveSessionAgentIds } from "../agent-scope.js";
import { resolveRunModelFallbacksOverride, resolveSessionAgentIds } from "../agent-scope.js";
import {
makeBootstrapWarn,
resolveBootstrapContextForRun,
@@ -53,6 +54,7 @@ import { resolveContextWindowInfo } from "../context-window-guard.js";
import { formatUserTime, resolveUserTimeFormat, resolveUserTimezone } from "../date-time.js";
import { DEFAULT_CONTEXT_TOKENS, DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js";
import { resolveOpenClawReferencePaths } from "../docs-path.js";
import { coerceToFailoverError, describeFailoverError } from "../failover-error.js";
import { resolveHeartbeatPromptForSystemPrompt } from "../heartbeat-system-prompt.js";
import {
applyAuthHeaderOverride,
@@ -60,6 +62,7 @@ import {
getApiKeyForModel,
resolveModelAuthMode,
} from "../model-auth.js";
import { isFallbackSummaryError, runWithModelFallback } from "../model-fallback.js";
import { supportsModelTools } from "../model-tool-support.js";
import { ensureOpenClawModelsJson } from "../models-config.js";
import { resolveOwnerDisplaySetting } from "../owner-display.js";
@@ -320,12 +323,106 @@ function containsRealConversationMessages(messages: AgentMessage[]): boolean {
);
}
function hasExplicitCompactionModel(params: CompactEmbeddedPiSessionParams): boolean {
return Boolean(params.config?.agents?.defaults?.compaction?.model?.trim());
}
function resolveCompactionFallbacksOverride(
params: CompactEmbeddedPiSessionParams,
): string[] | undefined {
return (
params.modelFallbacksOverride ??
resolveRunModelFallbacksOverride({
cfg: params.config,
sessionKey: params.sessionKey,
})
);
}
function hasCompactionModelFallbackCandidates(params: CompactEmbeddedPiSessionParams): boolean {
const fallbacksOverride = resolveCompactionFallbacksOverride(params);
const defaultFallbacks = resolveAgentModelFallbackValues(params.config?.agents?.defaults?.model);
return (fallbacksOverride ?? defaultFallbacks).length > 0;
}
function classifyCompactionFallbackResult(
result: EmbeddedPiCompactResult,
provider: string,
model: string,
) {
if (result.ok) {
return null;
}
const reason = result.reason?.trim();
if (!reason) {
return null;
}
const failureError = Object.assign(new Error(result.failure?.rawError ?? reason), {
status: result.failure?.status,
code: result.failure?.code,
});
const failoverError = coerceToFailoverError(failureError, { provider, model });
return failoverError ? { error: failoverError } : null;
}
function fallbackFailureToCompactionResult(err: unknown): EmbeddedPiCompactResult {
const reason = isFallbackSummaryError(err) ? err.message : formatErrorMessage(err);
return {
ok: false,
compacted: false,
reason,
};
}
/**
* Core compaction logic without lane queueing.
* Use this when already inside a session/global lane to avoid deadlocks.
*/
export async function compactEmbeddedPiSessionDirect(
params: CompactEmbeddedPiSessionParams,
): Promise<EmbeddedPiCompactResult> {
if (hasExplicitCompactionModel(params) || !hasCompactionModelFallbackCandidates(params)) {
return await compactEmbeddedPiSessionDirectOnce(params);
}
const resolvedCompactionTarget = resolveEmbeddedCompactionTarget({
config: params.config,
provider: params.provider,
modelId: params.model,
authProfileId: params.authProfileId,
defaultProvider: DEFAULT_PROVIDER,
defaultModel: DEFAULT_MODEL,
});
const primaryProvider = resolvedCompactionTarget.provider ?? DEFAULT_PROVIDER;
const primaryModel = resolvedCompactionTarget.model ?? DEFAULT_MODEL;
const fallbacksOverride = resolveCompactionFallbacksOverride(params);
try {
const fallbackResult = await runWithModelFallback<EmbeddedPiCompactResult>({
cfg: params.config,
provider: primaryProvider,
model: primaryModel,
runId: params.runId ?? params.sessionId,
agentDir: params.agentDir,
fallbacksOverride,
classifyResult: ({ result, provider, model }) =>
classifyCompactionFallbackResult(result, provider, model),
run: async (provider, model) => {
const authProfileId = provider === primaryProvider ? params.authProfileId : undefined;
return await compactEmbeddedPiSessionDirectOnce({
...params,
provider,
model,
authProfileId,
});
},
});
return fallbackResult.result;
} catch (err) {
return fallbackFailureToCompactionResult(err);
}
}
async function compactEmbeddedPiSessionDirectOnce(
params: CompactEmbeddedPiSessionParams,
): Promise<EmbeddedPiCompactResult> {
const startedAt = Date.now();
const diagId = params.diagId?.trim() || createCompactionDiagId();
@@ -352,8 +449,9 @@ export async function compactEmbeddedPiSessionDirect(
const authProfileId = resolvedCompactionTarget.authProfileId;
let thinkLevel: ThinkLevel = params.thinkLevel ?? "off";
const attemptedThinking = new Set<ThinkLevel>();
const fail = (reason: string): EmbeddedPiCompactResult => {
const fail = (reason: string, err?: unknown): EmbeddedPiCompactResult => {
const failureReason = classifyCompactionReason(reason);
const failure = err ? describeFailoverError(err) : undefined;
const detail =
failureReason === "unknown" ? formatUnknownCompactionReasonDetail(reason) : undefined;
const detailSuffix = detail ? ` detail=${detail}` : "";
@@ -367,6 +465,14 @@ export async function compactEmbeddedPiSessionDirect(
ok: false,
compacted: false,
reason,
failure: failure
? {
reason: failure.reason,
status: failure.status,
code: failure.code,
rawError: failure.rawError ?? failure.message,
}
: undefined,
};
};
const agentDir = params.agentDir ?? resolveOpenClawAgentDir();
@@ -430,7 +536,7 @@ export async function compactEmbeddedPiSessionDirect(
}
} catch (err) {
const reason = formatErrorMessage(err);
return fail(reason);
return fail(reason, err);
}
await fs.mkdir(resolvedWorkspace, { recursive: true });
@@ -1247,7 +1353,7 @@ export async function compactEmbeddedPiSessionDirect(
reason: formatErrorMessage(err),
safeguardCancelReason: consumeCompactionSafeguardCancelReason(compactionSessionManager),
});
return fail(reason);
return fail(reason, err);
} finally {
if (!checkpointSnapshotRetained) {
await cleanupCompactionCheckpointSnapshot(checkpointSnapshot);

View File

@@ -44,6 +44,8 @@ export type CompactEmbeddedPiSessionParams = {
skillsSnapshot?: SkillSnapshot;
provider?: string;
model?: string;
/** Effective model fallback chain for this session attempt. Undefined uses config defaults. */
modelFallbacksOverride?: string[];
/** Optional caller-resolved context engine for harness-owned compaction. */
contextEngine?: ContextEngine;
/** Optional caller-resolved token budget for harness-owned compaction. */

View File

@@ -21,6 +21,7 @@ export type EmbeddedCompactionRuntimeContext = {
senderId?: string;
provider?: string;
model?: string;
modelFallbacksOverride?: string[];
thinkLevel?: ThinkLevel;
reasoningLevel?: ReasoningLevel;
bashElevated?: ExecElevatedDefaults;
@@ -87,6 +88,7 @@ export function buildEmbeddedCompactionRuntimeContext(params: {
senderId?: string | null;
provider?: string | null;
modelId?: string | null;
modelFallbacksOverride?: string[];
thinkLevel?: ThinkLevel;
reasoningLevel?: ReasoningLevel;
bashElevated?: ExecElevatedDefaults;
@@ -117,6 +119,7 @@ export function buildEmbeddedCompactionRuntimeContext(params: {
senderId: params.senderId ?? undefined,
provider: resolved.provider,
model: resolved.model,
modelFallbacksOverride: params.modelFallbacksOverride,
thinkLevel: params.thinkLevel,
reasoningLevel: params.reasoningLevel,
bashElevated: params.bashElevated,

View File

@@ -1287,6 +1287,7 @@ export async function runEmbeddedPiAgent(
senderId: params.senderId,
provider,
modelId,
modelFallbacksOverride: params.modelFallbacksOverride,
thinkLevel,
reasoningLevel: params.reasoningLevel,
bashElevated: params.bashElevated,

View File

@@ -103,6 +103,8 @@ export type RunEmbeddedPiAgentParams = {
disableTools?: boolean;
provider?: string;
model?: string;
/** Effective model fallback chain for this session attempt. Undefined uses config defaults. */
modelFallbacksOverride?: string[];
/** Session-pinned embedded harness id. Prevents runtime hot-switching. */
agentHarnessId?: string;
authProfileId?: string;

View File

@@ -190,6 +190,13 @@ export type EmbeddedPiCompactResult = {
ok: boolean;
compacted: boolean;
reason?: string;
/** Structured failure metadata used by model fallback classification. */
failure?: {
reason?: string;
status?: number;
code?: string;
rawError?: string;
};
result?: {
summary: string;
firstKeptEntryId: string;

View File

@@ -55,6 +55,12 @@ export function buildEmbeddedRunBaseParams(params: {
isReasoningTagProvider?: ReasoningTagProviderResolver;
}) {
const config = params.run.config;
const modelFallbacksOverride = resolveEffectiveModelFallbacks({
cfg: config,
agentId: params.run.agentId,
hasSessionModelOverride: params.run.hasSessionModelOverride === true,
modelOverrideSource: params.run.modelOverrideSource,
});
return {
sessionFile: params.run.sessionFile,
workspaceDir: params.run.workspaceDir,
@@ -76,6 +82,7 @@ export function buildEmbeddedRunBaseParams(params: {
sourceReplyDeliveryMode: params.run.sourceReplyDeliveryMode,
provider: params.provider,
model: params.model,
modelFallbacksOverride,
...params.authProfile,
thinkLevel: params.run.thinkLevel,
verboseLevel: params.run.verboseLevel,