diff --git a/CHANGELOG.md b/CHANGELOG.md index 02e5b104143..1c8870056c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ Docs: https://docs.openclaw.ai ### Fixes - Gateway/config: report failed backup restores as failed in logs and config observe audit records instead of marking them valid. (#70515) Thanks @davidangularme. +- Compaction: use the active session model fallback chain for implicit summarization failures without persisting fallback model selection, so Azure content-filter 400s can recover. Fixes #64960. (#74470) Thanks @jalehman and @OpenCodeEngineer. ## 2026.4.30 diff --git a/docs/concepts/compaction.md b/docs/concepts/compaction.md index 3fd9803dc81..46bb5067809 100644 --- a/docs/concepts/compaction.md +++ b/docs/concepts/compaction.md @@ -89,7 +89,7 @@ This works with local models too, for example a second Ollama model dedicated to } ``` -When unset, compaction uses the agent's primary model. +When unset, compaction starts with the active session model. If summarization fails with a model-fallback-eligible provider error, OpenClaw retries that compaction attempt through the session's existing model fallback chain. The fallback choice is temporary and is not written back to session state. An explicit `agents.defaults.compaction.model` override remains exact and does not inherit the session fallback chain. ### Identifier preservation diff --git a/src/agents/agent-command.ts b/src/agents/agent-command.ts index 319bdf88800..20db2876531 100644 --- a/src/agents/agent-command.ts +++ b/src/agents/agent-command.ts @@ -968,6 +968,7 @@ async function agentCommandInternal( return attemptExecutionRuntime.runAgentAttempt({ providerOverride, modelOverride, + modelFallbacksOverride: effectiveFallbacksOverride, originalProvider: provider, cfg, sessionEntry, diff --git a/src/agents/command/attempt-execution.ts b/src/agents/command/attempt-execution.ts index 99aa33fbb79..038efc4272e 100644 --- a/src/agents/command/attempt-execution.ts +++ b/src/agents/command/attempt-execution.ts @@ -343,6 +343,7 @@ export function runAgentAttempt(params: { sessionStore?: Record; storePath?: string; allowTransientCooldownProbe?: boolean; + modelFallbacksOverride?: string[]; sessionHasHistory?: boolean; }) { const isRawModelRun = params.opts.modelRun === true || params.opts.promptMode === "none"; @@ -575,6 +576,7 @@ export function runAgentAttempt(params: { clientTools: params.opts.clientTools, provider: params.providerOverride, model: params.modelOverride, + modelFallbacksOverride: params.modelFallbacksOverride, authProfileId, authProfileIdSource: authProfileId ? harnessAuthSelection.authProfileIdSource : undefined, thinkLevel: params.resolvedThinkLevel, diff --git a/src/agents/pi-embedded-runner/compact.hooks.harness.ts b/src/agents/pi-embedded-runner/compact.hooks.harness.ts index 603b16ad94d..5e634d1093b 100644 --- a/src/agents/pi-embedded-runner/compact.hooks.harness.ts +++ b/src/agents/pi-embedded-runner/compact.hooks.harness.ts @@ -503,10 +503,15 @@ export async function loadCompactHooksHarness(): Promise<{ listAgentEntries: vi.fn(() => []), resolveAgentConfig: vi.fn(() => undefined), resolveDefaultAgentId: vi.fn(() => "main"), + resolveRunModelFallbacksOverride: vi.fn(() => undefined), resolveSessionAgentId: resolveSessionAgentIdMock, resolveSessionAgentIds: vi.fn(() => ({ defaultAgentId: "main", sessionAgentId: "main" })), })); + vi.doMock("../auth-profiles/source-check.js", () => ({ + hasAnyAuthProfileStoreSource: vi.fn(() => false), + })); + vi.doMock("../memory-search.js", () => ({ resolveMemorySearchConfig: resolveMemorySearchConfigMock, })); diff --git a/src/agents/pi-embedded-runner/compact.hooks.test.ts b/src/agents/pi-embedded-runner/compact.hooks.test.ts index 5be6bc0d9a3..706e3ad1932 100644 --- a/src/agents/pi-embedded-runner/compact.hooks.test.ts +++ b/src/agents/pi-embedded-runner/compact.hooks.test.ts @@ -309,6 +309,258 @@ describe("compactEmbeddedPiSessionDirect hooks", () => { ); }); + it("uses the session model fallback chain when implicit compaction fails", async () => { + resolveModelMock.mockImplementation((provider = "openai", modelId = "fake") => ({ + model: { provider, api: "responses", id: modelId, input: [] }, + error: null, + authStorage: { setRuntimeApiKey: vi.fn() }, + modelRegistry: {}, + })); + sessionCompactImpl + .mockRejectedValueOnce( + Object.assign( + new Error( + "400 The response was filtered due to the prompt triggering Azure OpenAI's content management policy.", + ), + { status: 400 }, + ), + ) + .mockResolvedValueOnce({ + summary: "fallback summary", + firstKeptEntryId: "entry-fallback", + tokensBefore: 120, + details: { ok: true }, + }); + + const result = await compactEmbeddedPiSessionDirect({ + sessionId: "session-1", + sessionKey: TEST_SESSION_KEY, + sessionFile: "/tmp/session.jsonl", + workspaceDir: "/tmp/workspace", + provider: "openai", + model: "gpt-primary", + config: { + agents: { + defaults: { + model: { + primary: "openai/gpt-primary", + fallbacks: ["anthropic/claude-fallback"], + }, + }, + }, + } as never, + }); + + expect(result.ok).toBe(true); + expect(result.result?.summary).toBe("fallback summary"); + expect(resolveModelMock).toHaveBeenCalledWith( + "openai", + "gpt-primary", + expect.any(String), + expect.anything(), + ); + expect(resolveModelMock).toHaveBeenCalledWith( + "anthropic", + "claude-fallback", + expect.any(String), + expect.anything(), + ); + }); + + it("uses the session model fallback chain when overflow compaction fails", async () => { + resolveModelMock.mockImplementation((provider = "openai", modelId = "fake") => ({ + model: { provider, api: "responses", id: modelId, input: [] }, + error: null, + authStorage: { setRuntimeApiKey: vi.fn() }, + modelRegistry: {}, + })); + sessionCompactImpl + .mockRejectedValueOnce( + Object.assign(new Error("primary compaction rate limited"), { + status: 429, + code: "rate_limit_exceeded", + }), + ) + .mockResolvedValueOnce({ + summary: "overflow fallback summary", + firstKeptEntryId: "entry-fallback", + tokensBefore: 120, + details: { ok: true }, + }); + + const result = await compactEmbeddedPiSessionDirect({ + sessionId: "session-1", + sessionKey: TEST_SESSION_KEY, + sessionFile: "/tmp/session.jsonl", + workspaceDir: "/tmp/workspace", + provider: "openai", + model: "gpt-primary", + trigger: "overflow", + modelFallbacksOverride: ["anthropic/claude-fallback"], + config: { + agents: { + defaults: { + model: { + primary: "openai/gpt-primary", + fallbacks: [], + }, + }, + }, + } as never, + }); + + expect(result.ok).toBe(true); + expect(result.result?.summary).toBe("overflow fallback summary"); + expect(resolveModelMock).toHaveBeenCalledWith( + "openai", + "gpt-primary", + expect.any(String), + expect.anything(), + ); + expect(resolveModelMock).toHaveBeenCalledWith( + "anthropic", + "claude-fallback", + expect.any(String), + expect.anything(), + ); + }); + + it("keeps compaction fallback selection ephemeral", async () => { + resolveModelMock.mockImplementation((provider = "openai", modelId = "fake") => ({ + model: { provider, api: "responses", id: modelId, input: [] }, + error: null, + authStorage: { setRuntimeApiKey: vi.fn() }, + modelRegistry: {}, + })); + sessionCompactImpl + .mockRejectedValueOnce(Object.assign(new Error("400 invalid request body"), { status: 400 })) + .mockResolvedValueOnce({ + summary: "fallback summary", + firstKeptEntryId: "entry-fallback", + tokensBefore: 120, + details: { ok: true }, + }); + const config = { + agents: { + defaults: { + model: { + primary: "openai/gpt-primary", + fallbacks: ["anthropic/claude-fallback"], + }, + }, + }, + sessions: { + entries: { + [TEST_SESSION_KEY]: { + modelProvider: "openai", + model: "gpt-primary", + }, + }, + }, + }; + const configBefore = structuredClone(config); + + const result = await compactEmbeddedPiSessionDirect({ + sessionId: "session-1", + sessionKey: TEST_SESSION_KEY, + sessionFile: "/tmp/session.jsonl", + workspaceDir: "/tmp/workspace", + provider: "openai", + model: "gpt-primary", + config: config as never, + }); + + expect(result.ok).toBe(true); + expect(config).toEqual(configBefore); + }); + + it("preserves explicit compaction.model behavior without session fallback", async () => { + resolveModelMock.mockImplementation((provider = "openai", modelId = "fake") => ({ + model: { provider, api: "responses", id: modelId, input: [] }, + error: null, + authStorage: { setRuntimeApiKey: vi.fn() }, + modelRegistry: {}, + })); + sessionCompactImpl.mockRejectedValueOnce( + Object.assign(new Error("400 invalid request body"), { status: 400 }), + ); + + const result = await compactEmbeddedPiSessionDirect({ + sessionId: "session-1", + sessionKey: TEST_SESSION_KEY, + sessionFile: "/tmp/session.jsonl", + workspaceDir: "/tmp/workspace", + provider: "openai", + model: "gpt-primary", + config: { + agents: { + defaults: { + model: { + primary: "openai/gpt-primary", + fallbacks: ["anthropic/claude-fallback"], + }, + compaction: { + model: "azure/compact-primary", + }, + }, + }, + } as never, + }); + + expect(result.ok).toBe(false); + expect(resolveModelMock).toHaveBeenCalledTimes(1); + expect(resolveModelMock).toHaveBeenCalledWith( + "azure", + "compact-primary", + expect.any(String), + expect.anything(), + ); + }); + + it("preserves compaction failure status and code metadata", async () => { + resolveModelMock.mockImplementation((provider = "openai", modelId = "fake") => ({ + model: { provider, api: "responses", id: modelId, input: [] }, + error: null, + authStorage: { setRuntimeApiKey: vi.fn() }, + modelRegistry: {}, + })); + sessionCompactImpl.mockRejectedValueOnce( + Object.assign(new Error("primary compaction rate limited"), { + status: 429, + code: "rate_limit_exceeded", + }), + ); + + const result = await compactEmbeddedPiSessionDirect({ + sessionId: "session-1", + sessionKey: TEST_SESSION_KEY, + sessionFile: "/tmp/session.jsonl", + workspaceDir: "/tmp/workspace", + provider: "openai", + model: "gpt-primary", + config: { + agents: { + defaults: { + compaction: { + model: "openai/gpt-primary", + }, + }, + }, + } as never, + }); + + expect(result).toMatchObject({ + ok: false, + compacted: false, + failure: { + reason: "rate_limit", + status: 429, + code: "rate_limit_exceeded", + rawError: "primary compaction rate limited", + }, + }); + }); + it("emits internal + plugin compaction hooks with counts", async () => { hookRunner.hasHooks.mockReturnValue(true); await runCompactionHooks({ diff --git a/src/agents/pi-embedded-runner/compact.queued.ts b/src/agents/pi-embedded-runner/compact.queued.ts index ac77208d2df..81a4df0a1be 100644 --- a/src/agents/pi-embedded-runner/compact.queued.ts +++ b/src/agents/pi-embedded-runner/compact.queued.ts @@ -321,6 +321,7 @@ function buildCompactionContextEngineRuntimeContext(params: { senderId: params.params.senderId, provider: params.params.provider, modelId: params.params.model, + modelFallbacksOverride: params.params.modelFallbacksOverride, thinkLevel: params.params.thinkLevel, reasoningLevel: params.params.reasoningLevel, bashElevated: params.params.bashElevated, diff --git a/src/agents/pi-embedded-runner/compact.ts b/src/agents/pi-embedded-runner/compact.ts index db3fbe7f020..6a077857cfa 100644 --- a/src/agents/pi-embedded-runner/compact.ts +++ b/src/agents/pi-embedded-runner/compact.ts @@ -9,6 +9,7 @@ import { } from "@mariozechner/pi-coding-agent"; import { isAcpRuntimeSpawnAvailable } from "../../acp/runtime/availability.js"; import type { ThinkLevel } from "../../auto-reply/thinking.js"; +import { resolveAgentModelFallbackValues } from "../../config/model-input.js"; import type { OpenClawConfig } from "../../config/types.openclaw.js"; import { captureCompactionCheckpointSnapshotAsync, @@ -34,7 +35,7 @@ import { resolveUserPath } from "../../utils.js"; import { normalizeMessageChannel } from "../../utils/message-channel.js"; import { isReasoningTagProvider } from "../../utils/provider-utils.js"; import { resolveOpenClawAgentDir } from "../agent-paths.js"; -import { resolveSessionAgentIds } from "../agent-scope.js"; +import { resolveRunModelFallbacksOverride, resolveSessionAgentIds } from "../agent-scope.js"; import { makeBootstrapWarn, resolveBootstrapContextForRun, @@ -53,6 +54,7 @@ import { resolveContextWindowInfo } from "../context-window-guard.js"; import { formatUserTime, resolveUserTimeFormat, resolveUserTimezone } from "../date-time.js"; import { DEFAULT_CONTEXT_TOKENS, DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js"; import { resolveOpenClawReferencePaths } from "../docs-path.js"; +import { coerceToFailoverError, describeFailoverError } from "../failover-error.js"; import { resolveHeartbeatPromptForSystemPrompt } from "../heartbeat-system-prompt.js"; import { applyAuthHeaderOverride, @@ -60,6 +62,7 @@ import { getApiKeyForModel, resolveModelAuthMode, } from "../model-auth.js"; +import { isFallbackSummaryError, runWithModelFallback } from "../model-fallback.js"; import { supportsModelTools } from "../model-tool-support.js"; import { ensureOpenClawModelsJson } from "../models-config.js"; import { resolveOwnerDisplaySetting } from "../owner-display.js"; @@ -320,12 +323,106 @@ function containsRealConversationMessages(messages: AgentMessage[]): boolean { ); } +function hasExplicitCompactionModel(params: CompactEmbeddedPiSessionParams): boolean { + return Boolean(params.config?.agents?.defaults?.compaction?.model?.trim()); +} + +function resolveCompactionFallbacksOverride( + params: CompactEmbeddedPiSessionParams, +): string[] | undefined { + return ( + params.modelFallbacksOverride ?? + resolveRunModelFallbacksOverride({ + cfg: params.config, + sessionKey: params.sessionKey, + }) + ); +} + +function hasCompactionModelFallbackCandidates(params: CompactEmbeddedPiSessionParams): boolean { + const fallbacksOverride = resolveCompactionFallbacksOverride(params); + const defaultFallbacks = resolveAgentModelFallbackValues(params.config?.agents?.defaults?.model); + return (fallbacksOverride ?? defaultFallbacks).length > 0; +} + +function classifyCompactionFallbackResult( + result: EmbeddedPiCompactResult, + provider: string, + model: string, +) { + if (result.ok) { + return null; + } + const reason = result.reason?.trim(); + if (!reason) { + return null; + } + const failureError = Object.assign(new Error(result.failure?.rawError ?? reason), { + status: result.failure?.status, + code: result.failure?.code, + }); + const failoverError = coerceToFailoverError(failureError, { provider, model }); + return failoverError ? { error: failoverError } : null; +} + +function fallbackFailureToCompactionResult(err: unknown): EmbeddedPiCompactResult { + const reason = isFallbackSummaryError(err) ? err.message : formatErrorMessage(err); + return { + ok: false, + compacted: false, + reason, + }; +} + /** * Core compaction logic without lane queueing. * Use this when already inside a session/global lane to avoid deadlocks. */ export async function compactEmbeddedPiSessionDirect( params: CompactEmbeddedPiSessionParams, +): Promise { + if (hasExplicitCompactionModel(params) || !hasCompactionModelFallbackCandidates(params)) { + return await compactEmbeddedPiSessionDirectOnce(params); + } + const resolvedCompactionTarget = resolveEmbeddedCompactionTarget({ + config: params.config, + provider: params.provider, + modelId: params.model, + authProfileId: params.authProfileId, + defaultProvider: DEFAULT_PROVIDER, + defaultModel: DEFAULT_MODEL, + }); + const primaryProvider = resolvedCompactionTarget.provider ?? DEFAULT_PROVIDER; + const primaryModel = resolvedCompactionTarget.model ?? DEFAULT_MODEL; + const fallbacksOverride = resolveCompactionFallbacksOverride(params); + try { + const fallbackResult = await runWithModelFallback({ + cfg: params.config, + provider: primaryProvider, + model: primaryModel, + runId: params.runId ?? params.sessionId, + agentDir: params.agentDir, + fallbacksOverride, + classifyResult: ({ result, provider, model }) => + classifyCompactionFallbackResult(result, provider, model), + run: async (provider, model) => { + const authProfileId = provider === primaryProvider ? params.authProfileId : undefined; + return await compactEmbeddedPiSessionDirectOnce({ + ...params, + provider, + model, + authProfileId, + }); + }, + }); + return fallbackResult.result; + } catch (err) { + return fallbackFailureToCompactionResult(err); + } +} + +async function compactEmbeddedPiSessionDirectOnce( + params: CompactEmbeddedPiSessionParams, ): Promise { const startedAt = Date.now(); const diagId = params.diagId?.trim() || createCompactionDiagId(); @@ -352,8 +449,9 @@ export async function compactEmbeddedPiSessionDirect( const authProfileId = resolvedCompactionTarget.authProfileId; let thinkLevel: ThinkLevel = params.thinkLevel ?? "off"; const attemptedThinking = new Set(); - const fail = (reason: string): EmbeddedPiCompactResult => { + const fail = (reason: string, err?: unknown): EmbeddedPiCompactResult => { const failureReason = classifyCompactionReason(reason); + const failure = err ? describeFailoverError(err) : undefined; const detail = failureReason === "unknown" ? formatUnknownCompactionReasonDetail(reason) : undefined; const detailSuffix = detail ? ` detail=${detail}` : ""; @@ -367,6 +465,14 @@ export async function compactEmbeddedPiSessionDirect( ok: false, compacted: false, reason, + failure: failure + ? { + reason: failure.reason, + status: failure.status, + code: failure.code, + rawError: failure.rawError ?? failure.message, + } + : undefined, }; }; const agentDir = params.agentDir ?? resolveOpenClawAgentDir(); @@ -430,7 +536,7 @@ export async function compactEmbeddedPiSessionDirect( } } catch (err) { const reason = formatErrorMessage(err); - return fail(reason); + return fail(reason, err); } await fs.mkdir(resolvedWorkspace, { recursive: true }); @@ -1247,7 +1353,7 @@ export async function compactEmbeddedPiSessionDirect( reason: formatErrorMessage(err), safeguardCancelReason: consumeCompactionSafeguardCancelReason(compactionSessionManager), }); - return fail(reason); + return fail(reason, err); } finally { if (!checkpointSnapshotRetained) { await cleanupCompactionCheckpointSnapshot(checkpointSnapshot); diff --git a/src/agents/pi-embedded-runner/compact.types.ts b/src/agents/pi-embedded-runner/compact.types.ts index 6c5c0c74db3..bb67c329dff 100644 --- a/src/agents/pi-embedded-runner/compact.types.ts +++ b/src/agents/pi-embedded-runner/compact.types.ts @@ -44,6 +44,8 @@ export type CompactEmbeddedPiSessionParams = { skillsSnapshot?: SkillSnapshot; provider?: string; model?: string; + /** Effective model fallback chain for this session attempt. Undefined uses config defaults. */ + modelFallbacksOverride?: string[]; /** Optional caller-resolved context engine for harness-owned compaction. */ contextEngine?: ContextEngine; /** Optional caller-resolved token budget for harness-owned compaction. */ diff --git a/src/agents/pi-embedded-runner/compaction-runtime-context.ts b/src/agents/pi-embedded-runner/compaction-runtime-context.ts index 01e2f04ebdb..636fb72b932 100644 --- a/src/agents/pi-embedded-runner/compaction-runtime-context.ts +++ b/src/agents/pi-embedded-runner/compaction-runtime-context.ts @@ -21,6 +21,7 @@ export type EmbeddedCompactionRuntimeContext = { senderId?: string; provider?: string; model?: string; + modelFallbacksOverride?: string[]; thinkLevel?: ThinkLevel; reasoningLevel?: ReasoningLevel; bashElevated?: ExecElevatedDefaults; @@ -87,6 +88,7 @@ export function buildEmbeddedCompactionRuntimeContext(params: { senderId?: string | null; provider?: string | null; modelId?: string | null; + modelFallbacksOverride?: string[]; thinkLevel?: ThinkLevel; reasoningLevel?: ReasoningLevel; bashElevated?: ExecElevatedDefaults; @@ -117,6 +119,7 @@ export function buildEmbeddedCompactionRuntimeContext(params: { senderId: params.senderId ?? undefined, provider: resolved.provider, model: resolved.model, + modelFallbacksOverride: params.modelFallbacksOverride, thinkLevel: params.thinkLevel, reasoningLevel: params.reasoningLevel, bashElevated: params.bashElevated, diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index a1ac66a64cf..b9e71545504 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -1287,6 +1287,7 @@ export async function runEmbeddedPiAgent( senderId: params.senderId, provider, modelId, + modelFallbacksOverride: params.modelFallbacksOverride, thinkLevel, reasoningLevel: params.reasoningLevel, bashElevated: params.bashElevated, diff --git a/src/agents/pi-embedded-runner/run/params.ts b/src/agents/pi-embedded-runner/run/params.ts index 86231c6d6ee..115560e60ec 100644 --- a/src/agents/pi-embedded-runner/run/params.ts +++ b/src/agents/pi-embedded-runner/run/params.ts @@ -103,6 +103,8 @@ export type RunEmbeddedPiAgentParams = { disableTools?: boolean; provider?: string; model?: string; + /** Effective model fallback chain for this session attempt. Undefined uses config defaults. */ + modelFallbacksOverride?: string[]; /** Session-pinned embedded harness id. Prevents runtime hot-switching. */ agentHarnessId?: string; authProfileId?: string; diff --git a/src/agents/pi-embedded-runner/types.ts b/src/agents/pi-embedded-runner/types.ts index 924009667fa..008a7f4714d 100644 --- a/src/agents/pi-embedded-runner/types.ts +++ b/src/agents/pi-embedded-runner/types.ts @@ -190,6 +190,13 @@ export type EmbeddedPiCompactResult = { ok: boolean; compacted: boolean; reason?: string; + /** Structured failure metadata used by model fallback classification. */ + failure?: { + reason?: string; + status?: number; + code?: string; + rawError?: string; + }; result?: { summary: string; firstKeptEntryId: string; diff --git a/src/auto-reply/reply/agent-runner-run-params.ts b/src/auto-reply/reply/agent-runner-run-params.ts index e1272d321a0..ae9ddb46749 100644 --- a/src/auto-reply/reply/agent-runner-run-params.ts +++ b/src/auto-reply/reply/agent-runner-run-params.ts @@ -55,6 +55,12 @@ export function buildEmbeddedRunBaseParams(params: { isReasoningTagProvider?: ReasoningTagProviderResolver; }) { const config = params.run.config; + const modelFallbacksOverride = resolveEffectiveModelFallbacks({ + cfg: config, + agentId: params.run.agentId, + hasSessionModelOverride: params.run.hasSessionModelOverride === true, + modelOverrideSource: params.run.modelOverrideSource, + }); return { sessionFile: params.run.sessionFile, workspaceDir: params.run.workspaceDir, @@ -76,6 +82,7 @@ export function buildEmbeddedRunBaseParams(params: { sourceReplyDeliveryMode: params.run.sourceReplyDeliveryMode, provider: params.provider, model: params.model, + modelFallbacksOverride, ...params.authProfile, thinkLevel: params.run.thinkLevel, verboseLevel: params.run.verboseLevel,