diff --git a/CHANGELOG.md b/CHANGELOG.md index 21ee239ab1d..f321cc40991 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,7 @@ Docs: https://docs.openclaw.ai - Export/session: keep inline export HTML scripts and vendor libraries injected after template formatting so generated session exports open with the app code, markdown renderer, and syntax highlighter present. Fixes #41862 and #49957; carries forward #41861 and #68947. Thanks @briannewman, @martenzi, and @armanddp. - Agents/ACPX: stage the patched Claude ACP adapter as an ACPX runtime dependency and route known Codex/Claude ACP commands through local wrappers, so Gateway runtime no longer depends on live `npx` adapter resolution. Fixes #73202. Thanks @joerod26. +- Memory/compaction: let pre-compaction memory flush use an exact `agents.defaults.compaction.memoryFlush.model` override such as `ollama/qwen3:8b` without inheriting the active session fallback chain, so local housekeeping can avoid paid conversation models. Fixes #53772. Thanks @limen96. - Gateway/hooks: route non-delivered hook completion and error summaries to the target agent's main session instead of the default agent session, preserving multi-agent hook isolation. Fixes #24693; carries forward #68667. Thanks @abersonFAC and @bluesky6868. - Control UI/models: request the configured Gateway model-list view so dashboards with only `models.providers.*.models` show those configured models first instead of flooding the picker with the full built-in catalog. Fixes #65405. Thanks @wbyanclaw. - CLI/models: keep default-model and allowlist pickers on explicit `models.providers.*.models` entries when `models.mode` is `replace` instead of loading the full built-in catalog. Fixes #64950. Thanks @mrozentsvayg. diff --git a/docs/concepts/compaction.md b/docs/concepts/compaction.md index 4b78806afa5..3fd9803dc81 100644 --- a/docs/concepts/compaction.md +++ b/docs/concepts/compaction.md @@ -132,7 +132,23 @@ By default, compaction runs silently. Set `notifyUser` to show brief status mess ### Memory flush -Before compaction, OpenClaw can run a **silent memory flush** turn to store durable notes to disk. See [Memory](/concepts/memory) for details and config. +Before compaction, OpenClaw can run a **silent memory flush** turn to store durable notes to disk. Set `agents.defaults.compaction.memoryFlush.model` when this housekeeping turn should use a local model instead of the active conversation model: + +```json +{ + "agents": { + "defaults": { + "compaction": { + "memoryFlush": { + "model": "ollama/qwen3:8b" + } + } + } + } +} +``` + +The memory-flush model override is exact and does not inherit the active session fallback chain. See [Memory](/concepts/memory) for details and config. ## Pluggable compaction providers diff --git a/docs/concepts/memory.md b/docs/concepts/memory.md index 7affc9abe15..c7a7079a497 100644 --- a/docs/concepts/memory.md +++ b/docs/concepts/memory.md @@ -110,6 +110,26 @@ Before [compaction](/concepts/compaction) summarizes your conversation, OpenClaw runs a silent turn that reminds the agent to save important context to memory files. This is on by default — you do not need to configure anything. +To keep that housekeeping turn on a local model, set an exact memory-flush model +override: + +```json +{ + "agents": { + "defaults": { + "compaction": { + "memoryFlush": { + "model": "ollama/qwen3:8b" + } + } + } + } +} +``` + +The override applies only to the memory-flush turn and does not inherit the +active session fallback chain. + The memory flush prevents context loss during compaction. If your agent has important facts in the conversation that are not yet written to a file, they diff --git a/docs/gateway/config-agents.md b/docs/gateway/config-agents.md index 6c155249a44..0746cc4c1f9 100644 --- a/docs/gateway/config-agents.md +++ b/docs/gateway/config-agents.md @@ -559,6 +559,7 @@ Periodic heartbeat runs. notifyUser: true, // send brief notices when compaction starts and completes (default: false) memoryFlush: { enabled: true, + model: "ollama/qwen3:8b", // optional memory-flush-only model override softThresholdTokens: 6000, systemPrompt: "Session nearing compaction. Store durable memories now.", prompt: "Write any lasting notes to memory/YYYY-MM-DD.md; reply with the exact silent token NO_REPLY if nothing to store.", @@ -580,7 +581,7 @@ Periodic heartbeat runs. - `model`: optional `provider/model-id` override for compaction summarization only. Use this when the main session should keep one model but compaction summaries should run on another; when unset, compaction uses the session's primary model. - `maxActiveTranscriptBytes`: optional byte threshold (`number` or strings like `"20mb"`) that triggers normal local compaction before a run when the active JSONL grows past the threshold. Requires `truncateAfterCompaction` so successful compaction can rotate to a smaller successor transcript. Disabled when unset or `0`. - `notifyUser`: when `true`, sends brief notices to the user when compaction starts and when it completes (for example, "Compacting context..." and "Compaction complete"). Disabled by default to keep compaction silent. -- `memoryFlush`: silent agentic turn before auto-compaction to store durable memories. Skipped when workspace is read-only. +- `memoryFlush`: silent agentic turn before auto-compaction to store durable memories. Set `model` to an exact provider/model such as `ollama/qwen3:8b` when this housekeeping turn should stay on a local model; the override does not inherit the active session fallback chain. Skipped when workspace is read-only. ### `agents.defaults.contextPruning` diff --git a/docs/plugins/sdk-overview.md b/docs/plugins/sdk-overview.md index 80802a74303..8547e820e31 100644 --- a/docs/plugins/sdk-overview.md +++ b/docs/plugins/sdk-overview.md @@ -273,6 +273,9 @@ AI CLI backend such as `codex-cli`. memory plugin's private layout. - `registerMemoryPromptSection`, `registerMemoryFlushPlan`, and `registerMemoryRuntime` are legacy-compatible exclusive memory-plugin APIs. +- `MemoryFlushPlan.model` can pin the flush turn to an exact `provider/model` + reference, such as `ollama/qwen3:8b`, without inheriting the active fallback + chain. - `registerMemoryEmbeddingProvider` lets the active memory plugin register one or more embedding adapter ids (for example `openai`, `gemini`, or a custom plugin-defined id). diff --git a/docs/reference/session-management-compaction.md b/docs/reference/session-management-compaction.md index e6efb47b29b..594ae40b18a 100644 --- a/docs/reference/session-management-compaction.md +++ b/docs/reference/session-management-compaction.md @@ -381,6 +381,7 @@ OpenClaw uses the **pre-threshold flush** approach: Config (`agents.defaults.compaction.memoryFlush`): - `enabled` (default: `true`) +- `model` (optional exact provider/model override for the flush turn, for example `ollama/qwen3:8b`) - `softThresholdTokens` (default: `4000`) - `prompt` (user message for the flush turn) - `systemPrompt` (extra system prompt appended for the flush turn) @@ -389,6 +390,9 @@ Notes: - The default prompt/system prompt include a `NO_REPLY` hint to suppress delivery. +- When `model` is set, the flush turn uses that model without inheriting the + active session fallback chain, so local-only housekeeping does not silently + fall back to a paid conversation model. - The flush runs once per compaction cycle (tracked in `sessions.json`). - The flush runs only for embedded Pi sessions (CLI backends skip it). - The flush is skipped when the session workspace is read-only (`workspaceAccess: "ro"` or `"none"`). diff --git a/extensions/memory-core/index.test.ts b/extensions/memory-core/index.test.ts index 867481d24c0..4bfc4955f88 100644 --- a/extensions/memory-core/index.test.ts +++ b/extensions/memory-core/index.test.ts @@ -134,6 +134,24 @@ describe("buildMemoryFlushPlan", () => { ).toBeNull(); }); + it("carries configured memory flush model override", () => { + const plan = buildMemoryFlushPlan({ + cfg: { + agents: { + defaults: { + compaction: { + memoryFlush: { + model: "ollama/qwen3:8b", + }, + }, + }, + }, + }, + }); + + expect(plan?.model).toBe("ollama/qwen3:8b"); + }); + it("falls back to defaults when numeric values are invalid", () => { const plan = buildMemoryFlushPlan({ cfg: { diff --git a/extensions/memory-core/src/flush-plan.ts b/extensions/memory-core/src/flush-plan.ts index dfbe62d97a8..7ffbcf71f68 100644 --- a/extensions/memory-core/src/flush-plan.ts +++ b/extensions/memory-core/src/flush-plan.ts @@ -132,6 +132,7 @@ export function buildMemoryFlushPlan( softThresholdTokens, forceFlushTranscriptBytes, reserveTokensFloor, + model: defaults?.model?.trim() || undefined, prompt: appendCurrentTimeLine(promptBase.replaceAll("YYYY-MM-DD", dateStamp), timeLine), systemPrompt: systemPrompt.replaceAll("YYYY-MM-DD", dateStamp), relativePath, diff --git a/src/auto-reply/reply/agent-runner-memory.test.ts b/src/auto-reply/reply/agent-runner-memory.test.ts index 3fdbf4b2ef8..86f0f9c5f57 100644 --- a/src/auto-reply/reply/agent-runner-memory.test.ts +++ b/src/auto-reply/reply/agent-runner-memory.test.ts @@ -176,6 +176,68 @@ describe("runMemoryFlushIfNeeded", () => { expect(persisted.main.memoryFlushAt).toBe(1_700_000_000_000); }); + it("runs memory flush on the configured maintenance model without active fallbacks", async () => { + registerMemoryFlushPlanResolver(() => ({ + softThresholdTokens: 4_000, + forceFlushTranscriptBytes: 1_000_000_000, + reserveTokensFloor: 20_000, + model: "ollama/qwen3:8b", + prompt: "Pre-compaction memory flush.\nNO_REPLY", + systemPrompt: "Write memory to memory/YYYY-MM-DD.md.", + relativePath: "memory/2023-11-14.md", + })); + const sessionEntry: SessionEntry = { + sessionId: "session", + updatedAt: Date.now(), + totalTokens: 80_000, + compactionCount: 1, + }; + + await runMemoryFlushIfNeeded({ + cfg: { + agents: { + defaults: { + model: { + primary: "anthropic/claude", + fallbacks: ["openai/gpt-5.4"], + }, + compaction: { + memoryFlush: { + model: "ollama/qwen3:8b", + }, + }, + }, + }, + }, + followupRun: createTestFollowupRun({ provider: "anthropic", model: "claude" }), + sessionCtx: { Provider: "whatsapp" } as unknown as TemplateContext, + defaultModel: "anthropic/claude", + agentCfgContextTokens: 100_000, + resolvedVerboseLevel: "off", + sessionEntry, + sessionStore: { main: sessionEntry }, + sessionKey: "main", + isHeartbeat: false, + replyOperation: createReplyOperation(), + }); + + expect(runWithModelFallbackMock).toHaveBeenCalledWith( + expect.objectContaining({ + provider: "ollama", + model: "qwen3:8b", + fallbacksOverride: [], + }), + ); + expect(runEmbeddedPiAgentMock).toHaveBeenCalledWith( + expect.objectContaining({ + provider: "ollama", + model: "qwen3:8b", + authProfileId: undefined, + authProfileIdSource: undefined, + }), + ); + }); + it("skips memory flush for CLI providers", async () => { const sessionEntry: SessionEntry = { sessionId: "session", diff --git a/src/auto-reply/reply/agent-runner-memory.ts b/src/auto-reply/reply/agent-runner-memory.ts index c9efcc431df..d0720b542d0 100644 --- a/src/auto-reply/reply/agent-runner-memory.ts +++ b/src/auto-reply/reply/agent-runner-memory.ts @@ -123,6 +123,38 @@ export function resolveEffectivePromptTokens( return base + output + estimate; } +export function resolveMemoryFlushModelFallbackOptions( + run: FollowupRun["run"], + model?: string, + configOverride: FollowupRun["run"]["config"] = run.config, +) { + const options = resolveModelFallbackOptions(run, configOverride); + const override = normalizeOptionalString(model); + if (!override) { + return options; + } + // A memory-flush maintenance model is an exact override: do not let a failed + // local flush silently fall through to the paid active conversation fallback. + const slashIdx = override.indexOf("/"); + if (slashIdx > 0) { + const overrideProvider = override.slice(0, slashIdx).trim(); + const overrideModel = override.slice(slashIdx + 1).trim(); + if (overrideProvider && overrideModel) { + return { + ...options, + provider: overrideProvider, + model: overrideModel, + fallbacksOverride: [], + }; + } + } + return { + ...options, + model: override, + fallbacksOverride: [], + }; +} + export type SessionTranscriptUsageSnapshot = { promptTokens?: number; outputTokens?: number; @@ -796,7 +828,11 @@ export async function runMemoryFlushIfNeeded(params: { let postCompactionSessionFile: string | undefined; try { await memoryDeps.runWithModelFallback({ - ...resolveModelFallbackOptions(params.followupRun.run), + ...resolveMemoryFlushModelFallbackOptions( + params.followupRun.run, + activeMemoryFlushPlan.model, + params.cfg, + ), runId: flushRunId, run: async (provider, model, runOptions) => { const { embeddedContext, senderContext, runBaseParams } = buildEmbeddedRunExecutionParams({ diff --git a/src/config/config.compaction-settings.test.ts b/src/config/config.compaction-settings.test.ts index d7980de244f..a5a91e2daf4 100644 --- a/src/config/config.compaction-settings.test.ts +++ b/src/config/config.compaction-settings.test.ts @@ -28,6 +28,7 @@ describe("config compaction settings", () => { }, memoryFlush: { enabled: false, + model: "ollama/qwen3:8b", softThresholdTokens: 1234, prompt: "Write notes.", systemPrompt: "Flush memory now.", @@ -44,6 +45,7 @@ describe("config compaction settings", () => { expect(compaction?.qualityGuard?.enabled).toBe(true); expect(compaction?.qualityGuard?.maxRetries).toBe(2); expect(compaction?.memoryFlush?.enabled).toBe(false); + expect(compaction?.memoryFlush?.model).toBe("ollama/qwen3:8b"); expect(compaction?.memoryFlush?.softThresholdTokens).toBe(1234); expect(compaction?.memoryFlush?.prompt).toBe("Write notes."); expect(compaction?.memoryFlush?.systemPrompt).toBe("Flush memory now."); diff --git a/src/config/schema.base.generated.ts b/src/config/schema.base.generated.ts index 483ba5bca98..f620196370e 100644 --- a/src/config/schema.base.generated.ts +++ b/src/config/schema.base.generated.ts @@ -5010,6 +5010,12 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { description: "Enables pre-compaction memory flush before the runtime performs stronger history reduction near token limits. Keep enabled unless you intentionally disable memory side effects in constrained environments.", }, + model: { + type: "string", + title: "Compaction Memory Flush Model Override", + description: + "Optional provider/model override used only for pre-compaction memory flush turns. Set this to a local model such as ollama/qwen3:8b when durable memory extraction should avoid the active session's paid model. The override is exact and does not inherit the active model fallback chain.", + }, softThresholdTokens: { type: "integer", minimum: 0, @@ -27030,6 +27036,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { help: "Enables pre-compaction memory flush before the runtime performs stronger history reduction near token limits. Keep enabled unless you intentionally disable memory side effects in constrained environments.", tags: ["advanced"], }, + "agents.defaults.compaction.memoryFlush.model": { + label: "Compaction Memory Flush Model Override", + help: "Optional provider/model override used only for pre-compaction memory flush turns. Set this to a local model such as ollama/qwen3:8b when durable memory extraction should avoid the active session's paid model. The override is exact and does not inherit the active model fallback chain.", + tags: ["models"], + }, "agents.defaults.compaction.memoryFlush.softThresholdTokens": { label: "Compaction Memory Flush Soft Threshold", help: "Threshold distance to compaction (in tokens) that triggers pre-compaction memory flush execution. Use earlier thresholds for safer persistence, or tighter thresholds for lower flush frequency.", diff --git a/src/config/schema.help.quality.test.ts b/src/config/schema.help.quality.test.ts index bcacd0c492b..4acff2ce4d4 100644 --- a/src/config/schema.help.quality.test.ts +++ b/src/config/schema.help.quality.test.ts @@ -399,6 +399,7 @@ const TARGET_KEYS = [ "agents.defaults.compaction.maxActiveTranscriptBytes", "agents.defaults.compaction.memoryFlush", "agents.defaults.compaction.memoryFlush.enabled", + "agents.defaults.compaction.memoryFlush.model", "agents.defaults.compaction.memoryFlush.softThresholdTokens", "agents.defaults.compaction.memoryFlush.prompt", "agents.defaults.compaction.memoryFlush.systemPrompt", diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index 97a361d30cc..b01190fc8cd 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -1291,6 +1291,8 @@ export const FIELD_HELP: Record = { "Pre-compaction memory flush settings that run an agentic memory write before heavy compaction. Keep enabled for long sessions so salient context is persisted before aggressive trimming.", "agents.defaults.compaction.memoryFlush.enabled": "Enables pre-compaction memory flush before the runtime performs stronger history reduction near token limits. Keep enabled unless you intentionally disable memory side effects in constrained environments.", + "agents.defaults.compaction.memoryFlush.model": + "Optional provider/model override used only for pre-compaction memory flush turns. Set this to a local model such as ollama/qwen3:8b when durable memory extraction should avoid the active session's paid model. The override is exact and does not inherit the active model fallback chain.", "agents.defaults.compaction.memoryFlush.softThresholdTokens": "Threshold distance to compaction (in tokens) that triggers pre-compaction memory flush execution. Use earlier thresholds for safer persistence, or tighter thresholds for lower flush frequency.", "agents.defaults.compaction.memoryFlush.forceFlushTranscriptBytes": diff --git a/src/config/schema.labels.ts b/src/config/schema.labels.ts index 8737a6d2533..9a6f00d3988 100644 --- a/src/config/schema.labels.ts +++ b/src/config/schema.labels.ts @@ -609,6 +609,7 @@ export const FIELD_LABELS: Record = { "agents.defaults.compaction.notifyUser": "Compaction Notify User", "agents.defaults.compaction.memoryFlush": "Compaction Memory Flush", "agents.defaults.compaction.memoryFlush.enabled": "Compaction Memory Flush Enabled", + "agents.defaults.compaction.memoryFlush.model": "Compaction Memory Flush Model Override", "agents.defaults.compaction.memoryFlush.softThresholdTokens": "Compaction Memory Flush Soft Threshold", "agents.defaults.compaction.memoryFlush.forceFlushTranscriptBytes": diff --git a/src/config/types.agent-defaults.ts b/src/config/types.agent-defaults.ts index 2d56107522a..e887f03936e 100644 --- a/src/config/types.agent-defaults.ts +++ b/src/config/types.agent-defaults.ts @@ -493,6 +493,8 @@ export type AgentCompactionConfig = { export type AgentCompactionMemoryFlushConfig = { /** Enable the pre-compaction memory flush (default: true). */ enabled?: boolean; + /** Optional provider/model override used only for pre-compaction memory flush turns. */ + model?: string; /** Run the memory flush when context is within this many tokens of the compaction threshold. */ softThresholdTokens?: number; /** diff --git a/src/config/zod-schema.agent-defaults.ts b/src/config/zod-schema.agent-defaults.ts index a4f27f99b40..0b99b6f8db0 100644 --- a/src/config/zod-schema.agent-defaults.ts +++ b/src/config/zod-schema.agent-defaults.ts @@ -189,6 +189,7 @@ export const AgentDefaultsSchema = z memoryFlush: z .object({ enabled: z.boolean().optional(), + model: z.string().optional(), softThresholdTokens: z.number().int().nonnegative().optional(), forceFlushTranscriptBytes: NonNegativeByteSizeSchema.optional(), prompt: z.string().optional(), diff --git a/src/plugins/memory-state.ts b/src/plugins/memory-state.ts index 227ef336e22..429c01dedb8 100644 --- a/src/plugins/memory-state.ts +++ b/src/plugins/memory-state.ts @@ -68,6 +68,7 @@ export type MemoryFlushPlan = { softThresholdTokens: number; forceFlushTranscriptBytes: number; reserveTokensFloor: number; + model?: string; prompt: string; systemPrompt: string; relativePath: string;