diff --git a/CHANGELOG.md b/CHANGELOG.md
index 21ee239ab1d..f321cc40991 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -46,6 +46,7 @@ Docs: https://docs.openclaw.ai
 
 - Export/session: keep inline export HTML scripts and vendor libraries injected after template formatting so generated session exports open with the app code, markdown renderer, and syntax highlighter present. Fixes #41862 and #49957; carries forward #41861 and #68947. Thanks @briannewman, @martenzi, and @armanddp.
 - Agents/ACPX: stage the patched Claude ACP adapter as an ACPX runtime dependency and route known Codex/Claude ACP commands through local wrappers, so Gateway runtime no longer depends on live `npx` adapter resolution. Fixes #73202. Thanks @joerod26.
+- Memory/compaction: let pre-compaction memory flush use an exact `agents.defaults.compaction.memoryFlush.model` override such as `ollama/qwen3:8b` without inheriting the active session fallback chain, so local housekeeping can avoid paid conversation models. Fixes #53772. Thanks @limen96.
 - Gateway/hooks: route non-delivered hook completion and error summaries to the target agent's main session instead of the default agent session, preserving multi-agent hook isolation. Fixes #24693; carries forward #68667. Thanks @abersonFAC and @bluesky6868.
 - Control UI/models: request the configured Gateway model-list view so dashboards with only `models.providers.*.models` show those configured models first instead of flooding the picker with the full built-in catalog. Fixes #65405. Thanks @wbyanclaw.
 - CLI/models: keep default-model and allowlist pickers on explicit `models.providers.*.models` entries when `models.mode` is `replace` instead of loading the full built-in catalog. Fixes #64950. Thanks @mrozentsvayg.
diff --git a/docs/concepts/compaction.md b/docs/concepts/compaction.md
index 4b78806afa5..3fd9803dc81 100644
--- a/docs/concepts/compaction.md
+++ b/docs/concepts/compaction.md
@@ -132,7 +132,23 @@ By default, compaction runs silently. Set `notifyUser` to show brief status mess
 
 ### Memory flush
 
-Before compaction, OpenClaw can run a **silent memory flush** turn to store durable notes to disk. See [Memory](/concepts/memory) for details and config.
+Before compaction, OpenClaw can run a **silent memory flush** turn to store durable notes to disk. Set `agents.defaults.compaction.memoryFlush.model` when this housekeeping turn should use a local model instead of the active conversation model:
+
+```json
+{
+  "agents": {
+    "defaults": {
+      "compaction": {
+        "memoryFlush": {
+          "model": "ollama/qwen3:8b"
+        }
+      }
+    }
+  }
+}
+```
+
+The memory-flush model override is exact and does not inherit the active session fallback chain. See [Memory](/concepts/memory) for details and config.
 
 ## Pluggable compaction providers
 
diff --git a/docs/concepts/memory.md b/docs/concepts/memory.md
index 7affc9abe15..c7a7079a497 100644
--- a/docs/concepts/memory.md
+++ b/docs/concepts/memory.md
@@ -110,6 +110,26 @@ Before [compaction](/concepts/compaction) summarizes your conversation, OpenClaw
 runs a silent turn that reminds the agent to save important context to memory
 files. This is on by default — you do not need to configure anything.
 
+To keep that housekeeping turn on a local model, set an exact memory-flush model
+override:
+
+```json
+{
+  "agents": {
+    "defaults": {
+      "compaction": {
+        "memoryFlush": {
+          "model": "ollama/qwen3:8b"
+        }
+      }
+    }
+  }
+}
+```
+
+The override applies only to the memory-flush turn and does not inherit the
+active session fallback chain.
+
 <Tip>
 The memory flush prevents context loss during compaction. If your agent has
 important facts in the conversation that are not yet written to a file, they
diff --git a/docs/gateway/config-agents.md b/docs/gateway/config-agents.md
index 6c155249a44..0746cc4c1f9 100644
--- a/docs/gateway/config-agents.md
+++ b/docs/gateway/config-agents.md
@@ -559,6 +559,7 @@ Periodic heartbeat runs.
         notifyUser: true, // send brief notices when compaction starts and completes (default: false)
         memoryFlush: {
           enabled: true,
+          model: "ollama/qwen3:8b", // optional memory-flush-only model override
           softThresholdTokens: 6000,
           systemPrompt: "Session nearing compaction. Store durable memories now.",
           prompt: "Write any lasting notes to memory/YYYY-MM-DD.md; reply with the exact silent token NO_REPLY if nothing to store.",
@@ -580,7 +581,7 @@ Periodic heartbeat runs.
 - `model`: optional `provider/model-id` override for compaction summarization only. Use this when the main session should keep one model but compaction summaries should run on another; when unset, compaction uses the session's primary model.
 - `maxActiveTranscriptBytes`: optional byte threshold (`number` or strings like `"20mb"`) that triggers normal local compaction before a run when the active JSONL grows past the threshold. Requires `truncateAfterCompaction` so successful compaction can rotate to a smaller successor transcript. Disabled when unset or `0`.
 - `notifyUser`: when `true`, sends brief notices to the user when compaction starts and when it completes (for example, "Compacting context..." and "Compaction complete"). Disabled by default to keep compaction silent.
-- `memoryFlush`: silent agentic turn before auto-compaction to store durable memories. Skipped when workspace is read-only.
+- `memoryFlush`: silent agentic turn before auto-compaction to store durable memories. Set `model` to an exact provider/model such as `ollama/qwen3:8b` when this housekeeping turn should stay on a local model; the override does not inherit the active session fallback chain. Skipped when workspace is read-only.
 
 ### `agents.defaults.contextPruning`
 
diff --git a/docs/plugins/sdk-overview.md b/docs/plugins/sdk-overview.md
index 80802a74303..8547e820e31 100644
--- a/docs/plugins/sdk-overview.md
+++ b/docs/plugins/sdk-overview.md
@@ -273,6 +273,9 @@ AI CLI backend such as `codex-cli`.
   memory plugin's private layout.
 - `registerMemoryPromptSection`, `registerMemoryFlushPlan`, and
   `registerMemoryRuntime` are legacy-compatible exclusive memory-plugin APIs.
+- `MemoryFlushPlan.model` can pin the flush turn to an exact `provider/model`
+  reference, such as `ollama/qwen3:8b`, without inheriting the active fallback
+  chain.
 - `registerMemoryEmbeddingProvider` lets the active memory plugin register one
   or more embedding adapter ids (for example `openai`, `gemini`, or a custom
   plugin-defined id).
diff --git a/docs/reference/session-management-compaction.md b/docs/reference/session-management-compaction.md
index e6efb47b29b..594ae40b18a 100644
--- a/docs/reference/session-management-compaction.md
+++ b/docs/reference/session-management-compaction.md
@@ -381,6 +381,7 @@ OpenClaw uses the **pre-threshold flush** approach:
 Config (`agents.defaults.compaction.memoryFlush`):
 
 - `enabled` (default: `true`)
+- `model` (optional exact provider/model override for the flush turn, for example `ollama/qwen3:8b`)
 - `softThresholdTokens` (default: `4000`)
 - `prompt` (user message for the flush turn)
 - `systemPrompt` (extra system prompt appended for the flush turn)
@@ -389,6 +390,9 @@ Notes:
 
 - The default prompt/system prompt include a `NO_REPLY` hint to suppress
   delivery.
+- When `model` is set, the flush turn uses that model without inheriting the
+  active session fallback chain, so local-only housekeeping does not silently
+  fall back to a paid conversation model.
 - The flush runs once per compaction cycle (tracked in `sessions.json`).
 - The flush runs only for embedded Pi sessions (CLI backends skip it).
 - The flush is skipped when the session workspace is read-only (`workspaceAccess: "ro"` or `"none"`).
diff --git a/extensions/memory-core/index.test.ts b/extensions/memory-core/index.test.ts
index 867481d24c0..4bfc4955f88 100644
--- a/extensions/memory-core/index.test.ts
+++ b/extensions/memory-core/index.test.ts
@@ -134,6 +134,24 @@ describe("buildMemoryFlushPlan", () => {
     ).toBeNull();
   });
 
+  it("carries configured memory flush model override", () => {
+    const plan = buildMemoryFlushPlan({
+      cfg: {
+        agents: {
+          defaults: {
+            compaction: {
+              memoryFlush: {
+                model: "ollama/qwen3:8b",
+              },
+            },
+          },
+        },
+      },
+    });
+
+    expect(plan?.model).toBe("ollama/qwen3:8b");
+  });
+
   it("falls back to defaults when numeric values are invalid", () => {
     const plan = buildMemoryFlushPlan({
       cfg: {
diff --git a/extensions/memory-core/src/flush-plan.ts b/extensions/memory-core/src/flush-plan.ts
index dfbe62d97a8..7ffbcf71f68 100644
--- a/extensions/memory-core/src/flush-plan.ts
+++ b/extensions/memory-core/src/flush-plan.ts
@@ -132,6 +132,7 @@ export function buildMemoryFlushPlan(
     softThresholdTokens,
     forceFlushTranscriptBytes,
     reserveTokensFloor,
+    model: defaults?.model?.trim() || undefined,
     prompt: appendCurrentTimeLine(promptBase.replaceAll("YYYY-MM-DD", dateStamp), timeLine),
     systemPrompt: systemPrompt.replaceAll("YYYY-MM-DD", dateStamp),
     relativePath,
diff --git a/src/auto-reply/reply/agent-runner-memory.test.ts b/src/auto-reply/reply/agent-runner-memory.test.ts
index 3fdbf4b2ef8..86f0f9c5f57 100644
--- a/src/auto-reply/reply/agent-runner-memory.test.ts
+++ b/src/auto-reply/reply/agent-runner-memory.test.ts
@@ -176,6 +176,68 @@ describe("runMemoryFlushIfNeeded", () => {
     expect(persisted.main.memoryFlushAt).toBe(1_700_000_000_000);
   });
 
+  it("runs memory flush on the configured maintenance model without active fallbacks", async () => {
+    registerMemoryFlushPlanResolver(() => ({
+      softThresholdTokens: 4_000,
+      forceFlushTranscriptBytes: 1_000_000_000,
+      reserveTokensFloor: 20_000,
+      model: "ollama/qwen3:8b",
+      prompt: "Pre-compaction memory flush.\nNO_REPLY",
+      systemPrompt: "Write memory to memory/YYYY-MM-DD.md.",
+      relativePath: "memory/2023-11-14.md",
+    }));
+    const sessionEntry: SessionEntry = {
+      sessionId: "session",
+      updatedAt: Date.now(),
+      totalTokens: 80_000,
+      compactionCount: 1,
+    };
+
+    await runMemoryFlushIfNeeded({
+      cfg: {
+        agents: {
+          defaults: {
+            model: {
+              primary: "anthropic/claude",
+              fallbacks: ["openai/gpt-5.4"],
+            },
+            compaction: {
+              memoryFlush: {
+                model: "ollama/qwen3:8b",
+              },
+            },
+          },
+        },
+      },
+      followupRun: createTestFollowupRun({ provider: "anthropic", model: "claude" }),
+      sessionCtx: { Provider: "whatsapp" } as unknown as TemplateContext,
+      defaultModel: "anthropic/claude",
+      agentCfgContextTokens: 100_000,
+      resolvedVerboseLevel: "off",
+      sessionEntry,
+      sessionStore: { main: sessionEntry },
+      sessionKey: "main",
+      isHeartbeat: false,
+      replyOperation: createReplyOperation(),
+    });
+
+    expect(runWithModelFallbackMock).toHaveBeenCalledWith(
+      expect.objectContaining({
+        provider: "ollama",
+        model: "qwen3:8b",
+        fallbacksOverride: [],
+      }),
+    );
+    expect(runEmbeddedPiAgentMock).toHaveBeenCalledWith(
+      expect.objectContaining({
+        provider: "ollama",
+        model: "qwen3:8b",
+        authProfileId: undefined,
+        authProfileIdSource: undefined,
+      }),
+    );
+  });
+
   it("skips memory flush for CLI providers", async () => {
     const sessionEntry: SessionEntry = {
       sessionId: "session",
diff --git a/src/auto-reply/reply/agent-runner-memory.ts b/src/auto-reply/reply/agent-runner-memory.ts
index c9efcc431df..d0720b542d0 100644
--- a/src/auto-reply/reply/agent-runner-memory.ts
+++ b/src/auto-reply/reply/agent-runner-memory.ts
@@ -123,6 +123,38 @@ export function resolveEffectivePromptTokens(
   return base + output + estimate;
 }
 
+export function resolveMemoryFlushModelFallbackOptions(
+  run: FollowupRun["run"],
+  model?: string,
+  configOverride: FollowupRun["run"]["config"] = run.config,
+) {
+  const options = resolveModelFallbackOptions(run, configOverride);
+  const override = normalizeOptionalString(model);
+  if (!override) {
+    return options;
+  }
+  // A memory-flush maintenance model is an exact override: do not let a failed
+  // local flush silently fall through to the paid active conversation fallback.
+  const slashIdx = override.indexOf("/");
+  if (slashIdx > 0) {
+    const overrideProvider = override.slice(0, slashIdx).trim();
+    const overrideModel = override.slice(slashIdx + 1).trim();
+    if (overrideProvider && overrideModel) {
+      return {
+        ...options,
+        provider: overrideProvider,
+        model: overrideModel,
+        fallbacksOverride: [],
+      };
+    }
+  }
+  return {
+    ...options,
+    model: override,
+    fallbacksOverride: [],
+  };
+}
+
 export type SessionTranscriptUsageSnapshot = {
   promptTokens?: number;
   outputTokens?: number;
@@ -796,7 +828,11 @@ export async function runMemoryFlushIfNeeded(params: {
   let postCompactionSessionFile: string | undefined;
   try {
     await memoryDeps.runWithModelFallback({
-      ...resolveModelFallbackOptions(params.followupRun.run),
+      ...resolveMemoryFlushModelFallbackOptions(
+        params.followupRun.run,
+        activeMemoryFlushPlan.model,
+        params.cfg,
+      ),
       runId: flushRunId,
       run: async (provider, model, runOptions) => {
         const { embeddedContext, senderContext, runBaseParams } = buildEmbeddedRunExecutionParams({
diff --git a/src/config/config.compaction-settings.test.ts b/src/config/config.compaction-settings.test.ts
index d7980de244f..a5a91e2daf4 100644
--- a/src/config/config.compaction-settings.test.ts
+++ b/src/config/config.compaction-settings.test.ts
@@ -28,6 +28,7 @@ describe("config compaction settings", () => {
       },
       memoryFlush: {
         enabled: false,
+        model: "ollama/qwen3:8b",
         softThresholdTokens: 1234,
         prompt: "Write notes.",
         systemPrompt: "Flush memory now.",
@@ -44,6 +45,7 @@ describe("config compaction settings", () => {
     expect(compaction?.qualityGuard?.enabled).toBe(true);
     expect(compaction?.qualityGuard?.maxRetries).toBe(2);
     expect(compaction?.memoryFlush?.enabled).toBe(false);
+    expect(compaction?.memoryFlush?.model).toBe("ollama/qwen3:8b");
     expect(compaction?.memoryFlush?.softThresholdTokens).toBe(1234);
     expect(compaction?.memoryFlush?.prompt).toBe("Write notes.");
     expect(compaction?.memoryFlush?.systemPrompt).toBe("Flush memory now.");
diff --git a/src/config/schema.base.generated.ts b/src/config/schema.base.generated.ts
index 483ba5bca98..f620196370e 100644
--- a/src/config/schema.base.generated.ts
+++ b/src/config/schema.base.generated.ts
@@ -5010,6 +5010,12 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
                         description:
                           "Enables pre-compaction memory flush before the runtime performs stronger history reduction near token limits. Keep enabled unless you intentionally disable memory side effects in constrained environments.",
                       },
+                      model: {
+                        type: "string",
+                        title: "Compaction Memory Flush Model Override",
+                        description:
+                          "Optional provider/model override used only for pre-compaction memory flush turns. Set this to a local model such as ollama/qwen3:8b when durable memory extraction should avoid the active session's paid model. The override is exact and does not inherit the active model fallback chain.",
+                      },
                       softThresholdTokens: {
                         type: "integer",
                         minimum: 0,
@@ -27030,6 +27036,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
       help: "Enables pre-compaction memory flush before the runtime performs stronger history reduction near token limits. Keep enabled unless you intentionally disable memory side effects in constrained environments.",
       tags: ["advanced"],
     },
+    "agents.defaults.compaction.memoryFlush.model": {
+      label: "Compaction Memory Flush Model Override",
+      help: "Optional provider/model override used only for pre-compaction memory flush turns. Set this to a local model such as ollama/qwen3:8b when durable memory extraction should avoid the active session's paid model. The override is exact and does not inherit the active model fallback chain.",
+      tags: ["models"],
+    },
     "agents.defaults.compaction.memoryFlush.softThresholdTokens": {
       label: "Compaction Memory Flush Soft Threshold",
       help: "Threshold distance to compaction (in tokens) that triggers pre-compaction memory flush execution. Use earlier thresholds for safer persistence, or tighter thresholds for lower flush frequency.",
diff --git a/src/config/schema.help.quality.test.ts b/src/config/schema.help.quality.test.ts
index bcacd0c492b..4acff2ce4d4 100644
--- a/src/config/schema.help.quality.test.ts
+++ b/src/config/schema.help.quality.test.ts
@@ -399,6 +399,7 @@ const TARGET_KEYS = [
   "agents.defaults.compaction.maxActiveTranscriptBytes",
   "agents.defaults.compaction.memoryFlush",
   "agents.defaults.compaction.memoryFlush.enabled",
+  "agents.defaults.compaction.memoryFlush.model",
   "agents.defaults.compaction.memoryFlush.softThresholdTokens",
   "agents.defaults.compaction.memoryFlush.prompt",
   "agents.defaults.compaction.memoryFlush.systemPrompt",
diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts
index 97a361d30cc..b01190fc8cd 100644
--- a/src/config/schema.help.ts
+++ b/src/config/schema.help.ts
@@ -1291,6 +1291,8 @@ export const FIELD_HELP: Record<string, string> = {
     "Pre-compaction memory flush settings that run an agentic memory write before heavy compaction. Keep enabled for long sessions so salient context is persisted before aggressive trimming.",
   "agents.defaults.compaction.memoryFlush.enabled":
     "Enables pre-compaction memory flush before the runtime performs stronger history reduction near token limits. Keep enabled unless you intentionally disable memory side effects in constrained environments.",
+  "agents.defaults.compaction.memoryFlush.model":
+    "Optional provider/model override used only for pre-compaction memory flush turns. Set this to a local model such as ollama/qwen3:8b when durable memory extraction should avoid the active session's paid model. The override is exact and does not inherit the active model fallback chain.",
   "agents.defaults.compaction.memoryFlush.softThresholdTokens":
     "Threshold distance to compaction (in tokens) that triggers pre-compaction memory flush execution. Use earlier thresholds for safer persistence, or tighter thresholds for lower flush frequency.",
   "agents.defaults.compaction.memoryFlush.forceFlushTranscriptBytes":
diff --git a/src/config/schema.labels.ts b/src/config/schema.labels.ts
index 8737a6d2533..9a6f00d3988 100644
--- a/src/config/schema.labels.ts
+++ b/src/config/schema.labels.ts
@@ -609,6 +609,7 @@ export const FIELD_LABELS: Record<string, string> = {
   "agents.defaults.compaction.notifyUser": "Compaction Notify User",
   "agents.defaults.compaction.memoryFlush": "Compaction Memory Flush",
   "agents.defaults.compaction.memoryFlush.enabled": "Compaction Memory Flush Enabled",
+  "agents.defaults.compaction.memoryFlush.model": "Compaction Memory Flush Model Override",
   "agents.defaults.compaction.memoryFlush.softThresholdTokens":
     "Compaction Memory Flush Soft Threshold",
   "agents.defaults.compaction.memoryFlush.forceFlushTranscriptBytes":
diff --git a/src/config/types.agent-defaults.ts b/src/config/types.agent-defaults.ts
index 2d56107522a..e887f03936e 100644
--- a/src/config/types.agent-defaults.ts
+++ b/src/config/types.agent-defaults.ts
@@ -493,6 +493,8 @@ export type AgentCompactionConfig = {
 export type AgentCompactionMemoryFlushConfig = {
   /** Enable the pre-compaction memory flush (default: true). */
   enabled?: boolean;
+  /** Optional provider/model override used only for pre-compaction memory flush turns. */
+  model?: string;
   /** Run the memory flush when context is within this many tokens of the compaction threshold. */
   softThresholdTokens?: number;
   /**
diff --git a/src/config/zod-schema.agent-defaults.ts b/src/config/zod-schema.agent-defaults.ts
index a4f27f99b40..0b99b6f8db0 100644
--- a/src/config/zod-schema.agent-defaults.ts
+++ b/src/config/zod-schema.agent-defaults.ts
@@ -189,6 +189,7 @@ export const AgentDefaultsSchema = z
         memoryFlush: z
           .object({
             enabled: z.boolean().optional(),
+            model: z.string().optional(),
             softThresholdTokens: z.number().int().nonnegative().optional(),
             forceFlushTranscriptBytes: NonNegativeByteSizeSchema.optional(),
             prompt: z.string().optional(),
diff --git a/src/plugins/memory-state.ts b/src/plugins/memory-state.ts
index 227ef336e22..429c01dedb8 100644
--- a/src/plugins/memory-state.ts
+++ b/src/plugins/memory-state.ts
@@ -68,6 +68,7 @@ export type MemoryFlushPlan = {
   softThresholdTokens: number;
   forceFlushTranscriptBytes: number;
   reserveTokensFloor: number;
+  model?: string;
   prompt: string;
   systemPrompt: string;
   relativePath: string;