From 5a2e5446a4a13b7283f7c1754481d153f2e31819 Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Tue, 28 Apr 2026 04:32:23 +0100
Subject: [PATCH] fix: explain heartbeat model bleed overflows

---
 CHANGELOG.md                                  |   1 +
 docs/gateway/heartbeat.md                     |   6 +
 .../reply/agent-runner-execution.test.ts      |  80 +++++++-
 .../reply/agent-runner-execution.ts           | 190 +++++++++++++++++-
 4 files changed, 273 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 96f4647dba4..2cc5777237a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -40,6 +40,7 @@ Docs: https://docs.openclaw.ai
 ### Fixes
 
 - Gateway/startup: keep value-option foreground starts on the gateway fast path and skip proxy bootstrap unless proxy env is configured, reducing normal gateway startup RSS and avoiding full CLI graph loading. Thanks @vincentkoc.
+- Heartbeat/models: show heartbeat model bleed guidance on context-overflow resets when the last runtime model matches configured `heartbeat.model`, so smaller local heartbeat models point users to `isolatedSession` or `lightContext` instead of only compaction-buffer tuning. Fixes #67314. Thanks @Knightmare6890.
 - Subagents/models: persist `sessions_spawn.model` and configured subagent models as child-session model overrides before the first turn, so spawned subagents actually run on the requested provider/model instead of reverting to the target agent default. Fixes #73180. Thanks @danielzinhu99.
 - Backup: skip installed plugin `extensions/*/node_modules` dependency trees while keeping plugin manifests and source files in archives, so local backups avoid rebuildable npm payload bloat. Fixes #64144. Thanks @BrilliantWang.
 - Cron/models: fail isolated cron runs closed when an explicit `payload.model` is not allowed or cannot be resolved, so scheduled jobs do not silently fall back to an unrelated agent default or paid route before configured provider proxies such as LiteLLM can run. Fixes #73146. Thanks @oneandrewwang.
diff --git a/docs/gateway/heartbeat.md b/docs/gateway/heartbeat.md
index 9232e7dc223..6f71a970c33 100644
--- a/docs/gateway/heartbeat.md
+++ b/docs/gateway/heartbeat.md
@@ -455,6 +455,12 @@ Heartbeats run full agent turns. Shorter intervals burn more tokens. To reduce c
 - Keep `HEARTBEAT.md` small.
 - Use `target: "none"` if you only want internal state updates.
 
+## Context overflow after heartbeat
+
+If a heartbeat uses a smaller local model, for example an Ollama model with a 32k window, and the next main-session turn reports context overflow, check whether the previous heartbeat left the session on the heartbeat model. OpenClaw's reset message calls this out when the last runtime model matches configured `heartbeat.model`.
+
+Use `isolatedSession: true` to run heartbeats in a fresh session, combine it with `lightContext: true` for the smallest prompt, or choose a heartbeat model with a context window large enough for the shared session.
+
 ## Related
 
 - [Automation & Tasks](/automation) — all automation mechanisms at a glance
diff --git a/src/auto-reply/reply/agent-runner-execution.test.ts b/src/auto-reply/reply/agent-runner-execution.test.ts
index 63926840ce1..0af108e2880 100644
--- a/src/auto-reply/reply/agent-runner-execution.test.ts
+++ b/src/auto-reply/reply/agent-runner-execution.test.ts
@@ -5,7 +5,10 @@ import { CommandLaneClearedError, GatewayDrainingError } from "../../process/com
 import type { TemplateContext } from "../templating.js";
 import { SILENT_REPLY_TOKEN } from "../tokens.js";
 import type { GetReplyOptions, ReplyPayload } from "../types.js";
-import { MAX_LIVE_SWITCH_RETRIES } from "./agent-runner-execution.js";
+import {
+  buildContextOverflowRecoveryText,
+  MAX_LIVE_SWITCH_RETRIES,
+} from "./agent-runner-execution.js";
 import type { FollowupRun } from "./queue.js";
 import type { ReplyOperation } from "./reply-run-registry.js";
 import type { TypingSignaler } from "./typing-mode.js";
@@ -293,6 +296,81 @@ function createMinimalRunAgentTurnParams(overrides?: {
   };
 }
 
+describe("buildContextOverflowRecoveryText", () => {
+  it("keeps the generic compaction-buffer hint without heartbeat model evidence", () => {
+    const text = buildContextOverflowRecoveryText({
+      cfg: {},
+      primaryProvider: "openrouter",
+      primaryModel: "qwen3.6-plus",
+    });
+
+    expect(text).toContain("reserveTokensFloor");
+    expect(text).not.toContain("heartbeat model bleed");
+  });
+
+  it("points to heartbeat model bleed when the last runtime model matches configured heartbeat.model", () => {
+    const text = buildContextOverflowRecoveryText({
+      cfg: {
+        models: {
+          providers: {
+            openrouter: {
+              models: [{ id: "qwen3.6-plus", contextTokens: 1_000_000 }],
+            },
+            ollama: {
+              models: [{ id: "qwen3.5-9b-32k:latest", contextTokens: 32_768 }],
+            },
+          },
+        },
+        agents: {
+          defaults: {
+            heartbeat: { model: "ollama/qwen3.5-9b-32k:latest" },
+          },
+        },
+      },
+      agentId: "agent",
+      primaryProvider: "openrouter",
+      primaryModel: "qwen3.6-plus",
+      activeSessionEntry: {
+        sessionId: "session",
+        updatedAt: 1,
+        modelProvider: "ollama",
+        model: "qwen3.5-9b-32k:latest",
+        contextTokens: 32_768,
+      },
+    });
+
+    expect(text).toContain("ollama/qwen3.5-9b-32k:latest (32k context)");
+    expect(text).toContain("openrouter/qwen3.6-plus");
+    expect(text).toContain("heartbeat model bleed");
+    expect(text).toContain("heartbeat.isolatedSession");
+    expect(text).not.toContain("reserveTokensFloor");
+  });
+
+  it("does not blame heartbeat when the smaller runtime model is not the configured heartbeat model", () => {
+    const text = buildContextOverflowRecoveryText({
+      cfg: {
+        agents: {
+          defaults: {
+            heartbeat: { model: "ollama/qwen3.5-9b-32k:latest" },
+          },
+        },
+      },
+      primaryProvider: "openrouter",
+      primaryModel: "qwen3.6-plus",
+      activeSessionEntry: {
+        sessionId: "session",
+        updatedAt: 1,
+        modelProvider: "anthropic",
+        model: "claude-haiku-4-5",
+        contextTokens: 32_768,
+      },
+    });
+
+    expect(text).toContain("reserveTokensFloor");
+    expect(text).not.toContain("heartbeat model bleed");
+  });
+});
+
 describe("runAgentTurnWithFallback", () => {
   beforeEach(() => {
     state.runEmbeddedPiAgentMock.mockReset();
diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts
index 9f1ff98d38d..714430e63ac 100644
--- a/src/auto-reply/reply/agent-runner-execution.ts
+++ b/src/auto-reply/reply/agent-runner-execution.ts
@@ -11,13 +11,14 @@ import {
 import { resolveBootstrapWarningSignaturesSeen } from "../../agents/bootstrap-budget.js";
 import { runCliAgent } from "../../agents/cli-runner.js";
 import { getCliSessionBinding } from "../../agents/cli-session.js";
+import { resolveContextTokensForModel } from "../../agents/context.js";
 import { LiveSessionModelSwitchError } from "../../agents/live-model-switch-error.js";
 import { runWithModelFallback, isFallbackSummaryError } from "../../agents/model-fallback.js";
 import {
   isCliRuntimeAlias,
   resolveCliRuntimeExecutionProvider,
 } from "../../agents/model-runtime-aliases.js";
-import { isCliProvider } from "../../agents/model-selection.js";
+import { isCliProvider, resolveModelRefFromString } from "../../agents/model-selection.js";
 import {
   BILLING_ERROR_USER_MESSAGE,
   formatRateLimitOrOverloadedErrorCopy,
@@ -457,6 +458,176 @@ function buildExternalRunFailureReply(
   };
 }
 
+const CONTEXT_OVERFLOW_RESET_HINT =
+  "\n\nTo prevent this, increase your compaction buffer by setting " +
+  "`agents.defaults.compaction.reserveTokensFloor` to 20000 or higher in your config.";
+
+type ModelRefLike = {
+  provider: string;
+  model: string;
+};
+
+function resolveAgentHeartbeatModelRaw(params: {
+  cfg: FollowupRun["run"]["config"];
+  agentId?: string;
+}): string | undefined {
+  const defaultModel = normalizeOptionalString(params.cfg.agents?.defaults?.heartbeat?.model);
+  const agentId = normalizeLowercaseStringOrEmpty(params.agentId);
+  const agentModel = agentId
+    ? normalizeOptionalString(
+        params.cfg.agents?.list?.find(
+          (entry) => normalizeLowercaseStringOrEmpty(entry?.id) === agentId,
+        )?.heartbeat?.model,
+      )
+    : undefined;
+  return agentModel ?? defaultModel;
+}
+
+function normalizeModelRefForCompare(ref: ModelRefLike | undefined) {
+  if (!ref) {
+    return undefined;
+  }
+  const provider = normalizeLowercaseStringOrEmpty(ref.provider);
+  const model = normalizeLowercaseStringOrEmpty(ref.model);
+  return provider && model ? { provider, model } : undefined;
+}
+
+function modelRefsEqual(left: ModelRefLike | undefined, right: ModelRefLike | undefined) {
+  const normalizedLeft = normalizeModelRefForCompare(left);
+  const normalizedRight = normalizeModelRefForCompare(right);
+  if (!normalizedLeft || !normalizedRight) {
+    return false;
+  }
+  return (
+    normalizedLeft.provider === normalizedRight.provider &&
+    normalizedLeft.model === normalizedRight.model
+  );
+}
+
+function formatContextWindowLabel(tokens: number): string {
+  if (tokens >= 1_000_000) {
+    return `${Math.round((tokens / 1_000_000) * 10) / 10}M`;
+  }
+  return `${Math.round(tokens / 1024)}k`;
+}
+
+function resolveContextWindowForHint(params: {
+  cfg: FollowupRun["run"]["config"];
+  ref: ModelRefLike;
+  activeSessionEntry?: SessionEntry;
+}) {
+  const activeContextTokens =
+    typeof params.activeSessionEntry?.contextTokens === "number" &&
+    Number.isFinite(params.activeSessionEntry.contextTokens) &&
+    params.activeSessionEntry.contextTokens > 0
+      ? Math.floor(params.activeSessionEntry.contextTokens)
+      : undefined;
+  return (
+    activeContextTokens ??
+    resolveContextTokensForModel({
+      cfg: params.cfg,
+      provider: params.ref.provider,
+      model: params.ref.model,
+      allowAsyncLoad: false,
+    })
+  );
+}
+
+function resolveHeartbeatBleedHint(params: {
+  cfg: FollowupRun["run"]["config"];
+  agentId?: string;
+  primaryProvider?: string;
+  primaryModel?: string;
+  activeSessionEntry?: SessionEntry;
+}): string | undefined {
+  const primaryProvider = normalizeOptionalString(params.primaryProvider);
+  const primaryModel = normalizeOptionalString(params.primaryModel);
+  if (!primaryProvider || !primaryModel) {
+    return undefined;
+  }
+
+  const runtimeProvider = normalizeOptionalString(params.activeSessionEntry?.modelProvider);
+  const runtimeModel = normalizeOptionalString(params.activeSessionEntry?.model);
+  if (!runtimeProvider || !runtimeModel) {
+    return undefined;
+  }
+
+  const primaryRef = { provider: primaryProvider, model: primaryModel };
+  const runtimeRef = { provider: runtimeProvider, model: runtimeModel };
+  if (modelRefsEqual(primaryRef, runtimeRef)) {
+    return undefined;
+  }
+
+  const heartbeatModelRaw = resolveAgentHeartbeatModelRaw({
+    cfg: params.cfg,
+    agentId: params.agentId,
+  });
+  const heartbeatRef = heartbeatModelRaw
+    ? resolveModelRefFromString({
+        cfg: params.cfg,
+        raw: heartbeatModelRaw,
+        defaultProvider: primaryProvider,
+      })?.ref
+    : undefined;
+  if (!modelRefsEqual(runtimeRef, heartbeatRef)) {
+    return undefined;
+  }
+
+  const runtimeWindow = resolveContextWindowForHint({
+    cfg: params.cfg,
+    ref: runtimeRef,
+    activeSessionEntry: params.activeSessionEntry,
+  });
+  const primaryWindow = resolveContextTokensForModel({
+    cfg: params.cfg,
+    provider: primaryRef.provider,
+    model: primaryRef.model,
+    allowAsyncLoad: false,
+  });
+  if (
+    typeof runtimeWindow === "number" &&
+    typeof primaryWindow === "number" &&
+    runtimeWindow >= primaryWindow
+  ) {
+    return undefined;
+  }
+
+  const runtimeLabel =
+    typeof runtimeWindow === "number" && runtimeWindow > 0
+      ? ` (${formatContextWindowLabel(runtimeWindow)} context)`
+      : "";
+  return (
+    `\n\nThe previous heartbeat turn left this session on ${runtimeProvider}/${runtimeModel}` +
+    `${runtimeLabel} instead of ${primaryProvider}/${primaryModel}. This matches the configured ` +
+    "`heartbeat.model`, so the overflow is likely heartbeat model bleed rather than a " +
+    "compaction-buffer problem. Set `heartbeat.isolatedSession: true`, enable " +
+    "`heartbeat.lightContext: true`, or use a heartbeat model with a larger context window."
+  );
+}
+
+export function buildContextOverflowRecoveryText(params: {
+  duringCompaction?: boolean;
+  cfg: FollowupRun["run"]["config"];
+  agentId?: string;
+  primaryProvider?: string;
+  primaryModel?: string;
+  activeSessionEntry?: SessionEntry;
+}): string {
+  const prefix = params.duringCompaction
+    ? "⚠️ Context limit exceeded during compaction. I've reset our conversation to start fresh - please try again."
+    : "⚠️ Context limit exceeded. I've reset our conversation to start fresh - please try again.";
+  return (
+    prefix +
+    (resolveHeartbeatBleedHint({
+      cfg: params.cfg,
+      agentId: params.agentId,
+      primaryProvider: params.primaryProvider,
+      primaryModel: params.primaryModel,
+      activeSessionEntry: params.activeSessionEntry,
+    }) ?? CONTEXT_OVERFLOW_RESET_HINT)
+  );
+}
+
 function shouldApplyOpenAIGptChatGuard(params: { provider?: string; model?: string }): boolean {
   if (params.provider !== "openai" && params.provider !== "openai-codex") {
     return false;
@@ -1475,7 +1646,13 @@ export async function runAgentTurnWithFallback(params: {
         return {
           kind: "final",
           payload: {
-            text: "⚠️ Context limit exceeded. I've reset our conversation to start fresh - please try again.\n\nTo prevent this, increase your compaction buffer by setting `agents.defaults.compaction.reserveTokensFloor` to 20000 or higher in your config.",
+            text: buildContextOverflowRecoveryText({
+              cfg: runtimeConfig,
+              agentId: params.followupRun.run.agentId,
+              primaryProvider: params.followupRun.run.provider,
+              primaryModel: params.followupRun.run.model,
+              activeSessionEntry: params.getActiveSessionEntry(),
+            }),
           },
         };
       }
@@ -1595,7 +1772,14 @@ export async function runAgentTurnWithFallback(params: {
         return {
           kind: "final",
           payload: {
-            text: "⚠️ Context limit exceeded during compaction. I've reset our conversation to start fresh - please try again.\n\nTo prevent this, increase your compaction buffer by setting `agents.defaults.compaction.reserveTokensFloor` to 20000 or higher in your config.",
+            text: buildContextOverflowRecoveryText({
+              duringCompaction: true,
+              cfg: runtimeConfig,
+              agentId: params.followupRun.run.agentId,
+              primaryProvider: params.followupRun.run.provider,
+              primaryModel: params.followupRun.run.model,
+              activeSessionEntry: params.getActiveSessionEntry(),
+            }),
           },
         };
       }