fix: explain heartbeat model bleed overflows

2026-05-06 09:00:42 +00:00 · 2026-04-28 04:32:23 +01:00
parent 68561a8c94
commit 5a2e5446a4
4 changed files with 273 additions and 4 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -40,6 +40,7 @@ Docs: https://docs.openclaw.ai
 ### Fixes

 - Gateway/startup: keep value-option foreground starts on the gateway fast path and skip proxy bootstrap unless proxy env is configured, reducing normal gateway startup RSS and avoiding full CLI graph loading. Thanks @vincentkoc.
+- Heartbeat/models: show heartbeat model bleed guidance on context-overflow resets when the last runtime model matches configured `heartbeat.model`, so smaller local heartbeat models point users to `isolatedSession` or `lightContext` instead of only compaction-buffer tuning. Fixes #67314. Thanks @Knightmare6890.
 - Subagents/models: persist `sessions_spawn.model` and configured subagent models as child-session model overrides before the first turn, so spawned subagents actually run on the requested provider/model instead of reverting to the target agent default. Fixes #73180. Thanks @danielzinhu99.
 - Backup: skip installed plugin `extensions/*/node_modules` dependency trees while keeping plugin manifests and source files in archives, so local backups avoid rebuildable npm payload bloat. Fixes #64144. Thanks @BrilliantWang.
 - Cron/models: fail isolated cron runs closed when an explicit `payload.model` is not allowed or cannot be resolved, so scheduled jobs do not silently fall back to an unrelated agent default or paid route before configured provider proxies such as LiteLLM can run. Fixes #73146. Thanks @oneandrewwang.
--- a/docs/gateway/heartbeat.md
+++ b/docs/gateway/heartbeat.md
@@ -455,6 +455,12 @@ Heartbeats run full agent turns. Shorter intervals burn more tokens. To reduce c
 - Keep `HEARTBEAT.md` small.
 - Use `target: "none"` if you only want internal state updates.

+## Context overflow after heartbeat
+
+If a heartbeat uses a smaller local model, for example an Ollama model with a 32k window, and the next main-session turn reports context overflow, check whether the previous heartbeat left the session on the heartbeat model. OpenClaw's reset message calls this out when the last runtime model matches configured `heartbeat.model`.
+
+Use `isolatedSession: true` to run heartbeats in a fresh session, combine it with `lightContext: true` for the smallest prompt, or choose a heartbeat model with a context window large enough for the shared session.
+
 ## Related

 - [Automation & Tasks](/automation) — all automation mechanisms at a glance
--- a/src/auto-reply/reply/agent-runner-execution.test.ts
+++ b/src/auto-reply/reply/agent-runner-execution.test.ts
@@ -5,7 +5,10 @@ import { CommandLaneClearedError, GatewayDrainingError } from "../../process/com
 import type { TemplateContext } from "../templating.js";
 import { SILENT_REPLY_TOKEN } from "../tokens.js";
 import type { GetReplyOptions, ReplyPayload } from "../types.js";
-import { MAX_LIVE_SWITCH_RETRIES } from "./agent-runner-execution.js";
+import {
+  buildContextOverflowRecoveryText,
+  MAX_LIVE_SWITCH_RETRIES,
+} from "./agent-runner-execution.js";
 import type { FollowupRun } from "./queue.js";
 import type { ReplyOperation } from "./reply-run-registry.js";
 import type { TypingSignaler } from "./typing-mode.js";
@@ -293,6 +296,81 @@ function createMinimalRunAgentTurnParams(overrides?: {
  };
 }

+describe("buildContextOverflowRecoveryText", () => {
+  it("keeps the generic compaction-buffer hint without heartbeat model evidence", () => {
+    const text = buildContextOverflowRecoveryText({
+      cfg: {},
+      primaryProvider: "openrouter",
+      primaryModel: "qwen3.6-plus",
+    });
+
+    expect(text).toContain("reserveTokensFloor");
+    expect(text).not.toContain("heartbeat model bleed");
+  });
+
+  it("points to heartbeat model bleed when the last runtime model matches configured heartbeat.model", () => {
+    const text = buildContextOverflowRecoveryText({
+      cfg: {
+        models: {
+          providers: {
+            openrouter: {
+              models: [{ id: "qwen3.6-plus", contextTokens: 1_000_000 }],
+            },
+            ollama: {
+              models: [{ id: "qwen3.5-9b-32k:latest", contextTokens: 32_768 }],
+            },
+          },
+        },
+        agents: {
+          defaults: {
+            heartbeat: { model: "ollama/qwen3.5-9b-32k:latest" },
+          },
+        },
+      },
+      agentId: "agent",
+      primaryProvider: "openrouter",
+      primaryModel: "qwen3.6-plus",
+      activeSessionEntry: {
+        sessionId: "session",
+        updatedAt: 1,
+        modelProvider: "ollama",
+        model: "qwen3.5-9b-32k:latest",
+        contextTokens: 32_768,
+      },
+    });
+
+    expect(text).toContain("ollama/qwen3.5-9b-32k:latest (32k context)");
+    expect(text).toContain("openrouter/qwen3.6-plus");
+    expect(text).toContain("heartbeat model bleed");
+    expect(text).toContain("heartbeat.isolatedSession");
+    expect(text).not.toContain("reserveTokensFloor");
+  });
+
+  it("does not blame heartbeat when the smaller runtime model is not the configured heartbeat model", () => {
+    const text = buildContextOverflowRecoveryText({
+      cfg: {
+        agents: {
+          defaults: {
+            heartbeat: { model: "ollama/qwen3.5-9b-32k:latest" },
+          },
+        },
+      },
+      primaryProvider: "openrouter",
+      primaryModel: "qwen3.6-plus",
+      activeSessionEntry: {
+        sessionId: "session",
+        updatedAt: 1,
+        modelProvider: "anthropic",
+        model: "claude-haiku-4-5",
+        contextTokens: 32_768,
+      },
+    });
+
+    expect(text).toContain("reserveTokensFloor");
+    expect(text).not.toContain("heartbeat model bleed");
+  });
+});
+
 describe("runAgentTurnWithFallback", () => {
  beforeEach(() => {
    state.runEmbeddedPiAgentMock.mockReset();
--- a/src/auto-reply/reply/agent-runner-execution.ts
+++ b/src/auto-reply/reply/agent-runner-execution.ts
@@ -11,13 +11,14 @@ import {
 import { resolveBootstrapWarningSignaturesSeen } from "../../agents/bootstrap-budget.js";
 import { runCliAgent } from "../../agents/cli-runner.js";
 import { getCliSessionBinding } from "../../agents/cli-session.js";
+import { resolveContextTokensForModel } from "../../agents/context.js";
 import { LiveSessionModelSwitchError } from "../../agents/live-model-switch-error.js";
 import { runWithModelFallback, isFallbackSummaryError } from "../../agents/model-fallback.js";
 import {
  isCliRuntimeAlias,
  resolveCliRuntimeExecutionProvider,
 } from "../../agents/model-runtime-aliases.js";
-import { isCliProvider } from "../../agents/model-selection.js";
+import { isCliProvider, resolveModelRefFromString } from "../../agents/model-selection.js";
 import {
  BILLING_ERROR_USER_MESSAGE,
  formatRateLimitOrOverloadedErrorCopy,
@@ -457,6 +458,176 @@ function buildExternalRunFailureReply(
  };
 }

+const CONTEXT_OVERFLOW_RESET_HINT =
+  "\n\nTo prevent this, increase your compaction buffer by setting " +
+  "`agents.defaults.compaction.reserveTokensFloor` to 20000 or higher in your config.";
+
+type ModelRefLike = {
+  provider: string;
+  model: string;
+};
+
+function resolveAgentHeartbeatModelRaw(params: {
+  cfg: FollowupRun["run"]["config"];
+  agentId?: string;
+}): string | undefined {
+  const defaultModel = normalizeOptionalString(params.cfg.agents?.defaults?.heartbeat?.model);
+  const agentId = normalizeLowercaseStringOrEmpty(params.agentId);
+  const agentModel = agentId
+    ? normalizeOptionalString(
+        params.cfg.agents?.list?.find(
+          (entry) => normalizeLowercaseStringOrEmpty(entry?.id) === agentId,
+        )?.heartbeat?.model,
+      )
+    : undefined;
+  return agentModel ?? defaultModel;
+}
+
+function normalizeModelRefForCompare(ref: ModelRefLike | undefined) {
+  if (!ref) {
+    return undefined;
+  }
+  const provider = normalizeLowercaseStringOrEmpty(ref.provider);
+  const model = normalizeLowercaseStringOrEmpty(ref.model);
+  return provider && model ? { provider, model } : undefined;
+}
+
+function modelRefsEqual(left: ModelRefLike | undefined, right: ModelRefLike | undefined) {
+  const normalizedLeft = normalizeModelRefForCompare(left);
+  const normalizedRight = normalizeModelRefForCompare(right);
+  if (!normalizedLeft || !normalizedRight) {
+    return false;
+  }
+  return (
+    normalizedLeft.provider === normalizedRight.provider &&
+    normalizedLeft.model === normalizedRight.model
+  );
+}
+
+function formatContextWindowLabel(tokens: number): string {
+  if (tokens >= 1_000_000) {
+    return `${Math.round((tokens / 1_000_000) * 10) / 10}M`;
+  }
+  return `${Math.round(tokens / 1024)}k`;
+}
+
+function resolveContextWindowForHint(params: {
+  cfg: FollowupRun["run"]["config"];
+  ref: ModelRefLike;
+  activeSessionEntry?: SessionEntry;
+}) {
+  const activeContextTokens =
+    typeof params.activeSessionEntry?.contextTokens === "number" &&
+    Number.isFinite(params.activeSessionEntry.contextTokens) &&
+    params.activeSessionEntry.contextTokens > 0
+      ? Math.floor(params.activeSessionEntry.contextTokens)
+      : undefined;
+  return (
+    activeContextTokens ??
+    resolveContextTokensForModel({
+      cfg: params.cfg,
+      provider: params.ref.provider,
+      model: params.ref.model,
+      allowAsyncLoad: false,
+    })
+  );
+}
+
+function resolveHeartbeatBleedHint(params: {
+  cfg: FollowupRun["run"]["config"];
+  agentId?: string;
+  primaryProvider?: string;
+  primaryModel?: string;
+  activeSessionEntry?: SessionEntry;
+}): string | undefined {
+  const primaryProvider = normalizeOptionalString(params.primaryProvider);
+  const primaryModel = normalizeOptionalString(params.primaryModel);
+  if (!primaryProvider || !primaryModel) {
+    return undefined;
+  }
+
+  const runtimeProvider = normalizeOptionalString(params.activeSessionEntry?.modelProvider);
+  const runtimeModel = normalizeOptionalString(params.activeSessionEntry?.model);
+  if (!runtimeProvider || !runtimeModel) {
+    return undefined;
+  }
+
+  const primaryRef = { provider: primaryProvider, model: primaryModel };
+  const runtimeRef = { provider: runtimeProvider, model: runtimeModel };
+  if (modelRefsEqual(primaryRef, runtimeRef)) {
+    return undefined;
+  }
+
+  const heartbeatModelRaw = resolveAgentHeartbeatModelRaw({
+    cfg: params.cfg,
+    agentId: params.agentId,
+  });
+  const heartbeatRef = heartbeatModelRaw
+    ? resolveModelRefFromString({
+        cfg: params.cfg,
+        raw: heartbeatModelRaw,
+        defaultProvider: primaryProvider,
+      })?.ref
+    : undefined;
+  if (!modelRefsEqual(runtimeRef, heartbeatRef)) {
+    return undefined;
+  }
+
+  const runtimeWindow = resolveContextWindowForHint({
+    cfg: params.cfg,
+    ref: runtimeRef,
+    activeSessionEntry: params.activeSessionEntry,
+  });
+  const primaryWindow = resolveContextTokensForModel({
+    cfg: params.cfg,
+    provider: primaryRef.provider,
+    model: primaryRef.model,
+    allowAsyncLoad: false,
+  });
+  if (
+    typeof runtimeWindow === "number" &&
+    typeof primaryWindow === "number" &&
+    runtimeWindow >= primaryWindow
+  ) {
+    return undefined;
+  }
+
+  const runtimeLabel =
+    typeof runtimeWindow === "number" && runtimeWindow > 0
+      ? ` (${formatContextWindowLabel(runtimeWindow)} context)`
+      : "";
+  return (
+    `\n\nThe previous heartbeat turn left this session on ${runtimeProvider}/${runtimeModel}` +
+    `${runtimeLabel} instead of ${primaryProvider}/${primaryModel}. This matches the configured ` +
+    "`heartbeat.model`, so the overflow is likely heartbeat model bleed rather than a " +
+    "compaction-buffer problem. Set `heartbeat.isolatedSession: true`, enable " +
+    "`heartbeat.lightContext: true`, or use a heartbeat model with a larger context window."
+  );
+}
+
+export function buildContextOverflowRecoveryText(params: {
+  duringCompaction?: boolean;
+  cfg: FollowupRun["run"]["config"];
+  agentId?: string;
+  primaryProvider?: string;
+  primaryModel?: string;
+  activeSessionEntry?: SessionEntry;
+}): string {
+  const prefix = params.duringCompaction
+    ? "⚠️ Context limit exceeded during compaction. I've reset our conversation to start fresh - please try again."
+    : "⚠️ Context limit exceeded. I've reset our conversation to start fresh - please try again.";
+  return (
+    prefix +
+    (resolveHeartbeatBleedHint({
+      cfg: params.cfg,
+      agentId: params.agentId,
+      primaryProvider: params.primaryProvider,
+      primaryModel: params.primaryModel,
+      activeSessionEntry: params.activeSessionEntry,
+    }) ?? CONTEXT_OVERFLOW_RESET_HINT)
+  );
+}
+
 function shouldApplyOpenAIGptChatGuard(params: { provider?: string; model?: string }): boolean {
  if (params.provider !== "openai" && params.provider !== "openai-codex") {
    return false;
@@ -1475,7 +1646,13 @@ export async function runAgentTurnWithFallback(params: {
        return {
          kind: "final",
          payload: {
-            text: "⚠️ Context limit exceeded. I've reset our conversation to start fresh - please try again.\n\nTo prevent this, increase your compaction buffer by setting `agents.defaults.compaction.reserveTokensFloor` to 20000 or higher in your config.",
+            text: buildContextOverflowRecoveryText({
+              cfg: runtimeConfig,
+              agentId: params.followupRun.run.agentId,
+              primaryProvider: params.followupRun.run.provider,
+              primaryModel: params.followupRun.run.model,
+              activeSessionEntry: params.getActiveSessionEntry(),
+            }),
          },
        };
      }
@@ -1595,7 +1772,14 @@ export async function runAgentTurnWithFallback(params: {
        return {
          kind: "final",
          payload: {
-            text: "⚠️ Context limit exceeded during compaction. I've reset our conversation to start fresh - please try again.\n\nTo prevent this, increase your compaction buffer by setting `agents.defaults.compaction.reserveTokensFloor` to 20000 or higher in your config.",
+            text: buildContextOverflowRecoveryText({
+              duringCompaction: true,
+              cfg: runtimeConfig,
+              agentId: params.followupRun.run.agentId,
+              primaryProvider: params.followupRun.run.provider,
+              primaryModel: params.followupRun.run.model,
+              activeSessionEntry: params.getActiveSessionEntry(),
+            }),
          },
        };
      }