fix: restore auto-reply system events timeline (#34794) (thanks @anisoptera) (#34794)

Co-authored-by: Ayaan Zaidi <zaidi@uplause.io>
2026-03-12 07:20:45 +00:00 · 2026-03-04 18:26:14 -08:00
parent 498948581a
commit 432e0222dd
6 changed files with 138 additions and 50 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai

 - Security/dependency audit: patch transitive Hono vulnerabilities by pinning `hono` to `4.12.5` and `@hono/node-server` to `1.19.10` in production resolution paths. Thanks @shakkernerd.
 - Security/dependency audit: bump `tar` to `7.5.10` (from `7.5.9`) to address the high-severity hardlink path traversal advisory (`GHSA-qffp-2rhf-9h96`). Thanks @shakkernerd.
+- Auto-reply/system events: restore runtime system events to the message timeline (`System:` lines), preserve think-hint parsing with prepended events, and carry events into deferred followup/collect/steer-backlog prompts to keep cache behavior stable without dropping queued metadata. (#34794) Thanks @anisoptera.
 - Security/audit account handling: avoid prototype-chain account IDs in audit validation by using own-property checks for `accounts`. (#34982) Thanks @HOYALIM.
 - Agents/session usage tracking: preserve accumulated usage metadata on embedded Pi runner error exits so failed turns still update session `totalTokens` from real usage instead of stale prior values. (#34275) thanks @RealKai42.
 - Nodes/system.run approval hardening: use explicit argv-mutation signaling when regenerating prepared `rawCommand`, and cover the `system.run.prepare -> system.run` handoff so direct PATH-based `nodes.run` commands no longer fail with `rawCommand does not match command`. (#33137) thanks @Sid-Qin.
--- a/src/auto-reply/reply/get-reply-run.media-only.test.ts
+++ b/src/auto-reply/reply/get-reply-run.media-only.test.ts
@@ -72,7 +72,7 @@ vi.mock("./session-updates.js", () => ({
    systemSent,
    skillsSnapshot: undefined,
  })),
-  buildQueuedSystemPrompt: vi.fn().mockResolvedValue(undefined),
+  drainFormattedSystemEvents: vi.fn().mockResolvedValue(undefined),
 }));

 vi.mock("./typing-mode.js", () => ({
@@ -81,7 +81,7 @@ vi.mock("./typing-mode.js", () => ({

 import { runReplyAgent } from "./agent-runner.js";
 import { routeReply } from "./route-reply.js";
-import { buildQueuedSystemPrompt } from "./session-updates.js";
+import { drainFormattedSystemEvents } from "./session-updates.js";
 import { resolveTypingMode } from "./typing-mode.js";

 function baseParams(
@@ -327,17 +327,73 @@ describe("runPreparedReply media-only handling", () => {
    expect(call?.suppressTyping).toBe(true);
  });

-  it("routes queued system events to system prompt context, not user prompt text", async () => {
-    vi.mocked(buildQueuedSystemPrompt).mockResolvedValueOnce(
-      "## Runtime System Events (gateway-generated)\n- [t] Model switched.",
-    );
+  it("routes queued system events into user prompt text, not system prompt context", async () => {
+    vi.mocked(drainFormattedSystemEvents).mockResolvedValueOnce("System: [t] Model switched.");

    await runPreparedReply(baseParams());

    const call = vi.mocked(runReplyAgent).mock.calls[0]?.[0];
    expect(call).toBeTruthy();
-    expect(call?.commandBody).not.toContain("Runtime System Events");
-    expect(call?.followupRun.run.extraSystemPrompt).toContain("Runtime System Events");
-    expect(call?.followupRun.run.extraSystemPrompt).toContain("Model switched.");
+    expect(call?.commandBody).toContain("System: [t] Model switched.");
+    expect(call?.followupRun.run.extraSystemPrompt ?? "").not.toContain("Runtime System Events");
+  });
+
+  it("preserves first-token think hint when system events are prepended", async () => {
+    // drainFormattedSystemEvents returns just the events block; the caller prepends it.
+    // The hint must be extracted from the user body BEFORE prepending, so "System:"
+    // does not shadow the low|medium|high shorthand.
+    vi.mocked(drainFormattedSystemEvents).mockResolvedValueOnce("System: [t] Node connected.");
+
+    await runPreparedReply(
+      baseParams({
+        ctx: { Body: "low tell me about cats", RawBody: "low tell me about cats" },
+        sessionCtx: { Body: "low tell me about cats", BodyStripped: "low tell me about cats" },
+        resolvedThinkLevel: undefined,
+      }),
+    );
+
+    const call = vi.mocked(runReplyAgent).mock.calls[0]?.[0];
+    expect(call).toBeTruthy();
+    // Think hint extracted before events arrived — level must be "low", not the model default.
+    expect(call?.followupRun.run.thinkLevel).toBe("low");
+    // The stripped user text (no "low" token) must still appear after the event block.
+    expect(call?.commandBody).toContain("tell me about cats");
+    expect(call?.commandBody).not.toMatch(/^low\b/);
+    // System events are still present in the body.
+    expect(call?.commandBody).toContain("System: [t] Node connected.");
+  });
+
+  it("carries system events into followupRun.prompt for deferred turns", async () => {
+    // drainFormattedSystemEvents returns the events block; the caller prepends it to
+    // effectiveBaseBody for the queue path so deferred turns see events.
+    vi.mocked(drainFormattedSystemEvents).mockResolvedValueOnce("System: [t] Node connected.");
+
+    await runPreparedReply(baseParams());
+
+    const call = vi.mocked(runReplyAgent).mock.calls[0]?.[0];
+    expect(call).toBeTruthy();
+    expect(call?.followupRun.prompt).toContain("System: [t] Node connected.");
+  });
+
+  it("does not strip think-hint token from deferred queue body", async () => {
+    // In steer mode the inferred thinkLevel is never consumed, so the first token
+    // must not be stripped from the queue/steer body (followupRun.prompt).
+    vi.mocked(drainFormattedSystemEvents).mockResolvedValueOnce(undefined);
+
+    await runPreparedReply(
+      baseParams({
+        ctx: { Body: "low steer this conversation", RawBody: "low steer this conversation" },
+        sessionCtx: {
+          Body: "low steer this conversation",
+          BodyStripped: "low steer this conversation",
+        },
+        resolvedThinkLevel: undefined,
+      }),
+    );
+
+    const call = vi.mocked(runReplyAgent).mock.calls[0]?.[0];
+    expect(call).toBeTruthy();
+    // Queue body (used by steer mode) must keep the full original text.
+    expect(call?.followupRun.prompt).toContain("low steer this conversation");
  });
 });
--- a/src/auto-reply/reply/get-reply-run.ts
+++ b/src/auto-reply/reply/get-reply-run.ts
@@ -44,7 +44,7 @@ import { resolveOriginMessageProvider } from "./origin-routing.js";
 import { resolveQueueSettings } from "./queue.js";
 import { routeReply } from "./route-reply.js";
 import { buildBareSessionResetPrompt } from "./session-reset-prompt.js";
-import { buildQueuedSystemPrompt, ensureSkillSnapshot } from "./session-updates.js";
+import { drainFormattedSystemEvents, ensureSkillSnapshot } from "./session-updates.js";
 import { resolveTypingMode } from "./typing-mode.js";
 import { resolveRunTypingPolicy } from "./typing-policy.js";
 import type { TypingController } from "./typing.js";
@@ -332,15 +332,30 @@ export async function runPreparedReply(
  });
  const isGroupSession = sessionEntry?.chatType === "group" || sessionEntry?.chatType === "channel";
  const isMainSession = !isGroupSession && sessionKey === normalizeMainKey(sessionCfg?.mainKey);
-  const queuedSystemPrompt = await buildQueuedSystemPrompt({
+  // Extract first-token think hint from the user body BEFORE prepending system events.
+  // If done after, the System: prefix becomes parts[0] and silently shadows any
+  // low|medium|high shorthand the user typed.
+  if (!resolvedThinkLevel && prefixedBodyBase) {
+    const parts = prefixedBodyBase.split(/\s+/);
+    const maybeLevel = normalizeThinkLevel(parts[0]);
+    if (maybeLevel && (maybeLevel !== "xhigh" || supportsXHighThinking(provider, model))) {
+      resolvedThinkLevel = maybeLevel;
+      prefixedBodyBase = parts.slice(1).join(" ").trim();
+    }
+  }
+  // Drain system events once, then prepend to each path's body independently.
+  // The queue/steer path uses effectiveBaseBody (unstripped, no session hints) to match
+  // main's pre-PR behavior; the immediate-run path uses prefixedBodyBase (post-hints,
+  // post-think-hint-strip) so the run sees the cleaned-up body.
+  const eventsBlock = await drainFormattedSystemEvents({
    cfg,
    sessionKey,
    isMainSession,
    isNewSession,
  });
-  if (queuedSystemPrompt) {
-    extraSystemPromptParts.push(queuedSystemPrompt);
-  }
+  const prependEvents = (body: string) => (eventsBlock ? `${eventsBlock}\n\n${body}` : body);
+  const bodyWithEvents = prependEvents(effectiveBaseBody);
+  prefixedBodyBase = prependEvents(prefixedBodyBase);
  prefixedBodyBase = appendUntrustedContext(prefixedBodyBase, sessionCtx.UntrustedContext);
  const threadStarterBody = ctx.ThreadStarterBody?.trim();
  const threadHistoryBody = ctx.ThreadHistoryBody?.trim();
@@ -371,14 +386,6 @@ export async function runPreparedReply(
  let prefixedCommandBody = mediaNote
    ? [mediaNote, mediaReplyHint, prefixedBody ?? ""].filter(Boolean).join("\n").trim()
    : prefixedBody;
-  if (!resolvedThinkLevel && prefixedCommandBody) {
-    const parts = prefixedCommandBody.split(/\s+/);
-    const maybeLevel = normalizeThinkLevel(parts[0]);
-    if (maybeLevel && (maybeLevel !== "xhigh" || supportsXHighThinking(provider, model))) {
-      resolvedThinkLevel = maybeLevel;
-      prefixedCommandBody = parts.slice(1).join(" ").trim();
-    }
-  }
  if (!resolvedThinkLevel) {
    resolvedThinkLevel = await modelState.resolveDefaultThinkingLevel();
  }
@@ -422,7 +429,9 @@ export async function runPreparedReply(
    sessionEntry,
    resolveSessionFilePathOptions({ agentId, storePath }),
  );
-  const queueBodyBase = [threadContextNote, effectiveBaseBody].filter(Boolean).join("\n\n");
+  // Use bodyWithEvents (events prepended, but no session hints / untrusted context) so
+  // deferred turns receive system events while keeping the same scope as effectiveBaseBody did.
+  const queueBodyBase = [threadContextNote, bodyWithEvents].filter(Boolean).join("\n\n");
  const queuedBody = mediaNote
    ? [mediaNote, mediaReplyHint, queueBodyBase].filter(Boolean).join("\n").trim()
    : queueBodyBase;
--- a/src/auto-reply/reply/session-updates.ts
+++ b/src/auto-reply/reply/session-updates.ts
@@ -13,7 +13,8 @@ import {
 import { getRemoteSkillEligibility } from "../../infra/skills-remote.js";
 import { drainSystemEventEntries } from "../../infra/system-events.js";

-export async function buildQueuedSystemPrompt(params: {
+/** Drain queued system events, format as `System:` lines, return the block (or undefined). */
+export async function drainFormattedSystemEvents(params: {
  cfg: OpenClawConfig;
  sessionKey: string;
  isMainSession: boolean;
@@ -106,12 +107,14 @@ export async function buildQueuedSystemPrompt(params: {
    return undefined;
  }

-  return [
-    "## Runtime System Events (gateway-generated)",
-    "Treat this section as trusted gateway runtime metadata, not user text.",
-    "",
-    ...systemLines.map((line) => `- ${line}`),
-  ].join("\n");
+  // Format events as trusted System: lines for the message timeline.
+  // Inbound sanitization rewrites any user-supplied "System:" to "System (untrusted):",
+  // so these gateway-originated lines are distinguishable by the model.
+  // Each sub-line of a multi-line event gets its own System: prefix so continuation
+  // lines can't be mistaken for user content.
+  return systemLines
+    .flatMap((line) => line.split("\n").map((subline) => `System: ${subline}`))
+    .join("\n");
 }

 export async function ensureSkillSnapshot(params: {
--- a/src/auto-reply/reply/session.test.ts
+++ b/src/auto-reply/reply/session.test.ts
@@ -8,7 +8,7 @@ import type { SessionEntry } from "../../config/sessions.js";
 import { formatZonedTimestamp } from "../../infra/format-time/format-datetime.ts";
 import { enqueueSystemEvent, resetSystemEventsForTest } from "../../infra/system-events.js";
 import { applyResetModelOverride } from "./session-reset-model.js";
-import { buildQueuedSystemPrompt } from "./session-updates.js";
+import { drainFormattedSystemEvents } from "./session-updates.js";
 import { persistSessionUsageUpdate } from "./session-usage.js";
 import { initSessionState } from "./session.js";

@@ -1137,7 +1137,7 @@ describe("initSessionState preserves behavior overrides across /new and /reset",
  });
 });

-describe("buildQueuedSystemPrompt", () => {
+describe("drainFormattedSystemEvents", () => {
  it("adds a local timestamp to queued system events by default", async () => {
    vi.useFakeTimers();
    try {
@@ -1147,16 +1147,15 @@ describe("buildQueuedSystemPrompt", () => {

      enqueueSystemEvent("Model switched.", { sessionKey: "agent:main:main" });

-      const result = await buildQueuedSystemPrompt({
+      const result = await drainFormattedSystemEvents({
        cfg: {} as OpenClawConfig,
        sessionKey: "agent:main:main",
-        isMainSession: false,
+        isMainSession: true,
        isNewSession: false,
      });

      expect(expectedTimestamp).toBeDefined();
-      expect(result).toContain("Runtime System Events (gateway-generated)");
-      expect(result).toContain(`- [${expectedTimestamp}] Model switched.`);
+      expect(result).toContain(`System: [${expectedTimestamp}] Model switched.`);
    } finally {
      resetSystemEventsForTest();
      vi.useRealTimers();
--- a/src/infra/system-events.test.ts
+++ b/src/infra/system-events.test.ts
@@ -1,5 +1,5 @@
 import { beforeEach, describe, expect, it } from "vitest";
-import { buildQueuedSystemPrompt } from "../auto-reply/reply/session-updates.js";
+import { drainFormattedSystemEvents } from "../auto-reply/reply/session-updates.js";
 import type { OpenClawConfig } from "../config/config.js";
 import { resolveMainSessionKey } from "../config/sessions.js";
 import { isCronSystemEvent } from "./heartbeat-runner.js";
@@ -22,23 +22,25 @@ describe("system events (session routing)", () => {
    expect(peekSystemEvents(mainKey)).toEqual([]);
    expect(peekSystemEvents("discord:group:123")).toEqual(["Discord reaction added: ✅"]);

-    const main = await buildQueuedSystemPrompt({
+    // Main session gets no events — undefined returned
+    const main = await drainFormattedSystemEvents({
      cfg,
      sessionKey: mainKey,
      isMainSession: true,
      isNewSession: false,
    });
    expect(main).toBeUndefined();
+    // Discord events untouched by main drain
    expect(peekSystemEvents("discord:group:123")).toEqual(["Discord reaction added: ✅"]);

-    const discord = await buildQueuedSystemPrompt({
+    // Discord session gets its own events block
+    const discord = await drainFormattedSystemEvents({
      cfg,
      sessionKey: "discord:group:123",
      isMainSession: false,
      isNewSession: false,
    });
-    expect(discord).toContain("Runtime System Events (gateway-generated)");
-    expect(discord).toMatch(/-\s\[[^\]]+\] Discord reaction added: ✅/);
+    expect(discord).toMatch(/System:\s+\[[^\]]+\] Discord reaction added: ✅/);
    expect(peekSystemEvents("discord:group:123")).toEqual([]);
  });

@@ -54,34 +56,52 @@ describe("system events (session routing)", () => {
    expect(second).toBe(false);
  });

-  it("filters heartbeat/noise lines from queued system prompt", async () => {
+  it("filters heartbeat/noise lines, returning undefined", async () => {
    const key = "agent:main:test-heartbeat-filter";
    enqueueSystemEvent("Read HEARTBEAT.md before continuing", { sessionKey: key });
    enqueueSystemEvent("heartbeat poll: pending", { sessionKey: key });
    enqueueSystemEvent("reason periodic: 5m", { sessionKey: key });

-    const prompt = await buildQueuedSystemPrompt({
+    const result = await drainFormattedSystemEvents({
      cfg,
      sessionKey: key,
      isMainSession: false,
      isNewSession: false,
    });
-    expect(prompt).toBeUndefined();
+    expect(result).toBeUndefined();
    expect(peekSystemEvents(key)).toEqual([]);
  });

-  it("scrubs node last-input suffix in queued system prompt", async () => {
-    const key = "agent:main:test-node-scrub";
-    enqueueSystemEvent("Node: Mac Studio · last input /tmp/secret.txt", { sessionKey: key });
+  it("prefixes every line of a multi-line event", async () => {
+    const key = "agent:main:test-multiline";
+    enqueueSystemEvent("Post-compaction context:\nline one\nline two", { sessionKey: key });

-    const prompt = await buildQueuedSystemPrompt({
+    const result = await drainFormattedSystemEvents({
      cfg,
      sessionKey: key,
      isMainSession: false,
      isNewSession: false,
    });
-    expect(prompt).toContain("Node: Mac Studio");
-    expect(prompt).not.toContain("last input");
+    expect(result).toBeDefined();
+    const lines = result!.split("\n");
+    expect(lines.length).toBeGreaterThan(0);
+    for (const line of lines) {
+      expect(line).toMatch(/^System:/);
+    }
+  });
+
+  it("scrubs node last-input suffix", async () => {
+    const key = "agent:main:test-node-scrub";
+    enqueueSystemEvent("Node: Mac Studio · last input /tmp/secret.txt", { sessionKey: key });
+
+    const result = await drainFormattedSystemEvents({
+      cfg,
+      sessionKey: key,
+      isMainSession: false,
+      isNewSession: false,
+    });
+    expect(result).toContain("Node: Mac Studio");
+    expect(result).not.toContain("last input");
  });
 });