fix: restore auto-reply system events timeline (#34794) (thanks @anisoptera) (#34794)

Co-authored-by: Ayaan Zaidi <zaidi@uplause.io>
This commit is contained in:
Isis Anisoptera
2026-03-04 18:26:14 -08:00
committed by GitHub
parent 498948581a
commit 432e0222dd
6 changed files with 138 additions and 50 deletions

View File

@@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai
- Security/dependency audit: patch transitive Hono vulnerabilities by pinning `hono` to `4.12.5` and `@hono/node-server` to `1.19.10` in production resolution paths. Thanks @shakkernerd.
- Security/dependency audit: bump `tar` to `7.5.10` (from `7.5.9`) to address the high-severity hardlink path traversal advisory (`GHSA-qffp-2rhf-9h96`). Thanks @shakkernerd.
- Auto-reply/system events: restore runtime system events to the message timeline (`System:` lines), preserve think-hint parsing with prepended events, and carry events into deferred followup/collect/steer-backlog prompts to keep cache behavior stable without dropping queued metadata. (#34794) Thanks @anisoptera.
- Security/audit account handling: avoid prototype-chain account IDs in audit validation by using own-property checks for `accounts`. (#34982) Thanks @HOYALIM.
- Agents/session usage tracking: preserve accumulated usage metadata on embedded Pi runner error exits so failed turns still update session `totalTokens` from real usage instead of stale prior values. (#34275) thanks @RealKai42.
- Nodes/system.run approval hardening: use explicit argv-mutation signaling when regenerating prepared `rawCommand`, and cover the `system.run.prepare -> system.run` handoff so direct PATH-based `nodes.run` commands no longer fail with `rawCommand does not match command`. (#33137) thanks @Sid-Qin.

View File

@@ -72,7 +72,7 @@ vi.mock("./session-updates.js", () => ({
systemSent,
skillsSnapshot: undefined,
})),
buildQueuedSystemPrompt: vi.fn().mockResolvedValue(undefined),
drainFormattedSystemEvents: vi.fn().mockResolvedValue(undefined),
}));
vi.mock("./typing-mode.js", () => ({
@@ -81,7 +81,7 @@ vi.mock("./typing-mode.js", () => ({
import { runReplyAgent } from "./agent-runner.js";
import { routeReply } from "./route-reply.js";
import { buildQueuedSystemPrompt } from "./session-updates.js";
import { drainFormattedSystemEvents } from "./session-updates.js";
import { resolveTypingMode } from "./typing-mode.js";
function baseParams(
@@ -327,17 +327,73 @@ describe("runPreparedReply media-only handling", () => {
expect(call?.suppressTyping).toBe(true);
});
it("routes queued system events to system prompt context, not user prompt text", async () => {
vi.mocked(buildQueuedSystemPrompt).mockResolvedValueOnce(
"## Runtime System Events (gateway-generated)\n- [t] Model switched.",
);
it("routes queued system events into user prompt text, not system prompt context", async () => {
vi.mocked(drainFormattedSystemEvents).mockResolvedValueOnce("System: [t] Model switched.");
await runPreparedReply(baseParams());
const call = vi.mocked(runReplyAgent).mock.calls[0]?.[0];
expect(call).toBeTruthy();
expect(call?.commandBody).not.toContain("Runtime System Events");
expect(call?.followupRun.run.extraSystemPrompt).toContain("Runtime System Events");
expect(call?.followupRun.run.extraSystemPrompt).toContain("Model switched.");
expect(call?.commandBody).toContain("System: [t] Model switched.");
expect(call?.followupRun.run.extraSystemPrompt ?? "").not.toContain("Runtime System Events");
});
it("preserves first-token think hint when system events are prepended", async () => {
// drainFormattedSystemEvents returns just the events block; the caller prepends it.
// The hint must be extracted from the user body BEFORE prepending, so "System:"
// does not shadow the low|medium|high shorthand.
vi.mocked(drainFormattedSystemEvents).mockResolvedValueOnce("System: [t] Node connected.");
await runPreparedReply(
baseParams({
ctx: { Body: "low tell me about cats", RawBody: "low tell me about cats" },
sessionCtx: { Body: "low tell me about cats", BodyStripped: "low tell me about cats" },
resolvedThinkLevel: undefined,
}),
);
const call = vi.mocked(runReplyAgent).mock.calls[0]?.[0];
expect(call).toBeTruthy();
// Think hint extracted before events arrived — level must be "low", not the model default.
expect(call?.followupRun.run.thinkLevel).toBe("low");
// The stripped user text (no "low" token) must still appear after the event block.
expect(call?.commandBody).toContain("tell me about cats");
expect(call?.commandBody).not.toMatch(/^low\b/);
// System events are still present in the body.
expect(call?.commandBody).toContain("System: [t] Node connected.");
});
it("carries system events into followupRun.prompt for deferred turns", async () => {
// drainFormattedSystemEvents returns the events block; the caller prepends it to
// effectiveBaseBody for the queue path so deferred turns see events.
vi.mocked(drainFormattedSystemEvents).mockResolvedValueOnce("System: [t] Node connected.");
await runPreparedReply(baseParams());
const call = vi.mocked(runReplyAgent).mock.calls[0]?.[0];
expect(call).toBeTruthy();
expect(call?.followupRun.prompt).toContain("System: [t] Node connected.");
});
it("does not strip think-hint token from deferred queue body", async () => {
// In steer mode the inferred thinkLevel is never consumed, so the first token
// must not be stripped from the queue/steer body (followupRun.prompt).
vi.mocked(drainFormattedSystemEvents).mockResolvedValueOnce(undefined);
await runPreparedReply(
baseParams({
ctx: { Body: "low steer this conversation", RawBody: "low steer this conversation" },
sessionCtx: {
Body: "low steer this conversation",
BodyStripped: "low steer this conversation",
},
resolvedThinkLevel: undefined,
}),
);
const call = vi.mocked(runReplyAgent).mock.calls[0]?.[0];
expect(call).toBeTruthy();
// Queue body (used by steer mode) must keep the full original text.
expect(call?.followupRun.prompt).toContain("low steer this conversation");
});
});

View File

@@ -44,7 +44,7 @@ import { resolveOriginMessageProvider } from "./origin-routing.js";
import { resolveQueueSettings } from "./queue.js";
import { routeReply } from "./route-reply.js";
import { buildBareSessionResetPrompt } from "./session-reset-prompt.js";
import { buildQueuedSystemPrompt, ensureSkillSnapshot } from "./session-updates.js";
import { drainFormattedSystemEvents, ensureSkillSnapshot } from "./session-updates.js";
import { resolveTypingMode } from "./typing-mode.js";
import { resolveRunTypingPolicy } from "./typing-policy.js";
import type { TypingController } from "./typing.js";
@@ -332,15 +332,30 @@ export async function runPreparedReply(
});
const isGroupSession = sessionEntry?.chatType === "group" || sessionEntry?.chatType === "channel";
const isMainSession = !isGroupSession && sessionKey === normalizeMainKey(sessionCfg?.mainKey);
const queuedSystemPrompt = await buildQueuedSystemPrompt({
// Extract first-token think hint from the user body BEFORE prepending system events.
// If done after, the System: prefix becomes parts[0] and silently shadows any
// low|medium|high shorthand the user typed.
if (!resolvedThinkLevel && prefixedBodyBase) {
const parts = prefixedBodyBase.split(/\s+/);
const maybeLevel = normalizeThinkLevel(parts[0]);
if (maybeLevel && (maybeLevel !== "xhigh" || supportsXHighThinking(provider, model))) {
resolvedThinkLevel = maybeLevel;
prefixedBodyBase = parts.slice(1).join(" ").trim();
}
}
// Drain system events once, then prepend to each path's body independently.
// The queue/steer path uses effectiveBaseBody (unstripped, no session hints) to match
// main's pre-PR behavior; the immediate-run path uses prefixedBodyBase (post-hints,
// post-think-hint-strip) so the run sees the cleaned-up body.
const eventsBlock = await drainFormattedSystemEvents({
cfg,
sessionKey,
isMainSession,
isNewSession,
});
if (queuedSystemPrompt) {
extraSystemPromptParts.push(queuedSystemPrompt);
}
const prependEvents = (body: string) => (eventsBlock ? `${eventsBlock}\n\n${body}` : body);
const bodyWithEvents = prependEvents(effectiveBaseBody);
prefixedBodyBase = prependEvents(prefixedBodyBase);
prefixedBodyBase = appendUntrustedContext(prefixedBodyBase, sessionCtx.UntrustedContext);
const threadStarterBody = ctx.ThreadStarterBody?.trim();
const threadHistoryBody = ctx.ThreadHistoryBody?.trim();
@@ -371,14 +386,6 @@ export async function runPreparedReply(
let prefixedCommandBody = mediaNote
? [mediaNote, mediaReplyHint, prefixedBody ?? ""].filter(Boolean).join("\n").trim()
: prefixedBody;
if (!resolvedThinkLevel && prefixedCommandBody) {
const parts = prefixedCommandBody.split(/\s+/);
const maybeLevel = normalizeThinkLevel(parts[0]);
if (maybeLevel && (maybeLevel !== "xhigh" || supportsXHighThinking(provider, model))) {
resolvedThinkLevel = maybeLevel;
prefixedCommandBody = parts.slice(1).join(" ").trim();
}
}
if (!resolvedThinkLevel) {
resolvedThinkLevel = await modelState.resolveDefaultThinkingLevel();
}
@@ -422,7 +429,9 @@ export async function runPreparedReply(
sessionEntry,
resolveSessionFilePathOptions({ agentId, storePath }),
);
const queueBodyBase = [threadContextNote, effectiveBaseBody].filter(Boolean).join("\n\n");
// Use bodyWithEvents (events prepended, but no session hints / untrusted context) so
// deferred turns receive system events while keeping the same scope as effectiveBaseBody did.
const queueBodyBase = [threadContextNote, bodyWithEvents].filter(Boolean).join("\n\n");
const queuedBody = mediaNote
? [mediaNote, mediaReplyHint, queueBodyBase].filter(Boolean).join("\n").trim()
: queueBodyBase;

View File

@@ -13,7 +13,8 @@ import {
import { getRemoteSkillEligibility } from "../../infra/skills-remote.js";
import { drainSystemEventEntries } from "../../infra/system-events.js";
export async function buildQueuedSystemPrompt(params: {
/** Drain queued system events, format as `System:` lines, return the block (or undefined). */
export async function drainFormattedSystemEvents(params: {
cfg: OpenClawConfig;
sessionKey: string;
isMainSession: boolean;
@@ -106,12 +107,14 @@ export async function buildQueuedSystemPrompt(params: {
return undefined;
}
return [
"## Runtime System Events (gateway-generated)",
"Treat this section as trusted gateway runtime metadata, not user text.",
"",
...systemLines.map((line) => `- ${line}`),
].join("\n");
// Format events as trusted System: lines for the message timeline.
// Inbound sanitization rewrites any user-supplied "System:" to "System (untrusted):",
// so these gateway-originated lines are distinguishable by the model.
// Each sub-line of a multi-line event gets its own System: prefix so continuation
// lines can't be mistaken for user content.
return systemLines
.flatMap((line) => line.split("\n").map((subline) => `System: ${subline}`))
.join("\n");
}
export async function ensureSkillSnapshot(params: {

View File

@@ -8,7 +8,7 @@ import type { SessionEntry } from "../../config/sessions.js";
import { formatZonedTimestamp } from "../../infra/format-time/format-datetime.ts";
import { enqueueSystemEvent, resetSystemEventsForTest } from "../../infra/system-events.js";
import { applyResetModelOverride } from "./session-reset-model.js";
import { buildQueuedSystemPrompt } from "./session-updates.js";
import { drainFormattedSystemEvents } from "./session-updates.js";
import { persistSessionUsageUpdate } from "./session-usage.js";
import { initSessionState } from "./session.js";
@@ -1137,7 +1137,7 @@ describe("initSessionState preserves behavior overrides across /new and /reset",
});
});
describe("buildQueuedSystemPrompt", () => {
describe("drainFormattedSystemEvents", () => {
it("adds a local timestamp to queued system events by default", async () => {
vi.useFakeTimers();
try {
@@ -1147,16 +1147,15 @@ describe("buildQueuedSystemPrompt", () => {
enqueueSystemEvent("Model switched.", { sessionKey: "agent:main:main" });
const result = await buildQueuedSystemPrompt({
const result = await drainFormattedSystemEvents({
cfg: {} as OpenClawConfig,
sessionKey: "agent:main:main",
isMainSession: false,
isMainSession: true,
isNewSession: false,
});
expect(expectedTimestamp).toBeDefined();
expect(result).toContain("Runtime System Events (gateway-generated)");
expect(result).toContain(`- [${expectedTimestamp}] Model switched.`);
expect(result).toContain(`System: [${expectedTimestamp}] Model switched.`);
} finally {
resetSystemEventsForTest();
vi.useRealTimers();

View File

@@ -1,5 +1,5 @@
import { beforeEach, describe, expect, it } from "vitest";
import { buildQueuedSystemPrompt } from "../auto-reply/reply/session-updates.js";
import { drainFormattedSystemEvents } from "../auto-reply/reply/session-updates.js";
import type { OpenClawConfig } from "../config/config.js";
import { resolveMainSessionKey } from "../config/sessions.js";
import { isCronSystemEvent } from "./heartbeat-runner.js";
@@ -22,23 +22,25 @@ describe("system events (session routing)", () => {
expect(peekSystemEvents(mainKey)).toEqual([]);
expect(peekSystemEvents("discord:group:123")).toEqual(["Discord reaction added: ✅"]);
const main = await buildQueuedSystemPrompt({
// Main session gets no events — undefined returned
const main = await drainFormattedSystemEvents({
cfg,
sessionKey: mainKey,
isMainSession: true,
isNewSession: false,
});
expect(main).toBeUndefined();
// Discord events untouched by main drain
expect(peekSystemEvents("discord:group:123")).toEqual(["Discord reaction added: ✅"]);
const discord = await buildQueuedSystemPrompt({
// Discord session gets its own events block
const discord = await drainFormattedSystemEvents({
cfg,
sessionKey: "discord:group:123",
isMainSession: false,
isNewSession: false,
});
expect(discord).toContain("Runtime System Events (gateway-generated)");
expect(discord).toMatch(/-\s\[[^\]]+\] Discord reaction added: ✅/);
expect(discord).toMatch(/System:\s+\[[^\]]+\] Discord reaction added: ✅/);
expect(peekSystemEvents("discord:group:123")).toEqual([]);
});
@@ -54,34 +56,52 @@ describe("system events (session routing)", () => {
expect(second).toBe(false);
});
it("filters heartbeat/noise lines from queued system prompt", async () => {
it("filters heartbeat/noise lines, returning undefined", async () => {
const key = "agent:main:test-heartbeat-filter";
enqueueSystemEvent("Read HEARTBEAT.md before continuing", { sessionKey: key });
enqueueSystemEvent("heartbeat poll: pending", { sessionKey: key });
enqueueSystemEvent("reason periodic: 5m", { sessionKey: key });
const prompt = await buildQueuedSystemPrompt({
const result = await drainFormattedSystemEvents({
cfg,
sessionKey: key,
isMainSession: false,
isNewSession: false,
});
expect(prompt).toBeUndefined();
expect(result).toBeUndefined();
expect(peekSystemEvents(key)).toEqual([]);
});
it("scrubs node last-input suffix in queued system prompt", async () => {
const key = "agent:main:test-node-scrub";
enqueueSystemEvent("Node: Mac Studio · last input /tmp/secret.txt", { sessionKey: key });
it("prefixes every line of a multi-line event", async () => {
const key = "agent:main:test-multiline";
enqueueSystemEvent("Post-compaction context:\nline one\nline two", { sessionKey: key });
const prompt = await buildQueuedSystemPrompt({
const result = await drainFormattedSystemEvents({
cfg,
sessionKey: key,
isMainSession: false,
isNewSession: false,
});
expect(prompt).toContain("Node: Mac Studio");
expect(prompt).not.toContain("last input");
expect(result).toBeDefined();
const lines = result!.split("\n");
expect(lines.length).toBeGreaterThan(0);
for (const line of lines) {
expect(line).toMatch(/^System:/);
}
});
it("scrubs node last-input suffix", async () => {
const key = "agent:main:test-node-scrub";
enqueueSystemEvent("Node: Mac Studio · last input /tmp/secret.txt", { sessionKey: key });
const result = await drainFormattedSystemEvents({
cfg,
sessionKey: key,
isMainSession: false,
isNewSession: false,
});
expect(result).toContain("Node: Mac Studio");
expect(result).not.toContain("last input");
});
});