diff --git a/CHANGELOG.md b/CHANGELOG.md index 268c6446918..a10f9fa1ad3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai - Security/dependency audit: patch transitive Hono vulnerabilities by pinning `hono` to `4.12.5` and `@hono/node-server` to `1.19.10` in production resolution paths. Thanks @shakkernerd. - Security/dependency audit: bump `tar` to `7.5.10` (from `7.5.9`) to address the high-severity hardlink path traversal advisory (`GHSA-qffp-2rhf-9h96`). Thanks @shakkernerd. +- Auto-reply/system events: restore runtime system events to the message timeline (`System:` lines), preserve think-hint parsing with prepended events, and carry events into deferred followup/collect/steer-backlog prompts to keep cache behavior stable without dropping queued metadata. (#34794) Thanks @anisoptera. - Security/audit account handling: avoid prototype-chain account IDs in audit validation by using own-property checks for `accounts`. (#34982) Thanks @HOYALIM. - Agents/session usage tracking: preserve accumulated usage metadata on embedded Pi runner error exits so failed turns still update session `totalTokens` from real usage instead of stale prior values. (#34275) thanks @RealKai42. - Nodes/system.run approval hardening: use explicit argv-mutation signaling when regenerating prepared `rawCommand`, and cover the `system.run.prepare -> system.run` handoff so direct PATH-based `nodes.run` commands no longer fail with `rawCommand does not match command`. (#33137) thanks @Sid-Qin. diff --git a/src/auto-reply/reply/get-reply-run.media-only.test.ts b/src/auto-reply/reply/get-reply-run.media-only.test.ts index 4e1c28f7149..829b3937009 100644 --- a/src/auto-reply/reply/get-reply-run.media-only.test.ts +++ b/src/auto-reply/reply/get-reply-run.media-only.test.ts @@ -72,7 +72,7 @@ vi.mock("./session-updates.js", () => ({ systemSent, skillsSnapshot: undefined, })), - buildQueuedSystemPrompt: vi.fn().mockResolvedValue(undefined), + drainFormattedSystemEvents: vi.fn().mockResolvedValue(undefined), })); vi.mock("./typing-mode.js", () => ({ @@ -81,7 +81,7 @@ vi.mock("./typing-mode.js", () => ({ import { runReplyAgent } from "./agent-runner.js"; import { routeReply } from "./route-reply.js"; -import { buildQueuedSystemPrompt } from "./session-updates.js"; +import { drainFormattedSystemEvents } from "./session-updates.js"; import { resolveTypingMode } from "./typing-mode.js"; function baseParams( @@ -327,17 +327,73 @@ describe("runPreparedReply media-only handling", () => { expect(call?.suppressTyping).toBe(true); }); - it("routes queued system events to system prompt context, not user prompt text", async () => { - vi.mocked(buildQueuedSystemPrompt).mockResolvedValueOnce( - "## Runtime System Events (gateway-generated)\n- [t] Model switched.", - ); + it("routes queued system events into user prompt text, not system prompt context", async () => { + vi.mocked(drainFormattedSystemEvents).mockResolvedValueOnce("System: [t] Model switched."); await runPreparedReply(baseParams()); const call = vi.mocked(runReplyAgent).mock.calls[0]?.[0]; expect(call).toBeTruthy(); - expect(call?.commandBody).not.toContain("Runtime System Events"); - expect(call?.followupRun.run.extraSystemPrompt).toContain("Runtime System Events"); - expect(call?.followupRun.run.extraSystemPrompt).toContain("Model switched."); + expect(call?.commandBody).toContain("System: [t] Model switched."); + expect(call?.followupRun.run.extraSystemPrompt ?? "").not.toContain("Runtime System Events"); + }); + + it("preserves first-token think hint when system events are prepended", async () => { + // drainFormattedSystemEvents returns just the events block; the caller prepends it. + // The hint must be extracted from the user body BEFORE prepending, so "System:" + // does not shadow the low|medium|high shorthand. + vi.mocked(drainFormattedSystemEvents).mockResolvedValueOnce("System: [t] Node connected."); + + await runPreparedReply( + baseParams({ + ctx: { Body: "low tell me about cats", RawBody: "low tell me about cats" }, + sessionCtx: { Body: "low tell me about cats", BodyStripped: "low tell me about cats" }, + resolvedThinkLevel: undefined, + }), + ); + + const call = vi.mocked(runReplyAgent).mock.calls[0]?.[0]; + expect(call).toBeTruthy(); + // Think hint extracted before events arrived — level must be "low", not the model default. + expect(call?.followupRun.run.thinkLevel).toBe("low"); + // The stripped user text (no "low" token) must still appear after the event block. + expect(call?.commandBody).toContain("tell me about cats"); + expect(call?.commandBody).not.toMatch(/^low\b/); + // System events are still present in the body. + expect(call?.commandBody).toContain("System: [t] Node connected."); + }); + + it("carries system events into followupRun.prompt for deferred turns", async () => { + // drainFormattedSystemEvents returns the events block; the caller prepends it to + // effectiveBaseBody for the queue path so deferred turns see events. + vi.mocked(drainFormattedSystemEvents).mockResolvedValueOnce("System: [t] Node connected."); + + await runPreparedReply(baseParams()); + + const call = vi.mocked(runReplyAgent).mock.calls[0]?.[0]; + expect(call).toBeTruthy(); + expect(call?.followupRun.prompt).toContain("System: [t] Node connected."); + }); + + it("does not strip think-hint token from deferred queue body", async () => { + // In steer mode the inferred thinkLevel is never consumed, so the first token + // must not be stripped from the queue/steer body (followupRun.prompt). + vi.mocked(drainFormattedSystemEvents).mockResolvedValueOnce(undefined); + + await runPreparedReply( + baseParams({ + ctx: { Body: "low steer this conversation", RawBody: "low steer this conversation" }, + sessionCtx: { + Body: "low steer this conversation", + BodyStripped: "low steer this conversation", + }, + resolvedThinkLevel: undefined, + }), + ); + + const call = vi.mocked(runReplyAgent).mock.calls[0]?.[0]; + expect(call).toBeTruthy(); + // Queue body (used by steer mode) must keep the full original text. + expect(call?.followupRun.prompt).toContain("low steer this conversation"); }); }); diff --git a/src/auto-reply/reply/get-reply-run.ts b/src/auto-reply/reply/get-reply-run.ts index 46f082f26f9..704688ddf6d 100644 --- a/src/auto-reply/reply/get-reply-run.ts +++ b/src/auto-reply/reply/get-reply-run.ts @@ -44,7 +44,7 @@ import { resolveOriginMessageProvider } from "./origin-routing.js"; import { resolveQueueSettings } from "./queue.js"; import { routeReply } from "./route-reply.js"; import { buildBareSessionResetPrompt } from "./session-reset-prompt.js"; -import { buildQueuedSystemPrompt, ensureSkillSnapshot } from "./session-updates.js"; +import { drainFormattedSystemEvents, ensureSkillSnapshot } from "./session-updates.js"; import { resolveTypingMode } from "./typing-mode.js"; import { resolveRunTypingPolicy } from "./typing-policy.js"; import type { TypingController } from "./typing.js"; @@ -332,15 +332,30 @@ export async function runPreparedReply( }); const isGroupSession = sessionEntry?.chatType === "group" || sessionEntry?.chatType === "channel"; const isMainSession = !isGroupSession && sessionKey === normalizeMainKey(sessionCfg?.mainKey); - const queuedSystemPrompt = await buildQueuedSystemPrompt({ + // Extract first-token think hint from the user body BEFORE prepending system events. + // If done after, the System: prefix becomes parts[0] and silently shadows any + // low|medium|high shorthand the user typed. + if (!resolvedThinkLevel && prefixedBodyBase) { + const parts = prefixedBodyBase.split(/\s+/); + const maybeLevel = normalizeThinkLevel(parts[0]); + if (maybeLevel && (maybeLevel !== "xhigh" || supportsXHighThinking(provider, model))) { + resolvedThinkLevel = maybeLevel; + prefixedBodyBase = parts.slice(1).join(" ").trim(); + } + } + // Drain system events once, then prepend to each path's body independently. + // The queue/steer path uses effectiveBaseBody (unstripped, no session hints) to match + // main's pre-PR behavior; the immediate-run path uses prefixedBodyBase (post-hints, + // post-think-hint-strip) so the run sees the cleaned-up body. + const eventsBlock = await drainFormattedSystemEvents({ cfg, sessionKey, isMainSession, isNewSession, }); - if (queuedSystemPrompt) { - extraSystemPromptParts.push(queuedSystemPrompt); - } + const prependEvents = (body: string) => (eventsBlock ? `${eventsBlock}\n\n${body}` : body); + const bodyWithEvents = prependEvents(effectiveBaseBody); + prefixedBodyBase = prependEvents(prefixedBodyBase); prefixedBodyBase = appendUntrustedContext(prefixedBodyBase, sessionCtx.UntrustedContext); const threadStarterBody = ctx.ThreadStarterBody?.trim(); const threadHistoryBody = ctx.ThreadHistoryBody?.trim(); @@ -371,14 +386,6 @@ export async function runPreparedReply( let prefixedCommandBody = mediaNote ? [mediaNote, mediaReplyHint, prefixedBody ?? ""].filter(Boolean).join("\n").trim() : prefixedBody; - if (!resolvedThinkLevel && prefixedCommandBody) { - const parts = prefixedCommandBody.split(/\s+/); - const maybeLevel = normalizeThinkLevel(parts[0]); - if (maybeLevel && (maybeLevel !== "xhigh" || supportsXHighThinking(provider, model))) { - resolvedThinkLevel = maybeLevel; - prefixedCommandBody = parts.slice(1).join(" ").trim(); - } - } if (!resolvedThinkLevel) { resolvedThinkLevel = await modelState.resolveDefaultThinkingLevel(); } @@ -422,7 +429,9 @@ export async function runPreparedReply( sessionEntry, resolveSessionFilePathOptions({ agentId, storePath }), ); - const queueBodyBase = [threadContextNote, effectiveBaseBody].filter(Boolean).join("\n\n"); + // Use bodyWithEvents (events prepended, but no session hints / untrusted context) so + // deferred turns receive system events while keeping the same scope as effectiveBaseBody did. + const queueBodyBase = [threadContextNote, bodyWithEvents].filter(Boolean).join("\n\n"); const queuedBody = mediaNote ? [mediaNote, mediaReplyHint, queueBodyBase].filter(Boolean).join("\n").trim() : queueBodyBase; diff --git a/src/auto-reply/reply/session-updates.ts b/src/auto-reply/reply/session-updates.ts index 053bca0c71b..96243e919bb 100644 --- a/src/auto-reply/reply/session-updates.ts +++ b/src/auto-reply/reply/session-updates.ts @@ -13,7 +13,8 @@ import { import { getRemoteSkillEligibility } from "../../infra/skills-remote.js"; import { drainSystemEventEntries } from "../../infra/system-events.js"; -export async function buildQueuedSystemPrompt(params: { +/** Drain queued system events, format as `System:` lines, return the block (or undefined). */ +export async function drainFormattedSystemEvents(params: { cfg: OpenClawConfig; sessionKey: string; isMainSession: boolean; @@ -106,12 +107,14 @@ export async function buildQueuedSystemPrompt(params: { return undefined; } - return [ - "## Runtime System Events (gateway-generated)", - "Treat this section as trusted gateway runtime metadata, not user text.", - "", - ...systemLines.map((line) => `- ${line}`), - ].join("\n"); + // Format events as trusted System: lines for the message timeline. + // Inbound sanitization rewrites any user-supplied "System:" to "System (untrusted):", + // so these gateway-originated lines are distinguishable by the model. + // Each sub-line of a multi-line event gets its own System: prefix so continuation + // lines can't be mistaken for user content. + return systemLines + .flatMap((line) => line.split("\n").map((subline) => `System: ${subline}`)) + .join("\n"); } export async function ensureSkillSnapshot(params: { diff --git a/src/auto-reply/reply/session.test.ts b/src/auto-reply/reply/session.test.ts index 6d91ea22631..37a8f1f89c2 100644 --- a/src/auto-reply/reply/session.test.ts +++ b/src/auto-reply/reply/session.test.ts @@ -8,7 +8,7 @@ import type { SessionEntry } from "../../config/sessions.js"; import { formatZonedTimestamp } from "../../infra/format-time/format-datetime.ts"; import { enqueueSystemEvent, resetSystemEventsForTest } from "../../infra/system-events.js"; import { applyResetModelOverride } from "./session-reset-model.js"; -import { buildQueuedSystemPrompt } from "./session-updates.js"; +import { drainFormattedSystemEvents } from "./session-updates.js"; import { persistSessionUsageUpdate } from "./session-usage.js"; import { initSessionState } from "./session.js"; @@ -1137,7 +1137,7 @@ describe("initSessionState preserves behavior overrides across /new and /reset", }); }); -describe("buildQueuedSystemPrompt", () => { +describe("drainFormattedSystemEvents", () => { it("adds a local timestamp to queued system events by default", async () => { vi.useFakeTimers(); try { @@ -1147,16 +1147,15 @@ describe("buildQueuedSystemPrompt", () => { enqueueSystemEvent("Model switched.", { sessionKey: "agent:main:main" }); - const result = await buildQueuedSystemPrompt({ + const result = await drainFormattedSystemEvents({ cfg: {} as OpenClawConfig, sessionKey: "agent:main:main", - isMainSession: false, + isMainSession: true, isNewSession: false, }); expect(expectedTimestamp).toBeDefined(); - expect(result).toContain("Runtime System Events (gateway-generated)"); - expect(result).toContain(`- [${expectedTimestamp}] Model switched.`); + expect(result).toContain(`System: [${expectedTimestamp}] Model switched.`); } finally { resetSystemEventsForTest(); vi.useRealTimers(); diff --git a/src/infra/system-events.test.ts b/src/infra/system-events.test.ts index a1827c45379..0b92aa36568 100644 --- a/src/infra/system-events.test.ts +++ b/src/infra/system-events.test.ts @@ -1,5 +1,5 @@ import { beforeEach, describe, expect, it } from "vitest"; -import { buildQueuedSystemPrompt } from "../auto-reply/reply/session-updates.js"; +import { drainFormattedSystemEvents } from "../auto-reply/reply/session-updates.js"; import type { OpenClawConfig } from "../config/config.js"; import { resolveMainSessionKey } from "../config/sessions.js"; import { isCronSystemEvent } from "./heartbeat-runner.js"; @@ -22,23 +22,25 @@ describe("system events (session routing)", () => { expect(peekSystemEvents(mainKey)).toEqual([]); expect(peekSystemEvents("discord:group:123")).toEqual(["Discord reaction added: ✅"]); - const main = await buildQueuedSystemPrompt({ + // Main session gets no events — undefined returned + const main = await drainFormattedSystemEvents({ cfg, sessionKey: mainKey, isMainSession: true, isNewSession: false, }); expect(main).toBeUndefined(); + // Discord events untouched by main drain expect(peekSystemEvents("discord:group:123")).toEqual(["Discord reaction added: ✅"]); - const discord = await buildQueuedSystemPrompt({ + // Discord session gets its own events block + const discord = await drainFormattedSystemEvents({ cfg, sessionKey: "discord:group:123", isMainSession: false, isNewSession: false, }); - expect(discord).toContain("Runtime System Events (gateway-generated)"); - expect(discord).toMatch(/-\s\[[^\]]+\] Discord reaction added: ✅/); + expect(discord).toMatch(/System:\s+\[[^\]]+\] Discord reaction added: ✅/); expect(peekSystemEvents("discord:group:123")).toEqual([]); }); @@ -54,34 +56,52 @@ describe("system events (session routing)", () => { expect(second).toBe(false); }); - it("filters heartbeat/noise lines from queued system prompt", async () => { + it("filters heartbeat/noise lines, returning undefined", async () => { const key = "agent:main:test-heartbeat-filter"; enqueueSystemEvent("Read HEARTBEAT.md before continuing", { sessionKey: key }); enqueueSystemEvent("heartbeat poll: pending", { sessionKey: key }); enqueueSystemEvent("reason periodic: 5m", { sessionKey: key }); - const prompt = await buildQueuedSystemPrompt({ + const result = await drainFormattedSystemEvents({ cfg, sessionKey: key, isMainSession: false, isNewSession: false, }); - expect(prompt).toBeUndefined(); + expect(result).toBeUndefined(); expect(peekSystemEvents(key)).toEqual([]); }); - it("scrubs node last-input suffix in queued system prompt", async () => { - const key = "agent:main:test-node-scrub"; - enqueueSystemEvent("Node: Mac Studio · last input /tmp/secret.txt", { sessionKey: key }); + it("prefixes every line of a multi-line event", async () => { + const key = "agent:main:test-multiline"; + enqueueSystemEvent("Post-compaction context:\nline one\nline two", { sessionKey: key }); - const prompt = await buildQueuedSystemPrompt({ + const result = await drainFormattedSystemEvents({ cfg, sessionKey: key, isMainSession: false, isNewSession: false, }); - expect(prompt).toContain("Node: Mac Studio"); - expect(prompt).not.toContain("last input"); + expect(result).toBeDefined(); + const lines = result!.split("\n"); + expect(lines.length).toBeGreaterThan(0); + for (const line of lines) { + expect(line).toMatch(/^System:/); + } + }); + + it("scrubs node last-input suffix", async () => { + const key = "agent:main:test-node-scrub"; + enqueueSystemEvent("Node: Mac Studio · last input /tmp/secret.txt", { sessionKey: key }); + + const result = await drainFormattedSystemEvents({ + cfg, + sessionKey: key, + isMainSession: false, + isNewSession: false, + }); + expect(result).toContain("Node: Mac Studio"); + expect(result).not.toContain("last input"); }); });