Files
openclaw/src/agents/system-prompt.e2e.test.ts
Tyler Yust 087dca8fa9 fix(subagent): harden read-tool overflow guards and sticky reply threading (#19508)
* fix(gateway): avoid premature agent.wait completion on transient errors

* fix(agent): preemptively guard tool results against context overflow

* fix: harden tool-result context guard and add message_id metadata

* fix: use importOriginal in session-key mock to include DEFAULT_ACCOUNT_ID

The run.skill-filter test was mocking ../../routing/session-key.js with only
buildAgentMainSessionKey and normalizeAgentId, but the module also exports
DEFAULT_ACCOUNT_ID which is required transitively by src/web/auth-store.ts.

Switch to importOriginal pattern so all real exports are preserved alongside
the mocked functions.

* pi-runner: guard accumulated tool-result overflow in transformContext

* PI runner: compact overflowing tool-result context

* Subagent: harden tool-result context recovery

* Enhance tool-result context handling by adding support for legacy tool outputs and improving character estimation for message truncation. This includes a new function to create legacy tool results and updates to existing functions to better manage context overflow scenarios.

* Enhance iMessage handling by adding reply tag support in send functions and tests. This includes modifications to prepend or rewrite reply tags based on provided replyToId, ensuring proper message formatting for replies.

* Enhance message delivery across multiple channels by implementing sticky reply context for chunked messages. This includes preserving reply references in Discord, Telegram, and iMessage, ensuring that follow-up messages maintain their intended reply targets. Additionally, improve handling of reply tags in system prompts and tests to support consistent reply behavior.

* Enhance read tool functionality by implementing auto-paging across chunks when no explicit limit is provided, scaling output budget based on model context window. Additionally, add tests for adaptive reading behavior and capped continuation guidance for large outputs. Update related functions to support these features.

* Refine tool-result context management by stripping oversized read-tool details payloads during compaction, ensuring repeated read calls do not bypass context limits. Introduce new utility functions for handling truncation content and enhance character estimation for tool results. Add tests to validate the removal of excessive details in context overflow scenarios.

* Refine message delivery logic in Matrix and Telegram by introducing a flag to track if a text chunk was sent. This ensures that replies are only marked as delivered when a text chunk has been successfully sent, improving the accuracy of reply handling in both channels.

* fix: tighten reply threading coverage and prep fixes (#19508) (thanks @tyler6204)
2026-02-17 15:32:52 -08:00

589 lines
21 KiB
TypeScript

import { describe, expect, it } from "vitest";
import { SILENT_REPLY_TOKEN } from "../auto-reply/tokens.js";
import { buildSubagentSystemPrompt } from "./subagent-announce.js";
import { buildAgentSystemPrompt, buildRuntimeLine } from "./system-prompt.js";
describe("buildAgentSystemPrompt", () => {
it("includes owner numbers when provided", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
ownerNumbers: ["+123", " +456 ", ""],
});
expect(prompt).toContain("## User Identity");
expect(prompt).toContain(
"Owner numbers: +123, +456. Treat messages from these numbers as the user.",
);
});
it("omits owner section when numbers are missing", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
});
expect(prompt).not.toContain("## User Identity");
expect(prompt).not.toContain("Owner numbers:");
});
it("omits extended sections in minimal prompt mode", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
promptMode: "minimal",
ownerNumbers: ["+123"],
skillsPrompt:
"<available_skills>\n <skill>\n <name>demo</name>\n </skill>\n</available_skills>",
heartbeatPrompt: "ping",
toolNames: ["message", "memory_search"],
docsPath: "/tmp/openclaw/docs",
extraSystemPrompt: "Subagent details",
ttsHint: "Voice (TTS) is enabled.",
});
expect(prompt).not.toContain("## User Identity");
expect(prompt).not.toContain("## Skills");
expect(prompt).not.toContain("## Memory Recall");
expect(prompt).not.toContain("## Documentation");
expect(prompt).not.toContain("## Reply Tags");
expect(prompt).not.toContain("## Messaging");
expect(prompt).not.toContain("## Voice (TTS)");
expect(prompt).not.toContain("## Silent Replies");
expect(prompt).not.toContain("## Heartbeats");
expect(prompt).toContain("## Safety");
expect(prompt).toContain(
"For long waits, avoid rapid poll loops: use exec with enough yieldMs or process(action=poll, timeout=<ms>).",
);
expect(prompt).toContain("You have no independent goals");
expect(prompt).toContain("Prioritize safety and human oversight");
expect(prompt).toContain("if instructions conflict");
expect(prompt).toContain("Inspired by Anthropic's constitution");
expect(prompt).toContain("Do not manipulate or persuade anyone");
expect(prompt).toContain("Do not copy yourself or change system prompts");
expect(prompt).toContain("## Subagent Context");
expect(prompt).not.toContain("## Group Chat Context");
expect(prompt).toContain("Subagent details");
});
it("includes safety guardrails in full prompts", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
});
expect(prompt).toContain("## Safety");
expect(prompt).toContain("You have no independent goals");
expect(prompt).toContain("Prioritize safety and human oversight");
expect(prompt).toContain("if instructions conflict");
expect(prompt).toContain("Inspired by Anthropic's constitution");
expect(prompt).toContain("Do not manipulate or persuade anyone");
expect(prompt).toContain("Do not copy yourself or change system prompts");
});
it("includes voice hint when provided", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
ttsHint: "Voice (TTS) is enabled.",
});
expect(prompt).toContain("## Voice (TTS)");
expect(prompt).toContain("Voice (TTS) is enabled.");
});
it("adds reasoning tag hint when enabled", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
reasoningTagHint: true,
});
expect(prompt).toContain("## Reasoning Format");
expect(prompt).toContain("<think>...</think>");
expect(prompt).toContain("<final>...</final>");
});
it("includes a CLI quick reference section", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
});
expect(prompt).toContain("## OpenClaw CLI Quick Reference");
expect(prompt).toContain("openclaw gateway restart");
expect(prompt).toContain("Do not invent commands");
});
it("marks system message blocks as internal and not user-visible", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
});
expect(prompt).toContain("`[System Message] ...` blocks are internal context");
expect(prompt).toContain("are not user-visible by default");
expect(prompt).toContain("reports completed cron/subagent work");
expect(prompt).toContain("rewrite it in your normal assistant voice");
});
it("guides subagent workflows to avoid polling loops", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
});
expect(prompt).toContain(
"For long waits, avoid rapid poll loops: use exec with enough yieldMs or process(action=poll, timeout=<ms>).",
);
expect(prompt).toContain("Completion is push-based: it will auto-announce when done.");
expect(prompt).toContain("Do not poll `subagents list` / `sessions_list` in a loop");
});
it("lists available tools when provided", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
toolNames: ["exec", "sessions_list", "sessions_history", "sessions_send"],
});
expect(prompt).toContain("Tool availability (filtered by policy):");
expect(prompt).toContain("sessions_list");
expect(prompt).toContain("sessions_history");
expect(prompt).toContain("sessions_send");
});
it("preserves tool casing in the prompt", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
toolNames: ["Read", "Exec", "process"],
skillsPrompt:
"<available_skills>\n <skill>\n <name>demo</name>\n </skill>\n</available_skills>",
docsPath: "/tmp/openclaw/docs",
});
expect(prompt).toContain("- Read: Read file contents");
expect(prompt).toContain("- Exec: Run shell commands");
expect(prompt).toContain(
"- If exactly one skill clearly applies: read its SKILL.md at <location> with `Read`, then follow it.",
);
expect(prompt).toContain("OpenClaw docs: /tmp/openclaw/docs");
expect(prompt).toContain(
"For OpenClaw behavior, commands, config, or architecture: consult local docs first.",
);
});
it("includes docs guidance when docsPath is provided", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
docsPath: "/tmp/openclaw/docs",
});
expect(prompt).toContain("## Documentation");
expect(prompt).toContain("OpenClaw docs: /tmp/openclaw/docs");
expect(prompt).toContain(
"For OpenClaw behavior, commands, config, or architecture: consult local docs first.",
);
});
it("includes workspace notes when provided", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
workspaceNotes: ["Reminder: commit your changes in this workspace after edits."],
});
expect(prompt).toContain("Reminder: commit your changes in this workspace after edits.");
});
it("includes user timezone when provided (12-hour)", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
userTimezone: "America/Chicago",
userTime: "Monday, January 5th, 2026 — 3:26 PM",
userTimeFormat: "12",
});
expect(prompt).toContain("## Current Date & Time");
expect(prompt).toContain("Time zone: America/Chicago");
});
it("includes user timezone when provided (24-hour)", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
userTimezone: "America/Chicago",
userTime: "Monday, January 5th, 2026 — 15:26",
userTimeFormat: "24",
});
expect(prompt).toContain("## Current Date & Time");
expect(prompt).toContain("Time zone: America/Chicago");
});
it("shows timezone when only timezone is provided", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
userTimezone: "America/Chicago",
userTimeFormat: "24",
});
expect(prompt).toContain("## Current Date & Time");
expect(prompt).toContain("Time zone: America/Chicago");
});
it("hints to use session_status for current date/time", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/clawd",
userTimezone: "America/Chicago",
});
expect(prompt).toContain("session_status");
expect(prompt).toContain("current date");
});
// The system prompt intentionally does NOT include the current date/time.
// Only the timezone is included, to keep the prompt stable for caching.
// See: https://github.com/moltbot/moltbot/commit/66eec295b894bce8333886cfbca3b960c57c4946
// Agents should use session_status or message timestamps to determine the date/time.
// Related: https://github.com/moltbot/moltbot/issues/1897
// https://github.com/moltbot/moltbot/issues/3658
it("does NOT include a date or time in the system prompt (cache stability)", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/clawd",
userTimezone: "America/Chicago",
userTime: "Monday, January 5th, 2026 — 3:26 PM",
userTimeFormat: "12",
});
// The prompt should contain the timezone but NOT the formatted date/time string.
// This is intentional for prompt cache stability — the date/time was removed in
// commit 66eec295b. If you're here because you want to add it back, please see
// https://github.com/moltbot/moltbot/issues/3658 for the preferred approach:
// gateway-level timestamp injection into messages, not the system prompt.
expect(prompt).toContain("Time zone: America/Chicago");
expect(prompt).not.toContain("Monday, January 5th, 2026");
expect(prompt).not.toContain("3:26 PM");
expect(prompt).not.toContain("15:26");
});
it("includes model alias guidance when aliases are provided", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
modelAliasLines: [
"- Opus: anthropic/claude-opus-4-5",
"- Sonnet: anthropic/claude-sonnet-4-5",
],
});
expect(prompt).toContain("## Model Aliases");
expect(prompt).toContain("Prefer aliases when specifying model overrides");
expect(prompt).toContain("- Opus: anthropic/claude-opus-4-5");
});
it("adds ClaudeBot self-update guidance when gateway tool is available", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
toolNames: ["gateway", "exec"],
});
expect(prompt).toContain("## OpenClaw Self-Update");
expect(prompt).toContain("config.apply");
expect(prompt).toContain("update.run");
});
it("includes skills guidance when skills prompt is present", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
skillsPrompt:
"<available_skills>\n <skill>\n <name>demo</name>\n </skill>\n</available_skills>",
});
expect(prompt).toContain("## Skills");
expect(prompt).toContain(
"- If exactly one skill clearly applies: read its SKILL.md at <location> with `read`, then follow it.",
);
});
it("appends available skills when provided", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
skillsPrompt:
"<available_skills>\n <skill>\n <name>demo</name>\n </skill>\n</available_skills>",
});
expect(prompt).toContain("<available_skills>");
expect(prompt).toContain("<name>demo</name>");
});
it("omits skills section when no skills prompt is provided", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
});
expect(prompt).not.toContain("## Skills");
expect(prompt).not.toContain("<available_skills>");
});
it("renders project context files when provided", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
contextFiles: [
{ path: "AGENTS.md", content: "Alpha" },
{ path: "IDENTITY.md", content: "Bravo" },
],
});
expect(prompt).toContain("# Project Context");
expect(prompt).toContain("## AGENTS.md");
expect(prompt).toContain("Alpha");
expect(prompt).toContain("## IDENTITY.md");
expect(prompt).toContain("Bravo");
});
it("ignores context files with missing or blank paths", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
contextFiles: [
{ path: undefined as unknown as string, content: "Missing path" },
{ path: " ", content: "Blank path" },
{ path: "AGENTS.md", content: "Alpha" },
],
});
expect(prompt).toContain("# Project Context");
expect(prompt).toContain("## AGENTS.md");
expect(prompt).toContain("Alpha");
expect(prompt).not.toContain("Missing path");
expect(prompt).not.toContain("Blank path");
});
it("adds SOUL guidance when a soul file is present", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
contextFiles: [
{ path: "./SOUL.md", content: "Persona" },
{ path: "dir\\SOUL.md", content: "Persona Windows" },
],
});
expect(prompt).toContain(
"If SOUL.md is present, embody its persona and tone. Avoid stiff, generic replies; follow its guidance unless higher-priority instructions override it.",
);
});
it("summarizes the message tool when available", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
toolNames: ["message"],
});
expect(prompt).toContain("message: Send messages and channel actions");
expect(prompt).toContain("### message tool");
expect(prompt).toContain(`respond with ONLY: ${SILENT_REPLY_TOKEN}`);
});
it("includes inline button style guidance when runtime supports inline buttons", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
toolNames: ["message"],
runtimeInfo: {
channel: "telegram",
capabilities: ["inlineButtons"],
},
});
expect(prompt).toContain("buttons=[[{text,callback_data,style?}]]");
expect(prompt).toContain("`style` can be `primary`, `success`, or `danger`");
});
it("includes runtime provider capabilities when present", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
runtimeInfo: {
channel: "telegram",
capabilities: ["inlineButtons"],
},
});
expect(prompt).toContain("channel=telegram");
expect(prompt).toContain("capabilities=inlineButtons");
});
it("includes agent id in runtime when provided", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
runtimeInfo: {
agentId: "work",
host: "host",
os: "macOS",
arch: "arm64",
node: "v20",
model: "anthropic/claude",
},
});
expect(prompt).toContain("agent=work");
});
it("includes reasoning visibility hint", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
reasoningLevel: "off",
});
expect(prompt).toContain("Reasoning: off");
expect(prompt).toContain("/reasoning");
expect(prompt).toContain("/status shows Reasoning");
});
it("builds runtime line with agent and channel details", () => {
const line = buildRuntimeLine(
{
agentId: "work",
host: "host",
repoRoot: "/repo",
os: "macOS",
arch: "arm64",
node: "v20",
model: "anthropic/claude",
defaultModel: "anthropic/claude-opus-4-5",
},
"telegram",
["inlineButtons"],
"low",
);
expect(line).toContain("agent=work");
expect(line).toContain("host=host");
expect(line).toContain("repo=/repo");
expect(line).toContain("os=macOS (arm64)");
expect(line).toContain("node=v20");
expect(line).toContain("model=anthropic/claude");
expect(line).toContain("default_model=anthropic/claude-opus-4-5");
expect(line).toContain("channel=telegram");
expect(line).toContain("capabilities=inlineButtons");
expect(line).toContain("thinking=low");
});
it("describes sandboxed runtime and elevated when allowed", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
sandboxInfo: {
enabled: true,
workspaceDir: "/tmp/sandbox",
containerWorkspaceDir: "/workspace",
workspaceAccess: "ro",
agentWorkspaceMount: "/agent",
elevated: { allowed: true, defaultLevel: "on" },
},
});
expect(prompt).toContain("Your working directory is: /workspace");
expect(prompt).toContain(
"For read/write/edit/apply_patch, file paths resolve against host workspace: /tmp/openclaw. For bash/exec commands, use sandbox container paths under /workspace (or relative paths from that workdir), not host paths.",
);
expect(prompt).toContain("Sandbox container workdir: /workspace");
expect(prompt).toContain(
"Sandbox host mount source (file tools bridge only; not valid inside sandbox exec): /tmp/sandbox",
);
expect(prompt).toContain("You are running in a sandboxed runtime");
expect(prompt).toContain("Sub-agents stay sandboxed");
expect(prompt).toContain("User can toggle with /elevated on|off|ask|full.");
expect(prompt).toContain("Current elevated level: on");
});
it("includes reaction guidance when provided", () => {
const prompt = buildAgentSystemPrompt({
workspaceDir: "/tmp/openclaw",
reactionGuidance: {
level: "minimal",
channel: "Telegram",
},
});
expect(prompt).toContain("## Reactions");
expect(prompt).toContain("Reactions are enabled for Telegram in MINIMAL mode.");
});
});
describe("buildSubagentSystemPrompt", () => {
it("includes sub-agent spawning guidance for depth-1 orchestrator when maxSpawnDepth >= 2", () => {
const prompt = buildSubagentSystemPrompt({
childSessionKey: "agent:main:subagent:abc",
task: "research task",
childDepth: 1,
maxSpawnDepth: 2,
});
expect(prompt).toContain("## Sub-Agent Spawning");
expect(prompt).toContain("You CAN spawn your own sub-agents");
expect(prompt).toContain("sessions_spawn");
expect(prompt).toContain("`subagents` tool");
expect(prompt).toContain("announce their results back to you automatically");
expect(prompt).toContain("Do NOT repeatedly poll `subagents list`");
});
it("does not include spawning guidance for depth-1 leaf when maxSpawnDepth == 1", () => {
const prompt = buildSubagentSystemPrompt({
childSessionKey: "agent:main:subagent:abc",
task: "research task",
childDepth: 1,
maxSpawnDepth: 1,
});
expect(prompt).not.toContain("## Sub-Agent Spawning");
expect(prompt).not.toContain("You CAN spawn");
});
it("includes leaf worker note for depth-2 sub-sub-agents", () => {
const prompt = buildSubagentSystemPrompt({
childSessionKey: "agent:main:subagent:abc:subagent:def",
task: "leaf task",
childDepth: 2,
maxSpawnDepth: 2,
});
expect(prompt).toContain("## Sub-Agent Spawning");
expect(prompt).toContain("leaf worker");
expect(prompt).toContain("CANNOT spawn further sub-agents");
});
it("uses 'parent orchestrator' label for depth-2 agents", () => {
const prompt = buildSubagentSystemPrompt({
childSessionKey: "agent:main:subagent:abc:subagent:def",
task: "leaf task",
childDepth: 2,
maxSpawnDepth: 2,
});
expect(prompt).toContain("spawned by the parent orchestrator");
expect(prompt).toContain("reported to the parent orchestrator");
});
it("uses 'main agent' label for depth-1 agents", () => {
const prompt = buildSubagentSystemPrompt({
childSessionKey: "agent:main:subagent:abc",
task: "orchestrator task",
childDepth: 1,
maxSpawnDepth: 2,
});
expect(prompt).toContain("spawned by the main agent");
expect(prompt).toContain("reported to the main agent");
});
it("includes recovery guidance for compacted/truncated tool output", () => {
const prompt = buildSubagentSystemPrompt({
childSessionKey: "agent:main:subagent:abc",
task: "investigate logs",
childDepth: 1,
maxSpawnDepth: 2,
});
expect(prompt).toContain("[compacted: tool output removed to free context]");
expect(prompt).toContain("[truncated: output exceeded context limit]");
expect(prompt).toContain("offset/limit");
expect(prompt).toContain("instead of full-file `cat`");
});
it("defaults to depth 1 and maxSpawnDepth 1 when not provided", () => {
const prompt = buildSubagentSystemPrompt({
childSessionKey: "agent:main:subagent:abc",
task: "basic task",
});
// Should not include spawning guidance (default maxSpawnDepth is 1, depth 1 is leaf)
expect(prompt).not.toContain("## Sub-Agent Spawning");
expect(prompt).toContain("spawned by the main agent");
});
});