From 55d1a2e0e0d760bd3649bca05d50c6070662ff74 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Sun, 26 Apr 2026 11:38:38 -0700 Subject: [PATCH] fix(logging): redact persisted transcript text (cherry picked from commit 406ae72fd2789c78053f6b94006ec8f488bc1c00) --- CHANGELOG.md | 1 + docs/.generated/config-baseline.sha256 | 4 +- docs/gateway/configuration-reference.md | 1 + docs/gateway/logging.md | 8 +- docs/gateway/security/index.md | 2 +- docs/logging.md | 10 +- src/agents/pi-embedded-runner.guard.test.ts | 43 ++++++++ .../session-tool-result-guard-wrapper.ts | 97 +++++++++++++++++-- src/config/schema.base.generated.ts | 8 +- src/config/schema.help.ts | 4 +- src/config/types.base.ts | 4 +- src/logging/redact.test.ts | 10 ++ src/security/safe-regex.test.ts | 1 + src/security/safe-regex.ts | 16 +-- 14 files changed, 178 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ceb24b63304..b7fad6e3bfc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -122,6 +122,7 @@ Docs: https://docs.openclaw.ai ### Fixes - Auto-reply: poison inbound message dedupe after replay-unsafe provider/runtime failures so retries stay safe before visible progress but cannot duplicate messages after block output, tool side effects, or session progress. Fixes #69303; keeps #58549 and #64606 as duplicate validation. Thanks @martingarramon, @NikolaFC, and @zeroth-blip. +- Logging/sessions: apply configured redaction patterns to persisted session transcript text and accept escaped character classes in safe custom redaction regexes, so transcript JSONL no longer keeps matching sensitive text in the clear. Fixes #42982. Thanks @panpan0000. - Agents/OpenAI: keep Responses web search compatible with minimal thinking by raising `web_search` requests to the lowest supported reasoning effort instead of sending a rejected minimal payload. - Agents/tools: honor the `bundle-mcp` allowlist token when deciding whether bundled MCP tools are available, so restricted tool policies can still enable bundled MCP without exposing unrelated tools. - Agents/model fallback: jump directly to a known later live-session model redirect instead of walking unrelated fallback candidates, while preserving the already-landed live-session/fallback loop guard. Fixes #57471; related loop family already closed via #58496. Thanks @yuxiaoyang2007-prog. diff --git a/docs/.generated/config-baseline.sha256 b/docs/.generated/config-baseline.sha256 index 4270a1983a3..ba8cafbb740 100644 --- a/docs/.generated/config-baseline.sha256 +++ b/docs/.generated/config-baseline.sha256 @@ -1,4 +1,4 @@ -91a6cec6c5bdf4b0bf955a058955278990a1af31f32f8fcf2ac26d7548fb99e5 config-baseline.json -5f5fb87fd46f9cbb84d8af17e00ae3c4b74062e8ad517bc2260ba83da2e9014f config-baseline.core.json +a62ead999508b18d9ea3e1c129e3cdd44244af0ff0e6f81653dfced9aa52019a config-baseline.json +3245c9a013c55ee8a24db52d5e88c42bc86e26f822d4a144fc7f37fc71e05fa8 config-baseline.core.json 080c0a4f2d4175d6d7ab1e38f76b21de32669055c518d75c96e784865d89bf25 config-baseline.channel.json f9e0174988718959fe1923a54496ec5b9262721fe1e7306f32ccb1316d9d9c3f config-baseline.plugin.json diff --git a/docs/gateway/configuration-reference.md b/docs/gateway/configuration-reference.md index 5458564605b..f88afcff490 100644 --- a/docs/gateway/configuration-reference.md +++ b/docs/gateway/configuration-reference.md @@ -859,6 +859,7 @@ Notes: - Set `logging.file` for a stable path. - `consoleLevel` bumps to `debug` when `--verbose`. - `maxFileBytes`: maximum active log file size in bytes before rotation (positive integer; default: `104857600` = 100 MB). OpenClaw keeps up to five numbered archives beside the active file. +- `redactSensitive` / `redactPatterns`: best-effort masking for console output, file logs, OTLP log records, and persisted session transcript text. --- diff --git a/docs/gateway/logging.md b/docs/gateway/logging.md index 7fe1a17d1c1..c4c3d9d883b 100644 --- a/docs/gateway/logging.md +++ b/docs/gateway/logging.md @@ -52,10 +52,12 @@ You can tune console verbosity independently via: - `logging.consoleLevel` (default `info`) - `logging.consoleStyle` (`pretty` | `compact` | `json`) -## Tool summary redaction +## Redaction -Verbose tool summaries (e.g. `🛠️ Exec: ...`) can mask sensitive tokens before they hit the -console stream. This is **tools-only** and does not alter file logs. +OpenClaw can mask sensitive tokens before log or transcript output leaves the +process. The same redaction policy is applied at console, file-log, OTLP +log-record, and session transcript text sinks, so matching secret values are +masked before JSONL lines or messages are written to disk. - `logging.redactSensitive`: `off` | `tools` (default: `tools`) - `logging.redactPatterns`: array of regex strings (overrides defaults) diff --git a/docs/gateway/security/index.md b/docs/gateway/security/index.md index 55ac84121ab..6f70c5c39d1 100644 --- a/docs/gateway/security/index.md +++ b/docs/gateway/security/index.md @@ -999,7 +999,7 @@ Logs and transcripts can leak sensitive info even when access controls are corre Recommendations: -- Keep tool summary redaction on (`logging.redactSensitive: "tools"`; default). +- Keep log and transcript redaction on (`logging.redactSensitive: "tools"`; default). - Add custom patterns for your environment via `logging.redactPatterns` (tokens, hostnames, internal URLs). - When sharing diagnostics, prefer `openclaw status --all` (pasteable, secrets redacted) over raw logs. - Prune old session transcripts and log files if you don’t need long retention. diff --git a/docs/logging.md b/docs/logging.md index 8fb1489439c..c73aff54b82 100644 --- a/docs/logging.md +++ b/docs/logging.md @@ -167,14 +167,16 @@ file log levels. ### Redaction -Tool summaries can redact sensitive tokens before they hit the console: +OpenClaw can redact sensitive tokens before they hit console output, file logs, +OTLP log records, or persisted session transcript text: - `logging.redactSensitive`: `off` | `tools` (default: `tools`) - `logging.redactPatterns`: list of regex strings to override the default set -Redaction applies at the logging sinks for **console output**, **stderr-routed -console diagnostics**, and **file logs**. File logs stay JSONL, but matching -secret values are masked before the line is written to disk. +File logs and session transcripts stay JSONL, but matching secret values are +masked before the line or message is written to disk. Redaction is best-effort: +it applies to text-bearing message content and log strings, not every +identifier or binary payload field. ## Diagnostics and OpenTelemetry diff --git a/src/agents/pi-embedded-runner.guard.test.ts b/src/agents/pi-embedded-runner.guard.test.ts index d93a3520325..fbc1df5bb6f 100644 --- a/src/agents/pi-embedded-runner.guard.test.ts +++ b/src/agents/pi-embedded-runner.guard.test.ts @@ -1,6 +1,7 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; import { SessionManager } from "@mariozechner/pi-coding-agent"; import { describe, expect, it } from "vitest"; +import type { OpenClawConfig } from "../config/types.openclaw.js"; import { guardSessionManager } from "./session-tool-result-guard-wrapper.js"; import { sanitizeToolUseResultPairing } from "./session-transcript-repair.js"; @@ -35,4 +36,46 @@ describe("guardSessionManager integration", () => { "assistant", ]); }); + + it("redacts configured text patterns before persisting transcript messages", () => { + const cfg = { + logging: { + redactSensitive: "tools", + redactPatterns: [String.raw`([\w]|[-.])+@([\w]|[-.])+\.\w+`], + }, + } satisfies OpenClawConfig; + const sm = guardSessionManager(SessionManager.inMemory(), { config: cfg }); + const appendMessage = sm.appendMessage.bind(sm) as unknown as (message: AgentMessage) => void; + + appendMessage({ + role: "assistant", + content: [ + { type: "thinking", thinking: "the email is peter@dc.io", thinkingSignature: "sig" }, + { type: "text", text: "contact peter@dc.io" }, + { type: "toolCall", id: "call_1", name: "read", arguments: { path: "/tmp/peter@dc.io" } }, + ], + stopReason: "toolUse", + } as AgentMessage); + appendMessage({ + role: "toolResult", + toolCallId: "call_1", + toolName: "read", + content: [{ type: "text", text: "peter@dc.io\n" }], + isError: false, + } as AgentMessage); + + const messages = sm + .getEntries() + .filter((e) => e.type === "message") + .map((e) => (e as { message: AgentMessage }).message); + const serialized = JSON.stringify(messages); + + expect(serialized).not.toContain("the email is peter@dc.io"); + expect(serialized).not.toContain("contact peter@dc.io"); + expect(serialized).not.toContain("peter@dc.io\\n"); + expect(serialized).toContain('"thinking":"the email is peter@d***.io"'); + expect(serialized).toContain('"text":"contact peter@d***.io"'); + expect(serialized).toContain('"text":"peter@d***.io\\n"'); + expect(serialized).toContain('"/tmp/peter@dc.io"'); + }); }); diff --git a/src/agents/session-tool-result-guard-wrapper.ts b/src/agents/session-tool-result-guard-wrapper.ts index 79939e7ab96..cbbcb512f20 100644 --- a/src/agents/session-tool-result-guard-wrapper.ts +++ b/src/agents/session-tool-result-guard-wrapper.ts @@ -1,6 +1,7 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; import type { SessionManager } from "@mariozechner/pi-coding-agent"; import type { OpenClawConfig } from "../config/types.openclaw.js"; +import { redactSensitiveText } from "../logging/redact.js"; import { getGlobalHookRunner } from "../plugins/hook-runner-global.js"; import { applyInputProvenanceToUserMessage, @@ -16,6 +17,71 @@ export type GuardedSessionManager = SessionManager & { clearPendingToolResults?: () => void; }; +function redactTranscriptText(value: string, cfg?: OpenClawConfig): string { + if (cfg?.logging?.redactSensitive === "off") { + return value; + } + return redactSensitiveText(value, { + mode: cfg?.logging?.redactSensitive, + patterns: cfg?.logging?.redactPatterns, + }); +} + +function redactTranscriptContentBlock(block: unknown, cfg?: OpenClawConfig): unknown { + if (!block || typeof block !== "object" || Array.isArray(block)) { + return block; + } + const source = block as Record; + let next: Record | null = null; + const assign = (key: string, value: string) => { + const redacted = redactTranscriptText(value, cfg); + if (redacted === value) { + return; + } + next ??= { ...source }; + next[key] = redacted; + }; + + if (typeof source.text === "string") { + assign("text", source.text); + } + if (typeof source.thinking === "string") { + assign("thinking", source.thinking); + } + if (typeof source.partialJson === "string") { + assign("partialJson", source.partialJson); + } + return next ?? block; +} + +function redactTranscriptContent(content: unknown, cfg?: OpenClawConfig): unknown { + if (typeof content === "string") { + return redactTranscriptText(content, cfg); + } + if (!Array.isArray(content)) { + return content; + } + let changed = false; + const redacted = content.map((block) => { + const next = redactTranscriptContentBlock(block, cfg); + changed ||= next !== block; + return next; + }); + return changed ? redacted : content; +} + +function redactTranscriptMessage(message: AgentMessage, cfg?: OpenClawConfig): AgentMessage { + const source = message as unknown as Record; + const redactedContent = redactTranscriptContent(source.content, cfg); + if (redactedContent === source.content) { + return message; + } + return { + ...source, + content: redactedContent, + } as unknown as AgentMessage; +} + /** * Apply the tool-result guard to a SessionManager exactly once and expose * a flush method on the instance for easy teardown handling. @@ -38,14 +104,31 @@ export function guardSessionManager( } const hookRunner = getGlobalHookRunner(); - const beforeMessageWrite = hookRunner?.hasHooks("before_message_write") - ? (event: { message: import("@mariozechner/pi-agent-core").AgentMessage }) => { - return hookRunner.runBeforeMessageWrite(event, { - agentId: opts?.agentId, - sessionKey: opts?.sessionKey, - }); + const beforeMessageWrite = (event: { + message: import("@mariozechner/pi-agent-core").AgentMessage; + }) => { + let message = event.message; + let changed = false; + if (hookRunner?.hasHooks("before_message_write")) { + const result = hookRunner.runBeforeMessageWrite(event, { + agentId: opts?.agentId, + sessionKey: opts?.sessionKey, + }); + if (result?.block) { + return result; } - : undefined; + if (result?.message) { + message = result.message; + changed = true; + } + } + const redacted = redactTranscriptMessage(message, opts?.config); + if (redacted !== message) { + message = redacted; + changed = true; + } + return changed ? { message } : undefined; + }; const transform = hookRunner?.hasHooks("tool_result_persist") ? ( diff --git a/src/config/schema.base.generated.ts b/src/config/schema.base.generated.ts index bc3103395d6..ce47712f9ef 100644 --- a/src/config/schema.base.generated.ts +++ b/src/config/schema.base.generated.ts @@ -466,7 +466,7 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { ], title: "Sensitive Data Redaction Mode", description: - 'Sensitive redaction mode: "off" disables built-in masking, while "tools" redacts sensitive tool/config payload fields. Keep "tools" in shared logs unless you have isolated secure log sinks.', + 'Sensitive redaction mode: "off" disables built-in masking, while "tools" redacts sensitive tool/config payload fields in log sinks and persisted transcript text. Keep "tools" enabled unless logs and transcripts are isolated.', }, redactPatterns: { type: "array", @@ -475,7 +475,7 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { }, title: "Custom Redaction Patterns", description: - "Additional custom redact regex patterns applied to log output before emission/storage. Use this to mask org-specific tokens and identifiers not covered by built-in redaction rules.", + "Additional custom redact regex patterns applied to log output and persisted transcript text before storage. Use this to mask org-specific tokens and identifiers not covered by built-in redaction rules.", }, }, additionalProperties: false, @@ -23982,12 +23982,12 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { }, "logging.redactSensitive": { label: "Sensitive Data Redaction Mode", - help: 'Sensitive redaction mode: "off" disables built-in masking, while "tools" redacts sensitive tool/config payload fields. Keep "tools" in shared logs unless you have isolated secure log sinks.', + help: 'Sensitive redaction mode: "off" disables built-in masking, while "tools" redacts sensitive tool/config payload fields in log sinks and persisted transcript text. Keep "tools" enabled unless logs and transcripts are isolated.', tags: ["privacy", "observability"], }, "logging.redactPatterns": { label: "Custom Redaction Patterns", - help: "Additional custom redact regex patterns applied to log output before emission/storage. Use this to mask org-specific tokens and identifiers not covered by built-in redaction rules.", + help: "Additional custom redact regex patterns applied to log output and persisted transcript text before storage. Use this to mask org-specific tokens and identifiers not covered by built-in redaction rules.", tags: ["privacy", "observability"], }, "cli.banner": { diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index 0db68fec08d..e56682f1eb4 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -43,9 +43,9 @@ export const FIELD_HELP: Record = { "logging.consoleStyle": 'Console output format style: "pretty", "compact", or "json" based on operator and ingestion needs. Use json for machine parsing pipelines and pretty/compact for human-first terminal workflows.', "logging.redactSensitive": - 'Sensitive redaction mode: "off" disables built-in masking, while "tools" redacts sensitive tool/config payload fields. Keep "tools" in shared logs unless you have isolated secure log sinks.', + 'Sensitive redaction mode: "off" disables built-in masking, while "tools" redacts sensitive tool/config payload fields in log sinks and persisted transcript text. Keep "tools" enabled unless logs and transcripts are isolated.', "logging.redactPatterns": - "Additional custom redact regex patterns applied to log output before emission/storage. Use this to mask org-specific tokens and identifiers not covered by built-in redaction rules.", + "Additional custom redact regex patterns applied to log output and persisted transcript text before storage. Use this to mask org-specific tokens and identifiers not covered by built-in redaction rules.", cli: "CLI presentation controls for local command output behavior such as banner and tagline style. Use this section to keep startup output aligned with operator preference without changing runtime behavior.", "cli.banner": "CLI startup banner controls for title/version line and tagline style behavior. Keep banner enabled for fast version/context checks, then tune tagline mode to your preferred noise level.", diff --git a/src/config/types.base.ts b/src/config/types.base.ts index 8b3a9cf82dd..8c5bcf945be 100644 --- a/src/config/types.base.ts +++ b/src/config/types.base.ts @@ -225,9 +225,9 @@ export type LoggingConfig = { maxFileBytes?: number; consoleLevel?: "silent" | "fatal" | "error" | "warn" | "info" | "debug" | "trace"; consoleStyle?: "pretty" | "compact" | "json"; - /** Redact sensitive tokens in tool summaries. Default: "tools". */ + /** Redact sensitive tokens in log sinks and persisted transcript text. Default: "tools". */ redactSensitive?: "off" | "tools"; - /** Regex patterns used to redact sensitive tokens (defaults apply when unset). */ + /** Regex patterns used to redact sensitive tokens from logs and transcripts. */ redactPatterns?: string[]; }; diff --git a/src/logging/redact.test.ts b/src/logging/redact.test.ts index ebaf0319376..dfa4f5e6e0f 100644 --- a/src/logging/redact.test.ts +++ b/src/logging/redact.test.ts @@ -132,6 +132,16 @@ describe("redactSensitiveText", () => { expect(output).toBe("token=abcdef…ghij"); }); + it("honors escaped character classes in custom patterns", () => { + const input = "contact peter@dc.io"; + const output = redactSensitiveText(input, { + mode: "tools", + patterns: [String.raw`([\w]|[-.])+@([\w]|[-.])+\.\w+`], + }); + expect(output).toBe("contact peter@d***.io"); + expect(output).not.toContain("peter@dc.io"); + }); + it("ignores unsafe nested-repetition custom patterns", () => { const input = `${"a".repeat(28)}!`; const output = redactSensitiveText(input, { diff --git a/src/security/safe-regex.test.ts b/src/security/safe-regex.test.ts index 439b56091e7..1de9f51d977 100644 --- a/src/security/safe-regex.test.ts +++ b/src/security/safe-regex.test.ts @@ -12,6 +12,7 @@ describe("safe regex", () => { ["(a|aa)+$", true], ["^(?:foo|bar)$", false], ["^(ab|cd)+$", false], + [String.raw`([\w]|[-.])+@([\w]|[-.])+\.\w+`, false], ] as const)("classifies nested repetition for %s", (pattern, expected) => { expect(hasNestedRepetition(pattern)).toBe(expected); }); diff --git a/src/security/safe-regex.ts b/src/security/safe-regex.ts index e197929c4a4..7b10e21582c 100644 --- a/src/security/safe-regex.ts +++ b/src/security/safe-regex.ts @@ -140,19 +140,23 @@ function tokenizePattern(source: string): PatternToken[] { for (let i = 0; i < source.length; i += 1) { const ch = source[i]; - if (ch === "\\") { - i += 1; - tokens.push({ kind: "simple-token" }); - continue; - } - if (inCharClass) { + if (ch === "\\") { + i += 1; + continue; + } if (ch === "]") { inCharClass = false; } continue; } + if (ch === "\\") { + i += 1; + tokens.push({ kind: "simple-token" }); + continue; + } + if (ch === "[") { inCharClass = true; tokens.push({ kind: "simple-token" });