diff --git a/CHANGELOG.md b/CHANGELOG.md index af137d95977..4cc1f276167 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Logging/sessions: apply configured redaction patterns to persisted session transcript text and accept escaped character classes in safe custom redaction regexes, so transcript JSONL no longer keeps matching sensitive text in the clear. Fixes #42982. Thanks @panpan0000. - Auto-reply: poison inbound message dedupe after replay-unsafe provider/runtime failures so retries stay safe before visible progress but cannot duplicate messages after block output, tool side effects, or session progress. Fixes #69303; keeps #58549 and #64606 as duplicate validation. Thanks @martingarramon, @NikolaFC, and @zeroth-blip. - Agents/model fallback: jump directly to a known later live-session model redirect instead of walking unrelated fallback candidates, while preserving the already-landed live-session/fallback loop guard. Fixes #57471; related loop family already closed via #58496. Thanks @yuxiaoyang2007-prog. - Gateway/Bonjour: keep @homebridge/ciao cancellation handlers registered across advertiser restarts so late probing cancellations cannot crash Linux and other mDNS-churned gateways. Thanks @codex. diff --git a/docs/.generated/config-baseline.sha256 b/docs/.generated/config-baseline.sha256 index 6ea2680b50f..2d0e0e2b4d3 100644 --- a/docs/.generated/config-baseline.sha256 +++ b/docs/.generated/config-baseline.sha256 @@ -1,4 +1,4 @@ -7fa6e35bb9f9d3096d6281f141488be0dcfe15de40dc4f5c0305eb1ff2bc60b6 config-baseline.json -5f5fb87fd46f9cbb84d8af17e00ae3c4b74062e8ad517bc2260ba83da2e9014f config-baseline.core.json +4d1995e41b659e484afb5a48d6fca0558337123200a4a537f556ca38e8e829e7 config-baseline.json +3245c9a013c55ee8a24db52d5e88c42bc86e26f822d4a144fc7f37fc71e05fa8 config-baseline.core.json 7cd9c908f066c143eab2a201efbc9640f483ab28bba92ddeca1d18cc2b528bc3 config-baseline.channel.json f9e0174988718959fe1923a54496ec5b9262721fe1e7306f32ccb1316d9d9c3f config-baseline.plugin.json diff --git a/docs/gateway/configuration-reference.md b/docs/gateway/configuration-reference.md index 5458564605b..f88afcff490 100644 --- a/docs/gateway/configuration-reference.md +++ b/docs/gateway/configuration-reference.md @@ -859,6 +859,7 @@ Notes: - Set `logging.file` for a stable path. - `consoleLevel` bumps to `debug` when `--verbose`. - `maxFileBytes`: maximum active log file size in bytes before rotation (positive integer; default: `104857600` = 100 MB). OpenClaw keeps up to five numbered archives beside the active file. +- `redactSensitive` / `redactPatterns`: best-effort masking for console output, file logs, OTLP log records, and persisted session transcript text. --- diff --git a/docs/gateway/logging.md b/docs/gateway/logging.md index a3b47cfca8b..c4c3d9d883b 100644 --- a/docs/gateway/logging.md +++ b/docs/gateway/logging.md @@ -54,9 +54,10 @@ You can tune console verbosity independently via: ## Redaction -OpenClaw can mask sensitive tokens before log output leaves the process. The -same redaction policy is applied at console and file-log sinks, so matching -secret values are masked before JSONL lines are written to disk. +OpenClaw can mask sensitive tokens before log or transcript output leaves the +process. The same redaction policy is applied at console, file-log, OTLP +log-record, and session transcript text sinks, so matching secret values are +masked before JSONL lines or messages are written to disk. - `logging.redactSensitive`: `off` | `tools` (default: `tools`) - `logging.redactPatterns`: array of regex strings (overrides defaults) diff --git a/docs/gateway/security/index.md b/docs/gateway/security/index.md index 55ac84121ab..6f70c5c39d1 100644 --- a/docs/gateway/security/index.md +++ b/docs/gateway/security/index.md @@ -999,7 +999,7 @@ Logs and transcripts can leak sensitive info even when access controls are corre Recommendations: -- Keep tool summary redaction on (`logging.redactSensitive: "tools"`; default). +- Keep log and transcript redaction on (`logging.redactSensitive: "tools"`; default). - Add custom patterns for your environment via `logging.redactPatterns` (tokens, hostnames, internal URLs). - When sharing diagnostics, prefer `openclaw status --all` (pasteable, secrets redacted) over raw logs. - Prune old session transcripts and log files if you don’t need long retention. diff --git a/docs/logging.md b/docs/logging.md index 8fb1489439c..c73aff54b82 100644 --- a/docs/logging.md +++ b/docs/logging.md @@ -167,14 +167,16 @@ file log levels. ### Redaction -Tool summaries can redact sensitive tokens before they hit the console: +OpenClaw can redact sensitive tokens before they hit console output, file logs, +OTLP log records, or persisted session transcript text: - `logging.redactSensitive`: `off` | `tools` (default: `tools`) - `logging.redactPatterns`: list of regex strings to override the default set -Redaction applies at the logging sinks for **console output**, **stderr-routed -console diagnostics**, and **file logs**. File logs stay JSONL, but matching -secret values are masked before the line is written to disk. +File logs and session transcripts stay JSONL, but matching secret values are +masked before the line or message is written to disk. Redaction is best-effort: +it applies to text-bearing message content and log strings, not every +identifier or binary payload field. ## Diagnostics and OpenTelemetry diff --git a/src/agents/pi-embedded-runner.guard.test.ts b/src/agents/pi-embedded-runner.guard.test.ts index d93a3520325..fbc1df5bb6f 100644 --- a/src/agents/pi-embedded-runner.guard.test.ts +++ b/src/agents/pi-embedded-runner.guard.test.ts @@ -1,6 +1,7 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; import { SessionManager } from "@mariozechner/pi-coding-agent"; import { describe, expect, it } from "vitest"; +import type { OpenClawConfig } from "../config/types.openclaw.js"; import { guardSessionManager } from "./session-tool-result-guard-wrapper.js"; import { sanitizeToolUseResultPairing } from "./session-transcript-repair.js"; @@ -35,4 +36,46 @@ describe("guardSessionManager integration", () => { "assistant", ]); }); + + it("redacts configured text patterns before persisting transcript messages", () => { + const cfg = { + logging: { + redactSensitive: "tools", + redactPatterns: [String.raw`([\w]|[-.])+@([\w]|[-.])+\.\w+`], + }, + } satisfies OpenClawConfig; + const sm = guardSessionManager(SessionManager.inMemory(), { config: cfg }); + const appendMessage = sm.appendMessage.bind(sm) as unknown as (message: AgentMessage) => void; + + appendMessage({ + role: "assistant", + content: [ + { type: "thinking", thinking: "the email is peter@dc.io", thinkingSignature: "sig" }, + { type: "text", text: "contact peter@dc.io" }, + { type: "toolCall", id: "call_1", name: "read", arguments: { path: "/tmp/peter@dc.io" } }, + ], + stopReason: "toolUse", + } as AgentMessage); + appendMessage({ + role: "toolResult", + toolCallId: "call_1", + toolName: "read", + content: [{ type: "text", text: "peter@dc.io\n" }], + isError: false, + } as AgentMessage); + + const messages = sm + .getEntries() + .filter((e) => e.type === "message") + .map((e) => (e as { message: AgentMessage }).message); + const serialized = JSON.stringify(messages); + + expect(serialized).not.toContain("the email is peter@dc.io"); + expect(serialized).not.toContain("contact peter@dc.io"); + expect(serialized).not.toContain("peter@dc.io\\n"); + expect(serialized).toContain('"thinking":"the email is peter@d***.io"'); + expect(serialized).toContain('"text":"contact peter@d***.io"'); + expect(serialized).toContain('"text":"peter@d***.io\\n"'); + expect(serialized).toContain('"/tmp/peter@dc.io"'); + }); }); diff --git a/src/agents/session-tool-result-guard-wrapper.ts b/src/agents/session-tool-result-guard-wrapper.ts index 79939e7ab96..cbbcb512f20 100644 --- a/src/agents/session-tool-result-guard-wrapper.ts +++ b/src/agents/session-tool-result-guard-wrapper.ts @@ -1,6 +1,7 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; import type { SessionManager } from "@mariozechner/pi-coding-agent"; import type { OpenClawConfig } from "../config/types.openclaw.js"; +import { redactSensitiveText } from "../logging/redact.js"; import { getGlobalHookRunner } from "../plugins/hook-runner-global.js"; import { applyInputProvenanceToUserMessage, @@ -16,6 +17,71 @@ export type GuardedSessionManager = SessionManager & { clearPendingToolResults?: () => void; }; +function redactTranscriptText(value: string, cfg?: OpenClawConfig): string { + if (cfg?.logging?.redactSensitive === "off") { + return value; + } + return redactSensitiveText(value, { + mode: cfg?.logging?.redactSensitive, + patterns: cfg?.logging?.redactPatterns, + }); +} + +function redactTranscriptContentBlock(block: unknown, cfg?: OpenClawConfig): unknown { + if (!block || typeof block !== "object" || Array.isArray(block)) { + return block; + } + const source = block as Record; + let next: Record | null = null; + const assign = (key: string, value: string) => { + const redacted = redactTranscriptText(value, cfg); + if (redacted === value) { + return; + } + next ??= { ...source }; + next[key] = redacted; + }; + + if (typeof source.text === "string") { + assign("text", source.text); + } + if (typeof source.thinking === "string") { + assign("thinking", source.thinking); + } + if (typeof source.partialJson === "string") { + assign("partialJson", source.partialJson); + } + return next ?? block; +} + +function redactTranscriptContent(content: unknown, cfg?: OpenClawConfig): unknown { + if (typeof content === "string") { + return redactTranscriptText(content, cfg); + } + if (!Array.isArray(content)) { + return content; + } + let changed = false; + const redacted = content.map((block) => { + const next = redactTranscriptContentBlock(block, cfg); + changed ||= next !== block; + return next; + }); + return changed ? redacted : content; +} + +function redactTranscriptMessage(message: AgentMessage, cfg?: OpenClawConfig): AgentMessage { + const source = message as unknown as Record; + const redactedContent = redactTranscriptContent(source.content, cfg); + if (redactedContent === source.content) { + return message; + } + return { + ...source, + content: redactedContent, + } as unknown as AgentMessage; +} + /** * Apply the tool-result guard to a SessionManager exactly once and expose * a flush method on the instance for easy teardown handling. @@ -38,14 +104,31 @@ export function guardSessionManager( } const hookRunner = getGlobalHookRunner(); - const beforeMessageWrite = hookRunner?.hasHooks("before_message_write") - ? (event: { message: import("@mariozechner/pi-agent-core").AgentMessage }) => { - return hookRunner.runBeforeMessageWrite(event, { - agentId: opts?.agentId, - sessionKey: opts?.sessionKey, - }); + const beforeMessageWrite = (event: { + message: import("@mariozechner/pi-agent-core").AgentMessage; + }) => { + let message = event.message; + let changed = false; + if (hookRunner?.hasHooks("before_message_write")) { + const result = hookRunner.runBeforeMessageWrite(event, { + agentId: opts?.agentId, + sessionKey: opts?.sessionKey, + }); + if (result?.block) { + return result; } - : undefined; + if (result?.message) { + message = result.message; + changed = true; + } + } + const redacted = redactTranscriptMessage(message, opts?.config); + if (redacted !== message) { + message = redacted; + changed = true; + } + return changed ? { message } : undefined; + }; const transform = hookRunner?.hasHooks("tool_result_persist") ? ( diff --git a/src/config/schema.base.generated.ts b/src/config/schema.base.generated.ts index fba40897f65..47149742c34 100644 --- a/src/config/schema.base.generated.ts +++ b/src/config/schema.base.generated.ts @@ -466,7 +466,7 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { ], title: "Sensitive Data Redaction Mode", description: - 'Sensitive redaction mode: "off" disables built-in masking, while "tools" redacts sensitive tool/config payload fields. Keep "tools" in shared logs unless you have isolated secure log sinks.', + 'Sensitive redaction mode: "off" disables built-in masking, while "tools" redacts sensitive tool/config payload fields in log sinks and persisted transcript text. Keep "tools" enabled unless logs and transcripts are isolated.', }, redactPatterns: { type: "array", @@ -475,7 +475,7 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { }, title: "Custom Redaction Patterns", description: - "Additional custom redact regex patterns applied to log output before emission/storage. Use this to mask org-specific tokens and identifiers not covered by built-in redaction rules.", + "Additional custom redact regex patterns applied to log output and persisted transcript text before storage. Use this to mask org-specific tokens and identifiers not covered by built-in redaction rules.", }, }, additionalProperties: false, @@ -23982,12 +23982,12 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { }, "logging.redactSensitive": { label: "Sensitive Data Redaction Mode", - help: 'Sensitive redaction mode: "off" disables built-in masking, while "tools" redacts sensitive tool/config payload fields. Keep "tools" in shared logs unless you have isolated secure log sinks.', + help: 'Sensitive redaction mode: "off" disables built-in masking, while "tools" redacts sensitive tool/config payload fields in log sinks and persisted transcript text. Keep "tools" enabled unless logs and transcripts are isolated.', tags: ["privacy", "observability"], }, "logging.redactPatterns": { label: "Custom Redaction Patterns", - help: "Additional custom redact regex patterns applied to log output before emission/storage. Use this to mask org-specific tokens and identifiers not covered by built-in redaction rules.", + help: "Additional custom redact regex patterns applied to log output and persisted transcript text before storage. Use this to mask org-specific tokens and identifiers not covered by built-in redaction rules.", tags: ["privacy", "observability"], }, "cli.banner": { diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index 0db68fec08d..e56682f1eb4 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -43,9 +43,9 @@ export const FIELD_HELP: Record = { "logging.consoleStyle": 'Console output format style: "pretty", "compact", or "json" based on operator and ingestion needs. Use json for machine parsing pipelines and pretty/compact for human-first terminal workflows.', "logging.redactSensitive": - 'Sensitive redaction mode: "off" disables built-in masking, while "tools" redacts sensitive tool/config payload fields. Keep "tools" in shared logs unless you have isolated secure log sinks.', + 'Sensitive redaction mode: "off" disables built-in masking, while "tools" redacts sensitive tool/config payload fields in log sinks and persisted transcript text. Keep "tools" enabled unless logs and transcripts are isolated.', "logging.redactPatterns": - "Additional custom redact regex patterns applied to log output before emission/storage. Use this to mask org-specific tokens and identifiers not covered by built-in redaction rules.", + "Additional custom redact regex patterns applied to log output and persisted transcript text before storage. Use this to mask org-specific tokens and identifiers not covered by built-in redaction rules.", cli: "CLI presentation controls for local command output behavior such as banner and tagline style. Use this section to keep startup output aligned with operator preference without changing runtime behavior.", "cli.banner": "CLI startup banner controls for title/version line and tagline style behavior. Keep banner enabled for fast version/context checks, then tune tagline mode to your preferred noise level.", diff --git a/src/config/types.base.ts b/src/config/types.base.ts index 8b3a9cf82dd..8c5bcf945be 100644 --- a/src/config/types.base.ts +++ b/src/config/types.base.ts @@ -225,9 +225,9 @@ export type LoggingConfig = { maxFileBytes?: number; consoleLevel?: "silent" | "fatal" | "error" | "warn" | "info" | "debug" | "trace"; consoleStyle?: "pretty" | "compact" | "json"; - /** Redact sensitive tokens in tool summaries. Default: "tools". */ + /** Redact sensitive tokens in log sinks and persisted transcript text. Default: "tools". */ redactSensitive?: "off" | "tools"; - /** Regex patterns used to redact sensitive tokens (defaults apply when unset). */ + /** Regex patterns used to redact sensitive tokens from logs and transcripts. */ redactPatterns?: string[]; }; diff --git a/src/logging/redact.test.ts b/src/logging/redact.test.ts index ebaf0319376..dfa4f5e6e0f 100644 --- a/src/logging/redact.test.ts +++ b/src/logging/redact.test.ts @@ -132,6 +132,16 @@ describe("redactSensitiveText", () => { expect(output).toBe("token=abcdef…ghij"); }); + it("honors escaped character classes in custom patterns", () => { + const input = "contact peter@dc.io"; + const output = redactSensitiveText(input, { + mode: "tools", + patterns: [String.raw`([\w]|[-.])+@([\w]|[-.])+\.\w+`], + }); + expect(output).toBe("contact peter@d***.io"); + expect(output).not.toContain("peter@dc.io"); + }); + it("ignores unsafe nested-repetition custom patterns", () => { const input = `${"a".repeat(28)}!`; const output = redactSensitiveText(input, { diff --git a/src/security/safe-regex.test.ts b/src/security/safe-regex.test.ts index 439b56091e7..1de9f51d977 100644 --- a/src/security/safe-regex.test.ts +++ b/src/security/safe-regex.test.ts @@ -12,6 +12,7 @@ describe("safe regex", () => { ["(a|aa)+$", true], ["^(?:foo|bar)$", false], ["^(ab|cd)+$", false], + [String.raw`([\w]|[-.])+@([\w]|[-.])+\.\w+`, false], ] as const)("classifies nested repetition for %s", (pattern, expected) => { expect(hasNestedRepetition(pattern)).toBe(expected); }); diff --git a/src/security/safe-regex.ts b/src/security/safe-regex.ts index e197929c4a4..7b10e21582c 100644 --- a/src/security/safe-regex.ts +++ b/src/security/safe-regex.ts @@ -140,19 +140,23 @@ function tokenizePattern(source: string): PatternToken[] { for (let i = 0; i < source.length; i += 1) { const ch = source[i]; - if (ch === "\\") { - i += 1; - tokens.push({ kind: "simple-token" }); - continue; - } - if (inCharClass) { + if (ch === "\\") { + i += 1; + continue; + } if (ch === "]") { inCharClass = false; } continue; } + if (ch === "\\") { + i += 1; + tokens.push({ kind: "simple-token" }); + continue; + } + if (ch === "[") { inCharClass = true; tokens.push({ kind: "simple-token" });