fix(logging): redact persisted transcript text

(cherry picked from commit 406ae72fd2)
2026-05-06 07:30:43 +00:00 · 2026-04-26 11:38:38 -07:00
parent c8972376cb
commit 55d1a2e0e0
14 changed files with 178 additions and 31 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -122,6 +122,7 @@ Docs: https://docs.openclaw.ai
 ### Fixes

 - Auto-reply: poison inbound message dedupe after replay-unsafe provider/runtime failures so retries stay safe before visible progress but cannot duplicate messages after block output, tool side effects, or session progress. Fixes #69303; keeps #58549 and #64606 as duplicate validation. Thanks @martingarramon, @NikolaFC, and @zeroth-blip.
+- Logging/sessions: apply configured redaction patterns to persisted session transcript text and accept escaped character classes in safe custom redaction regexes, so transcript JSONL no longer keeps matching sensitive text in the clear. Fixes #42982. Thanks @panpan0000.
 - Agents/OpenAI: keep Responses web search compatible with minimal thinking by raising `web_search` requests to the lowest supported reasoning effort instead of sending a rejected minimal payload.
 - Agents/tools: honor the `bundle-mcp` allowlist token when deciding whether bundled MCP tools are available, so restricted tool policies can still enable bundled MCP without exposing unrelated tools.
 - Agents/model fallback: jump directly to a known later live-session model redirect instead of walking unrelated fallback candidates, while preserving the already-landed live-session/fallback loop guard. Fixes #57471; related loop family already closed via #58496. Thanks @yuxiaoyang2007-prog.
--- a/docs/.generated/config-baseline.sha256
+++ b/docs/.generated/config-baseline.sha256
@@ -1,4 +1,4 @@
-91a6cec6c5bdf4b0bf955a058955278990a1af31f32f8fcf2ac26d7548fb99e5  config-baseline.json
-5f5fb87fd46f9cbb84d8af17e00ae3c4b74062e8ad517bc2260ba83da2e9014f  config-baseline.core.json
+a62ead999508b18d9ea3e1c129e3cdd44244af0ff0e6f81653dfced9aa52019a  config-baseline.json
+3245c9a013c55ee8a24db52d5e88c42bc86e26f822d4a144fc7f37fc71e05fa8  config-baseline.core.json
 080c0a4f2d4175d6d7ab1e38f76b21de32669055c518d75c96e784865d89bf25  config-baseline.channel.json
 f9e0174988718959fe1923a54496ec5b9262721fe1e7306f32ccb1316d9d9c3f  config-baseline.plugin.json
--- a/docs/gateway/configuration-reference.md
+++ b/docs/gateway/configuration-reference.md
@@ -859,6 +859,7 @@ Notes:
 - Set `logging.file` for a stable path.
 - `consoleLevel` bumps to `debug` when `--verbose`.
 - `maxFileBytes`: maximum active log file size in bytes before rotation (positive integer; default: `104857600` = 100 MB). OpenClaw keeps up to five numbered archives beside the active file.
+- `redactSensitive` / `redactPatterns`: best-effort masking for console output, file logs, OTLP log records, and persisted session transcript text.

 ---

--- a/docs/gateway/logging.md
+++ b/docs/gateway/logging.md
@@ -52,10 +52,12 @@ You can tune console verbosity independently via:
 - `logging.consoleLevel` (default `info`)
 - `logging.consoleStyle` (`pretty` | `compact` | `json`)

-## Tool summary redaction
+## Redaction

-Verbose tool summaries (e.g. `🛠️ Exec: ...`) can mask sensitive tokens before they hit the
-console stream. This is **tools-only** and does not alter file logs.
+OpenClaw can mask sensitive tokens before log or transcript output leaves the
+process. The same redaction policy is applied at console, file-log, OTLP
+log-record, and session transcript text sinks, so matching secret values are
+masked before JSONL lines or messages are written to disk.

 - `logging.redactSensitive`: `off` | `tools` (default: `tools`)
 - `logging.redactPatterns`: array of regex strings (overrides defaults)
--- a/docs/gateway/security/index.md
+++ b/docs/gateway/security/index.md
@@ -999,7 +999,7 @@ Logs and transcripts can leak sensitive info even when access controls are corre

 Recommendations:

- Keep tool summary redaction on (`logging.redactSensitive: "tools"`; default).
+- Keep log and transcript redaction on (`logging.redactSensitive: "tools"`; default).
 - Add custom patterns for your environment via `logging.redactPatterns` (tokens, hostnames, internal URLs).
 - When sharing diagnostics, prefer `openclaw status --all` (pasteable, secrets redacted) over raw logs.
 - Prune old session transcripts and log files if you don’t need long retention.
--- a/docs/logging.md
+++ b/docs/logging.md
@@ -167,14 +167,16 @@ file log levels.

 ### Redaction

-Tool summaries can redact sensitive tokens before they hit the console:
+OpenClaw can redact sensitive tokens before they hit console output, file logs,
+OTLP log records, or persisted session transcript text:

 - `logging.redactSensitive`: `off` | `tools` (default: `tools`)
 - `logging.redactPatterns`: list of regex strings to override the default set

-Redaction applies at the logging sinks for **console output**, **stderr-routed
-console diagnostics**, and **file logs**. File logs stay JSONL, but matching
-secret values are masked before the line is written to disk.
+File logs and session transcripts stay JSONL, but matching secret values are
+masked before the line or message is written to disk. Redaction is best-effort:
+it applies to text-bearing message content and log strings, not every
+identifier or binary payload field.

 ## Diagnostics and OpenTelemetry

--- a/src/agents/pi-embedded-runner.guard.test.ts
+++ b/src/agents/pi-embedded-runner.guard.test.ts
@@ -1,6 +1,7 @@
 import type { AgentMessage } from "@mariozechner/pi-agent-core";
 import { SessionManager } from "@mariozechner/pi-coding-agent";
 import { describe, expect, it } from "vitest";
+import type { OpenClawConfig } from "../config/types.openclaw.js";
 import { guardSessionManager } from "./session-tool-result-guard-wrapper.js";
 import { sanitizeToolUseResultPairing } from "./session-transcript-repair.js";

@@ -35,4 +36,46 @@ describe("guardSessionManager integration", () => {
      "assistant",
    ]);
  });
+
+  it("redacts configured text patterns before persisting transcript messages", () => {
+    const cfg = {
+      logging: {
+        redactSensitive: "tools",
+        redactPatterns: [String.raw`([\w]|[-.])+@([\w]|[-.])+\.\w+`],
+      },
+    } satisfies OpenClawConfig;
+    const sm = guardSessionManager(SessionManager.inMemory(), { config: cfg });
+    const appendMessage = sm.appendMessage.bind(sm) as unknown as (message: AgentMessage) => void;
+
+    appendMessage({
+      role: "assistant",
+      content: [
+        { type: "thinking", thinking: "the email is peter@dc.io", thinkingSignature: "sig" },
+        { type: "text", text: "contact peter@dc.io" },
+        { type: "toolCall", id: "call_1", name: "read", arguments: { path: "/tmp/peter@dc.io" } },
+      ],
+      stopReason: "toolUse",
+    } as AgentMessage);
+    appendMessage({
+      role: "toolResult",
+      toolCallId: "call_1",
+      toolName: "read",
+      content: [{ type: "text", text: "peter@dc.io\n" }],
+      isError: false,
+    } as AgentMessage);
+
+    const messages = sm
+      .getEntries()
+      .filter((e) => e.type === "message")
+      .map((e) => (e as { message: AgentMessage }).message);
+    const serialized = JSON.stringify(messages);
+
+    expect(serialized).not.toContain("the email is peter@dc.io");
+    expect(serialized).not.toContain("contact peter@dc.io");
+    expect(serialized).not.toContain("peter@dc.io\\n");
+    expect(serialized).toContain('"thinking":"the email is peter@d***.io"');
+    expect(serialized).toContain('"text":"contact peter@d***.io"');
+    expect(serialized).toContain('"text":"peter@d***.io\\n"');
+    expect(serialized).toContain('"/tmp/peter@dc.io"');
+  });
 });
--- a/src/agents/session-tool-result-guard-wrapper.ts
+++ b/src/agents/session-tool-result-guard-wrapper.ts
@@ -1,6 +1,7 @@
 import type { AgentMessage } from "@mariozechner/pi-agent-core";
 import type { SessionManager } from "@mariozechner/pi-coding-agent";
 import type { OpenClawConfig } from "../config/types.openclaw.js";
+import { redactSensitiveText } from "../logging/redact.js";
 import { getGlobalHookRunner } from "../plugins/hook-runner-global.js";
 import {
  applyInputProvenanceToUserMessage,
@@ -16,6 +17,71 @@ export type GuardedSessionManager = SessionManager & {
  clearPendingToolResults?: () => void;
 };

+function redactTranscriptText(value: string, cfg?: OpenClawConfig): string {
+  if (cfg?.logging?.redactSensitive === "off") {
+    return value;
+  }
+  return redactSensitiveText(value, {
+    mode: cfg?.logging?.redactSensitive,
+    patterns: cfg?.logging?.redactPatterns,
+  });
+}
+
+function redactTranscriptContentBlock(block: unknown, cfg?: OpenClawConfig): unknown {
+  if (!block || typeof block !== "object" || Array.isArray(block)) {
+    return block;
+  }
+  const source = block as Record<string, unknown>;
+  let next: Record<string, unknown> | null = null;
+  const assign = (key: string, value: string) => {
+    const redacted = redactTranscriptText(value, cfg);
+    if (redacted === value) {
+      return;
+    }
+    next ??= { ...source };
+    next[key] = redacted;
+  };
+
+  if (typeof source.text === "string") {
+    assign("text", source.text);
+  }
+  if (typeof source.thinking === "string") {
+    assign("thinking", source.thinking);
+  }
+  if (typeof source.partialJson === "string") {
+    assign("partialJson", source.partialJson);
+  }
+  return next ?? block;
+}
+
+function redactTranscriptContent(content: unknown, cfg?: OpenClawConfig): unknown {
+  if (typeof content === "string") {
+    return redactTranscriptText(content, cfg);
+  }
+  if (!Array.isArray(content)) {
+    return content;
+  }
+  let changed = false;
+  const redacted = content.map((block) => {
+    const next = redactTranscriptContentBlock(block, cfg);
+    changed ||= next !== block;
+    return next;
+  });
+  return changed ? redacted : content;
+}
+
+function redactTranscriptMessage(message: AgentMessage, cfg?: OpenClawConfig): AgentMessage {
+  const source = message as unknown as Record<string, unknown>;
+  const redactedContent = redactTranscriptContent(source.content, cfg);
+  if (redactedContent === source.content) {
+    return message;
+  }
+  return {
+    ...source,
+    content: redactedContent,
+  } as unknown as AgentMessage;
+}
+
 /**
 * Apply the tool-result guard to a SessionManager exactly once and expose
 * a flush method on the instance for easy teardown handling.
@@ -38,14 +104,31 @@ export function guardSessionManager(
  }

  const hookRunner = getGlobalHookRunner();
-  const beforeMessageWrite = hookRunner?.hasHooks("before_message_write")
-    ? (event: { message: import("@mariozechner/pi-agent-core").AgentMessage }) => {
-        return hookRunner.runBeforeMessageWrite(event, {
-          agentId: opts?.agentId,
-          sessionKey: opts?.sessionKey,
-        });
+  const beforeMessageWrite = (event: {
+    message: import("@mariozechner/pi-agent-core").AgentMessage;
+  }) => {
+    let message = event.message;
+    let changed = false;
+    if (hookRunner?.hasHooks("before_message_write")) {
+      const result = hookRunner.runBeforeMessageWrite(event, {
+        agentId: opts?.agentId,
+        sessionKey: opts?.sessionKey,
+      });
+      if (result?.block) {
+        return result;
      }
-    : undefined;
+      if (result?.message) {
+        message = result.message;
+        changed = true;
+      }
+    }
+    const redacted = redactTranscriptMessage(message, opts?.config);
+    if (redacted !== message) {
+      message = redacted;
+      changed = true;
+    }
+    return changed ? { message } : undefined;
+  };

  const transform = hookRunner?.hasHooks("tool_result_persist")
    ? (
--- a/src/config/schema.base.generated.ts
+++ b/src/config/schema.base.generated.ts
@@ -466,7 +466,7 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
            ],
            title: "Sensitive Data Redaction Mode",
            description:
-              'Sensitive redaction mode: "off" disables built-in masking, while "tools" redacts sensitive tool/config payload fields. Keep "tools" in shared logs unless you have isolated secure log sinks.',
+              'Sensitive redaction mode: "off" disables built-in masking, while "tools" redacts sensitive tool/config payload fields in log sinks and persisted transcript text. Keep "tools" enabled unless logs and transcripts are isolated.',
          },
          redactPatterns: {
            type: "array",
@@ -475,7 +475,7 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
            },
            title: "Custom Redaction Patterns",
            description:
-              "Additional custom redact regex patterns applied to log output before emission/storage. Use this to mask org-specific tokens and identifiers not covered by built-in redaction rules.",
+              "Additional custom redact regex patterns applied to log output and persisted transcript text before storage. Use this to mask org-specific tokens and identifiers not covered by built-in redaction rules.",
          },
        },
        additionalProperties: false,
@@ -23982,12 +23982,12 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
    },
    "logging.redactSensitive": {
      label: "Sensitive Data Redaction Mode",
-      help: 'Sensitive redaction mode: "off" disables built-in masking, while "tools" redacts sensitive tool/config payload fields. Keep "tools" in shared logs unless you have isolated secure log sinks.',
+      help: 'Sensitive redaction mode: "off" disables built-in masking, while "tools" redacts sensitive tool/config payload fields in log sinks and persisted transcript text. Keep "tools" enabled unless logs and transcripts are isolated.',
      tags: ["privacy", "observability"],
    },
    "logging.redactPatterns": {
      label: "Custom Redaction Patterns",
-      help: "Additional custom redact regex patterns applied to log output before emission/storage. Use this to mask org-specific tokens and identifiers not covered by built-in redaction rules.",
+      help: "Additional custom redact regex patterns applied to log output and persisted transcript text before storage. Use this to mask org-specific tokens and identifiers not covered by built-in redaction rules.",
      tags: ["privacy", "observability"],
    },
    "cli.banner": {
--- a/src/config/schema.help.ts
+++ b/src/config/schema.help.ts
@@ -43,9 +43,9 @@ export const FIELD_HELP: Record<string, string> = {
  "logging.consoleStyle":
    'Console output format style: "pretty", "compact", or "json" based on operator and ingestion needs. Use json for machine parsing pipelines and pretty/compact for human-first terminal workflows.',
  "logging.redactSensitive":
-    'Sensitive redaction mode: "off" disables built-in masking, while "tools" redacts sensitive tool/config payload fields. Keep "tools" in shared logs unless you have isolated secure log sinks.',
+    'Sensitive redaction mode: "off" disables built-in masking, while "tools" redacts sensitive tool/config payload fields in log sinks and persisted transcript text. Keep "tools" enabled unless logs and transcripts are isolated.',
  "logging.redactPatterns":
-    "Additional custom redact regex patterns applied to log output before emission/storage. Use this to mask org-specific tokens and identifiers not covered by built-in redaction rules.",
+    "Additional custom redact regex patterns applied to log output and persisted transcript text before storage. Use this to mask org-specific tokens and identifiers not covered by built-in redaction rules.",
  cli: "CLI presentation controls for local command output behavior such as banner and tagline style. Use this section to keep startup output aligned with operator preference without changing runtime behavior.",
  "cli.banner":
    "CLI startup banner controls for title/version line and tagline style behavior. Keep banner enabled for fast version/context checks, then tune tagline mode to your preferred noise level.",
--- a/src/config/types.base.ts
+++ b/src/config/types.base.ts
@@ -225,9 +225,9 @@ export type LoggingConfig = {
  maxFileBytes?: number;
  consoleLevel?: "silent" | "fatal" | "error" | "warn" | "info" | "debug" | "trace";
  consoleStyle?: "pretty" | "compact" | "json";
-  /** Redact sensitive tokens in tool summaries. Default: "tools". */
+  /** Redact sensitive tokens in log sinks and persisted transcript text. Default: "tools". */
  redactSensitive?: "off" | "tools";
-  /** Regex patterns used to redact sensitive tokens (defaults apply when unset). */
+  /** Regex patterns used to redact sensitive tokens from logs and transcripts. */
  redactPatterns?: string[];
 };

--- a/src/logging/redact.test.ts
+++ b/src/logging/redact.test.ts
@@ -132,6 +132,16 @@ describe("redactSensitiveText", () => {
    expect(output).toBe("token=abcdef…ghij");
  });

+  it("honors escaped character classes in custom patterns", () => {
+    const input = "contact peter@dc.io";
+    const output = redactSensitiveText(input, {
+      mode: "tools",
+      patterns: [String.raw`([\w]|[-.])+@([\w]|[-.])+\.\w+`],
+    });
+    expect(output).toBe("contact peter@d***.io");
+    expect(output).not.toContain("peter@dc.io");
+  });
+
  it("ignores unsafe nested-repetition custom patterns", () => {
    const input = `${"a".repeat(28)}!`;
    const output = redactSensitiveText(input, {
--- a/src/security/safe-regex.test.ts
+++ b/src/security/safe-regex.test.ts
@@ -12,6 +12,7 @@ describe("safe regex", () => {
    ["(a|aa)+$", true],
    ["^(?:foo|bar)$", false],
    ["^(ab|cd)+$", false],
+    [String.raw`([\w]|[-.])+@([\w]|[-.])+\.\w+`, false],
  ] as const)("classifies nested repetition for %s", (pattern, expected) => {
    expect(hasNestedRepetition(pattern)).toBe(expected);
  });
--- a/src/security/safe-regex.ts
+++ b/src/security/safe-regex.ts
@@ -140,19 +140,23 @@ function tokenizePattern(source: string): PatternToken[] {
  for (let i = 0; i < source.length; i += 1) {
    const ch = source[i];

-    if (ch === "\\") {
-      i += 1;
-      tokens.push({ kind: "simple-token" });
-      continue;
-    }
-
    if (inCharClass) {
+      if (ch === "\\") {
+        i += 1;
+        continue;
+      }
      if (ch === "]") {
        inCharClass = false;
      }
      continue;
    }

+    if (ch === "\\") {
+      i += 1;
+      tokens.push({ kind: "simple-token" });
+      continue;
+    }
+
    if (ch === "[") {
      inCharClass = true;
      tokens.push({ kind: "simple-token" });