From 139dfd97bbbd7ae8fd98b06268c5aa238a67bda9 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Fri, 24 Apr 2026 14:51:45 -0700 Subject: [PATCH] fix(diagnostics-otel): export logs from diagnostic events Export diagnostics OTEL logs through bounded diagnostic log events while keeping core log records off the public plugin diagnostic stream.\n\nIncludes security hardening for log payload redaction, bounded attributes, prototype-pollution keys, OTEL export failure reporting, and extension SDK seam usage. --- CHANGELOG.md | 2 +- .../.generated/plugin-sdk-api-baseline.sha256 | 4 +- .../diagnostics-otel/src/service.test.ts | 185 ++++++++---- extensions/diagnostics-otel/src/service.ts | 241 ++++++++-------- src/infra/diagnostic-events.test.ts | 22 ++ src/infra/diagnostic-events.ts | 30 +- src/logging/diagnostic-log-events.test.ts | 143 ++++++++++ src/logging/diagnostic-stability.ts | 4 + src/logging/logger.ts | 267 ++++++++++++++++++ src/plugin-sdk/diagnostics-otel.ts | 6 +- 10 files changed, 708 insertions(+), 196 deletions(-) create mode 100644 src/logging/diagnostic-log-events.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 99db8700713..dd57b9a4bfc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,7 +21,6 @@ Docs: https://docs.openclaw.ai - TUI/dependencies: remove direct `cli-highlight` usage from the OpenClaw TUI code-block renderer, keeping themed code coloring without the extra root dependency. Thanks @vincentkoc. - Diagnostics/OTEL: export run, model-call, and tool-execution diagnostic lifecycle events as OTEL spans without retaining live span state. Thanks @vincentkoc. - Providers/Anthropic Vertex: move the Vertex SDK runtime behind the bundled provider plugin so core no longer owns that provider-specific dependency. Thanks @vincentkoc. -- Plugins/web fetch: move local Readability extraction into a bundled plugin so core no longer owns the Readability and DOM parser dependencies. Thanks @vincentkoc. - Plugins/activation: expose activation plan reasons and a richer plan API so callers can inspect why a plugin was selected while preserving existing id-list activation behavior. (#70943) Thanks @vincentkoc. - Plugins/source metadata: expose normalized install-source facts on provider and channel catalogs so onboarding can explain npm pinning, integrity state, and local availability before runtime loads. (#70951) Thanks @vincentkoc. - Plugins/catalog: pin the official external WeCom channel source to an exact npm release plus dist integrity, with a guard that official external sources stay integrity-pinned. (#70997) Thanks @vincentkoc. @@ -69,6 +68,7 @@ Docs: https://docs.openclaw.ai - Config/plugins: accept `plugins.entries.*.hooks.allowConversationAccess` in validation, generated schema metadata, and plugin policy inspection so trusted external plugins can enable conversation-access hooks such as `agent_end` without local schema patches. Fixes #71215. (#71221) Thanks @BillChirico. - Codex harness/models: keep legacy `codex/*` harness shorthand out of model picker and `/models` choice surfaces while migrating primary legacy refs to canonical `openai/*` plus explicit Codex harness config. (#71193) Thanks @vincentkoc. - Plugins/runtime deps: respect explicit plugin and channel disablement when repairing bundled runtime dependencies, so doctor and health checks no longer install deps for disabled configured channels. +- Diagnostics/OTEL: export logs through bounded diagnostic log events instead of a direct logger transport hook. Thanks @vincentkoc. - WhatsApp/plugins: support an explicit opt-in for inbound `message_received` hooks with canonical channel, conversation, session, and sender fields. Thanks @vincentkoc. - Channels/setup: keep bundled setup entries dependency-light and stage WhatsApp runtime dependencies only when login actually needs them, so first-run setup and read-only channel discovery avoid unused SDK imports. - Slack/HTTP: keep webhook handlers in a process-global registry so HTTP mode survives plugin-loader/native-import splits and `/slack/events/` no longer returns 404 after logging as active. Fixes #67955, #46245, and #46246. Thanks @chrisabad and @cesararevalo. diff --git a/docs/.generated/plugin-sdk-api-baseline.sha256 b/docs/.generated/plugin-sdk-api-baseline.sha256 index 8487f07dbe5..b6e8b620918 100644 --- a/docs/.generated/plugin-sdk-api-baseline.sha256 +++ b/docs/.generated/plugin-sdk-api-baseline.sha256 @@ -1,2 +1,2 @@ -a2005473b59e26995f563e8bd3f1c6782bd3ee193a65dd3255160d40e505fa4d plugin-sdk-api-baseline.json -5778c0bb6cfc85c1b0665ec431269bcb43b8891720d0b30467681420ab411721 plugin-sdk-api-baseline.jsonl +c1501890ed21debd56a119381101e6fec9bde0a4deae94bddcb464369cf0e51a plugin-sdk-api-baseline.json +10ee97c1acf0b2a725f0f89b357146ce769ed39f46cccd3e40d0a5d00571c599 plugin-sdk-api-baseline.jsonl diff --git a/extensions/diagnostics-otel/src/service.test.ts b/extensions/diagnostics-otel/src/service.test.ts index ebae5360b5f..f89ec32b863 100644 --- a/extensions/diagnostics-otel/src/service.test.ts +++ b/extensions/diagnostics-otel/src/service.test.ts @@ -1,7 +1,5 @@ import { beforeEach, describe, expect, test, vi } from "vitest"; -const registerLogTransportMock = vi.hoisted(() => vi.fn()); - const telemetryState = vi.hoisted(() => { const counters = new Map }>(); const histograms = new Map }>(); @@ -113,14 +111,6 @@ vi.mock("@opentelemetry/semantic-conventions", () => ({ ATTR_SERVICE_NAME: "service.name", })); -vi.mock("../api.js", async () => { - const actual = await vi.importActual("../api.js"); - return { - ...actual, - registerLogTransport: registerLogTransportMock, - }; -}); - import type { OpenClawPluginServiceContext } from "../api.js"; import { emitDiagnosticEvent } from "../api.js"; import { createDiagnosticsOtelService } from "./service.js"; @@ -132,6 +122,7 @@ const TRACE_ID = "4bf92f3577b34da6a3ce929d0e0e4736"; const SPAN_ID = "00f067aa0ba902b7"; const CHILD_SPAN_ID = "1111111111111111"; const GRANDCHILD_SPAN_ID = "2222222222222222"; +const PROTO_KEY = "__proto__"; function createLogger() { return { @@ -174,26 +165,17 @@ function createTraceOnlyContext(endpoint: string): OpenClawPluginServiceContext return createOtelContext(endpoint, { traces: true }); } -type RegisteredLogTransport = (logObj: Record) => void; -function setupRegisteredTransports() { - const registeredTransports: RegisteredLogTransport[] = []; - const stopTransports: ReturnType[] = []; - registerLogTransportMock.mockImplementation((transport) => { - registeredTransports.push(transport); - const stopTransport = vi.fn(); - stopTransports.push(stopTransport); - return stopTransport; - }); - return { registeredTransports, stopTransports }; -} - -async function emitAndCaptureLog(logObj: Record) { - const { registeredTransports } = setupRegisteredTransports(); +async function emitAndCaptureLog( + event: Omit[0], { type: "log.record" }>, "type">, +) { const service = createDiagnosticsOtelService(); const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { logs: true }); await service.start(ctx); - expect(registeredTransports).toHaveLength(1); - registeredTransports[0]?.(logObj); + emitDiagnosticEvent({ + type: "log.record", + ...event, + }); + await flushDiagnosticEvents(); expect(logEmit).toHaveBeenCalled(); const emitCall = logEmit.mock.calls[0]?.[0]; await service.stop?.(ctx); @@ -215,15 +197,12 @@ describe("diagnostics-otel service", () => { telemetryState.meter.createHistogram.mockClear(); sdkStart.mockClear(); sdkShutdown.mockClear(); - logEmit.mockClear(); + logEmit.mockReset(); logShutdown.mockClear(); traceExporterCtor.mockClear(); - registerLogTransportMock.mockReset(); }); test("records message-flow metrics and spans", async () => { - const { registeredTransports } = setupRegisteredTransports(); - const service = createDiagnosticsOtelService(); const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true, logs: true }); await service.start(ctx); @@ -289,29 +268,24 @@ describe("diagnostics-otel service", () => { expect(spanNames).toContain("openclaw.message.processed"); expect(spanNames).toContain("openclaw.session.stuck"); - expect(registerLogTransportMock).toHaveBeenCalledTimes(1); - expect(registeredTransports).toHaveLength(1); - registeredTransports[0]?.({ - 0: '{"subsystem":"diagnostic"}', - 1: "hello", - _meta: { logLevelName: "INFO", date: new Date() }, + emitDiagnosticEvent({ + type: "log.record", + level: "INFO", + message: "hello", + attributes: { subsystem: "diagnostic" }, }); + await flushDiagnosticEvents(); expect(logEmit).toHaveBeenCalled(); await service.stop?.(ctx); }); test("restarts without retaining prior listeners or log transports", async () => { - const { registeredTransports, stopTransports } = setupRegisteredTransports(); - const service = createDiagnosticsOtelService(); const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true, logs: true }); await service.start(ctx); await service.start(ctx); - expect(registerLogTransportMock).toHaveBeenCalledTimes(2); - expect(registeredTransports).toHaveLength(2); - expect(stopTransports[0]).toHaveBeenCalledTimes(1); expect(logShutdown).toHaveBeenCalledTimes(1); expect(sdkShutdown).toHaveBeenCalledTimes(1); @@ -325,7 +299,6 @@ describe("diagnostics-otel service", () => { expect(telemetryState.tracer.startSpan).toHaveBeenCalledTimes(1); await service.stop?.(ctx); - expect(stopTransports[1]).toHaveBeenCalledTimes(1); expect(logShutdown).toHaveBeenCalledTimes(2); expect(sdkShutdown).toHaveBeenCalledTimes(2); @@ -340,8 +313,6 @@ describe("diagnostics-otel service", () => { }); test("tears down active handles when restarted with diagnostics disabled", async () => { - const { stopTransports } = setupRegisteredTransports(); - const service = createDiagnosticsOtelService(); const enabledCtx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, @@ -354,7 +325,6 @@ describe("diagnostics-otel service", () => { config: { diagnostics: { enabled: false } }, }); - expect(stopTransports[0]).toHaveBeenCalledTimes(1); expect(logShutdown).toHaveBeenCalledTimes(1); expect(sdkShutdown).toHaveBeenCalledTimes(1); @@ -410,8 +380,8 @@ describe("diagnostics-otel service", () => { test("redacts sensitive data from log messages before export", async () => { const emitCall = await emitAndCaptureLog({ - 0: "Using API key sk-1234567890abcdef1234567890abcdef", - _meta: { logLevelName: "INFO", date: new Date() }, + level: "INFO", + message: "Using API key sk-1234567890abcdef1234567890abcdef", }); expect(emitCall?.body).not.toContain("sk-1234567890abcdef1234567890abcdef"); @@ -421,9 +391,11 @@ describe("diagnostics-otel service", () => { test("redacts sensitive data from log attributes before export", async () => { const emitCall = await emitAndCaptureLog({ - 0: '{"token":"ghp_abcdefghijklmnopqrstuvwxyz123456"}', // pragma: allowlist secret - 1: "auth configured", - _meta: { logLevelName: "DEBUG", date: new Date() }, + level: "DEBUG", + message: "auth configured", + attributes: { + token: "ghp_abcdefghijklmnopqrstuvwxyz123456", // pragma: allowlist secret + }, }); const tokenAttr = emitCall?.attributes?.["openclaw.token"]; @@ -435,16 +407,16 @@ describe("diagnostics-otel service", () => { test("attaches diagnostic trace context to exported logs", async () => { const emitCall = await emitAndCaptureLog({ - 0: '{"subsystem":"diagnostic"}', - 1: { - trace: { - traceId: TRACE_ID, - spanId: SPAN_ID, - traceFlags: "01", - }, + level: "INFO", + message: "traceable log", + attributes: { + subsystem: "diagnostic", + }, + trace: { + traceId: TRACE_ID, + spanId: SPAN_ID, + traceFlags: "01", }, - 2: "traceable log", - _meta: { logLevelName: "INFO", date: new Date() }, }); expect(emitCall?.attributes).toMatchObject({ @@ -473,6 +445,99 @@ describe("diagnostics-otel service", () => { }); }); + test("bounds plugin-emitted log attributes and omits source paths", async () => { + const service = createDiagnosticsOtelService(); + const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { logs: true }); + await service.start(ctx); + + const attributes = Object.create(null) as Record; + attributes.good = "y".repeat(6000); + attributes["bad key"] = "drop-me"; + attributes[PROTO_KEY] = "pollute"; + attributes["constructor"] = "pollute"; + attributes["prototype"] = "pollute"; + attributes["sk-1234567890abcdef1234567890abcdef"] = "secret-key"; // pragma: allowlist secret + + emitDiagnosticEvent({ + type: "log.record", + level: "INFO", + message: "x".repeat(6000), + attributes, + code: { + filepath: "/Users/alice/openclaw/src/private.ts", + line: 42, + functionName: "handler", + location: "/Users/alice/openclaw/src/private.ts:42", + }, + } as Parameters[0]); + await flushDiagnosticEvents(); + + const emitCall = logEmit.mock.calls[0]?.[0]; + expect(emitCall?.body.length).toBeLessThanOrEqual(4200); + expect(emitCall?.attributes).toMatchObject({ + "openclaw.good": expect.stringMatching(/^y+/), + "code.lineno": 42, + "code.function": "handler", + }); + expect(String(emitCall?.attributes?.["openclaw.good"]).length).toBeLessThanOrEqual(4200); + expect(Object.hasOwn(emitCall?.attributes ?? {}, `openclaw.${PROTO_KEY}`)).toBe(false); + expect(Object.hasOwn(emitCall?.attributes ?? {}, "openclaw.constructor")).toBe(false); + expect(Object.hasOwn(emitCall?.attributes ?? {}, "openclaw.prototype")).toBe(false); + expect( + Object.hasOwn( + emitCall?.attributes ?? {}, + "openclaw.sk-1234567890abcdef1234567890abcdef", // pragma: allowlist secret + ), + ).toBe(false); + expect(emitCall?.attributes).toEqual( + expect.not.objectContaining({ + "openclaw.bad key": expect.anything(), + "code.filepath": expect.anything(), + "openclaw.code.location": expect.anything(), + }), + ); + await service.stop?.(ctx); + }); + + test("rate-limits repeated log export failure reports", async () => { + const service = createDiagnosticsOtelService(); + const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { logs: true }); + const nowSpy = vi.spyOn(Date, "now").mockReturnValue(1_000); + logEmit.mockImplementation(() => { + throw new Error("export failed"); + }); + try { + await service.start(ctx); + + emitDiagnosticEvent({ + type: "log.record", + level: "ERROR", + message: "first failing log", + }); + emitDiagnosticEvent({ + type: "log.record", + level: "ERROR", + message: "second failing log", + }); + await flushDiagnosticEvents(); + + expect(ctx.logger.error).toHaveBeenCalledTimes(1); + + nowSpy.mockReturnValue(62_000); + emitDiagnosticEvent({ + type: "log.record", + level: "ERROR", + message: "third failing log", + }); + await flushDiagnosticEvents(); + + expect(ctx.logger.error).toHaveBeenCalledTimes(2); + } finally { + nowSpy.mockRestore(); + await service.stop?.(ctx); + } + }); + test("does not parent diagnostic event spans from plugin-emittable trace context", async () => { const service = createDiagnosticsOtelService(); const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true }); diff --git a/extensions/diagnostics-otel/src/service.ts b/extensions/diagnostics-otel/src/service.ts index 75dbac94bd4..2c17ac5c92a 100644 --- a/extensions/diagnostics-otel/src/service.ts +++ b/extensions/diagnostics-otel/src/service.ts @@ -24,9 +24,8 @@ import { isValidDiagnosticSpanId, isValidDiagnosticTraceFlags, isValidDiagnosticTraceId, - onDiagnosticEvent, + onInternalDiagnosticEvent, redactSensitiveText, - registerLogTransport, } from "../api.js"; const DEFAULT_SERVICE_NAME = "openclaw"; @@ -41,6 +40,13 @@ const DROPPED_OTEL_ATTRIBUTE_KEYS = new Set([ "openclaw.traceId", ]); const LOW_CARDINALITY_VALUE_RE = /^[A-Za-z0-9_.:-]{1,120}$/u; +const MAX_OTEL_LOG_BODY_CHARS = 4 * 1024; +const MAX_OTEL_LOG_ATTRIBUTE_COUNT = 64; +const MAX_OTEL_LOG_ATTRIBUTE_VALUE_CHARS = 4 * 1024; +const LOG_RECORD_EXPORT_FAILURE_REPORT_INTERVAL_MS = 60_000; +const OTEL_LOG_RAW_ATTRIBUTE_KEY_RE = /^[A-Za-z0-9_.:-]{1,64}$/u; +const OTEL_LOG_ATTRIBUTE_KEY_RE = /^[A-Za-z0-9_.:-]{1,96}$/u; +const BLOCKED_OTEL_LOG_ATTRIBUTE_KEYS = new Set(["__proto__", "prototype", "constructor"]); function normalizeEndpoint(endpoint?: string): string | undefined { const trimmed = endpoint?.trim(); @@ -105,6 +111,44 @@ function genAiOperationName(api: string | undefined): "chat" | "text_completion" return api === "completions" ? "text_completion" : "chat"; } +function clampOtelLogText(value: string, maxChars: number): string { + return value.length > maxChars ? `${value.slice(0, maxChars)}...(truncated)` : value; +} + +function normalizeOtelLogString(value: string, maxChars: number): string { + return redactSensitiveText(clampOtelLogText(value, maxChars)); +} + +function assignOtelLogAttribute( + attributes: Record, + key: string, + value: string | number | boolean, +): void { + if (Object.keys(attributes).length >= MAX_OTEL_LOG_ATTRIBUTE_COUNT) { + return; + } + if (BLOCKED_OTEL_LOG_ATTRIBUTE_KEYS.has(key)) { + return; + } + if (redactSensitiveText(key) !== key) { + return; + } + if (!OTEL_LOG_ATTRIBUTE_KEY_RE.test(key)) { + return; + } + if (typeof value === "string") { + attributes[key] = normalizeOtelLogString(value, MAX_OTEL_LOG_ATTRIBUTE_VALUE_CHARS); + return; + } + if (typeof value === "number" && Number.isFinite(value)) { + attributes[key] = value; + return; + } + if (typeof value === "boolean") { + attributes[key] = value; + } +} + function normalizeTraceContext(value: unknown): DiagnosticTraceContext | undefined { if (!value || typeof value !== "object" || Array.isArray(value)) { return undefined; @@ -130,32 +174,32 @@ function normalizeTraceContext(value: unknown): DiagnosticTraceContext | undefin }; } -function extractTraceContext(value: unknown): DiagnosticTraceContext | undefined { - const direct = normalizeTraceContext(value); - if (direct) { - return direct; +function assignOtelLogEventAttributes( + attributes: Record, + eventAttributes: Record | undefined, +): void { + if (!eventAttributes) { + return; } - if (!value || typeof value !== "object" || Array.isArray(value)) { - return undefined; - } - return normalizeTraceContext((value as { trace?: unknown }).trace); -} - -function findLogTraceContext( - bindings: Record | undefined, - numericArgs: unknown[], -): DiagnosticTraceContext | undefined { - const fromBindings = extractTraceContext(bindings); - if (fromBindings) { - return fromBindings; - } - for (const arg of numericArgs) { - const fromArg = extractTraceContext(arg); - if (fromArg) { - return fromArg; + for (const rawKey in eventAttributes) { + if (Object.keys(attributes).length >= MAX_OTEL_LOG_ATTRIBUTE_COUNT) { + break; } + if (!Object.hasOwn(eventAttributes, rawKey)) { + continue; + } + const key = rawKey.trim(); + if (BLOCKED_OTEL_LOG_ATTRIBUTE_KEYS.has(key)) { + continue; + } + if (redactSensitiveText(key) !== key) { + continue; + } + if (!OTEL_LOG_RAW_ATTRIBUTE_KEY_RE.test(key)) { + continue; + } + assignOtelLogAttribute(attributes, `openclaw.${key}`, eventAttributes[rawKey]); } - return undefined; } function traceFlagsToOtel(traceFlags: string | undefined): TraceFlags { @@ -199,22 +243,18 @@ function addTraceAttributes( export function createDiagnosticsOtelService(): OpenClawPluginService { let sdk: NodeSDK | null = null; let logProvider: LoggerProvider | null = null; - let stopLogTransport: (() => void) | null = null; let unsubscribe: (() => void) | null = null; const stopStarted = async () => { const currentUnsubscribe = unsubscribe; - const currentStopLogTransport = stopLogTransport; const currentLogProvider = logProvider; const currentSdk = sdk; unsubscribe = null; - stopLogTransport = null; logProvider = null; sdk = null; currentUnsubscribe?.(); - currentStopLogTransport?.(); if (currentLogProvider) { await currentLogProvider.shutdown().catch(() => undefined); } @@ -402,7 +442,11 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { }, ); + let recordLogRecord: + | ((evt: Extract) => void) + | undefined; if (logsEnabled) { + let logRecordExportFailureLastReportedAt = Number.NEGATIVE_INFINITY; const logExporter = new OTLPLogExporter({ ...(logUrl ? { url: logUrl } : {}), ...(headers ? { headers } : {}), @@ -418,120 +462,54 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { processors: [logProcessor], }); const otelLogger = logProvider.getLogger("openclaw"); - - stopLogTransport = registerLogTransport((logObj) => { + recordLogRecord = (evt) => { try { - const safeStringify = (value: unknown) => { - try { - return JSON.stringify(value); - } catch { - return String(value); - } - }; - const meta = (logObj as Record)._meta as - | { - logLevelName?: string; - date?: Date; - name?: string; - parentNames?: string[]; - path?: { - filePath?: string; - fileLine?: string; - fileColumn?: string; - filePathWithLine?: string; - method?: string; - }; - } - | undefined; - const logLevelName = meta?.logLevelName ?? "INFO"; + const logLevelName = evt.level || "INFO"; const severityNumber = logSeverityMap[logLevelName] ?? (9 as SeverityNumber); + const attributes = Object.create(null) as Record; + assignOtelLogAttribute(attributes, "openclaw.log.level", logLevelName); + if (evt.loggerName) { + assignOtelLogAttribute(attributes, "openclaw.logger", evt.loggerName); + } + if (evt.loggerParents?.length) { + assignOtelLogAttribute( + attributes, + "openclaw.logger.parents", + evt.loggerParents.join("."), + ); + } + assignOtelLogEventAttributes(attributes, evt.attributes); + if (evt.code?.line) { + assignOtelLogAttribute(attributes, "code.lineno", evt.code.line); + } + if (evt.code?.functionName) { + assignOtelLogAttribute(attributes, "code.function", evt.code.functionName); + } + addTraceAttributes(attributes, evt.trace); - const numericArgs = Object.entries(logObj) - .filter(([key]) => /^\d+$/.test(key)) - .toSorted((a, b) => Number(a[0]) - Number(b[0])) - .map(([, value]) => value); - - let bindings: Record | undefined; - if (typeof numericArgs[0] === "string" && numericArgs[0].trim().startsWith("{")) { - try { - const parsed = JSON.parse(numericArgs[0]); - if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) { - bindings = parsed as Record; - numericArgs.shift(); - } - } catch { - // ignore malformed json bindings - } - } - const traceContext = findLogTraceContext(bindings, numericArgs); - - let message = ""; - if (numericArgs.length > 0 && typeof numericArgs[numericArgs.length - 1] === "string") { - message = String(numericArgs.pop()); - } else if (numericArgs.length === 1) { - message = safeStringify(numericArgs[0]); - numericArgs.length = 0; - } - if (!message) { - message = "log"; - } - - const attributes: Record = { - "openclaw.log.level": logLevelName, - }; - if (meta?.name) { - attributes["openclaw.logger"] = meta.name; - } - if (meta?.parentNames?.length) { - attributes["openclaw.logger.parents"] = meta.parentNames.join("."); - } - if (bindings) { - for (const [key, value] of Object.entries(bindings)) { - if ( - typeof value === "string" || - typeof value === "number" || - typeof value === "boolean" - ) { - attributes[`openclaw.${key}`] = value; - } else if (value != null) { - attributes[`openclaw.${key}`] = safeStringify(value); - } - } - } - if (numericArgs.length > 0) { - attributes["openclaw.log.args"] = safeStringify(numericArgs); - } - if (meta?.path?.filePath) { - attributes["code.filepath"] = meta.path.filePath; - } - if (meta?.path?.fileLine) { - attributes["code.lineno"] = Number(meta.path.fileLine); - } - if (meta?.path?.method) { - attributes["code.function"] = meta.path.method; - } - if (meta?.path?.filePathWithLine) { - attributes["openclaw.code.location"] = meta.path.filePathWithLine; - } - addTraceAttributes(attributes, traceContext); - - // OTLP can leave the host boundary, so redact string fields before export. const logRecord: LogRecord = { - body: redactSensitiveText(message), + body: normalizeOtelLogString(evt.message || "log", MAX_OTEL_LOG_BODY_CHARS), severityText: logLevelName, severityNumber, attributes: redactOtelAttributes(attributes), - timestamp: meta?.date ?? new Date(), + timestamp: evt.ts, }; - const logContext = contextForTraceContext(traceContext); + const logContext = contextForTraceContext(evt.trace); if (logContext) { logRecord.context = logContext; } otelLogger.emit(logRecord); } catch (err) { - ctx.logger.error(`diagnostics-otel: log transport failed: ${formatError(err)}`); + const now = Date.now(); + if ( + now - logRecordExportFailureLastReportedAt >= + LOG_RECORD_EXPORT_FAILURE_REPORT_INTERVAL_MS + ) { + logRecordExportFailureLastReportedAt = now; + ctx.logger.error(`diagnostics-otel: log record export failed: ${formatError(err)}`); + } } - }); + }; } const spanWithDuration = ( @@ -979,7 +957,7 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { queueDepthHistogram.record(evt.queued, { "openclaw.channel": "heartbeat" }); }; - unsubscribe = onDiagnosticEvent((evt: DiagnosticEventPayload) => { + unsubscribe = onInternalDiagnosticEvent((evt: DiagnosticEventPayload) => { try { switch (evt.type) { case "model.usage": @@ -1033,6 +1011,9 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { case "tool.execution.error": recordToolExecutionError(evt); return; + case "log.record": + recordLogRecord?.(evt); + return; case "tool.loop": case "tool.execution.started": case "run.started": diff --git a/src/infra/diagnostic-events.test.ts b/src/infra/diagnostic-events.test.ts index ca8829b62ca..dfdd50624b4 100644 --- a/src/infra/diagnostic-events.test.ts +++ b/src/infra/diagnostic-events.test.ts @@ -2,6 +2,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { emitDiagnosticEvent, isDiagnosticsEnabled, + onInternalDiagnosticEvent, onDiagnosticEvent, resetDiagnosticEventsForTest, setDiagnosticsEnabledForProcess, @@ -137,6 +138,27 @@ describe("diagnostic-events", () => { expect(events).toEqual(["tool.execution.started", "model.call.started"]); }); + it("keeps log records off the public diagnostic event stream", async () => { + const publicEvents: string[] = []; + const internalEvents: string[] = []; + onDiagnosticEvent((event) => { + publicEvents.push(event.type); + }); + onInternalDiagnosticEvent((event) => { + internalEvents.push(event.type); + }); + + emitDiagnosticEvent({ + type: "log.record", + level: "INFO", + message: "private log", + }); + + await new Promise((resolve) => setImmediate(resolve)); + expect(publicEvents).toEqual([]); + expect(internalEvents).toEqual(["log.record"]); + }); + it("skips event enrichment and subscribers when diagnostics are disabled", () => { const nowSpy = vi.spyOn(Date, "now"); const seen: string[] = []; diff --git a/src/infra/diagnostic-events.ts b/src/infra/diagnostic-events.ts index b3739ba6a55..d1a29993487 100644 --- a/src/infra/diagnostic-events.ts +++ b/src/infra/diagnostic-events.ts @@ -269,6 +269,19 @@ export type DiagnosticPayloadLargeEvent = DiagnosticBaseEvent & { reason?: string; }; +export type DiagnosticLogRecordEvent = DiagnosticBaseEvent & { + type: "log.record"; + level: string; + message: string; + loggerName?: string; + loggerParents?: string[]; + attributes?: Record; + code?: { + line?: number; + functionName?: string; + }; +}; + export type DiagnosticEventPayload = | DiagnosticUsageEvent | DiagnosticWebhookReceivedEvent @@ -293,7 +306,8 @@ export type DiagnosticEventPayload = | DiagnosticModelCallErrorEvent | DiagnosticMemorySampleEvent | DiagnosticMemoryPressureEvent - | DiagnosticPayloadLargeEvent; + | DiagnosticPayloadLargeEvent + | DiagnosticLogRecordEvent; export type DiagnosticEventInput = DiagnosticEventPayload extends infer Event ? Event extends DiagnosticEventPayload @@ -318,6 +332,7 @@ const ASYNC_DIAGNOSTIC_EVENT_TYPES = new Set([ "model.call.started", "model.call.completed", "model.call.error", + "log.record", ]); function getDiagnosticEventsState(): DiagnosticEventsGlobalState { @@ -424,7 +439,9 @@ export function emitDiagnosticEvent(event: DiagnosticEventInput) { dispatchDiagnosticEvent(state, enriched); } -export function onDiagnosticEvent(listener: (evt: DiagnosticEventPayload) => void): () => void { +export function onInternalDiagnosticEvent( + listener: (evt: DiagnosticEventPayload) => void, +): () => void { const state = getDiagnosticEventsState(); state.listeners.add(listener); return () => { @@ -432,6 +449,15 @@ export function onDiagnosticEvent(listener: (evt: DiagnosticEventPayload) => voi }; } +export function onDiagnosticEvent(listener: (evt: DiagnosticEventPayload) => void): () => void { + return onInternalDiagnosticEvent((event) => { + if (event.type === "log.record") { + return; + } + listener(event); + }); +} + export function resetDiagnosticEventsForTest(): void { const state = getDiagnosticEventsState(); state.enabled = true; diff --git a/src/logging/diagnostic-log-events.test.ts b/src/logging/diagnostic-log-events.test.ts new file mode 100644 index 00000000000..080038cfee5 --- /dev/null +++ b/src/logging/diagnostic-log-events.test.ts @@ -0,0 +1,143 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + onInternalDiagnosticEvent, + resetDiagnosticEventsForTest, + type DiagnosticEventPayload, +} from "../infra/diagnostic-events.js"; +import { getChildLogger, resetLogger, setLoggerOverride } from "./logger.js"; + +const TRACE_ID = "4bf92f3577b34da6a3ce929d0e0e4736"; +const SPAN_ID = "00f067aa0ba902b7"; +const PROTO_KEY = "__proto__"; + +function flushDiagnosticEvents() { + return new Promise((resolve) => setImmediate(resolve)); +} + +beforeEach(() => { + resetDiagnosticEventsForTest(); + resetLogger(); + setLoggerOverride({ level: "info" }); +}); + +afterEach(() => { + resetDiagnosticEventsForTest(); + setLoggerOverride(null); + resetLogger(); +}); + +describe("diagnostic log events", () => { + it("emits structured log records through diagnostics", async () => { + const received: Array> = []; + const unsubscribe = onInternalDiagnosticEvent((evt) => { + if (evt.type === "log.record") { + received.push(evt); + } + }); + + const logger = getChildLogger({ + subsystem: "diagnostic", + trace: { traceId: TRACE_ID, spanId: SPAN_ID }, + }); + logger.info({ runId: "run-1" }, "hello diagnostic logs"); + await flushDiagnosticEvents(); + unsubscribe(); + + expect(received).toHaveLength(1); + expect(received[0]).toMatchObject({ + type: "log.record", + level: "INFO", + message: "hello diagnostic logs", + attributes: { + subsystem: "diagnostic", + runId: "run-1", + }, + trace: { + traceId: TRACE_ID, + spanId: SPAN_ID, + }, + }); + }); + + it("redacts and bounds internal log records before diagnostic emission", async () => { + const received: Array> = []; + const unsubscribe = onInternalDiagnosticEvent((evt) => { + if (evt.type === "log.record") { + received.push(evt); + } + }); + + const secret = "ghp_abcdefghijklmnopqrstuvwxyz123456"; // pragma: allowlist secret + const logger = getChildLogger({ + subsystem: "diagnostic", + trace: { traceId: TRACE_ID, spanId: SPAN_ID }, + }); + logger.info( + { + token: secret, + longValue: "x".repeat(5000), + nested: { secret }, + "bad key": "drop-me", + }, + { raw: secret }, + `secret=${secret} ${"y".repeat(5000)}`, + ); + await flushDiagnosticEvents(); + unsubscribe(); + + expect(received).toHaveLength(1); + const [event] = received; + expect(event.message).not.toContain(secret); + expect(event.message.length).toBeLessThanOrEqual(4200); + expect(event.attributes?.token).not.toBe(secret); + expect(String(event.attributes?.token)).toContain("…"); + expect(String(event.attributes?.longValue).length).toBeLessThanOrEqual(2100); + expect(event.attributes).toEqual( + expect.not.objectContaining({ + nested: expect.anything(), + "bad key": expect.anything(), + }), + ); + expect(event).toEqual( + expect.not.objectContaining({ + argsJson: expect.anything(), + }), + ); + }); + + it("drops sensitive, blocked, and excess log attribute keys without copying large objects", async () => { + const received: Array> = []; + const unsubscribe = onInternalDiagnosticEvent((evt) => { + if (evt.type === "log.record") { + received.push(evt); + } + }); + + const structured = Object.create(null) as Record; + structured.safe = "ok"; + structured[PROTO_KEY] = "pollute"; + structured["constructor"] = "pollute"; + structured["prototype"] = "pollute"; + structured["sk-1234567890abcdef1234567890abcdef"] = "secret-key"; // pragma: allowlist secret + for (let index = 0; index < 1000; index += 1) { + structured[`extra${index}`] = index; + } + + const logger = getChildLogger({ + subsystem: "diagnostic", + trace: { traceId: TRACE_ID, spanId: SPAN_ID }, + }); + logger.info(structured, "bounded attrs"); + await flushDiagnosticEvents(); + unsubscribe(); + + expect(received).toHaveLength(1); + expect(received[0].attributes?.safe).toBe("ok"); + expect(Object.keys(received[0].attributes ?? {})).toHaveLength(32); + const attributes = received[0].attributes ?? {}; + expect(Object.hasOwn(attributes, PROTO_KEY)).toBe(false); + expect(Object.hasOwn(attributes, "constructor")).toBe(false); + expect(Object.hasOwn(attributes, "prototype")).toBe(false); + expect(Object.hasOwn(attributes, "sk-1234567890abcdef1234567890abcdef")).toBe(false); // pragma: allowlist secret + }); +}); diff --git a/src/logging/diagnostic-stability.ts b/src/logging/diagnostic-stability.ts index d7e3480e861..40051a6ec9c 100644 --- a/src/logging/diagnostic-stability.ts +++ b/src/logging/diagnostic-stability.ts @@ -275,6 +275,10 @@ function sanitizeDiagnosticEvent(event: DiagnosticEventPayload): DiagnosticStabi record.durationMs = event.durationMs; assignReasonCode(record, event.errorCategory); break; + case "log.record": + record.level = event.level; + record.source = event.loggerName; + break; case "diagnostic.memory.sample": record.memory = copyMemory(event.memory); break; diff --git a/src/logging/logger.ts b/src/logging/logger.ts index a6b1f2d5271..7c401324806 100644 --- a/src/logging/logger.ts +++ b/src/logging/logger.ts @@ -2,6 +2,14 @@ import fs from "node:fs"; import path from "node:path"; import { Logger as TsLogger } from "tslog"; import type { OpenClawConfig } from "../config/types.js"; +import { emitDiagnosticEvent } from "../infra/diagnostic-events.js"; +import { + isValidDiagnosticSpanId, + isValidDiagnosticTraceFlags, + isValidDiagnosticTraceId, + type DiagnosticTraceContext, +} from "../infra/diagnostic-trace-context.js"; +import { isBlockedObjectKey } from "../infra/prototype-keys.js"; import { POSIX_OPENCLAW_TMP_DIR, resolvePreferredOpenClawTmpDir, @@ -10,6 +18,7 @@ import { readLoggingConfig, shouldSkipMutatingLoggingConfigRead } from "./config import { resolveEnvLogLevelOverride } from "./env-log-level.js"; import { type LogLevel, levelToMinLevel, normalizeLogLevel } from "./levels.js"; import { resolveNodeRequireFromMeta } from "./node-require.js"; +import { redactSensitiveText } from "./redact.js"; import { loggingState } from "./state.js"; import { formatTimestamp } from "./timestamps.js"; import type { LoggerSettings } from "./types.js"; @@ -64,6 +73,20 @@ export type LogTransport = (logObj: LogTransportRecord) => void; const externalTransports = new Set(); +type DiagnosticLogCode = { + line?: number; + functionName?: string; +}; + +const MAX_DIAGNOSTIC_LOG_BINDINGS_JSON_CHARS = 8 * 1024; +const MAX_DIAGNOSTIC_LOG_MESSAGE_CHARS = 4 * 1024; +const MAX_DIAGNOSTIC_LOG_ATTRIBUTE_COUNT = 32; +const MAX_DIAGNOSTIC_LOG_ATTRIBUTE_VALUE_CHARS = 2 * 1024; +const MAX_DIAGNOSTIC_LOG_NAME_CHARS = 120; +const DIAGNOSTIC_LOG_ATTRIBUTE_KEY_RE = /^[A-Za-z0-9_.:-]{1,64}$/u; + +type DiagnosticLogAttributes = Record; + function attachExternalTransport(logger: TsLogger, transport: LogTransport): void { logger.attachTransport((logObj: LogObj) => { if (!externalTransports.has(transport)) { @@ -77,6 +100,248 @@ function attachExternalTransport(logger: TsLogger, transport: LogTranspo }); } +function clampDiagnosticLogText(value: string, maxChars: number): string { + return value.length > maxChars ? `${value.slice(0, maxChars)}...(truncated)` : value; +} + +function sanitizeDiagnosticLogText(value: string, maxChars: number): string { + return clampDiagnosticLogText( + redactSensitiveText(clampDiagnosticLogText(value, maxChars)), + maxChars, + ); +} + +function normalizeDiagnosticLogName(value: string | undefined): string | undefined { + if (!value || value.trim().startsWith("{")) { + return undefined; + } + const sanitized = sanitizeDiagnosticLogText(value.trim(), MAX_DIAGNOSTIC_LOG_NAME_CHARS); + return DIAGNOSTIC_LOG_ATTRIBUTE_KEY_RE.test(sanitized) ? sanitized : undefined; +} + +function assignDiagnosticLogAttribute( + attributes: DiagnosticLogAttributes, + state: { count: number }, + key: string, + value: unknown, +): void { + if (state.count >= MAX_DIAGNOSTIC_LOG_ATTRIBUTE_COUNT) { + return; + } + const normalizedKey = key.trim(); + if (isBlockedObjectKey(normalizedKey)) { + return; + } + if (redactSensitiveText(normalizedKey) !== normalizedKey) { + return; + } + if (!DIAGNOSTIC_LOG_ATTRIBUTE_KEY_RE.test(normalizedKey)) { + return; + } + if (typeof value === "string") { + attributes[normalizedKey] = sanitizeDiagnosticLogText( + value, + MAX_DIAGNOSTIC_LOG_ATTRIBUTE_VALUE_CHARS, + ); + state.count += 1; + return; + } + if (typeof value === "number" && Number.isFinite(value)) { + attributes[normalizedKey] = value; + state.count += 1; + return; + } + if (typeof value === "boolean") { + attributes[normalizedKey] = value; + state.count += 1; + } +} + +function addDiagnosticLogAttributesFrom( + attributes: DiagnosticLogAttributes, + state: { count: number }, + source: Record | undefined, +): void { + if (!source) { + return; + } + for (const key in source) { + if (state.count >= MAX_DIAGNOSTIC_LOG_ATTRIBUTE_COUNT) { + break; + } + if (!Object.hasOwn(source, key) || key === "trace") { + continue; + } + assignDiagnosticLogAttribute(attributes, state, key, source[key]); + } +} + +function isPlainLogRecordObject(value: unknown): value is Record { + if (!value || typeof value !== "object" || Array.isArray(value)) { + return false; + } + const prototype = Object.getPrototypeOf(value); + return prototype === Object.prototype || prototype === null; +} + +function normalizeTraceContext(value: unknown): DiagnosticTraceContext | undefined { + if (!value || typeof value !== "object" || Array.isArray(value)) { + return undefined; + } + const candidate = value as Partial; + if (!isValidDiagnosticTraceId(candidate.traceId)) { + return undefined; + } + if (candidate.spanId !== undefined && !isValidDiagnosticSpanId(candidate.spanId)) { + return undefined; + } + if (candidate.parentSpanId !== undefined && !isValidDiagnosticSpanId(candidate.parentSpanId)) { + return undefined; + } + if (candidate.traceFlags !== undefined && !isValidDiagnosticTraceFlags(candidate.traceFlags)) { + return undefined; + } + return { + traceId: candidate.traceId, + ...(candidate.spanId ? { spanId: candidate.spanId } : {}), + ...(candidate.parentSpanId ? { parentSpanId: candidate.parentSpanId } : {}), + ...(candidate.traceFlags ? { traceFlags: candidate.traceFlags } : {}), + }; +} + +function extractTraceContext(value: unknown): DiagnosticTraceContext | undefined { + const direct = normalizeTraceContext(value); + if (direct) { + return direct; + } + if (!value || typeof value !== "object" || Array.isArray(value)) { + return undefined; + } + return normalizeTraceContext((value as { trace?: unknown }).trace); +} + +function findLogTraceContext( + bindings: Record | undefined, + numericArgs: readonly unknown[], +): DiagnosticTraceContext | undefined { + const fromBindings = extractTraceContext(bindings); + if (fromBindings) { + return fromBindings; + } + for (const arg of numericArgs) { + const fromArg = extractTraceContext(arg); + if (fromArg) { + return fromArg; + } + } + return undefined; +} + +function buildDiagnosticLogRecord(logObj: LogTransportRecord) { + const meta = logObj._meta as + | { + logLevelName?: string; + date?: Date; + name?: string; + parentNames?: string[]; + path?: { + filePath?: string; + fileLine?: string; + fileColumn?: string; + filePathWithLine?: string; + method?: string; + }; + } + | undefined; + const numericArgs = Object.entries(logObj) + .filter(([key]) => /^\d+$/.test(key)) + .toSorted((a, b) => Number(a[0]) - Number(b[0])) + .map(([, value]) => value); + + let bindings: Record | undefined; + if ( + typeof numericArgs[0] === "string" && + numericArgs[0].length <= MAX_DIAGNOSTIC_LOG_BINDINGS_JSON_CHARS && + numericArgs[0].trim().startsWith("{") + ) { + try { + const parsed = JSON.parse(numericArgs[0]); + if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) { + bindings = parsed as Record; + numericArgs.shift(); + } + } catch { + // ignore malformed json bindings + } + } + + const trace = findLogTraceContext(bindings, numericArgs); + const structuredArg = numericArgs[0]; + const structuredBindings = isPlainLogRecordObject(structuredArg) ? structuredArg : undefined; + if (structuredBindings) { + numericArgs.shift(); + } + + let message = ""; + if (numericArgs.length > 0 && typeof numericArgs[numericArgs.length - 1] === "string") { + message = sanitizeDiagnosticLogText( + String(numericArgs.pop()), + MAX_DIAGNOSTIC_LOG_MESSAGE_CHARS, + ); + } else if ( + numericArgs.length === 1 && + (typeof numericArgs[0] === "number" || typeof numericArgs[0] === "boolean") + ) { + message = String(numericArgs[0]); + numericArgs.length = 0; + } + if (!message) { + message = "log"; + } + + const attributes: DiagnosticLogAttributes = Object.create(null) as DiagnosticLogAttributes; + const attributeState = { count: 0 }; + addDiagnosticLogAttributesFrom(attributes, attributeState, bindings); + addDiagnosticLogAttributesFrom(attributes, attributeState, structuredBindings); + + const code: DiagnosticLogCode = {}; + if (meta?.path?.fileLine) { + const line = Number(meta.path.fileLine); + if (Number.isFinite(line)) { + code.line = line; + } + } + if (meta?.path?.method) { + code.functionName = sanitizeDiagnosticLogText(meta.path.method, MAX_DIAGNOSTIC_LOG_NAME_CHARS); + } + + const loggerName = normalizeDiagnosticLogName(meta?.name); + const loggerParents = meta?.parentNames + ?.map(normalizeDiagnosticLogName) + .filter((name): name is string => Boolean(name)); + + return { + type: "log.record" as const, + level: meta?.logLevelName ?? "INFO", + message, + ...(loggerName ? { loggerName } : {}), + ...(loggerParents?.length ? { loggerParents } : {}), + ...(Object.keys(attributes).length > 0 ? { attributes } : {}), + ...(Object.keys(code).length > 0 ? { code } : {}), + ...(trace ? { trace } : {}), + }; +} + +function attachDiagnosticEventTransport(logger: TsLogger): void { + logger.attachTransport((logObj: LogObj) => { + try { + emitDiagnosticEvent(buildDiagnosticLogRecord(logObj as LogTransportRecord)); + } catch { + // never block on logging failures + } + }); +} + function canUseSilentVitestFileLogFastPath(envLevel: LogLevel | undefined): boolean { return ( process.env.VITEST === "true" && @@ -159,6 +424,7 @@ function buildLogger(settings: ResolvedSettings): TsLogger { // Silent logging does not write files; skip all filesystem setup in this path. if (settings.level === "silent") { + attachDiagnosticEventTransport(logger); for (const transport of externalTransports) { attachExternalTransport(logger, transport); } @@ -203,6 +469,7 @@ function buildLogger(settings: ResolvedSettings): TsLogger { // never block on logging failures } }); + attachDiagnosticEventTransport(logger); for (const transport of externalTransports) { attachExternalTransport(logger, transport); } diff --git a/src/plugin-sdk/diagnostics-otel.ts b/src/plugin-sdk/diagnostics-otel.ts index 6b1e109a96d..684d273cd8c 100644 --- a/src/plugin-sdk/diagnostics-otel.ts +++ b/src/plugin-sdk/diagnostics-otel.ts @@ -3,7 +3,11 @@ export type { DiagnosticEventPayload } from "../infra/diagnostic-events.js"; export type { DiagnosticTraceContext } from "../infra/diagnostic-trace-context.js"; -export { emitDiagnosticEvent, onDiagnosticEvent } from "../infra/diagnostic-events.js"; +export { + emitDiagnosticEvent, + onDiagnosticEvent, + onInternalDiagnosticEvent, +} from "../infra/diagnostic-events.js"; export { createChildDiagnosticTraceContext, createDiagnosticTraceContext,