diff --git a/CHANGELOG.md b/CHANGELOG.md index 99db8700713..dd57b9a4bfc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,7 +21,6 @@ Docs: https://docs.openclaw.ai - TUI/dependencies: remove direct `cli-highlight` usage from the OpenClaw TUI code-block renderer, keeping themed code coloring without the extra root dependency. Thanks @vincentkoc. - Diagnostics/OTEL: export run, model-call, and tool-execution diagnostic lifecycle events as OTEL spans without retaining live span state. Thanks @vincentkoc. - Providers/Anthropic Vertex: move the Vertex SDK runtime behind the bundled provider plugin so core no longer owns that provider-specific dependency. Thanks @vincentkoc. -- Plugins/web fetch: move local Readability extraction into a bundled plugin so core no longer owns the Readability and DOM parser dependencies. Thanks @vincentkoc. - Plugins/activation: expose activation plan reasons and a richer plan API so callers can inspect why a plugin was selected while preserving existing id-list activation behavior. (#70943) Thanks @vincentkoc. - Plugins/source metadata: expose normalized install-source facts on provider and channel catalogs so onboarding can explain npm pinning, integrity state, and local availability before runtime loads. (#70951) Thanks @vincentkoc. - Plugins/catalog: pin the official external WeCom channel source to an exact npm release plus dist integrity, with a guard that official external sources stay integrity-pinned. (#70997) Thanks @vincentkoc. @@ -69,6 +68,7 @@ Docs: https://docs.openclaw.ai - Config/plugins: accept `plugins.entries.*.hooks.allowConversationAccess` in validation, generated schema metadata, and plugin policy inspection so trusted external plugins can enable conversation-access hooks such as `agent_end` without local schema patches. Fixes #71215. (#71221) Thanks @BillChirico. - Codex harness/models: keep legacy `codex/*` harness shorthand out of model picker and `/models` choice surfaces while migrating primary legacy refs to canonical `openai/*` plus explicit Codex harness config. (#71193) Thanks @vincentkoc. - Plugins/runtime deps: respect explicit plugin and channel disablement when repairing bundled runtime dependencies, so doctor and health checks no longer install deps for disabled configured channels. +- Diagnostics/OTEL: export logs through bounded diagnostic log events instead of a direct logger transport hook. Thanks @vincentkoc. - WhatsApp/plugins: support an explicit opt-in for inbound `message_received` hooks with canonical channel, conversation, session, and sender fields. Thanks @vincentkoc. - Channels/setup: keep bundled setup entries dependency-light and stage WhatsApp runtime dependencies only when login actually needs them, so first-run setup and read-only channel discovery avoid unused SDK imports. - Slack/HTTP: keep webhook handlers in a process-global registry so HTTP mode survives plugin-loader/native-import splits and `/slack/events/` no longer returns 404 after logging as active. Fixes #67955, #46245, and #46246. Thanks @chrisabad and @cesararevalo. diff --git a/docs/.generated/plugin-sdk-api-baseline.sha256 b/docs/.generated/plugin-sdk-api-baseline.sha256 index 8487f07dbe5..b6e8b620918 100644 --- a/docs/.generated/plugin-sdk-api-baseline.sha256 +++ b/docs/.generated/plugin-sdk-api-baseline.sha256 @@ -1,2 +1,2 @@ -a2005473b59e26995f563e8bd3f1c6782bd3ee193a65dd3255160d40e505fa4d plugin-sdk-api-baseline.json -5778c0bb6cfc85c1b0665ec431269bcb43b8891720d0b30467681420ab411721 plugin-sdk-api-baseline.jsonl +c1501890ed21debd56a119381101e6fec9bde0a4deae94bddcb464369cf0e51a plugin-sdk-api-baseline.json +10ee97c1acf0b2a725f0f89b357146ce769ed39f46cccd3e40d0a5d00571c599 plugin-sdk-api-baseline.jsonl diff --git a/extensions/diagnostics-otel/src/service.test.ts b/extensions/diagnostics-otel/src/service.test.ts index ebae5360b5f..f89ec32b863 100644 --- a/extensions/diagnostics-otel/src/service.test.ts +++ b/extensions/diagnostics-otel/src/service.test.ts @@ -1,7 +1,5 @@ import { beforeEach, describe, expect, test, vi } from "vitest"; -const registerLogTransportMock = vi.hoisted(() => vi.fn()); - const telemetryState = vi.hoisted(() => { const counters = new Map }>(); const histograms = new Map }>(); @@ -113,14 +111,6 @@ vi.mock("@opentelemetry/semantic-conventions", () => ({ ATTR_SERVICE_NAME: "service.name", })); -vi.mock("../api.js", async () => { - const actual = await vi.importActual("../api.js"); - return { - ...actual, - registerLogTransport: registerLogTransportMock, - }; -}); - import type { OpenClawPluginServiceContext } from "../api.js"; import { emitDiagnosticEvent } from "../api.js"; import { createDiagnosticsOtelService } from "./service.js"; @@ -132,6 +122,7 @@ const TRACE_ID = "4bf92f3577b34da6a3ce929d0e0e4736"; const SPAN_ID = "00f067aa0ba902b7"; const CHILD_SPAN_ID = "1111111111111111"; const GRANDCHILD_SPAN_ID = "2222222222222222"; +const PROTO_KEY = "__proto__"; function createLogger() { return { @@ -174,26 +165,17 @@ function createTraceOnlyContext(endpoint: string): OpenClawPluginServiceContext return createOtelContext(endpoint, { traces: true }); } -type RegisteredLogTransport = (logObj: Record) => void; -function setupRegisteredTransports() { - const registeredTransports: RegisteredLogTransport[] = []; - const stopTransports: ReturnType[] = []; - registerLogTransportMock.mockImplementation((transport) => { - registeredTransports.push(transport); - const stopTransport = vi.fn(); - stopTransports.push(stopTransport); - return stopTransport; - }); - return { registeredTransports, stopTransports }; -} - -async function emitAndCaptureLog(logObj: Record) { - const { registeredTransports } = setupRegisteredTransports(); +async function emitAndCaptureLog( + event: Omit[0], { type: "log.record" }>, "type">, +) { const service = createDiagnosticsOtelService(); const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { logs: true }); await service.start(ctx); - expect(registeredTransports).toHaveLength(1); - registeredTransports[0]?.(logObj); + emitDiagnosticEvent({ + type: "log.record", + ...event, + }); + await flushDiagnosticEvents(); expect(logEmit).toHaveBeenCalled(); const emitCall = logEmit.mock.calls[0]?.[0]; await service.stop?.(ctx); @@ -215,15 +197,12 @@ describe("diagnostics-otel service", () => { telemetryState.meter.createHistogram.mockClear(); sdkStart.mockClear(); sdkShutdown.mockClear(); - logEmit.mockClear(); + logEmit.mockReset(); logShutdown.mockClear(); traceExporterCtor.mockClear(); - registerLogTransportMock.mockReset(); }); test("records message-flow metrics and spans", async () => { - const { registeredTransports } = setupRegisteredTransports(); - const service = createDiagnosticsOtelService(); const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true, logs: true }); await service.start(ctx); @@ -289,29 +268,24 @@ describe("diagnostics-otel service", () => { expect(spanNames).toContain("openclaw.message.processed"); expect(spanNames).toContain("openclaw.session.stuck"); - expect(registerLogTransportMock).toHaveBeenCalledTimes(1); - expect(registeredTransports).toHaveLength(1); - registeredTransports[0]?.({ - 0: '{"subsystem":"diagnostic"}', - 1: "hello", - _meta: { logLevelName: "INFO", date: new Date() }, + emitDiagnosticEvent({ + type: "log.record", + level: "INFO", + message: "hello", + attributes: { subsystem: "diagnostic" }, }); + await flushDiagnosticEvents(); expect(logEmit).toHaveBeenCalled(); await service.stop?.(ctx); }); test("restarts without retaining prior listeners or log transports", async () => { - const { registeredTransports, stopTransports } = setupRegisteredTransports(); - const service = createDiagnosticsOtelService(); const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true, logs: true }); await service.start(ctx); await service.start(ctx); - expect(registerLogTransportMock).toHaveBeenCalledTimes(2); - expect(registeredTransports).toHaveLength(2); - expect(stopTransports[0]).toHaveBeenCalledTimes(1); expect(logShutdown).toHaveBeenCalledTimes(1); expect(sdkShutdown).toHaveBeenCalledTimes(1); @@ -325,7 +299,6 @@ describe("diagnostics-otel service", () => { expect(telemetryState.tracer.startSpan).toHaveBeenCalledTimes(1); await service.stop?.(ctx); - expect(stopTransports[1]).toHaveBeenCalledTimes(1); expect(logShutdown).toHaveBeenCalledTimes(2); expect(sdkShutdown).toHaveBeenCalledTimes(2); @@ -340,8 +313,6 @@ describe("diagnostics-otel service", () => { }); test("tears down active handles when restarted with diagnostics disabled", async () => { - const { stopTransports } = setupRegisteredTransports(); - const service = createDiagnosticsOtelService(); const enabledCtx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, @@ -354,7 +325,6 @@ describe("diagnostics-otel service", () => { config: { diagnostics: { enabled: false } }, }); - expect(stopTransports[0]).toHaveBeenCalledTimes(1); expect(logShutdown).toHaveBeenCalledTimes(1); expect(sdkShutdown).toHaveBeenCalledTimes(1); @@ -410,8 +380,8 @@ describe("diagnostics-otel service", () => { test("redacts sensitive data from log messages before export", async () => { const emitCall = await emitAndCaptureLog({ - 0: "Using API key sk-1234567890abcdef1234567890abcdef", - _meta: { logLevelName: "INFO", date: new Date() }, + level: "INFO", + message: "Using API key sk-1234567890abcdef1234567890abcdef", }); expect(emitCall?.body).not.toContain("sk-1234567890abcdef1234567890abcdef"); @@ -421,9 +391,11 @@ describe("diagnostics-otel service", () => { test("redacts sensitive data from log attributes before export", async () => { const emitCall = await emitAndCaptureLog({ - 0: '{"token":"ghp_abcdefghijklmnopqrstuvwxyz123456"}', // pragma: allowlist secret - 1: "auth configured", - _meta: { logLevelName: "DEBUG", date: new Date() }, + level: "DEBUG", + message: "auth configured", + attributes: { + token: "ghp_abcdefghijklmnopqrstuvwxyz123456", // pragma: allowlist secret + }, }); const tokenAttr = emitCall?.attributes?.["openclaw.token"]; @@ -435,16 +407,16 @@ describe("diagnostics-otel service", () => { test("attaches diagnostic trace context to exported logs", async () => { const emitCall = await emitAndCaptureLog({ - 0: '{"subsystem":"diagnostic"}', - 1: { - trace: { - traceId: TRACE_ID, - spanId: SPAN_ID, - traceFlags: "01", - }, + level: "INFO", + message: "traceable log", + attributes: { + subsystem: "diagnostic", + }, + trace: { + traceId: TRACE_ID, + spanId: SPAN_ID, + traceFlags: "01", }, - 2: "traceable log", - _meta: { logLevelName: "INFO", date: new Date() }, }); expect(emitCall?.attributes).toMatchObject({ @@ -473,6 +445,99 @@ describe("diagnostics-otel service", () => { }); }); + test("bounds plugin-emitted log attributes and omits source paths", async () => { + const service = createDiagnosticsOtelService(); + const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { logs: true }); + await service.start(ctx); + + const attributes = Object.create(null) as Record; + attributes.good = "y".repeat(6000); + attributes["bad key"] = "drop-me"; + attributes[PROTO_KEY] = "pollute"; + attributes["constructor"] = "pollute"; + attributes["prototype"] = "pollute"; + attributes["sk-1234567890abcdef1234567890abcdef"] = "secret-key"; // pragma: allowlist secret + + emitDiagnosticEvent({ + type: "log.record", + level: "INFO", + message: "x".repeat(6000), + attributes, + code: { + filepath: "/Users/alice/openclaw/src/private.ts", + line: 42, + functionName: "handler", + location: "/Users/alice/openclaw/src/private.ts:42", + }, + } as Parameters[0]); + await flushDiagnosticEvents(); + + const emitCall = logEmit.mock.calls[0]?.[0]; + expect(emitCall?.body.length).toBeLessThanOrEqual(4200); + expect(emitCall?.attributes).toMatchObject({ + "openclaw.good": expect.stringMatching(/^y+/), + "code.lineno": 42, + "code.function": "handler", + }); + expect(String(emitCall?.attributes?.["openclaw.good"]).length).toBeLessThanOrEqual(4200); + expect(Object.hasOwn(emitCall?.attributes ?? {}, `openclaw.${PROTO_KEY}`)).toBe(false); + expect(Object.hasOwn(emitCall?.attributes ?? {}, "openclaw.constructor")).toBe(false); + expect(Object.hasOwn(emitCall?.attributes ?? {}, "openclaw.prototype")).toBe(false); + expect( + Object.hasOwn( + emitCall?.attributes ?? {}, + "openclaw.sk-1234567890abcdef1234567890abcdef", // pragma: allowlist secret + ), + ).toBe(false); + expect(emitCall?.attributes).toEqual( + expect.not.objectContaining({ + "openclaw.bad key": expect.anything(), + "code.filepath": expect.anything(), + "openclaw.code.location": expect.anything(), + }), + ); + await service.stop?.(ctx); + }); + + test("rate-limits repeated log export failure reports", async () => { + const service = createDiagnosticsOtelService(); + const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { logs: true }); + const nowSpy = vi.spyOn(Date, "now").mockReturnValue(1_000); + logEmit.mockImplementation(() => { + throw new Error("export failed"); + }); + try { + await service.start(ctx); + + emitDiagnosticEvent({ + type: "log.record", + level: "ERROR", + message: "first failing log", + }); + emitDiagnosticEvent({ + type: "log.record", + level: "ERROR", + message: "second failing log", + }); + await flushDiagnosticEvents(); + + expect(ctx.logger.error).toHaveBeenCalledTimes(1); + + nowSpy.mockReturnValue(62_000); + emitDiagnosticEvent({ + type: "log.record", + level: "ERROR", + message: "third failing log", + }); + await flushDiagnosticEvents(); + + expect(ctx.logger.error).toHaveBeenCalledTimes(2); + } finally { + nowSpy.mockRestore(); + await service.stop?.(ctx); + } + }); + test("does not parent diagnostic event spans from plugin-emittable trace context", async () => { const service = createDiagnosticsOtelService(); const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true }); diff --git a/extensions/diagnostics-otel/src/service.ts b/extensions/diagnostics-otel/src/service.ts index 75dbac94bd4..2c17ac5c92a 100644 --- a/extensions/diagnostics-otel/src/service.ts +++ b/extensions/diagnostics-otel/src/service.ts @@ -24,9 +24,8 @@ import { isValidDiagnosticSpanId, isValidDiagnosticTraceFlags, isValidDiagnosticTraceId, - onDiagnosticEvent, + onInternalDiagnosticEvent, redactSensitiveText, - registerLogTransport, } from "../api.js"; const DEFAULT_SERVICE_NAME = "openclaw"; @@ -41,6 +40,13 @@ const DROPPED_OTEL_ATTRIBUTE_KEYS = new Set([ "openclaw.traceId", ]); const LOW_CARDINALITY_VALUE_RE = /^[A-Za-z0-9_.:-]{1,120}$/u; +const MAX_OTEL_LOG_BODY_CHARS = 4 * 1024; +const MAX_OTEL_LOG_ATTRIBUTE_COUNT = 64; +const MAX_OTEL_LOG_ATTRIBUTE_VALUE_CHARS = 4 * 1024; +const LOG_RECORD_EXPORT_FAILURE_REPORT_INTERVAL_MS = 60_000; +const OTEL_LOG_RAW_ATTRIBUTE_KEY_RE = /^[A-Za-z0-9_.:-]{1,64}$/u; +const OTEL_LOG_ATTRIBUTE_KEY_RE = /^[A-Za-z0-9_.:-]{1,96}$/u; +const BLOCKED_OTEL_LOG_ATTRIBUTE_KEYS = new Set(["__proto__", "prototype", "constructor"]); function normalizeEndpoint(endpoint?: string): string | undefined { const trimmed = endpoint?.trim(); @@ -105,6 +111,44 @@ function genAiOperationName(api: string | undefined): "chat" | "text_completion" return api === "completions" ? "text_completion" : "chat"; } +function clampOtelLogText(value: string, maxChars: number): string { + return value.length > maxChars ? `${value.slice(0, maxChars)}...(truncated)` : value; +} + +function normalizeOtelLogString(value: string, maxChars: number): string { + return redactSensitiveText(clampOtelLogText(value, maxChars)); +} + +function assignOtelLogAttribute( + attributes: Record, + key: string, + value: string | number | boolean, +): void { + if (Object.keys(attributes).length >= MAX_OTEL_LOG_ATTRIBUTE_COUNT) { + return; + } + if (BLOCKED_OTEL_LOG_ATTRIBUTE_KEYS.has(key)) { + return; + } + if (redactSensitiveText(key) !== key) { + return; + } + if (!OTEL_LOG_ATTRIBUTE_KEY_RE.test(key)) { + return; + } + if (typeof value === "string") { + attributes[key] = normalizeOtelLogString(value, MAX_OTEL_LOG_ATTRIBUTE_VALUE_CHARS); + return; + } + if (typeof value === "number" && Number.isFinite(value)) { + attributes[key] = value; + return; + } + if (typeof value === "boolean") { + attributes[key] = value; + } +} + function normalizeTraceContext(value: unknown): DiagnosticTraceContext | undefined { if (!value || typeof value !== "object" || Array.isArray(value)) { return undefined; @@ -130,32 +174,32 @@ function normalizeTraceContext(value: unknown): DiagnosticTraceContext | undefin }; } -function extractTraceContext(value: unknown): DiagnosticTraceContext | undefined { - const direct = normalizeTraceContext(value); - if (direct) { - return direct; +function assignOtelLogEventAttributes( + attributes: Record, + eventAttributes: Record | undefined, +): void { + if (!eventAttributes) { + return; } - if (!value || typeof value !== "object" || Array.isArray(value)) { - return undefined; - } - return normalizeTraceContext((value as { trace?: unknown }).trace); -} - -function findLogTraceContext( - bindings: Record | undefined, - numericArgs: unknown[], -): DiagnosticTraceContext | undefined { - const fromBindings = extractTraceContext(bindings); - if (fromBindings) { - return fromBindings; - } - for (const arg of numericArgs) { - const fromArg = extractTraceContext(arg); - if (fromArg) { - return fromArg; + for (const rawKey in eventAttributes) { + if (Object.keys(attributes).length >= MAX_OTEL_LOG_ATTRIBUTE_COUNT) { + break; } + if (!Object.hasOwn(eventAttributes, rawKey)) { + continue; + } + const key = rawKey.trim(); + if (BLOCKED_OTEL_LOG_ATTRIBUTE_KEYS.has(key)) { + continue; + } + if (redactSensitiveText(key) !== key) { + continue; + } + if (!OTEL_LOG_RAW_ATTRIBUTE_KEY_RE.test(key)) { + continue; + } + assignOtelLogAttribute(attributes, `openclaw.${key}`, eventAttributes[rawKey]); } - return undefined; } function traceFlagsToOtel(traceFlags: string | undefined): TraceFlags { @@ -199,22 +243,18 @@ function addTraceAttributes( export function createDiagnosticsOtelService(): OpenClawPluginService { let sdk: NodeSDK | null = null; let logProvider: LoggerProvider | null = null; - let stopLogTransport: (() => void) | null = null; let unsubscribe: (() => void) | null = null; const stopStarted = async () => { const currentUnsubscribe = unsubscribe; - const currentStopLogTransport = stopLogTransport; const currentLogProvider = logProvider; const currentSdk = sdk; unsubscribe = null; - stopLogTransport = null; logProvider = null; sdk = null; currentUnsubscribe?.(); - currentStopLogTransport?.(); if (currentLogProvider) { await currentLogProvider.shutdown().catch(() => undefined); } @@ -402,7 +442,11 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { }, ); + let recordLogRecord: + | ((evt: Extract) => void) + | undefined; if (logsEnabled) { + let logRecordExportFailureLastReportedAt = Number.NEGATIVE_INFINITY; const logExporter = new OTLPLogExporter({ ...(logUrl ? { url: logUrl } : {}), ...(headers ? { headers } : {}), @@ -418,120 +462,54 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { processors: [logProcessor], }); const otelLogger = logProvider.getLogger("openclaw"); - - stopLogTransport = registerLogTransport((logObj) => { + recordLogRecord = (evt) => { try { - const safeStringify = (value: unknown) => { - try { - return JSON.stringify(value); - } catch { - return String(value); - } - }; - const meta = (logObj as Record)._meta as - | { - logLevelName?: string; - date?: Date; - name?: string; - parentNames?: string[]; - path?: { - filePath?: string; - fileLine?: string; - fileColumn?: string; - filePathWithLine?: string; - method?: string; - }; - } - | undefined; - const logLevelName = meta?.logLevelName ?? "INFO"; + const logLevelName = evt.level || "INFO"; const severityNumber = logSeverityMap[logLevelName] ?? (9 as SeverityNumber); + const attributes = Object.create(null) as Record; + assignOtelLogAttribute(attributes, "openclaw.log.level", logLevelName); + if (evt.loggerName) { + assignOtelLogAttribute(attributes, "openclaw.logger", evt.loggerName); + } + if (evt.loggerParents?.length) { + assignOtelLogAttribute( + attributes, + "openclaw.logger.parents", + evt.loggerParents.join("."), + ); + } + assignOtelLogEventAttributes(attributes, evt.attributes); + if (evt.code?.line) { + assignOtelLogAttribute(attributes, "code.lineno", evt.code.line); + } + if (evt.code?.functionName) { + assignOtelLogAttribute(attributes, "code.function", evt.code.functionName); + } + addTraceAttributes(attributes, evt.trace); - const numericArgs = Object.entries(logObj) - .filter(([key]) => /^\d+$/.test(key)) - .toSorted((a, b) => Number(a[0]) - Number(b[0])) - .map(([, value]) => value); - - let bindings: Record | undefined; - if (typeof numericArgs[0] === "string" && numericArgs[0].trim().startsWith("{")) { - try { - const parsed = JSON.parse(numericArgs[0]); - if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) { - bindings = parsed as Record; - numericArgs.shift(); - } - } catch { - // ignore malformed json bindings - } - } - const traceContext = findLogTraceContext(bindings, numericArgs); - - let message = ""; - if (numericArgs.length > 0 && typeof numericArgs[numericArgs.length - 1] === "string") { - message = String(numericArgs.pop()); - } else if (numericArgs.length === 1) { - message = safeStringify(numericArgs[0]); - numericArgs.length = 0; - } - if (!message) { - message = "log"; - } - - const attributes: Record = { - "openclaw.log.level": logLevelName, - }; - if (meta?.name) { - attributes["openclaw.logger"] = meta.name; - } - if (meta?.parentNames?.length) { - attributes["openclaw.logger.parents"] = meta.parentNames.join("."); - } - if (bindings) { - for (const [key, value] of Object.entries(bindings)) { - if ( - typeof value === "string" || - typeof value === "number" || - typeof value === "boolean" - ) { - attributes[`openclaw.${key}`] = value; - } else if (value != null) { - attributes[`openclaw.${key}`] = safeStringify(value); - } - } - } - if (numericArgs.length > 0) { - attributes["openclaw.log.args"] = safeStringify(numericArgs); - } - if (meta?.path?.filePath) { - attributes["code.filepath"] = meta.path.filePath; - } - if (meta?.path?.fileLine) { - attributes["code.lineno"] = Number(meta.path.fileLine); - } - if (meta?.path?.method) { - attributes["code.function"] = meta.path.method; - } - if (meta?.path?.filePathWithLine) { - attributes["openclaw.code.location"] = meta.path.filePathWithLine; - } - addTraceAttributes(attributes, traceContext); - - // OTLP can leave the host boundary, so redact string fields before export. const logRecord: LogRecord = { - body: redactSensitiveText(message), + body: normalizeOtelLogString(evt.message || "log", MAX_OTEL_LOG_BODY_CHARS), severityText: logLevelName, severityNumber, attributes: redactOtelAttributes(attributes), - timestamp: meta?.date ?? new Date(), + timestamp: evt.ts, }; - const logContext = contextForTraceContext(traceContext); + const logContext = contextForTraceContext(evt.trace); if (logContext) { logRecord.context = logContext; } otelLogger.emit(logRecord); } catch (err) { - ctx.logger.error(`diagnostics-otel: log transport failed: ${formatError(err)}`); + const now = Date.now(); + if ( + now - logRecordExportFailureLastReportedAt >= + LOG_RECORD_EXPORT_FAILURE_REPORT_INTERVAL_MS + ) { + logRecordExportFailureLastReportedAt = now; + ctx.logger.error(`diagnostics-otel: log record export failed: ${formatError(err)}`); + } } - }); + }; } const spanWithDuration = ( @@ -979,7 +957,7 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { queueDepthHistogram.record(evt.queued, { "openclaw.channel": "heartbeat" }); }; - unsubscribe = onDiagnosticEvent((evt: DiagnosticEventPayload) => { + unsubscribe = onInternalDiagnosticEvent((evt: DiagnosticEventPayload) => { try { switch (evt.type) { case "model.usage": @@ -1033,6 +1011,9 @@ export function createDiagnosticsOtelService(): OpenClawPluginService { case "tool.execution.error": recordToolExecutionError(evt); return; + case "log.record": + recordLogRecord?.(evt); + return; case "tool.loop": case "tool.execution.started": case "run.started": diff --git a/src/infra/diagnostic-events.test.ts b/src/infra/diagnostic-events.test.ts index ca8829b62ca..dfdd50624b4 100644 --- a/src/infra/diagnostic-events.test.ts +++ b/src/infra/diagnostic-events.test.ts @@ -2,6 +2,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { emitDiagnosticEvent, isDiagnosticsEnabled, + onInternalDiagnosticEvent, onDiagnosticEvent, resetDiagnosticEventsForTest, setDiagnosticsEnabledForProcess, @@ -137,6 +138,27 @@ describe("diagnostic-events", () => { expect(events).toEqual(["tool.execution.started", "model.call.started"]); }); + it("keeps log records off the public diagnostic event stream", async () => { + const publicEvents: string[] = []; + const internalEvents: string[] = []; + onDiagnosticEvent((event) => { + publicEvents.push(event.type); + }); + onInternalDiagnosticEvent((event) => { + internalEvents.push(event.type); + }); + + emitDiagnosticEvent({ + type: "log.record", + level: "INFO", + message: "private log", + }); + + await new Promise((resolve) => setImmediate(resolve)); + expect(publicEvents).toEqual([]); + expect(internalEvents).toEqual(["log.record"]); + }); + it("skips event enrichment and subscribers when diagnostics are disabled", () => { const nowSpy = vi.spyOn(Date, "now"); const seen: string[] = []; diff --git a/src/infra/diagnostic-events.ts b/src/infra/diagnostic-events.ts index b3739ba6a55..d1a29993487 100644 --- a/src/infra/diagnostic-events.ts +++ b/src/infra/diagnostic-events.ts @@ -269,6 +269,19 @@ export type DiagnosticPayloadLargeEvent = DiagnosticBaseEvent & { reason?: string; }; +export type DiagnosticLogRecordEvent = DiagnosticBaseEvent & { + type: "log.record"; + level: string; + message: string; + loggerName?: string; + loggerParents?: string[]; + attributes?: Record; + code?: { + line?: number; + functionName?: string; + }; +}; + export type DiagnosticEventPayload = | DiagnosticUsageEvent | DiagnosticWebhookReceivedEvent @@ -293,7 +306,8 @@ export type DiagnosticEventPayload = | DiagnosticModelCallErrorEvent | DiagnosticMemorySampleEvent | DiagnosticMemoryPressureEvent - | DiagnosticPayloadLargeEvent; + | DiagnosticPayloadLargeEvent + | DiagnosticLogRecordEvent; export type DiagnosticEventInput = DiagnosticEventPayload extends infer Event ? Event extends DiagnosticEventPayload @@ -318,6 +332,7 @@ const ASYNC_DIAGNOSTIC_EVENT_TYPES = new Set([ "model.call.started", "model.call.completed", "model.call.error", + "log.record", ]); function getDiagnosticEventsState(): DiagnosticEventsGlobalState { @@ -424,7 +439,9 @@ export function emitDiagnosticEvent(event: DiagnosticEventInput) { dispatchDiagnosticEvent(state, enriched); } -export function onDiagnosticEvent(listener: (evt: DiagnosticEventPayload) => void): () => void { +export function onInternalDiagnosticEvent( + listener: (evt: DiagnosticEventPayload) => void, +): () => void { const state = getDiagnosticEventsState(); state.listeners.add(listener); return () => { @@ -432,6 +449,15 @@ export function onDiagnosticEvent(listener: (evt: DiagnosticEventPayload) => voi }; } +export function onDiagnosticEvent(listener: (evt: DiagnosticEventPayload) => void): () => void { + return onInternalDiagnosticEvent((event) => { + if (event.type === "log.record") { + return; + } + listener(event); + }); +} + export function resetDiagnosticEventsForTest(): void { const state = getDiagnosticEventsState(); state.enabled = true; diff --git a/src/logging/diagnostic-log-events.test.ts b/src/logging/diagnostic-log-events.test.ts new file mode 100644 index 00000000000..080038cfee5 --- /dev/null +++ b/src/logging/diagnostic-log-events.test.ts @@ -0,0 +1,143 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + onInternalDiagnosticEvent, + resetDiagnosticEventsForTest, + type DiagnosticEventPayload, +} from "../infra/diagnostic-events.js"; +import { getChildLogger, resetLogger, setLoggerOverride } from "./logger.js"; + +const TRACE_ID = "4bf92f3577b34da6a3ce929d0e0e4736"; +const SPAN_ID = "00f067aa0ba902b7"; +const PROTO_KEY = "__proto__"; + +function flushDiagnosticEvents() { + return new Promise((resolve) => setImmediate(resolve)); +} + +beforeEach(() => { + resetDiagnosticEventsForTest(); + resetLogger(); + setLoggerOverride({ level: "info" }); +}); + +afterEach(() => { + resetDiagnosticEventsForTest(); + setLoggerOverride(null); + resetLogger(); +}); + +describe("diagnostic log events", () => { + it("emits structured log records through diagnostics", async () => { + const received: Array> = []; + const unsubscribe = onInternalDiagnosticEvent((evt) => { + if (evt.type === "log.record") { + received.push(evt); + } + }); + + const logger = getChildLogger({ + subsystem: "diagnostic", + trace: { traceId: TRACE_ID, spanId: SPAN_ID }, + }); + logger.info({ runId: "run-1" }, "hello diagnostic logs"); + await flushDiagnosticEvents(); + unsubscribe(); + + expect(received).toHaveLength(1); + expect(received[0]).toMatchObject({ + type: "log.record", + level: "INFO", + message: "hello diagnostic logs", + attributes: { + subsystem: "diagnostic", + runId: "run-1", + }, + trace: { + traceId: TRACE_ID, + spanId: SPAN_ID, + }, + }); + }); + + it("redacts and bounds internal log records before diagnostic emission", async () => { + const received: Array> = []; + const unsubscribe = onInternalDiagnosticEvent((evt) => { + if (evt.type === "log.record") { + received.push(evt); + } + }); + + const secret = "ghp_abcdefghijklmnopqrstuvwxyz123456"; // pragma: allowlist secret + const logger = getChildLogger({ + subsystem: "diagnostic", + trace: { traceId: TRACE_ID, spanId: SPAN_ID }, + }); + logger.info( + { + token: secret, + longValue: "x".repeat(5000), + nested: { secret }, + "bad key": "drop-me", + }, + { raw: secret }, + `secret=${secret} ${"y".repeat(5000)}`, + ); + await flushDiagnosticEvents(); + unsubscribe(); + + expect(received).toHaveLength(1); + const [event] = received; + expect(event.message).not.toContain(secret); + expect(event.message.length).toBeLessThanOrEqual(4200); + expect(event.attributes?.token).not.toBe(secret); + expect(String(event.attributes?.token)).toContain("…"); + expect(String(event.attributes?.longValue).length).toBeLessThanOrEqual(2100); + expect(event.attributes).toEqual( + expect.not.objectContaining({ + nested: expect.anything(), + "bad key": expect.anything(), + }), + ); + expect(event).toEqual( + expect.not.objectContaining({ + argsJson: expect.anything(), + }), + ); + }); + + it("drops sensitive, blocked, and excess log attribute keys without copying large objects", async () => { + const received: Array> = []; + const unsubscribe = onInternalDiagnosticEvent((evt) => { + if (evt.type === "log.record") { + received.push(evt); + } + }); + + const structured = Object.create(null) as Record; + structured.safe = "ok"; + structured[PROTO_KEY] = "pollute"; + structured["constructor"] = "pollute"; + structured["prototype"] = "pollute"; + structured["sk-1234567890abcdef1234567890abcdef"] = "secret-key"; // pragma: allowlist secret + for (let index = 0; index < 1000; index += 1) { + structured[`extra${index}`] = index; + } + + const logger = getChildLogger({ + subsystem: "diagnostic", + trace: { traceId: TRACE_ID, spanId: SPAN_ID }, + }); + logger.info(structured, "bounded attrs"); + await flushDiagnosticEvents(); + unsubscribe(); + + expect(received).toHaveLength(1); + expect(received[0].attributes?.safe).toBe("ok"); + expect(Object.keys(received[0].attributes ?? {})).toHaveLength(32); + const attributes = received[0].attributes ?? {}; + expect(Object.hasOwn(attributes, PROTO_KEY)).toBe(false); + expect(Object.hasOwn(attributes, "constructor")).toBe(false); + expect(Object.hasOwn(attributes, "prototype")).toBe(false); + expect(Object.hasOwn(attributes, "sk-1234567890abcdef1234567890abcdef")).toBe(false); // pragma: allowlist secret + }); +}); diff --git a/src/logging/diagnostic-stability.ts b/src/logging/diagnostic-stability.ts index d7e3480e861..40051a6ec9c 100644 --- a/src/logging/diagnostic-stability.ts +++ b/src/logging/diagnostic-stability.ts @@ -275,6 +275,10 @@ function sanitizeDiagnosticEvent(event: DiagnosticEventPayload): DiagnosticStabi record.durationMs = event.durationMs; assignReasonCode(record, event.errorCategory); break; + case "log.record": + record.level = event.level; + record.source = event.loggerName; + break; case "diagnostic.memory.sample": record.memory = copyMemory(event.memory); break; diff --git a/src/logging/logger.ts b/src/logging/logger.ts index a6b1f2d5271..7c401324806 100644 --- a/src/logging/logger.ts +++ b/src/logging/logger.ts @@ -2,6 +2,14 @@ import fs from "node:fs"; import path from "node:path"; import { Logger as TsLogger } from "tslog"; import type { OpenClawConfig } from "../config/types.js"; +import { emitDiagnosticEvent } from "../infra/diagnostic-events.js"; +import { + isValidDiagnosticSpanId, + isValidDiagnosticTraceFlags, + isValidDiagnosticTraceId, + type DiagnosticTraceContext, +} from "../infra/diagnostic-trace-context.js"; +import { isBlockedObjectKey } from "../infra/prototype-keys.js"; import { POSIX_OPENCLAW_TMP_DIR, resolvePreferredOpenClawTmpDir, @@ -10,6 +18,7 @@ import { readLoggingConfig, shouldSkipMutatingLoggingConfigRead } from "./config import { resolveEnvLogLevelOverride } from "./env-log-level.js"; import { type LogLevel, levelToMinLevel, normalizeLogLevel } from "./levels.js"; import { resolveNodeRequireFromMeta } from "./node-require.js"; +import { redactSensitiveText } from "./redact.js"; import { loggingState } from "./state.js"; import { formatTimestamp } from "./timestamps.js"; import type { LoggerSettings } from "./types.js"; @@ -64,6 +73,20 @@ export type LogTransport = (logObj: LogTransportRecord) => void; const externalTransports = new Set(); +type DiagnosticLogCode = { + line?: number; + functionName?: string; +}; + +const MAX_DIAGNOSTIC_LOG_BINDINGS_JSON_CHARS = 8 * 1024; +const MAX_DIAGNOSTIC_LOG_MESSAGE_CHARS = 4 * 1024; +const MAX_DIAGNOSTIC_LOG_ATTRIBUTE_COUNT = 32; +const MAX_DIAGNOSTIC_LOG_ATTRIBUTE_VALUE_CHARS = 2 * 1024; +const MAX_DIAGNOSTIC_LOG_NAME_CHARS = 120; +const DIAGNOSTIC_LOG_ATTRIBUTE_KEY_RE = /^[A-Za-z0-9_.:-]{1,64}$/u; + +type DiagnosticLogAttributes = Record; + function attachExternalTransport(logger: TsLogger, transport: LogTransport): void { logger.attachTransport((logObj: LogObj) => { if (!externalTransports.has(transport)) { @@ -77,6 +100,248 @@ function attachExternalTransport(logger: TsLogger, transport: LogTranspo }); } +function clampDiagnosticLogText(value: string, maxChars: number): string { + return value.length > maxChars ? `${value.slice(0, maxChars)}...(truncated)` : value; +} + +function sanitizeDiagnosticLogText(value: string, maxChars: number): string { + return clampDiagnosticLogText( + redactSensitiveText(clampDiagnosticLogText(value, maxChars)), + maxChars, + ); +} + +function normalizeDiagnosticLogName(value: string | undefined): string | undefined { + if (!value || value.trim().startsWith("{")) { + return undefined; + } + const sanitized = sanitizeDiagnosticLogText(value.trim(), MAX_DIAGNOSTIC_LOG_NAME_CHARS); + return DIAGNOSTIC_LOG_ATTRIBUTE_KEY_RE.test(sanitized) ? sanitized : undefined; +} + +function assignDiagnosticLogAttribute( + attributes: DiagnosticLogAttributes, + state: { count: number }, + key: string, + value: unknown, +): void { + if (state.count >= MAX_DIAGNOSTIC_LOG_ATTRIBUTE_COUNT) { + return; + } + const normalizedKey = key.trim(); + if (isBlockedObjectKey(normalizedKey)) { + return; + } + if (redactSensitiveText(normalizedKey) !== normalizedKey) { + return; + } + if (!DIAGNOSTIC_LOG_ATTRIBUTE_KEY_RE.test(normalizedKey)) { + return; + } + if (typeof value === "string") { + attributes[normalizedKey] = sanitizeDiagnosticLogText( + value, + MAX_DIAGNOSTIC_LOG_ATTRIBUTE_VALUE_CHARS, + ); + state.count += 1; + return; + } + if (typeof value === "number" && Number.isFinite(value)) { + attributes[normalizedKey] = value; + state.count += 1; + return; + } + if (typeof value === "boolean") { + attributes[normalizedKey] = value; + state.count += 1; + } +} + +function addDiagnosticLogAttributesFrom( + attributes: DiagnosticLogAttributes, + state: { count: number }, + source: Record | undefined, +): void { + if (!source) { + return; + } + for (const key in source) { + if (state.count >= MAX_DIAGNOSTIC_LOG_ATTRIBUTE_COUNT) { + break; + } + if (!Object.hasOwn(source, key) || key === "trace") { + continue; + } + assignDiagnosticLogAttribute(attributes, state, key, source[key]); + } +} + +function isPlainLogRecordObject(value: unknown): value is Record { + if (!value || typeof value !== "object" || Array.isArray(value)) { + return false; + } + const prototype = Object.getPrototypeOf(value); + return prototype === Object.prototype || prototype === null; +} + +function normalizeTraceContext(value: unknown): DiagnosticTraceContext | undefined { + if (!value || typeof value !== "object" || Array.isArray(value)) { + return undefined; + } + const candidate = value as Partial; + if (!isValidDiagnosticTraceId(candidate.traceId)) { + return undefined; + } + if (candidate.spanId !== undefined && !isValidDiagnosticSpanId(candidate.spanId)) { + return undefined; + } + if (candidate.parentSpanId !== undefined && !isValidDiagnosticSpanId(candidate.parentSpanId)) { + return undefined; + } + if (candidate.traceFlags !== undefined && !isValidDiagnosticTraceFlags(candidate.traceFlags)) { + return undefined; + } + return { + traceId: candidate.traceId, + ...(candidate.spanId ? { spanId: candidate.spanId } : {}), + ...(candidate.parentSpanId ? { parentSpanId: candidate.parentSpanId } : {}), + ...(candidate.traceFlags ? { traceFlags: candidate.traceFlags } : {}), + }; +} + +function extractTraceContext(value: unknown): DiagnosticTraceContext | undefined { + const direct = normalizeTraceContext(value); + if (direct) { + return direct; + } + if (!value || typeof value !== "object" || Array.isArray(value)) { + return undefined; + } + return normalizeTraceContext((value as { trace?: unknown }).trace); +} + +function findLogTraceContext( + bindings: Record | undefined, + numericArgs: readonly unknown[], +): DiagnosticTraceContext | undefined { + const fromBindings = extractTraceContext(bindings); + if (fromBindings) { + return fromBindings; + } + for (const arg of numericArgs) { + const fromArg = extractTraceContext(arg); + if (fromArg) { + return fromArg; + } + } + return undefined; +} + +function buildDiagnosticLogRecord(logObj: LogTransportRecord) { + const meta = logObj._meta as + | { + logLevelName?: string; + date?: Date; + name?: string; + parentNames?: string[]; + path?: { + filePath?: string; + fileLine?: string; + fileColumn?: string; + filePathWithLine?: string; + method?: string; + }; + } + | undefined; + const numericArgs = Object.entries(logObj) + .filter(([key]) => /^\d+$/.test(key)) + .toSorted((a, b) => Number(a[0]) - Number(b[0])) + .map(([, value]) => value); + + let bindings: Record | undefined; + if ( + typeof numericArgs[0] === "string" && + numericArgs[0].length <= MAX_DIAGNOSTIC_LOG_BINDINGS_JSON_CHARS && + numericArgs[0].trim().startsWith("{") + ) { + try { + const parsed = JSON.parse(numericArgs[0]); + if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) { + bindings = parsed as Record; + numericArgs.shift(); + } + } catch { + // ignore malformed json bindings + } + } + + const trace = findLogTraceContext(bindings, numericArgs); + const structuredArg = numericArgs[0]; + const structuredBindings = isPlainLogRecordObject(structuredArg) ? structuredArg : undefined; + if (structuredBindings) { + numericArgs.shift(); + } + + let message = ""; + if (numericArgs.length > 0 && typeof numericArgs[numericArgs.length - 1] === "string") { + message = sanitizeDiagnosticLogText( + String(numericArgs.pop()), + MAX_DIAGNOSTIC_LOG_MESSAGE_CHARS, + ); + } else if ( + numericArgs.length === 1 && + (typeof numericArgs[0] === "number" || typeof numericArgs[0] === "boolean") + ) { + message = String(numericArgs[0]); + numericArgs.length = 0; + } + if (!message) { + message = "log"; + } + + const attributes: DiagnosticLogAttributes = Object.create(null) as DiagnosticLogAttributes; + const attributeState = { count: 0 }; + addDiagnosticLogAttributesFrom(attributes, attributeState, bindings); + addDiagnosticLogAttributesFrom(attributes, attributeState, structuredBindings); + + const code: DiagnosticLogCode = {}; + if (meta?.path?.fileLine) { + const line = Number(meta.path.fileLine); + if (Number.isFinite(line)) { + code.line = line; + } + } + if (meta?.path?.method) { + code.functionName = sanitizeDiagnosticLogText(meta.path.method, MAX_DIAGNOSTIC_LOG_NAME_CHARS); + } + + const loggerName = normalizeDiagnosticLogName(meta?.name); + const loggerParents = meta?.parentNames + ?.map(normalizeDiagnosticLogName) + .filter((name): name is string => Boolean(name)); + + return { + type: "log.record" as const, + level: meta?.logLevelName ?? "INFO", + message, + ...(loggerName ? { loggerName } : {}), + ...(loggerParents?.length ? { loggerParents } : {}), + ...(Object.keys(attributes).length > 0 ? { attributes } : {}), + ...(Object.keys(code).length > 0 ? { code } : {}), + ...(trace ? { trace } : {}), + }; +} + +function attachDiagnosticEventTransport(logger: TsLogger): void { + logger.attachTransport((logObj: LogObj) => { + try { + emitDiagnosticEvent(buildDiagnosticLogRecord(logObj as LogTransportRecord)); + } catch { + // never block on logging failures + } + }); +} + function canUseSilentVitestFileLogFastPath(envLevel: LogLevel | undefined): boolean { return ( process.env.VITEST === "true" && @@ -159,6 +424,7 @@ function buildLogger(settings: ResolvedSettings): TsLogger { // Silent logging does not write files; skip all filesystem setup in this path. if (settings.level === "silent") { + attachDiagnosticEventTransport(logger); for (const transport of externalTransports) { attachExternalTransport(logger, transport); } @@ -203,6 +469,7 @@ function buildLogger(settings: ResolvedSettings): TsLogger { // never block on logging failures } }); + attachDiagnosticEventTransport(logger); for (const transport of externalTransports) { attachExternalTransport(logger, transport); } diff --git a/src/plugin-sdk/diagnostics-otel.ts b/src/plugin-sdk/diagnostics-otel.ts index 6b1e109a96d..684d273cd8c 100644 --- a/src/plugin-sdk/diagnostics-otel.ts +++ b/src/plugin-sdk/diagnostics-otel.ts @@ -3,7 +3,11 @@ export type { DiagnosticEventPayload } from "../infra/diagnostic-events.js"; export type { DiagnosticTraceContext } from "../infra/diagnostic-trace-context.js"; -export { emitDiagnosticEvent, onDiagnosticEvent } from "../infra/diagnostic-events.js"; +export { + emitDiagnosticEvent, + onDiagnosticEvent, + onInternalDiagnosticEvent, +} from "../infra/diagnostic-events.js"; export { createChildDiagnosticTraceContext, createDiagnosticTraceContext,