fix(diagnostics-otel): support preloaded sdk mode (#71450)

This commit is contained in:
Vincent Koc
2026-04-24 23:55:34 -07:00
committed by GitHub
parent 417b1c5507
commit 56eb1ffabf
5 changed files with 128 additions and 24 deletions

View File

@@ -1,4 +1,4 @@
import { beforeEach, describe, expect, test, vi } from "vitest";
import { afterEach, beforeEach, describe, expect, test, vi } from "vitest";
const telemetryState = vi.hoisted(() => {
const counters = new Map<string, { add: ReturnType<typeof vi.fn> }>();
@@ -125,6 +125,7 @@ const GRANDCHILD_SPAN_ID = "2222222222222222";
const PROTO_KEY = "__proto__";
const MAX_TEST_OTEL_CONTENT_ATTRIBUTE_CHARS = 4096;
const OTEL_TRUNCATED_SUFFIX_MAX_CHARS = 20;
const ORIGINAL_OPENCLAW_OTEL_PRELOADED = process.env.OPENCLAW_OTEL_PRELOADED;
function createLogger() {
return {
@@ -194,6 +195,7 @@ function flushDiagnosticEvents() {
describe("diagnostics-otel service", () => {
beforeEach(() => {
delete process.env.OPENCLAW_OTEL_PRELOADED;
telemetryState.counters.clear();
telemetryState.histograms.clear();
telemetryState.spans.length = 0;
@@ -208,6 +210,14 @@ describe("diagnostics-otel service", () => {
traceExporterCtor.mockClear();
});
afterEach(() => {
if (ORIGINAL_OPENCLAW_OTEL_PRELOADED === undefined) {
delete process.env.OPENCLAW_OTEL_PRELOADED;
} else {
process.env.OPENCLAW_OTEL_PRELOADED = ORIGINAL_OPENCLAW_OTEL_PRELOADED;
}
});
test("records message-flow metrics and spans", async () => {
const service = createDiagnosticsOtelService();
const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true, logs: true });
@@ -318,6 +328,84 @@ describe("diagnostics-otel service", () => {
expect(telemetryState.tracer.startSpan).not.toHaveBeenCalled();
});
test("uses a preloaded OpenTelemetry SDK without dropping diagnostic listeners", async () => {
process.env.OPENCLAW_OTEL_PRELOADED = "1";
const service = createDiagnosticsOtelService();
const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true, logs: true });
await service.start(ctx);
expect(sdkStart).not.toHaveBeenCalled();
expect(traceExporterCtor).not.toHaveBeenCalled();
expect(ctx.logger.info).toHaveBeenCalledWith(
"diagnostics-otel: using preloaded OpenTelemetry SDK",
);
emitDiagnosticEvent({
type: "run.completed",
runId: "run-1",
provider: "openai",
model: "gpt-5.4",
outcome: "completed",
durationMs: 100,
});
emitDiagnosticEvent({
type: "log.record",
level: "INFO",
message: "preloaded log",
});
await flushDiagnosticEvents();
expect(telemetryState.histograms.get("openclaw.run.duration_ms")?.record).toHaveBeenCalledWith(
100,
expect.objectContaining({
"openclaw.provider": "openai",
"openclaw.model": "gpt-5.4",
}),
);
expect(telemetryState.tracer.startSpan).toHaveBeenCalledWith(
"openclaw.run",
expect.objectContaining({
attributes: expect.objectContaining({
"openclaw.outcome": "completed",
}),
}),
undefined,
);
expect(logEmit).toHaveBeenCalled();
await service.stop?.(ctx);
expect(sdkShutdown).not.toHaveBeenCalled();
expect(logShutdown).toHaveBeenCalledTimes(1);
});
test("honors disabled traces when an OpenTelemetry SDK is preloaded", async () => {
process.env.OPENCLAW_OTEL_PRELOADED = "1";
const service = createDiagnosticsOtelService();
const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: false, metrics: true });
await service.start(ctx);
emitDiagnosticEvent({
type: "run.completed",
runId: "run-1",
provider: "openai",
model: "gpt-5.4",
outcome: "completed",
durationMs: 100,
});
expect(sdkStart).not.toHaveBeenCalled();
expect(telemetryState.histograms.get("openclaw.run.duration_ms")?.record).toHaveBeenCalledWith(
100,
expect.objectContaining({
"openclaw.provider": "openai",
}),
);
expect(telemetryState.tracer.startSpan).not.toHaveBeenCalled();
await service.stop?.(ctx);
expect(sdkShutdown).not.toHaveBeenCalled();
});
test("tears down active handles when restarted with diagnostics disabled", async () => {
const service = createDiagnosticsOtelService();
const enabledCtx = createOtelContext(OTEL_TEST_ENDPOINT, {

View File

@@ -49,6 +49,7 @@ const LOG_RECORD_EXPORT_FAILURE_REPORT_INTERVAL_MS = 60_000;
const OTEL_LOG_RAW_ATTRIBUTE_KEY_RE = /^[A-Za-z0-9_.:-]{1,64}$/u;
const OTEL_LOG_ATTRIBUTE_KEY_RE = /^[A-Za-z0-9_.:-]{1,96}$/u;
const BLOCKED_OTEL_LOG_ATTRIBUTE_KEYS = new Set(["__proto__", "prototype", "constructor"]);
const PRELOADED_OTEL_SDK_ENV = "OPENCLAW_OTEL_PRELOADED";
type OtelContentCapturePolicy = {
inputMessages: boolean;
@@ -164,6 +165,10 @@ function resolveContentCapturePolicy(value: unknown): OtelContentCapturePolicy {
};
}
function hasPreloadedOtelSdk(): boolean {
return process.env[PRELOADED_OTEL_SDK_ENV] === "1";
}
function normalizeOtelContentValue(value: unknown): string | undefined {
if (typeof value === "string") {
return normalizeOtelLogString(value, MAX_OTEL_CONTENT_ATTRIBUTE_CHARS);
@@ -400,38 +405,39 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
if (!tracesEnabled && !metricsEnabled && !logsEnabled) {
return;
}
const sdkPreloaded = hasPreloadedOtelSdk();
const resource = resourceFromAttributes({
[ATTR_SERVICE_NAME]: serviceName,
});
const traceUrl = resolveOtelUrl(endpoint, "v1/traces");
const metricUrl = resolveOtelUrl(endpoint, "v1/metrics");
const logUrl = resolveOtelUrl(endpoint, "v1/logs");
const traceExporter = tracesEnabled
? new OTLPTraceExporter({
...(traceUrl ? { url: traceUrl } : {}),
...(headers ? { headers } : {}),
})
: undefined;
if (!sdkPreloaded && (tracesEnabled || metricsEnabled)) {
const traceUrl = resolveOtelUrl(endpoint, "v1/traces");
const metricUrl = resolveOtelUrl(endpoint, "v1/metrics");
const traceExporter = tracesEnabled
? new OTLPTraceExporter({
...(traceUrl ? { url: traceUrl } : {}),
...(headers ? { headers } : {}),
})
: undefined;
const metricExporter = metricsEnabled
? new OTLPMetricExporter({
...(metricUrl ? { url: metricUrl } : {}),
...(headers ? { headers } : {}),
})
: undefined;
const metricExporter = metricsEnabled
? new OTLPMetricExporter({
...(metricUrl ? { url: metricUrl } : {}),
...(headers ? { headers } : {}),
})
: undefined;
const metricReader = metricExporter
? new PeriodicExportingMetricReader({
exporter: metricExporter,
...(typeof otel.flushIntervalMs === "number"
? { exportIntervalMillis: Math.max(1000, otel.flushIntervalMs) }
: {}),
})
: undefined;
const metricReader = metricExporter
? new PeriodicExportingMetricReader({
exporter: metricExporter,
...(typeof otel.flushIntervalMs === "number"
? { exportIntervalMillis: Math.max(1000, otel.flushIntervalMs) }
: {}),
})
: undefined;
if (tracesEnabled || metricsEnabled) {
sdk = new NodeSDK({
resource,
...(traceExporter ? { traceExporter } : {}),
@@ -452,6 +458,8 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
ctx.logger.error(`diagnostics-otel: failed to start SDK: ${formatError(err)}`);
throw err;
}
} else if (sdkPreloaded && (tracesEnabled || metricsEnabled)) {
ctx.logger.info("diagnostics-otel: using preloaded OpenTelemetry SDK");
}
const logSeverityMap: Record<string, SeverityNumber> = {