mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 18:40:44 +00:00
fix(diagnostics-otel): support preloaded sdk mode (#71450)
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
import { beforeEach, describe, expect, test, vi } from "vitest";
|
||||
import { afterEach, beforeEach, describe, expect, test, vi } from "vitest";
|
||||
|
||||
const telemetryState = vi.hoisted(() => {
|
||||
const counters = new Map<string, { add: ReturnType<typeof vi.fn> }>();
|
||||
@@ -125,6 +125,7 @@ const GRANDCHILD_SPAN_ID = "2222222222222222";
|
||||
const PROTO_KEY = "__proto__";
|
||||
const MAX_TEST_OTEL_CONTENT_ATTRIBUTE_CHARS = 4096;
|
||||
const OTEL_TRUNCATED_SUFFIX_MAX_CHARS = 20;
|
||||
const ORIGINAL_OPENCLAW_OTEL_PRELOADED = process.env.OPENCLAW_OTEL_PRELOADED;
|
||||
|
||||
function createLogger() {
|
||||
return {
|
||||
@@ -194,6 +195,7 @@ function flushDiagnosticEvents() {
|
||||
|
||||
describe("diagnostics-otel service", () => {
|
||||
beforeEach(() => {
|
||||
delete process.env.OPENCLAW_OTEL_PRELOADED;
|
||||
telemetryState.counters.clear();
|
||||
telemetryState.histograms.clear();
|
||||
telemetryState.spans.length = 0;
|
||||
@@ -208,6 +210,14 @@ describe("diagnostics-otel service", () => {
|
||||
traceExporterCtor.mockClear();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (ORIGINAL_OPENCLAW_OTEL_PRELOADED === undefined) {
|
||||
delete process.env.OPENCLAW_OTEL_PRELOADED;
|
||||
} else {
|
||||
process.env.OPENCLAW_OTEL_PRELOADED = ORIGINAL_OPENCLAW_OTEL_PRELOADED;
|
||||
}
|
||||
});
|
||||
|
||||
test("records message-flow metrics and spans", async () => {
|
||||
const service = createDiagnosticsOtelService();
|
||||
const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true, logs: true });
|
||||
@@ -318,6 +328,84 @@ describe("diagnostics-otel service", () => {
|
||||
expect(telemetryState.tracer.startSpan).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("uses a preloaded OpenTelemetry SDK without dropping diagnostic listeners", async () => {
|
||||
process.env.OPENCLAW_OTEL_PRELOADED = "1";
|
||||
const service = createDiagnosticsOtelService();
|
||||
const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true, logs: true });
|
||||
await service.start(ctx);
|
||||
|
||||
expect(sdkStart).not.toHaveBeenCalled();
|
||||
expect(traceExporterCtor).not.toHaveBeenCalled();
|
||||
expect(ctx.logger.info).toHaveBeenCalledWith(
|
||||
"diagnostics-otel: using preloaded OpenTelemetry SDK",
|
||||
);
|
||||
|
||||
emitDiagnosticEvent({
|
||||
type: "run.completed",
|
||||
runId: "run-1",
|
||||
provider: "openai",
|
||||
model: "gpt-5.4",
|
||||
outcome: "completed",
|
||||
durationMs: 100,
|
||||
});
|
||||
emitDiagnosticEvent({
|
||||
type: "log.record",
|
||||
level: "INFO",
|
||||
message: "preloaded log",
|
||||
});
|
||||
await flushDiagnosticEvents();
|
||||
|
||||
expect(telemetryState.histograms.get("openclaw.run.duration_ms")?.record).toHaveBeenCalledWith(
|
||||
100,
|
||||
expect.objectContaining({
|
||||
"openclaw.provider": "openai",
|
||||
"openclaw.model": "gpt-5.4",
|
||||
}),
|
||||
);
|
||||
expect(telemetryState.tracer.startSpan).toHaveBeenCalledWith(
|
||||
"openclaw.run",
|
||||
expect.objectContaining({
|
||||
attributes: expect.objectContaining({
|
||||
"openclaw.outcome": "completed",
|
||||
}),
|
||||
}),
|
||||
undefined,
|
||||
);
|
||||
expect(logEmit).toHaveBeenCalled();
|
||||
|
||||
await service.stop?.(ctx);
|
||||
expect(sdkShutdown).not.toHaveBeenCalled();
|
||||
expect(logShutdown).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
test("honors disabled traces when an OpenTelemetry SDK is preloaded", async () => {
|
||||
process.env.OPENCLAW_OTEL_PRELOADED = "1";
|
||||
const service = createDiagnosticsOtelService();
|
||||
const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: false, metrics: true });
|
||||
await service.start(ctx);
|
||||
|
||||
emitDiagnosticEvent({
|
||||
type: "run.completed",
|
||||
runId: "run-1",
|
||||
provider: "openai",
|
||||
model: "gpt-5.4",
|
||||
outcome: "completed",
|
||||
durationMs: 100,
|
||||
});
|
||||
|
||||
expect(sdkStart).not.toHaveBeenCalled();
|
||||
expect(telemetryState.histograms.get("openclaw.run.duration_ms")?.record).toHaveBeenCalledWith(
|
||||
100,
|
||||
expect.objectContaining({
|
||||
"openclaw.provider": "openai",
|
||||
}),
|
||||
);
|
||||
expect(telemetryState.tracer.startSpan).not.toHaveBeenCalled();
|
||||
|
||||
await service.stop?.(ctx);
|
||||
expect(sdkShutdown).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("tears down active handles when restarted with diagnostics disabled", async () => {
|
||||
const service = createDiagnosticsOtelService();
|
||||
const enabledCtx = createOtelContext(OTEL_TEST_ENDPOINT, {
|
||||
|
||||
@@ -49,6 +49,7 @@ const LOG_RECORD_EXPORT_FAILURE_REPORT_INTERVAL_MS = 60_000;
|
||||
const OTEL_LOG_RAW_ATTRIBUTE_KEY_RE = /^[A-Za-z0-9_.:-]{1,64}$/u;
|
||||
const OTEL_LOG_ATTRIBUTE_KEY_RE = /^[A-Za-z0-9_.:-]{1,96}$/u;
|
||||
const BLOCKED_OTEL_LOG_ATTRIBUTE_KEYS = new Set(["__proto__", "prototype", "constructor"]);
|
||||
const PRELOADED_OTEL_SDK_ENV = "OPENCLAW_OTEL_PRELOADED";
|
||||
|
||||
type OtelContentCapturePolicy = {
|
||||
inputMessages: boolean;
|
||||
@@ -164,6 +165,10 @@ function resolveContentCapturePolicy(value: unknown): OtelContentCapturePolicy {
|
||||
};
|
||||
}
|
||||
|
||||
function hasPreloadedOtelSdk(): boolean {
|
||||
return process.env[PRELOADED_OTEL_SDK_ENV] === "1";
|
||||
}
|
||||
|
||||
function normalizeOtelContentValue(value: unknown): string | undefined {
|
||||
if (typeof value === "string") {
|
||||
return normalizeOtelLogString(value, MAX_OTEL_CONTENT_ATTRIBUTE_CHARS);
|
||||
@@ -400,38 +405,39 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
if (!tracesEnabled && !metricsEnabled && !logsEnabled) {
|
||||
return;
|
||||
}
|
||||
const sdkPreloaded = hasPreloadedOtelSdk();
|
||||
|
||||
const resource = resourceFromAttributes({
|
||||
[ATTR_SERVICE_NAME]: serviceName,
|
||||
});
|
||||
|
||||
const traceUrl = resolveOtelUrl(endpoint, "v1/traces");
|
||||
const metricUrl = resolveOtelUrl(endpoint, "v1/metrics");
|
||||
const logUrl = resolveOtelUrl(endpoint, "v1/logs");
|
||||
const traceExporter = tracesEnabled
|
||||
? new OTLPTraceExporter({
|
||||
...(traceUrl ? { url: traceUrl } : {}),
|
||||
...(headers ? { headers } : {}),
|
||||
})
|
||||
: undefined;
|
||||
if (!sdkPreloaded && (tracesEnabled || metricsEnabled)) {
|
||||
const traceUrl = resolveOtelUrl(endpoint, "v1/traces");
|
||||
const metricUrl = resolveOtelUrl(endpoint, "v1/metrics");
|
||||
const traceExporter = tracesEnabled
|
||||
? new OTLPTraceExporter({
|
||||
...(traceUrl ? { url: traceUrl } : {}),
|
||||
...(headers ? { headers } : {}),
|
||||
})
|
||||
: undefined;
|
||||
|
||||
const metricExporter = metricsEnabled
|
||||
? new OTLPMetricExporter({
|
||||
...(metricUrl ? { url: metricUrl } : {}),
|
||||
...(headers ? { headers } : {}),
|
||||
})
|
||||
: undefined;
|
||||
const metricExporter = metricsEnabled
|
||||
? new OTLPMetricExporter({
|
||||
...(metricUrl ? { url: metricUrl } : {}),
|
||||
...(headers ? { headers } : {}),
|
||||
})
|
||||
: undefined;
|
||||
|
||||
const metricReader = metricExporter
|
||||
? new PeriodicExportingMetricReader({
|
||||
exporter: metricExporter,
|
||||
...(typeof otel.flushIntervalMs === "number"
|
||||
? { exportIntervalMillis: Math.max(1000, otel.flushIntervalMs) }
|
||||
: {}),
|
||||
})
|
||||
: undefined;
|
||||
const metricReader = metricExporter
|
||||
? new PeriodicExportingMetricReader({
|
||||
exporter: metricExporter,
|
||||
...(typeof otel.flushIntervalMs === "number"
|
||||
? { exportIntervalMillis: Math.max(1000, otel.flushIntervalMs) }
|
||||
: {}),
|
||||
})
|
||||
: undefined;
|
||||
|
||||
if (tracesEnabled || metricsEnabled) {
|
||||
sdk = new NodeSDK({
|
||||
resource,
|
||||
...(traceExporter ? { traceExporter } : {}),
|
||||
@@ -452,6 +458,8 @@ export function createDiagnosticsOtelService(): OpenClawPluginService {
|
||||
ctx.logger.error(`diagnostics-otel: failed to start SDK: ${formatError(err)}`);
|
||||
throw err;
|
||||
}
|
||||
} else if (sdkPreloaded && (tracesEnabled || metricsEnabled)) {
|
||||
ctx.logger.info("diagnostics-otel: using preloaded OpenTelemetry SDK");
|
||||
}
|
||||
|
||||
const logSeverityMap: Record<string, SeverityNumber> = {
|
||||
|
||||
Reference in New Issue
Block a user