mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:50:43 +00:00
feat(diagnostics-prometheus): add protected metrics exporter
This commit is contained in:
4
.github/labeler.yml
vendored
4
.github/labeler.yml
vendored
@@ -233,6 +233,10 @@
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "extensions/diagnostics-otel/**"
|
||||
"extensions: diagnostics-prometheus":
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "extensions/diagnostics-prometheus/**"
|
||||
"extensions: llm-task":
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
|
||||
@@ -44,6 +44,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Diagnostics/OTEL: emit bounded telemetry exporter health diagnostics for startup and log-export failures without exporting raw error text. Thanks @vincentkoc.
|
||||
- Diagnostics/OTEL: export agent harness lifecycle telemetry as bounded `openclaw.harness.run` spans and `openclaw.harness.duration_ms` metrics so QA-lab, Codex, and future harnesses share one trace shape. Thanks @vincentkoc.
|
||||
- Diagnostics/trace: propagate W3C `traceparent` headers from trusted model-call trace context to provider transports while replacing caller-supplied traceparent values. Thanks @vincentkoc.
|
||||
- Diagnostics/Prometheus: add a bundled `diagnostics-prometheus` plugin with a protected gateway scrape route for low-cardinality diagnostics metrics. Thanks @vincentkoc.
|
||||
- Plugins/CLI: add `openclaw plugins registry` for explicit persisted-registry inspection and `--refresh` repair without making normal startup rescan plugin locations. Thanks @vincentkoc.
|
||||
- Plugins/CLI: make `openclaw plugins list` read the cold persisted registry snapshot by default, leaving module-aware diagnostics to `plugins doctor` and `plugins inspect`. Thanks @vincentkoc.
|
||||
- Plugins/startup: move gateway startup plugin planning onto the versioned cold registry index, with postinstall repair for older registry files that predate startup metadata. Thanks @vincentkoc.
|
||||
|
||||
@@ -1442,6 +1442,7 @@
|
||||
"gateway/doctor",
|
||||
"logging",
|
||||
"gateway/opentelemetry",
|
||||
"gateway/prometheus",
|
||||
"gateway/logging",
|
||||
"gateway/diagnostics",
|
||||
"gateway/troubleshooting"
|
||||
|
||||
89
docs/gateway/prometheus.md
Normal file
89
docs/gateway/prometheus.md
Normal file
@@ -0,0 +1,89 @@
|
||||
---
|
||||
summary: "Expose OpenClaw diagnostics as Prometheus text metrics through the diagnostics-prometheus plugin"
|
||||
title: "Prometheus metrics"
|
||||
read_when:
|
||||
- You want Prometheus, Grafana, VictoriaMetrics, or another scraper to collect OpenClaw Gateway metrics
|
||||
- You need the Prometheus metric names and label policy for dashboards or alerts
|
||||
- You want metrics without running an OpenTelemetry collector
|
||||
---
|
||||
|
||||
OpenClaw can expose diagnostics metrics through the bundled
|
||||
`diagnostics-prometheus` plugin. It listens to trusted internal diagnostics and
|
||||
renders a Prometheus text endpoint at:
|
||||
|
||||
```text
|
||||
/api/diagnostics/prometheus
|
||||
```
|
||||
|
||||
The route uses Gateway authentication. Do not expose it as a public
|
||||
unauthenticated `/metrics` endpoint.
|
||||
|
||||
## Quick start
|
||||
|
||||
```json5
|
||||
{
|
||||
plugins: {
|
||||
allow: ["diagnostics-prometheus"],
|
||||
entries: {
|
||||
"diagnostics-prometheus": { enabled: true },
|
||||
},
|
||||
},
|
||||
diagnostics: {
|
||||
enabled: true,
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
You can also enable the plugin from the CLI:
|
||||
|
||||
```bash
|
||||
openclaw plugins enable diagnostics-prometheus
|
||||
```
|
||||
|
||||
Then scrape the protected Gateway route with the same Gateway authentication you
|
||||
use for operator APIs.
|
||||
|
||||
## Metrics exported
|
||||
|
||||
| Metric | Type | Labels |
|
||||
| --------------------------------------------- | --------- | ----------------------------------------------------------------------------------------- |
|
||||
| `openclaw_run_completed_total` | counter | `channel`, `model`, `outcome`, `provider`, `trigger` |
|
||||
| `openclaw_run_duration_seconds` | histogram | `channel`, `model`, `outcome`, `provider`, `trigger` |
|
||||
| `openclaw_model_call_total` | counter | `api`, `error_category`, `model`, `outcome`, `provider`, `transport` |
|
||||
| `openclaw_model_call_duration_seconds` | histogram | `api`, `error_category`, `model`, `outcome`, `provider`, `transport` |
|
||||
| `openclaw_model_tokens_total` | counter | `agent`, `channel`, `model`, `provider`, `token_type` |
|
||||
| `openclaw_gen_ai_client_token_usage` | histogram | `model`, `provider`, `token_type` |
|
||||
| `openclaw_model_cost_usd_total` | counter | `agent`, `channel`, `model`, `provider` |
|
||||
| `openclaw_tool_execution_total` | counter | `error_category`, `outcome`, `params_kind`, `tool` |
|
||||
| `openclaw_tool_execution_duration_seconds` | histogram | `error_category`, `outcome`, `params_kind`, `tool` |
|
||||
| `openclaw_harness_run_total` | counter | `channel`, `error_category`, `harness`, `model`, `outcome`, `phase`, `plugin`, `provider` |
|
||||
| `openclaw_harness_run_duration_seconds` | histogram | `channel`, `error_category`, `harness`, `model`, `outcome`, `phase`, `plugin`, `provider` |
|
||||
| `openclaw_message_processed_total` | counter | `channel`, `outcome`, `reason` |
|
||||
| `openclaw_message_processed_duration_seconds` | histogram | `channel`, `outcome`, `reason` |
|
||||
| `openclaw_message_delivery_total` | counter | `channel`, `delivery_kind`, `error_category`, `outcome` |
|
||||
| `openclaw_message_delivery_duration_seconds` | histogram | `channel`, `delivery_kind`, `error_category`, `outcome` |
|
||||
| `openclaw_queue_lane_size` | gauge | `lane` |
|
||||
| `openclaw_queue_lane_wait_seconds` | histogram | `lane` |
|
||||
| `openclaw_session_state_total` | counter | `reason`, `state` |
|
||||
| `openclaw_session_queue_depth` | gauge | `state` |
|
||||
| `openclaw_memory_bytes` | gauge | `kind` |
|
||||
| `openclaw_memory_rss_bytes` | histogram | none |
|
||||
| `openclaw_memory_pressure_total` | counter | `level`, `reason` |
|
||||
| `openclaw_telemetry_exporter_total` | counter | `exporter`, `reason`, `signal`, `status` |
|
||||
| `openclaw_prometheus_series_dropped_total` | counter | none |
|
||||
|
||||
## Label policy
|
||||
|
||||
Prometheus labels stay bounded and low-cardinality. The exporter does not emit
|
||||
raw diagnostic identifiers such as `runId`, `sessionKey`, `sessionId`, `callId`,
|
||||
`toolCallId`, message IDs, chat IDs, or provider request IDs.
|
||||
|
||||
Label values are redacted and must match OpenClaw's low-cardinality character
|
||||
policy. Values that fail the policy are replaced with `unknown`, `other`, or
|
||||
`none`, depending on the metric.
|
||||
|
||||
The exporter caps retained time series in memory. If the cap is reached, new
|
||||
series are dropped and `openclaw_prometheus_series_dropped_total` increments.
|
||||
|
||||
For full traces, logs, OTLP export, and OpenTelemetry GenAI semantic attributes,
|
||||
use [OpenTelemetry export](/gateway/opentelemetry).
|
||||
@@ -420,8 +420,9 @@ The same rule applies to other bundled-helper families such as:
|
||||
`plugin-sdk/nextcloud-talk`, `plugin-sdk/nostr`, `plugin-sdk/tlon`,
|
||||
`plugin-sdk/twitch`,
|
||||
`plugin-sdk/github-copilot-login`, `plugin-sdk/github-copilot-token`,
|
||||
`plugin-sdk/diagnostics-otel`, `plugin-sdk/diffs`, `plugin-sdk/llm-task`,
|
||||
`plugin-sdk/thread-ownership`, and `plugin-sdk/voice-call`
|
||||
`plugin-sdk/diagnostics-otel`, `plugin-sdk/diagnostics-prometheus`,
|
||||
`plugin-sdk/diffs`, `plugin-sdk/llm-task`, `plugin-sdk/thread-ownership`,
|
||||
and `plugin-sdk/voice-call`
|
||||
|
||||
`plugin-sdk/github-copilot-token` currently exposes the narrow token-helper
|
||||
surface `DEFAULT_COPILOT_API_BASE_URL`,
|
||||
|
||||
@@ -271,7 +271,7 @@ For the plugin authoring guide, see [Plugin SDK overview](/plugins/sdk-overview)
|
||||
| Line | `plugin-sdk/line`, `plugin-sdk/line-core`, `plugin-sdk/line-runtime`, `plugin-sdk/line-surface` | Bundled LINE helper/runtime surface |
|
||||
| IRC | `plugin-sdk/irc`, `plugin-sdk/irc-surface` | Bundled IRC helper surface |
|
||||
| Channel-specific helpers | `plugin-sdk/googlechat`, `plugin-sdk/zalouser`, `plugin-sdk/bluebubbles`, `plugin-sdk/bluebubbles-policy`, `plugin-sdk/mattermost`, `plugin-sdk/mattermost-policy`, `plugin-sdk/feishu-conversation`, `plugin-sdk/msteams`, `plugin-sdk/nextcloud-talk`, `plugin-sdk/nostr`, `plugin-sdk/tlon`, `plugin-sdk/twitch` | Bundled channel compatibility/helper seams |
|
||||
| Auth/plugin-specific helpers | `plugin-sdk/github-copilot-login`, `plugin-sdk/github-copilot-token`, `plugin-sdk/diagnostics-otel`, `plugin-sdk/diffs`, `plugin-sdk/llm-task`, `plugin-sdk/thread-ownership`, `plugin-sdk/voice-call` | Bundled feature/plugin helper seams; `plugin-sdk/github-copilot-token` currently exports `DEFAULT_COPILOT_API_BASE_URL`, `deriveCopilotApiBaseUrlFromToken`, and `resolveCopilotApiToken` |
|
||||
| Auth/plugin-specific helpers | `plugin-sdk/github-copilot-login`, `plugin-sdk/github-copilot-token`, `plugin-sdk/diagnostics-otel`, `plugin-sdk/diagnostics-prometheus`, `plugin-sdk/diffs`, `plugin-sdk/llm-task`, `plugin-sdk/thread-ownership`, `plugin-sdk/voice-call` | Bundled feature/plugin helper seams; `plugin-sdk/github-copilot-token` currently exports `DEFAULT_COPILOT_API_BASE_URL`, `deriveCopilotApiBaseUrlFromToken`, and `resolveCopilotApiToken` |
|
||||
</Accordion>
|
||||
</AccordionGroup>
|
||||
|
||||
|
||||
1
extensions/diagnostics-prometheus/api.ts
Normal file
1
extensions/diagnostics-prometheus/api.ts
Normal file
@@ -0,0 +1 @@
|
||||
export * from "openclaw/plugin-sdk/diagnostics-prometheus";
|
||||
20
extensions/diagnostics-prometheus/index.ts
Normal file
20
extensions/diagnostics-prometheus/index.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
|
||||
import { createDiagnosticsPrometheusExporter } from "./src/service.js";
|
||||
|
||||
const exporter = createDiagnosticsPrometheusExporter();
|
||||
|
||||
export default definePluginEntry({
|
||||
id: "diagnostics-prometheus",
|
||||
name: "Diagnostics Prometheus",
|
||||
description: "Expose OpenClaw diagnostics metrics in Prometheus text format",
|
||||
register(api) {
|
||||
api.registerService(exporter.service);
|
||||
api.registerHttpRoute({
|
||||
path: "/api/diagnostics/prometheus",
|
||||
auth: "gateway",
|
||||
match: "exact",
|
||||
gatewayRuntimeScopeSurface: "trusted-operator",
|
||||
handler: exporter.handler,
|
||||
});
|
||||
},
|
||||
});
|
||||
8
extensions/diagnostics-prometheus/openclaw.plugin.json
Normal file
8
extensions/diagnostics-prometheus/openclaw.plugin.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"id": "diagnostics-prometheus",
|
||||
"configSchema": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {}
|
||||
}
|
||||
}
|
||||
24
extensions/diagnostics-prometheus/package.json
Normal file
24
extensions/diagnostics-prometheus/package.json
Normal file
@@ -0,0 +1,24 @@
|
||||
{
|
||||
"name": "@openclaw/diagnostics-prometheus",
|
||||
"version": "2026.4.25",
|
||||
"description": "OpenClaw diagnostics Prometheus exporter",
|
||||
"type": "module",
|
||||
"devDependencies": {
|
||||
"@openclaw/plugin-sdk": "workspace:*"
|
||||
},
|
||||
"openclaw": {
|
||||
"extensions": [
|
||||
"./index.ts"
|
||||
],
|
||||
"compat": {
|
||||
"pluginApi": ">=2026.4.25"
|
||||
},
|
||||
"build": {
|
||||
"openclawVersion": "2026.4.25"
|
||||
},
|
||||
"release": {
|
||||
"publishToClawHub": true,
|
||||
"publishToNpm": true
|
||||
}
|
||||
}
|
||||
}
|
||||
169
extensions/diagnostics-prometheus/src/service.test.ts
Normal file
169
extensions/diagnostics-prometheus/src/service.test.ts
Normal file
@@ -0,0 +1,169 @@
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import type { DiagnosticEventMetadata, DiagnosticEventPayload } from "../api.js";
|
||||
import { createDiagnosticsPrometheusExporter, __test__ } from "./service.js";
|
||||
|
||||
const trusted: DiagnosticEventMetadata = Object.freeze({ trusted: true });
|
||||
const untrusted: DiagnosticEventMetadata = Object.freeze({ trusted: false });
|
||||
|
||||
function baseEvent(): Pick<DiagnosticEventPayload, "seq" | "ts"> {
|
||||
return { seq: 1, ts: 1700000000000 };
|
||||
}
|
||||
|
||||
describe("diagnostics-prometheus service", () => {
|
||||
it("records trusted run metrics without raw diagnostic identifiers", () => {
|
||||
const store = __test__.createPrometheusMetricStore();
|
||||
|
||||
__test__.recordDiagnosticEvent(
|
||||
store,
|
||||
{
|
||||
...baseEvent(),
|
||||
type: "run.completed",
|
||||
runId: "run-should-not-export",
|
||||
sessionKey: "session-should-not-export",
|
||||
provider: "openai",
|
||||
model: "gpt-5.4",
|
||||
channel: "discord",
|
||||
trigger: "message",
|
||||
durationMs: 1500,
|
||||
outcome: "completed",
|
||||
},
|
||||
trusted,
|
||||
);
|
||||
|
||||
const rendered = __test__.renderPrometheusMetrics(store);
|
||||
|
||||
expect(rendered).toContain("# TYPE openclaw_run_completed_total counter");
|
||||
expect(rendered).toContain(
|
||||
'openclaw_run_completed_total{channel="discord",model="gpt-5.4",outcome="completed",provider="openai",trigger="message"} 1',
|
||||
);
|
||||
expect(rendered).toContain(
|
||||
'openclaw_run_duration_seconds_sum{channel="discord",model="gpt-5.4",outcome="completed",provider="openai",trigger="message"} 1.5',
|
||||
);
|
||||
expect(rendered).not.toContain("run-should-not-export");
|
||||
expect(rendered).not.toContain("session-should-not-export");
|
||||
});
|
||||
|
||||
it("drops untrusted plugin-emitted diagnostic events", () => {
|
||||
const store = __test__.createPrometheusMetricStore();
|
||||
|
||||
__test__.recordDiagnosticEvent(
|
||||
store,
|
||||
{
|
||||
...baseEvent(),
|
||||
type: "model.call.completed",
|
||||
runId: "run-1",
|
||||
callId: "call-1",
|
||||
provider: "openai",
|
||||
model: "gpt-5.4",
|
||||
durationMs: 10,
|
||||
},
|
||||
untrusted,
|
||||
);
|
||||
|
||||
expect(__test__.renderPrometheusMetrics(store)).toBe("");
|
||||
});
|
||||
|
||||
it("redacts and bounds label values", () => {
|
||||
const store = __test__.createPrometheusMetricStore();
|
||||
|
||||
__test__.recordDiagnosticEvent(
|
||||
store,
|
||||
{
|
||||
...baseEvent(),
|
||||
type: "tool.execution.error",
|
||||
toolName: "shell\nbad",
|
||||
durationMs: 25,
|
||||
errorCategory: "Bearer sk-secret-token-value",
|
||||
},
|
||||
trusted,
|
||||
);
|
||||
|
||||
const rendered = __test__.renderPrometheusMetrics(store);
|
||||
|
||||
expect(rendered).toContain(
|
||||
'openclaw_tool_execution_total{error_category="other",outcome="error",params_kind="unknown",tool="tool"} 1',
|
||||
);
|
||||
expect(rendered).not.toContain("Bearer");
|
||||
expect(rendered).not.toContain("sk-secret");
|
||||
});
|
||||
|
||||
it("caps metric series growth and reports dropped series", () => {
|
||||
const store = __test__.createPrometheusMetricStore();
|
||||
|
||||
for (let index = 0; index < 2100; index += 1) {
|
||||
__test__.recordDiagnosticEvent(
|
||||
store,
|
||||
{
|
||||
...baseEvent(),
|
||||
type: "model.call.completed",
|
||||
runId: `run-${index}`,
|
||||
callId: `call-${index}`,
|
||||
provider: "openai",
|
||||
model: `model.${index}`,
|
||||
durationMs: 10,
|
||||
},
|
||||
trusted,
|
||||
);
|
||||
}
|
||||
|
||||
const rendered = __test__.renderPrometheusMetrics(store);
|
||||
|
||||
expect(rendered).toContain("# TYPE openclaw_prometheus_series_dropped_total counter");
|
||||
expect(rendered).toContain("openclaw_prometheus_series_dropped_total ");
|
||||
});
|
||||
|
||||
it("subscribes to internal diagnostics and renders scrape text", () => {
|
||||
const listeners: Array<
|
||||
(event: DiagnosticEventPayload, metadata: DiagnosticEventMetadata) => void
|
||||
> = [];
|
||||
const emitted: unknown[] = [];
|
||||
const exporter = createDiagnosticsPrometheusExporter();
|
||||
const unsubscribe = vi.fn();
|
||||
|
||||
exporter.service.start({
|
||||
config: {} as never,
|
||||
stateDir: "/tmp/openclaw-prometheus-test",
|
||||
logger: {
|
||||
info: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
error: vi.fn(),
|
||||
debug: vi.fn(),
|
||||
},
|
||||
internalDiagnostics: {
|
||||
emit: (event) => emitted.push(event),
|
||||
onEvent: (listener) => {
|
||||
listeners.push(listener);
|
||||
return unsubscribe;
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
listeners[0]?.(
|
||||
{
|
||||
...baseEvent(),
|
||||
type: "model.usage",
|
||||
provider: "openai",
|
||||
model: "gpt-5.4",
|
||||
usage: { input: 12, output: 3, total: 15 },
|
||||
},
|
||||
trusted,
|
||||
);
|
||||
|
||||
expect(emitted).toContainEqual(
|
||||
expect.objectContaining({
|
||||
type: "telemetry.exporter",
|
||||
exporter: "diagnostics-prometheus",
|
||||
signal: "metrics",
|
||||
status: "started",
|
||||
}),
|
||||
);
|
||||
expect(exporter.render()).toContain(
|
||||
'openclaw_model_tokens_total{agent="unknown",channel="unknown",model="gpt-5.4",provider="openai",token_type="input"} 12',
|
||||
);
|
||||
|
||||
exporter.service.stop?.();
|
||||
|
||||
expect(unsubscribe).toHaveBeenCalledOnce();
|
||||
expect(exporter.render()).toBe("");
|
||||
});
|
||||
});
|
||||
684
extensions/diagnostics-prometheus/src/service.ts
Normal file
684
extensions/diagnostics-prometheus/src/service.ts
Normal file
@@ -0,0 +1,684 @@
|
||||
import type { IncomingMessage, ServerResponse } from "node:http";
|
||||
import type {
|
||||
DiagnosticEventMetadata,
|
||||
DiagnosticEventPayload,
|
||||
OpenClawPluginHttpRouteHandler,
|
||||
OpenClawPluginService,
|
||||
} from "../api.js";
|
||||
import { redactSensitiveText } from "../api.js";
|
||||
|
||||
type LabelSet = Record<string, string>;
|
||||
|
||||
type CounterSample = {
|
||||
help: string;
|
||||
labels: LabelSet;
|
||||
value: number;
|
||||
};
|
||||
|
||||
type HistogramSample = {
|
||||
buckets: number[];
|
||||
counts: number[];
|
||||
count: number;
|
||||
help: string;
|
||||
labels: LabelSet;
|
||||
sum: number;
|
||||
};
|
||||
|
||||
type GaugeSample = {
|
||||
help: string;
|
||||
labels: LabelSet;
|
||||
value: number;
|
||||
};
|
||||
|
||||
type MetricSnapshot = {
|
||||
counters: Map<string, CounterSample>;
|
||||
gauges: Map<string, GaugeSample>;
|
||||
histograms: Map<string, HistogramSample>;
|
||||
};
|
||||
|
||||
type PrometheusMetricStore = ReturnType<typeof createPrometheusMetricStore>;
|
||||
|
||||
const DURATION_BUCKETS_SECONDS = [
|
||||
0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60, 120, 300, 600,
|
||||
];
|
||||
const TOKEN_BUCKETS = [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576];
|
||||
const BYTE_BUCKETS = [
|
||||
1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864, 268435456, 1073741824,
|
||||
4294967296, 17179869184,
|
||||
];
|
||||
const LOW_CARDINALITY_VALUE_RE = /^[A-Za-z0-9_.:-]{1,120}$/u;
|
||||
const MAX_PROMETHEUS_SERIES = 2048;
|
||||
const DROPPED_SERIES_COUNTER_NAME = "openclaw_prometheus_series_dropped_total";
|
||||
|
||||
function lowCardinalityLabel(value: string | undefined, fallback = "unknown"): string {
|
||||
if (!value) {
|
||||
return fallback;
|
||||
}
|
||||
const redacted = redactSensitiveText(value.trim());
|
||||
return LOW_CARDINALITY_VALUE_RE.test(redacted) ? redacted : fallback;
|
||||
}
|
||||
|
||||
function numericValue(value: number | undefined): number | undefined {
|
||||
return typeof value === "number" && Number.isFinite(value) && value >= 0 ? value : undefined;
|
||||
}
|
||||
|
||||
function seconds(ms: number | undefined): number | undefined {
|
||||
const value = numericValue(ms);
|
||||
return value === undefined ? undefined : value / 1000;
|
||||
}
|
||||
|
||||
function sortedLabels(labels: LabelSet): [string, string][] {
|
||||
return Object.entries(labels).toSorted(([left], [right]) => left.localeCompare(right));
|
||||
}
|
||||
|
||||
function metricKey(name: string, labels: LabelSet): string {
|
||||
return `${name}|${JSON.stringify(sortedLabels(labels))}`;
|
||||
}
|
||||
|
||||
function escapeHelp(value: string): string {
|
||||
return value.replace(/\\/g, "\\\\").replace(/\n/g, "\\n");
|
||||
}
|
||||
|
||||
function escapeLabelValue(value: string): string {
|
||||
return value.replace(/\\/g, "\\\\").replace(/\n/g, "\\n").replace(/"/g, '\\"');
|
||||
}
|
||||
|
||||
function formatLabels(labels: LabelSet): string {
|
||||
const entries = sortedLabels(labels);
|
||||
if (entries.length === 0) {
|
||||
return "";
|
||||
}
|
||||
return `{${entries.map(([key, value]) => `${key}="${escapeLabelValue(value)}"`).join(",")}}`;
|
||||
}
|
||||
|
||||
function formatPrometheusNumber(value: number): string {
|
||||
if (!Number.isFinite(value)) {
|
||||
return "0";
|
||||
}
|
||||
return Number.isInteger(value) ? String(value) : String(Number(value.toPrecision(12)));
|
||||
}
|
||||
|
||||
function createPrometheusMetricStore() {
|
||||
const counters = new Map<string, CounterSample>();
|
||||
const gauges = new Map<string, GaugeSample>();
|
||||
const histograms = new Map<string, HistogramSample>();
|
||||
let droppedSeries = 0;
|
||||
|
||||
const canCreateSeries = <T>(map: Map<string, T>, key: string, metricName: string): boolean => {
|
||||
if (map.has(key)) {
|
||||
return true;
|
||||
}
|
||||
if (metricName === DROPPED_SERIES_COUNTER_NAME) {
|
||||
return true;
|
||||
}
|
||||
if (counters.size + gauges.size + histograms.size < MAX_PROMETHEUS_SERIES) {
|
||||
return true;
|
||||
}
|
||||
droppedSeries += 1;
|
||||
return false;
|
||||
};
|
||||
|
||||
const counter = (name: string, help: string, labels: LabelSet, amount = 1) => {
|
||||
if (!Number.isFinite(amount) || amount <= 0) {
|
||||
return;
|
||||
}
|
||||
const key = metricKey(name, labels);
|
||||
if (!canCreateSeries(counters, key, name)) {
|
||||
return;
|
||||
}
|
||||
const existing = counters.get(key);
|
||||
if (existing) {
|
||||
existing.value += amount;
|
||||
return;
|
||||
}
|
||||
counters.set(key, { help, labels, value: amount });
|
||||
};
|
||||
|
||||
const gauge = (name: string, help: string, labels: LabelSet, value: number | undefined) => {
|
||||
if (value === undefined || !Number.isFinite(value)) {
|
||||
return;
|
||||
}
|
||||
const key = metricKey(name, labels);
|
||||
if (!canCreateSeries(gauges, key, name)) {
|
||||
return;
|
||||
}
|
||||
gauges.set(key, { help, labels, value });
|
||||
};
|
||||
|
||||
const histogram = (
|
||||
name: string,
|
||||
help: string,
|
||||
labels: LabelSet,
|
||||
value: number | undefined,
|
||||
buckets = DURATION_BUCKETS_SECONDS,
|
||||
) => {
|
||||
if (value === undefined || !Number.isFinite(value) || value < 0) {
|
||||
return;
|
||||
}
|
||||
const key = metricKey(name, labels);
|
||||
if (!canCreateSeries(histograms, key, name)) {
|
||||
return;
|
||||
}
|
||||
let sample = histograms.get(key);
|
||||
if (!sample) {
|
||||
sample = {
|
||||
buckets,
|
||||
counts: buckets.map(() => 0),
|
||||
count: 0,
|
||||
help,
|
||||
labels,
|
||||
sum: 0,
|
||||
};
|
||||
histograms.set(key, sample);
|
||||
}
|
||||
sample.count += 1;
|
||||
sample.sum += value;
|
||||
for (let index = 0; index < sample.buckets.length; index += 1) {
|
||||
const bucket = sample.buckets[index];
|
||||
if (bucket !== undefined && value <= bucket) {
|
||||
sample.counts[index] = (sample.counts[index] ?? 0) + 1;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const snapshot = (): MetricSnapshot => {
|
||||
const counterSnapshot = new Map(counters);
|
||||
if (droppedSeries > 0) {
|
||||
counterSnapshot.set(metricKey(DROPPED_SERIES_COUNTER_NAME, {}), {
|
||||
help: "Prometheus metric series dropped because the exporter series cap was reached.",
|
||||
labels: {},
|
||||
value: droppedSeries,
|
||||
});
|
||||
}
|
||||
return {
|
||||
counters: counterSnapshot,
|
||||
gauges: new Map(gauges),
|
||||
histograms: new Map(histograms),
|
||||
};
|
||||
};
|
||||
|
||||
const reset = () => {
|
||||
counters.clear();
|
||||
gauges.clear();
|
||||
histograms.clear();
|
||||
droppedSeries = 0;
|
||||
};
|
||||
|
||||
return { counter, gauge, histogram, reset, snapshot };
|
||||
}
|
||||
|
||||
function safeErrorMessage(err: unknown): string {
|
||||
const message = err instanceof Error ? (err.message ?? err.name) : String(err);
|
||||
return redactSensitiveText(message)
|
||||
.replaceAll("\u0000", " ")
|
||||
.replace(/[\r\n\t\u2028\u2029]/gu, " ")
|
||||
.slice(0, 500);
|
||||
}
|
||||
|
||||
function renderPrometheusMetrics(store: PrometheusMetricStore): string {
|
||||
const snapshot = store.snapshot();
|
||||
const lines: string[] = [];
|
||||
const emitted = new Set<string>();
|
||||
|
||||
const emitHeader = (name: string, type: "counter" | "gauge" | "histogram", help: string) => {
|
||||
if (emitted.has(name)) {
|
||||
return;
|
||||
}
|
||||
emitted.add(name);
|
||||
lines.push(`# HELP ${name} ${escapeHelp(help)}`);
|
||||
lines.push(`# TYPE ${name} ${type}`);
|
||||
};
|
||||
|
||||
const counterEntries = [...snapshot.counters.entries()].toSorted(([left], [right]) =>
|
||||
left.localeCompare(right),
|
||||
);
|
||||
for (const [key, sample] of counterEntries) {
|
||||
const name = key.split("|", 1)[0] ?? "";
|
||||
emitHeader(name, "counter", sample.help);
|
||||
lines.push(`${name}${formatLabels(sample.labels)} ${formatPrometheusNumber(sample.value)}`);
|
||||
}
|
||||
|
||||
const gaugeEntries = [...snapshot.gauges.entries()].toSorted(([left], [right]) =>
|
||||
left.localeCompare(right),
|
||||
);
|
||||
for (const [key, sample] of gaugeEntries) {
|
||||
const name = key.split("|", 1)[0] ?? "";
|
||||
emitHeader(name, "gauge", sample.help);
|
||||
lines.push(`${name}${formatLabels(sample.labels)} ${formatPrometheusNumber(sample.value)}`);
|
||||
}
|
||||
|
||||
const histogramEntries = [...snapshot.histograms.entries()].toSorted(([left], [right]) =>
|
||||
left.localeCompare(right),
|
||||
);
|
||||
for (const [key, sample] of histogramEntries) {
|
||||
const name = key.split("|", 1)[0] ?? "";
|
||||
emitHeader(name, "histogram", sample.help);
|
||||
for (let index = 0; index < sample.buckets.length; index += 1) {
|
||||
const bucket = sample.buckets[index];
|
||||
if (bucket === undefined) {
|
||||
continue;
|
||||
}
|
||||
lines.push(
|
||||
`${name}_bucket${formatLabels({ ...sample.labels, le: String(bucket) })} ${formatPrometheusNumber(sample.counts[index] ?? 0)}`,
|
||||
);
|
||||
}
|
||||
lines.push(
|
||||
`${name}_bucket${formatLabels({ ...sample.labels, le: "+Inf" })} ${formatPrometheusNumber(sample.count)}`,
|
||||
);
|
||||
lines.push(`${name}_sum${formatLabels(sample.labels)} ${formatPrometheusNumber(sample.sum)}`);
|
||||
lines.push(
|
||||
`${name}_count${formatLabels(sample.labels)} ${formatPrometheusNumber(sample.count)}`,
|
||||
);
|
||||
}
|
||||
|
||||
lines.push("");
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
function runLabels(evt: {
|
||||
channel?: string;
|
||||
model?: string;
|
||||
outcome?: string;
|
||||
provider?: string;
|
||||
trigger?: string;
|
||||
}): LabelSet {
|
||||
return {
|
||||
channel: lowCardinalityLabel(evt.channel),
|
||||
model: lowCardinalityLabel(evt.model),
|
||||
outcome: lowCardinalityLabel(evt.outcome, "unknown"),
|
||||
provider: lowCardinalityLabel(evt.provider),
|
||||
trigger: lowCardinalityLabel(evt.trigger),
|
||||
};
|
||||
}
|
||||
|
||||
function modelCallLabels(evt: {
|
||||
api?: string;
|
||||
errorCategory?: string;
|
||||
model?: string;
|
||||
provider?: string;
|
||||
transport?: string;
|
||||
type: string;
|
||||
}): LabelSet {
|
||||
return {
|
||||
api: lowCardinalityLabel(evt.api),
|
||||
error_category:
|
||||
evt.type === "model.call.error" ? lowCardinalityLabel(evt.errorCategory, "other") : "none",
|
||||
model: lowCardinalityLabel(evt.model),
|
||||
outcome: evt.type === "model.call.error" ? "error" : "completed",
|
||||
provider: lowCardinalityLabel(evt.provider),
|
||||
transport: lowCardinalityLabel(evt.transport),
|
||||
};
|
||||
}
|
||||
|
||||
function toolExecutionLabels(evt: {
|
||||
errorCategory?: string;
|
||||
paramsSummary?: { kind: string };
|
||||
toolName: string;
|
||||
type: string;
|
||||
}): LabelSet {
|
||||
return {
|
||||
error_category:
|
||||
evt.type === "tool.execution.error"
|
||||
? lowCardinalityLabel(evt.errorCategory, "other")
|
||||
: "none",
|
||||
outcome: evt.type === "tool.execution.error" ? "error" : "completed",
|
||||
params_kind: lowCardinalityLabel(evt.paramsSummary?.kind),
|
||||
tool: lowCardinalityLabel(evt.toolName, "tool"),
|
||||
};
|
||||
}
|
||||
|
||||
function harnessLabels(evt: {
|
||||
channel?: string;
|
||||
errorCategory?: string;
|
||||
harnessId: string;
|
||||
model?: string;
|
||||
outcome?: string;
|
||||
phase?: string;
|
||||
pluginId?: string;
|
||||
provider?: string;
|
||||
type: string;
|
||||
}): LabelSet {
|
||||
return {
|
||||
channel: lowCardinalityLabel(evt.channel),
|
||||
error_category:
|
||||
evt.type === "harness.run.error" ? lowCardinalityLabel(evt.errorCategory, "other") : "none",
|
||||
harness: lowCardinalityLabel(evt.harnessId),
|
||||
model: lowCardinalityLabel(evt.model),
|
||||
outcome: evt.type === "harness.run.error" ? "error" : lowCardinalityLabel(evt.outcome),
|
||||
phase: evt.type === "harness.run.error" ? lowCardinalityLabel(evt.phase) : "none",
|
||||
plugin: lowCardinalityLabel(evt.pluginId),
|
||||
provider: lowCardinalityLabel(evt.provider),
|
||||
};
|
||||
}
|
||||
|
||||
function recordModelUsage(
|
||||
store: PrometheusMetricStore,
|
||||
evt: Extract<DiagnosticEventPayload, { type: "model.usage" }>,
|
||||
) {
|
||||
const labels = {
|
||||
agent: lowCardinalityLabel(evt.agentId),
|
||||
channel: lowCardinalityLabel(evt.channel),
|
||||
model: lowCardinalityLabel(evt.model),
|
||||
provider: lowCardinalityLabel(evt.provider),
|
||||
};
|
||||
const usage = evt.usage;
|
||||
const recordTokens = (tokenType: string, value: number | undefined) => {
|
||||
const amount = numericValue(value);
|
||||
if (amount === undefined || amount === 0) {
|
||||
return;
|
||||
}
|
||||
store.counter(
|
||||
"openclaw_model_tokens_total",
|
||||
"Model tokens reported by diagnostic usage events.",
|
||||
{
|
||||
...labels,
|
||||
token_type: tokenType,
|
||||
},
|
||||
amount,
|
||||
);
|
||||
if (tokenType === "input" || tokenType === "output") {
|
||||
store.histogram(
|
||||
"openclaw_gen_ai_client_token_usage",
|
||||
"GenAI token usage distribution for input and output tokens.",
|
||||
{
|
||||
model: labels.model,
|
||||
provider: labels.provider,
|
||||
token_type: tokenType,
|
||||
},
|
||||
amount,
|
||||
TOKEN_BUCKETS,
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
recordTokens("input", usage.input);
|
||||
recordTokens("output", usage.output);
|
||||
recordTokens("cache_read", usage.cacheRead);
|
||||
recordTokens("cache_write", usage.cacheWrite);
|
||||
recordTokens("prompt", usage.promptTokens);
|
||||
recordTokens("total", usage.total);
|
||||
|
||||
store.counter(
|
||||
"openclaw_model_cost_usd_total",
|
||||
"Estimated model cost in USD reported by diagnostic usage events.",
|
||||
labels,
|
||||
numericValue(evt.costUsd) ?? 0,
|
||||
);
|
||||
store.histogram(
|
||||
"openclaw_model_usage_duration_seconds",
|
||||
"Model usage event duration in seconds.",
|
||||
labels,
|
||||
seconds(evt.durationMs),
|
||||
);
|
||||
}
|
||||
|
||||
function recordDiagnosticEvent(
|
||||
store: PrometheusMetricStore,
|
||||
evt: DiagnosticEventPayload,
|
||||
metadata: DiagnosticEventMetadata,
|
||||
): void {
|
||||
if (!metadata.trusted) {
|
||||
return;
|
||||
}
|
||||
|
||||
switch (evt.type) {
|
||||
case "model.usage":
|
||||
recordModelUsage(store, evt);
|
||||
return;
|
||||
case "run.completed":
|
||||
store.histogram(
|
||||
"openclaw_run_duration_seconds",
|
||||
"Agent run duration in seconds.",
|
||||
runLabels(evt),
|
||||
seconds(evt.durationMs),
|
||||
);
|
||||
store.counter(
|
||||
"openclaw_run_completed_total",
|
||||
"Agent runs completed by outcome.",
|
||||
runLabels(evt),
|
||||
);
|
||||
return;
|
||||
case "model.call.completed":
|
||||
case "model.call.error":
|
||||
store.histogram(
|
||||
"openclaw_model_call_duration_seconds",
|
||||
"Provider model call duration in seconds.",
|
||||
modelCallLabels(evt),
|
||||
seconds(evt.durationMs),
|
||||
);
|
||||
store.counter(
|
||||
"openclaw_model_call_total",
|
||||
"Provider model calls completed by outcome.",
|
||||
modelCallLabels(evt),
|
||||
);
|
||||
return;
|
||||
case "tool.execution.completed":
|
||||
case "tool.execution.error":
|
||||
store.histogram(
|
||||
"openclaw_tool_execution_duration_seconds",
|
||||
"Tool execution duration in seconds.",
|
||||
toolExecutionLabels(evt),
|
||||
seconds(evt.durationMs),
|
||||
);
|
||||
store.counter(
|
||||
"openclaw_tool_execution_total",
|
||||
"Tool executions completed by outcome.",
|
||||
toolExecutionLabels(evt),
|
||||
);
|
||||
return;
|
||||
case "harness.run.completed":
|
||||
case "harness.run.error":
|
||||
store.histogram(
|
||||
"openclaw_harness_run_duration_seconds",
|
||||
"Agent harness run duration in seconds.",
|
||||
harnessLabels(evt),
|
||||
seconds(evt.durationMs),
|
||||
);
|
||||
store.counter(
|
||||
"openclaw_harness_run_total",
|
||||
"Agent harness runs completed by outcome.",
|
||||
harnessLabels(evt),
|
||||
);
|
||||
return;
|
||||
case "message.processed":
|
||||
store.counter("openclaw_message_processed_total", "Inbound messages processed by outcome.", {
|
||||
channel: lowCardinalityLabel(evt.channel),
|
||||
outcome: evt.outcome,
|
||||
reason: lowCardinalityLabel(evt.reason, "none"),
|
||||
});
|
||||
store.histogram(
|
||||
"openclaw_message_processed_duration_seconds",
|
||||
"Inbound message processing duration in seconds.",
|
||||
{
|
||||
channel: lowCardinalityLabel(evt.channel),
|
||||
outcome: evt.outcome,
|
||||
reason: lowCardinalityLabel(evt.reason, "none"),
|
||||
},
|
||||
seconds(evt.durationMs),
|
||||
);
|
||||
return;
|
||||
case "message.delivery.completed":
|
||||
case "message.delivery.error":
|
||||
store.counter(
|
||||
"openclaw_message_delivery_total",
|
||||
"Outbound message delivery attempts by outcome.",
|
||||
{
|
||||
channel: lowCardinalityLabel(evt.channel),
|
||||
delivery_kind: evt.deliveryKind,
|
||||
error_category:
|
||||
evt.type === "message.delivery.error"
|
||||
? lowCardinalityLabel(evt.errorCategory, "other")
|
||||
: "none",
|
||||
outcome: evt.type === "message.delivery.error" ? "error" : "completed",
|
||||
},
|
||||
);
|
||||
store.histogram(
|
||||
"openclaw_message_delivery_duration_seconds",
|
||||
"Outbound message delivery duration in seconds.",
|
||||
{
|
||||
channel: lowCardinalityLabel(evt.channel),
|
||||
delivery_kind: evt.deliveryKind,
|
||||
error_category:
|
||||
evt.type === "message.delivery.error"
|
||||
? lowCardinalityLabel(evt.errorCategory, "other")
|
||||
: "none",
|
||||
outcome: evt.type === "message.delivery.error" ? "error" : "completed",
|
||||
},
|
||||
seconds(evt.durationMs),
|
||||
);
|
||||
return;
|
||||
case "queue.lane.enqueue":
|
||||
case "queue.lane.dequeue":
|
||||
store.gauge(
|
||||
"openclaw_queue_lane_size",
|
||||
"Current diagnostic queue lane size.",
|
||||
{
|
||||
lane: lowCardinalityLabel(evt.lane),
|
||||
},
|
||||
numericValue(evt.queueSize),
|
||||
);
|
||||
if (evt.type === "queue.lane.dequeue") {
|
||||
store.histogram(
|
||||
"openclaw_queue_lane_wait_seconds",
|
||||
"Queue lane wait time in seconds.",
|
||||
{ lane: lowCardinalityLabel(evt.lane) },
|
||||
seconds(evt.waitMs),
|
||||
);
|
||||
}
|
||||
return;
|
||||
case "session.state":
|
||||
store.counter("openclaw_session_state_total", "Session state observations.", {
|
||||
reason: lowCardinalityLabel(evt.reason, "none"),
|
||||
state: evt.state,
|
||||
});
|
||||
if (evt.queueDepth !== undefined) {
|
||||
store.gauge(
|
||||
"openclaw_session_queue_depth",
|
||||
"Latest observed session queue depth.",
|
||||
{
|
||||
state: evt.state,
|
||||
},
|
||||
numericValue(evt.queueDepth),
|
||||
);
|
||||
}
|
||||
return;
|
||||
case "diagnostic.memory.sample":
|
||||
store.gauge(
|
||||
"openclaw_memory_bytes",
|
||||
"Latest process memory usage by memory kind.",
|
||||
{ kind: "rss" },
|
||||
evt.memory.rssBytes,
|
||||
);
|
||||
store.gauge(
|
||||
"openclaw_memory_bytes",
|
||||
"Latest process memory usage by memory kind.",
|
||||
{ kind: "heap_total" },
|
||||
evt.memory.heapTotalBytes,
|
||||
);
|
||||
store.gauge(
|
||||
"openclaw_memory_bytes",
|
||||
"Latest process memory usage by memory kind.",
|
||||
{ kind: "heap_used" },
|
||||
evt.memory.heapUsedBytes,
|
||||
);
|
||||
store.histogram(
|
||||
"openclaw_memory_rss_bytes",
|
||||
"RSS memory sample distribution in bytes.",
|
||||
{},
|
||||
numericValue(evt.memory.rssBytes),
|
||||
BYTE_BUCKETS,
|
||||
);
|
||||
return;
|
||||
case "diagnostic.memory.pressure":
|
||||
store.counter(
|
||||
"openclaw_memory_pressure_total",
|
||||
"Memory pressure events by level and reason.",
|
||||
{
|
||||
level: evt.level,
|
||||
reason: evt.reason,
|
||||
},
|
||||
);
|
||||
return;
|
||||
case "telemetry.exporter":
|
||||
store.counter("openclaw_telemetry_exporter_total", "Telemetry exporter lifecycle events.", {
|
||||
exporter: lowCardinalityLabel(evt.exporter),
|
||||
reason: lowCardinalityLabel(evt.reason, "none"),
|
||||
signal: evt.signal,
|
||||
status: evt.status,
|
||||
});
|
||||
return;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
function createMetricsHandler(store: PrometheusMetricStore): OpenClawPluginHttpRouteHandler {
|
||||
return (req: IncomingMessage, res: ServerResponse) => {
|
||||
if (req.method !== "GET" && req.method !== "HEAD") {
|
||||
res.statusCode = 405;
|
||||
res.setHeader("Allow", "GET, HEAD");
|
||||
res.end("Method Not Allowed");
|
||||
return true;
|
||||
}
|
||||
|
||||
const body = renderPrometheusMetrics(store);
|
||||
res.statusCode = 200;
|
||||
res.setHeader("Cache-Control", "no-store");
|
||||
res.setHeader("Content-Type", "text/plain; version=0.0.4; charset=utf-8");
|
||||
if (req.method === "HEAD") {
|
||||
res.end();
|
||||
return true;
|
||||
}
|
||||
res.end(body);
|
||||
return true;
|
||||
};
|
||||
}
|
||||
|
||||
export function createDiagnosticsPrometheusExporter() {
|
||||
const store = createPrometheusMetricStore();
|
||||
let unsubscribe: (() => void) | undefined;
|
||||
|
||||
const service = {
|
||||
id: "diagnostics-prometheus",
|
||||
start(ctx) {
|
||||
const subscribe = ctx.internalDiagnostics?.onEvent;
|
||||
if (!subscribe) {
|
||||
ctx.logger.error("diagnostics-prometheus: internal diagnostics capability unavailable");
|
||||
return;
|
||||
}
|
||||
unsubscribe = subscribe((event, metadata) => {
|
||||
try {
|
||||
recordDiagnosticEvent(store, event, metadata);
|
||||
} catch (err) {
|
||||
ctx.logger.error(
|
||||
`diagnostics-prometheus: event handler failed (${event.type}): ${safeErrorMessage(err)}`,
|
||||
);
|
||||
}
|
||||
});
|
||||
ctx.internalDiagnostics?.emit({
|
||||
type: "telemetry.exporter",
|
||||
exporter: "diagnostics-prometheus",
|
||||
signal: "metrics",
|
||||
status: "started",
|
||||
reason: "configured",
|
||||
});
|
||||
},
|
||||
stop() {
|
||||
unsubscribe?.();
|
||||
unsubscribe = undefined;
|
||||
store.reset();
|
||||
},
|
||||
} satisfies OpenClawPluginService;
|
||||
|
||||
return {
|
||||
handler: createMetricsHandler(store),
|
||||
render: () => renderPrometheusMetrics(store),
|
||||
service,
|
||||
};
|
||||
}
|
||||
|
||||
export const __test__ = {
|
||||
createPrometheusMetricStore,
|
||||
recordDiagnosticEvent,
|
||||
renderPrometheusMetrics,
|
||||
};
|
||||
16
extensions/diagnostics-prometheus/tsconfig.json
Normal file
16
extensions/diagnostics-prometheus/tsconfig.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"extends": "../tsconfig.package-boundary.base.json",
|
||||
"compilerOptions": {
|
||||
"rootDir": "."
|
||||
},
|
||||
"include": ["./*.ts", "./src/**/*.ts"],
|
||||
"exclude": [
|
||||
"./**/*.test.ts",
|
||||
"./dist/**",
|
||||
"./node_modules/**",
|
||||
"./src/test-support/**",
|
||||
"./src/**/*test-helpers.ts",
|
||||
"./src/**/*test-harness.ts",
|
||||
"./src/**/*test-support.ts"
|
||||
]
|
||||
}
|
||||
@@ -596,6 +596,10 @@
|
||||
"types": "./dist/plugin-sdk/diagnostics-otel.d.ts",
|
||||
"default": "./dist/plugin-sdk/diagnostics-otel.js"
|
||||
},
|
||||
"./plugin-sdk/diagnostics-prometheus": {
|
||||
"types": "./dist/plugin-sdk/diagnostics-prometheus.d.ts",
|
||||
"default": "./dist/plugin-sdk/diagnostics-prometheus.js"
|
||||
},
|
||||
"./plugin-sdk/diffs": {
|
||||
"types": "./dist/plugin-sdk/diffs.d.ts",
|
||||
"default": "./dist/plugin-sdk/diffs.js"
|
||||
|
||||
@@ -134,6 +134,7 @@
|
||||
"device-bootstrap",
|
||||
"diagnostic-runtime",
|
||||
"diagnostics-otel",
|
||||
"diagnostics-prometheus",
|
||||
"diffs",
|
||||
"error-runtime",
|
||||
"extension-shared",
|
||||
|
||||
@@ -191,6 +191,7 @@ const LOCAL_EXTENSION_API_BARREL_GUARDS = [
|
||||
"bluebubbles",
|
||||
"device-pair",
|
||||
"diagnostics-otel",
|
||||
"diagnostics-prometheus",
|
||||
"discord",
|
||||
"diffs",
|
||||
"feishu",
|
||||
|
||||
15
src/plugin-sdk/diagnostics-prometheus.ts
Normal file
15
src/plugin-sdk/diagnostics-prometheus.ts
Normal file
@@ -0,0 +1,15 @@
|
||||
// Narrow plugin-sdk surface for the bundled diagnostics-prometheus plugin.
|
||||
// Keep this list additive and scoped to the bundled diagnostics-prometheus surface.
|
||||
|
||||
export type {
|
||||
DiagnosticEventMetadata,
|
||||
DiagnosticEventPayload,
|
||||
} from "../infra/diagnostic-events.js";
|
||||
export { redactSensitiveText } from "../logging/redact.js";
|
||||
export { emptyPluginConfigSchema } from "../plugins/config-schema.js";
|
||||
export type {
|
||||
OpenClawPluginApi,
|
||||
OpenClawPluginHttpRouteHandler,
|
||||
OpenClawPluginService,
|
||||
OpenClawPluginServiceContext,
|
||||
} from "../plugins/types.js";
|
||||
@@ -180,7 +180,7 @@ describe("startPluginServices", () => {
|
||||
expect(stopThrows).toHaveBeenCalledOnce();
|
||||
});
|
||||
|
||||
it("grants internal diagnostics only to the bundled diagnostics OTEL service", async () => {
|
||||
it("grants internal diagnostics only to bundled diagnostics exporter services", async () => {
|
||||
const contexts: OpenClawPluginServiceContext[] = [];
|
||||
const diagnosticsService = createTrackingService("diagnostics-otel", { contexts });
|
||||
await startPluginServices({
|
||||
@@ -191,6 +191,18 @@ describe("startPluginServices", () => {
|
||||
expect(contexts[0]?.internalDiagnostics?.onEvent).toBeTypeOf("function");
|
||||
expect(contexts[0]?.internalDiagnostics?.emit).toBeTypeOf("function");
|
||||
|
||||
const prometheusContexts: OpenClawPluginServiceContext[] = [];
|
||||
const prometheusService = createTrackingService("diagnostics-prometheus", {
|
||||
contexts: prometheusContexts,
|
||||
});
|
||||
await startPluginServices({
|
||||
registry: createRegistry([prometheusService], "diagnostics-prometheus", "bundled"),
|
||||
config: createServiceConfig(),
|
||||
});
|
||||
|
||||
expect(prometheusContexts[0]?.internalDiagnostics?.onEvent).toBeTypeOf("function");
|
||||
expect(prometheusContexts[0]?.internalDiagnostics?.emit).toBeTypeOf("function");
|
||||
|
||||
const untrustedContexts: OpenClawPluginServiceContext[] = [];
|
||||
const untrustedService = createTrackingService("diagnostics-otel", {
|
||||
contexts: untrustedContexts,
|
||||
|
||||
@@ -24,14 +24,18 @@ function createServiceContext(params: {
|
||||
workspaceDir?: string;
|
||||
service?: PluginServiceRegistration;
|
||||
}): OpenClawPluginServiceContext {
|
||||
const grantsInternalDiagnostics =
|
||||
params.service?.origin === "bundled" &&
|
||||
params.service.pluginId === params.service.service.id &&
|
||||
(params.service.service.id === "diagnostics-otel" ||
|
||||
params.service.service.id === "diagnostics-prometheus");
|
||||
|
||||
return {
|
||||
config: params.config,
|
||||
workspaceDir: params.workspaceDir,
|
||||
stateDir: STATE_DIR,
|
||||
logger: createPluginLogger(),
|
||||
...(params.service?.origin === "bundled" &&
|
||||
params.service.pluginId === "diagnostics-otel" &&
|
||||
params.service.service.id === "diagnostics-otel"
|
||||
...(grantsInternalDiagnostics
|
||||
? {
|
||||
internalDiagnostics: {
|
||||
emit: emitTrustedDiagnosticEvent,
|
||||
|
||||
Reference in New Issue
Block a user