feat(plugins): add sanitized model call hooks

This commit is contained in:
Vincent Koc
2026-04-25 10:56:18 -07:00
parent 9ffe764416
commit 275c128e99
8 changed files with 321 additions and 44 deletions

View File

@@ -24,6 +24,7 @@ Docs: https://docs.openclaw.ai
- Plugins/compat: mark `OPENCLAW_DISABLE_PERSISTED_PLUGIN_REGISTRY` as a deprecated break-glass switch and point operators at registry repair instead. Thanks @vincentkoc.
- Plugins/registry: ignore stale persisted registry reads when plugin policy no longer matches current config, and stamp generated registry files with a do-not-edit warning. Thanks @vincentkoc.
- Diagnostics/OTEL: surface provider request identifiers as bounded hashes on model-call diagnostics and span events, without exporting raw request IDs or metric labels. Thanks @Lidang-Jiang and @vincentkoc.
- Plugins/diagnostics: add metadata-only `model_call_started` and `model_call_ended` hooks for provider/model call telemetry without exposing prompts, responses, headers, request bodies, or raw provider request IDs. Thanks @vincentkoc.
- Diagnostics/OTEL: add bounded outbound message delivery lifecycle diagnostics and export them as low-cardinality delivery spans/metrics without message body, recipient, room, or media-path data. (#71471) Thanks @vincentkoc and @jlapenna.
- Diagnostics/OTEL: emit bounded exec-process diagnostics and export them as `openclaw.exec` spans without exposing command text, working directories, or container identifiers. (#71451) Thanks @vincentkoc and @jlapenna.
- Diagnostics/OTEL: support `OPENCLAW_OTEL_PRELOADED=1` so the plugin can reuse an already-registered OpenTelemetry SDK while keeping OpenClaw diagnostic listeners wired. (#71450) Thanks @vincentkoc and @jlapenna.

View File

@@ -1,2 +1,2 @@
f6d9588737310773031e744b6726ba80a9ca742205db335aae95fbd1e2925dc8 plugin-sdk-api-baseline.json
a4c86fe92b7bea538f33139e9b57cfada766b7d504323c2e20a7ca205994be44 plugin-sdk-api-baseline.jsonl
fae367b052828a57feab3bfcd10a58ebeacd6c858b337d0aab72726863952946 plugin-sdk-api-baseline.json
1bb8995e1486f7d900928aaace87421a8297fe41264197f9bf849f07c65c8f2b plugin-sdk-api-baseline.jsonl

View File

@@ -68,6 +68,7 @@ observation-only.
**Conversation observation**
- `model_call_started` / `model_call_ended` — observe sanitized provider/model call metadata, timing, outcome, and bounded request-id hashes without prompt or response content
- `llm_input` — observe provider input (system prompt, prompt, history)
- `llm_output` — observe provider output
@@ -162,6 +163,13 @@ so your plugin does not depend on a legacy combined phase.
`before_agent_start` and `agent_end` include `event.runId` when OpenClaw can
identify the active run. The same value is also available on `ctx.runId`.
Use `model_call_started` and `model_call_ended` for provider-call telemetry
that should not receive raw prompts, history, responses, headers, request
bodies, or provider request IDs. These hooks include stable metadata such as
`runId`, `callId`, `provider`, `model`, optional `api`/`transport`, terminal
`durationMs`/`outcome`, and `upstreamRequestIdHash` when OpenClaw can derive a
bounded provider request-id hash.
Non-bundled plugins that need `llm_input`, `llm_output`, or `agent_end` must set:
```json

View File

@@ -1,11 +1,16 @@
import type { StreamFn } from "@mariozechner/pi-agent-core";
import { beforeEach, describe, expect, it } from "vitest";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import {
onInternalDiagnosticEvent,
resetDiagnosticEventsForTest,
type DiagnosticEventPayload,
} from "../../../infra/diagnostic-events.js";
import { createDiagnosticTraceContext } from "../../../infra/diagnostic-trace-context.js";
import {
initializeGlobalHookRunner,
resetGlobalHookRunner,
} from "../../../plugins/hook-runner-global.js";
import { createHookRunnerWithRegistry } from "../../../plugins/hooks.test-helpers.js";
import { wrapStreamFnWithDiagnosticModelCallEvents } from "./attempt.model-diagnostic-events.js";
async function collectModelCallEvents(run: () => Promise<void>): Promise<DiagnosticEventPayload[]> {
@@ -33,6 +38,11 @@ async function drain(stream: AsyncIterable<unknown>): Promise<void> {
describe("wrapStreamFnWithDiagnosticModelCallEvents", () => {
beforeEach(() => {
resetDiagnosticEventsForTest();
resetGlobalHookRunner();
});
afterEach(() => {
resetGlobalHookRunner();
});
it("emits started and completed events for async streams", async () => {
@@ -170,6 +180,79 @@ describe("wrapStreamFnWithDiagnosticModelCallEvents", () => {
]);
});
it("fires frozen sanitized model-call plugin hooks", async () => {
const started = vi.fn();
const ended = vi.fn();
const { registry } = createHookRunnerWithRegistry([
{ hookName: "model_call_started", handler: started },
{ hookName: "model_call_ended", handler: ended },
]);
initializeGlobalHookRunner(registry);
const secretChunk = "secret response with Bearer sk-test-secret-value";
async function* stream() {
yield { type: "text", text: secretChunk };
}
const wrapped = wrapStreamFnWithDiagnosticModelCallEvents(
(() => stream()) as unknown as StreamFn,
{
runId: "run-1",
sessionKey: "session-key",
sessionId: "session-id",
provider: "openai",
model: "gpt-5.4",
api: "openai-responses",
transport: "http",
trace: createDiagnosticTraceContext(),
nextCallId: () => "call-hook",
},
);
const events = await collectModelCallEvents(async () => {
await drain(wrapped({} as never, {} as never, {} as never) as AsyncIterable<unknown>);
});
await new Promise<void>((resolve) => setImmediate(resolve));
expect(events.map((event) => event.type)).toEqual([
"model.call.started",
"model.call.completed",
]);
expect(started).toHaveBeenCalledWith(
expect.objectContaining({
runId: "run-1",
callId: "call-hook",
sessionKey: "session-key",
sessionId: "session-id",
provider: "openai",
model: "gpt-5.4",
api: "openai-responses",
transport: "http",
}),
expect.objectContaining({
runId: "run-1",
sessionKey: "session-key",
sessionId: "session-id",
modelProviderId: "openai",
modelId: "gpt-5.4",
}),
);
expect(ended).toHaveBeenCalledWith(
expect.objectContaining({
runId: "run-1",
callId: "call-hook",
outcome: "completed",
durationMs: expect.any(Number),
}),
expect.objectContaining({ runId: "run-1" }),
);
const startedEvent = started.mock.calls[0]?.[0];
const startedCtx = started.mock.calls[0]?.[1];
expect(Object.isFrozen(startedEvent)).toBe(true);
expect(Object.isFrozen(startedCtx)).toBe(true);
expect(Object.isFrozen((startedCtx as { trace?: unknown } | undefined)?.trace)).toBe(true);
expect(JSON.stringify([started.mock.calls, ended.mock.calls])).not.toContain(secretChunk);
});
it("emits error events when stream consumption stops early", async () => {
async function* stream() {
yield { type: "text", text: "first" };

View File

@@ -1,4 +1,5 @@
import type { StreamFn } from "@mariozechner/pi-agent-core";
import { fireAndForgetBoundedHook } from "../../../hooks/fire-and-forget.js";
import {
diagnosticErrorCategory,
diagnosticProviderRequestIdHash,
@@ -12,6 +13,12 @@ import {
freezeDiagnosticTraceContext,
type DiagnosticTraceContext,
} from "../../../infra/diagnostic-trace-context.js";
import { getGlobalHookRunner } from "../../../plugins/hook-runner-global.js";
import type {
PluginHookAgentContext,
PluginHookModelCallEndedEvent,
PluginHookModelCallStartedEvent,
} from "../../../plugins/hook-types.js";
export { diagnosticErrorCategory };
@@ -35,6 +42,10 @@ type ModelCallErrorFields = Pick<
Extract<DiagnosticEventInput, { type: "model.call.error" }>,
"errorCategory" | "upstreamRequestIdHash"
>;
type ModelCallEndedHookFields = Pick<
PluginHookModelCallEndedEvent,
"durationMs" | "outcome" | "errorCategory" | "upstreamRequestIdHash"
>;
const MODEL_CALL_STREAM_RETURN_TIMEOUT_MS = 1000;
@@ -90,6 +101,102 @@ function modelCallErrorFields(err: unknown): ModelCallErrorFields {
};
}
function modelCallHookEventBase(eventBase: ModelCallEventBase): PluginHookModelCallStartedEvent {
return {
runId: eventBase.runId,
callId: eventBase.callId,
...(eventBase.sessionKey ? { sessionKey: eventBase.sessionKey } : {}),
...(eventBase.sessionId ? { sessionId: eventBase.sessionId } : {}),
provider: eventBase.provider,
model: eventBase.model,
...(eventBase.api ? { api: eventBase.api } : {}),
...(eventBase.transport ? { transport: eventBase.transport } : {}),
};
}
function modelCallHookContext(eventBase: ModelCallEventBase): PluginHookAgentContext {
return Object.freeze({
runId: eventBase.runId,
trace: eventBase.trace,
...(eventBase.sessionKey ? { sessionKey: eventBase.sessionKey } : {}),
...(eventBase.sessionId ? { sessionId: eventBase.sessionId } : {}),
modelProviderId: eventBase.provider,
modelId: eventBase.model,
}) as PluginHookAgentContext;
}
function dispatchModelCallStartedHook(eventBase: ModelCallEventBase): void {
const hookRunner = getGlobalHookRunner();
if (!hookRunner?.hasHooks("model_call_started")) {
return;
}
const event = Object.freeze(modelCallHookEventBase(eventBase)) as PluginHookModelCallStartedEvent;
const hookCtx = modelCallHookContext(eventBase);
fireAndForgetBoundedHook(
() => hookRunner.runModelCallStarted(event, hookCtx),
"model_call_started plugin hook failed",
);
}
function dispatchModelCallEndedHook(
eventBase: ModelCallEventBase,
fields: ModelCallEndedHookFields,
): void {
const hookRunner = getGlobalHookRunner();
if (!hookRunner?.hasHooks("model_call_ended")) {
return;
}
const event = Object.freeze({
...modelCallHookEventBase(eventBase),
...fields,
}) as PluginHookModelCallEndedEvent;
const hookCtx = modelCallHookContext(eventBase);
fireAndForgetBoundedHook(
() => hookRunner.runModelCallEnded(event, hookCtx),
"model_call_ended plugin hook failed",
);
}
function emitModelCallStarted(eventBase: ModelCallEventBase): void {
emitTrustedDiagnosticEvent({
type: "model.call.started",
...eventBase,
});
dispatchModelCallStartedHook(eventBase);
}
function emitModelCallCompleted(eventBase: ModelCallEventBase, startedAt: number): void {
const durationMs = Date.now() - startedAt;
emitTrustedDiagnosticEvent({
type: "model.call.completed",
...eventBase,
durationMs,
});
dispatchModelCallEndedHook(eventBase, {
durationMs,
outcome: "completed",
});
}
function emitModelCallError(
eventBase: ModelCallEventBase,
startedAt: number,
fields: ModelCallErrorFields,
): void {
const durationMs = Date.now() - startedAt;
emitTrustedDiagnosticEvent({
type: "model.call.error",
...eventBase,
durationMs,
...fields,
});
dispatchModelCallEndedHook(eventBase, {
durationMs,
outcome: "error",
...fields,
});
}
async function safeReturnIterator(iterator: AsyncIterator<unknown>): Promise<void> {
let returnResult: unknown;
try {
@@ -137,29 +244,15 @@ async function* observeModelCallIterator<T>(
yield next.value;
}
terminalEmitted = true;
emitTrustedDiagnosticEvent({
type: "model.call.completed",
...eventBase,
durationMs: Date.now() - startedAt,
});
emitModelCallCompleted(eventBase, startedAt);
} catch (err) {
terminalEmitted = true;
emitTrustedDiagnosticEvent({
type: "model.call.error",
...eventBase,
durationMs: Date.now() - startedAt,
...modelCallErrorFields(err),
});
emitModelCallError(eventBase, startedAt, modelCallErrorFields(err));
throw err;
} finally {
if (!terminalEmitted) {
await safeReturnIterator(iterator);
emitTrustedDiagnosticEvent({
type: "model.call.error",
...eventBase,
durationMs: Date.now() - startedAt,
errorCategory: "StreamAbandoned",
});
emitModelCallError(eventBase, startedAt, { errorCategory: "StreamAbandoned" });
}
}
}
@@ -209,11 +302,7 @@ function observeModelCallResult(
startedAt,
);
}
emitTrustedDiagnosticEvent({
type: "model.call.completed",
...eventBase,
durationMs: Date.now() - startedAt,
});
emitModelCallCompleted(eventBase, startedAt);
return result;
}
@@ -225,10 +314,7 @@ export function wrapStreamFnWithDiagnosticModelCallEvents(
const callId = ctx.nextCallId();
const trace = freezeDiagnosticTraceContext(createChildDiagnosticTraceContext(ctx.trace));
const eventBase = baseModelCallEvent(ctx, callId, trace);
emitTrustedDiagnosticEvent({
type: "model.call.started",
...eventBase,
});
emitModelCallStarted(eventBase);
const startedAt = Date.now();
try {
@@ -237,24 +323,14 @@ export function wrapStreamFnWithDiagnosticModelCallEvents(
return result.then(
(resolved) => observeModelCallResult(resolved, eventBase, startedAt),
(err) => {
emitTrustedDiagnosticEvent({
type: "model.call.error",
...eventBase,
durationMs: Date.now() - startedAt,
...modelCallErrorFields(err),
});
emitModelCallError(eventBase, startedAt, modelCallErrorFields(err));
throw err;
},
);
}
return observeModelCallResult(result, eventBase, startedAt);
} catch (err) {
emitTrustedDiagnosticEvent({
type: "model.call.error",
...eventBase,
durationMs: Date.now() - startedAt,
...modelCallErrorFields(err),
});
emitModelCallError(eventBase, startedAt, modelCallErrorFields(err));
throw err;
}
}) as StreamFn;

View File

@@ -59,6 +59,8 @@ export type PluginHookName =
| "before_prompt_build"
| "before_agent_start"
| "before_agent_reply"
| "model_call_started"
| "model_call_ended"
| "llm_input"
| "llm_output"
| "agent_end"
@@ -90,6 +92,8 @@ export const PLUGIN_HOOK_NAMES = [
"before_prompt_build",
"before_agent_start",
"before_agent_reply",
"model_call_started",
"model_call_ended",
"llm_input",
"llm_output",
"agent_end",
@@ -187,6 +191,26 @@ export type PluginHookLlmInputEvent = {
imagesCount: number;
};
export type PluginHookModelCallBaseEvent = {
runId: string;
callId: string;
sessionKey?: string;
sessionId?: string;
provider: string;
model: string;
api?: string;
transport?: string;
};
export type PluginHookModelCallStartedEvent = PluginHookModelCallBaseEvent;
export type PluginHookModelCallEndedEvent = PluginHookModelCallBaseEvent & {
durationMs: number;
outcome: "completed" | "error";
errorCategory?: string;
upstreamRequestIdHash?: string;
};
export type PluginHookLlmOutputEvent = {
runId: string;
sessionId: string;
@@ -676,6 +700,14 @@ export type PluginHookHandlerMap = {
event: PluginHookBeforeAgentReplyEvent,
ctx: PluginHookAgentContext,
) => Promise<PluginHookBeforeAgentReplyResult | void> | PluginHookBeforeAgentReplyResult | void;
model_call_started: (
event: PluginHookModelCallStartedEvent,
ctx: PluginHookAgentContext,
) => Promise<void> | void;
model_call_ended: (
event: PluginHookModelCallEndedEvent,
ctx: PluginHookAgentContext,
) => Promise<void> | void;
llm_input: (event: PluginHookLlmInputEvent, ctx: PluginHookAgentContext) => Promise<void> | void;
llm_output: (
event: PluginHookLlmOutputEvent,

View File

@@ -29,6 +29,8 @@ import type {
PluginHookBeforePromptBuildEvent,
PluginHookBeforePromptBuildResult,
PluginHookBeforeCompactionEvent,
PluginHookModelCallEndedEvent,
PluginHookModelCallStartedEvent,
PluginHookInboundClaimContext,
PluginHookInboundClaimEvent,
PluginHookInboundClaimResult,
@@ -85,6 +87,8 @@ export type {
PluginHookBeforeModelResolveResult,
PluginHookBeforePromptBuildEvent,
PluginHookBeforePromptBuildResult,
PluginHookModelCallEndedEvent,
PluginHookModelCallStartedEvent,
PluginHookLlmInputEvent,
PluginHookLlmOutputEvent,
PluginHookAgentEndEvent,
@@ -588,6 +592,30 @@ export function createHookRunner(
);
}
/**
* Run model_call_started hook.
* Allows plugins to observe sanitized model-call metadata.
* Runs in parallel (fire-and-forget).
*/
async function runModelCallStarted(
event: PluginHookModelCallStartedEvent,
ctx: PluginHookAgentContext,
): Promise<void> {
return runVoidHook("model_call_started", event, ctx);
}
/**
* Run model_call_ended hook.
* Allows plugins to observe sanitized terminal model-call metadata.
* Runs in parallel (fire-and-forget).
*/
async function runModelCallEnded(
event: PluginHookModelCallEndedEvent,
ctx: PluginHookAgentContext,
): Promise<void> {
return runVoidHook("model_call_ended", event, ctx);
}
/**
* Run agent_end hook.
* Allows plugins to analyze completed conversations.
@@ -1124,6 +1152,8 @@ export function createHookRunner(
runBeforePromptBuild,
runBeforeAgentStart,
runBeforeAgentReply,
runModelCallStarted,
runModelCallEnded,
runLlmInput,
runLlmOutput,
runAgentEnd,

View File

@@ -7,14 +7,24 @@ const hookCtx = {
};
async function expectLlmHookCall(params: {
hookName: "llm_input" | "llm_output";
hookName: "model_call_started" | "model_call_ended" | "llm_input" | "llm_output";
event: Record<string, unknown>;
expectedEvent: Record<string, unknown>;
}) {
const handler = vi.fn();
const { runner } = createHookRunnerWithRegistry([{ hookName: params.hookName, handler }]);
if (params.hookName === "llm_input") {
if (params.hookName === "model_call_started") {
await runner.runModelCallStarted(
params.event as Parameters<typeof runner.runModelCallStarted>[0],
hookCtx,
);
} else if (params.hookName === "model_call_ended") {
await runner.runModelCallEnded(
params.event as Parameters<typeof runner.runModelCallEnded>[0],
hookCtx,
);
} else if (params.hookName === "llm_input") {
await runner.runLlmInput(
{
...params.event,
@@ -40,6 +50,38 @@ async function expectLlmHookCall(params: {
describe("llm hook runner methods", () => {
it.each([
{
name: "runModelCallStarted invokes registered model_call_started hooks",
hookName: "model_call_started" as const,
methodName: "runModelCallStarted" as const,
event: {
runId: "run-1",
callId: "call-1",
sessionId: "session-1",
provider: "openai",
model: "gpt-5",
api: "openai-responses",
transport: "http",
},
expectedEvent: { runId: "run-1", callId: "call-1", provider: "openai" },
},
{
name: "runModelCallEnded invokes registered model_call_ended hooks",
hookName: "model_call_ended" as const,
methodName: "runModelCallEnded" as const,
event: {
runId: "run-1",
callId: "call-1",
sessionId: "session-1",
provider: "openai",
model: "gpt-5",
durationMs: 42,
outcome: "error",
errorCategory: "TimeoutError",
upstreamRequestIdHash: "sha256:abcdef123456",
},
expectedEvent: { runId: "run-1", callId: "call-1", outcome: "error" },
},
{
name: "runLlmInput invokes registered llm_input hooks",
hookName: "llm_input" as const,
@@ -80,8 +122,13 @@ describe("llm hook runner methods", () => {
});
it("hasHooks returns true for registered llm hooks", () => {
const { runner } = createHookRunnerWithRegistry([{ hookName: "llm_input", handler: vi.fn() }]);
const { runner } = createHookRunnerWithRegistry([
{ hookName: "model_call_started", handler: vi.fn() },
{ hookName: "llm_input", handler: vi.fn() },
]);
expect(runner.hasHooks("model_call_started")).toBe(true);
expect(runner.hasHooks("model_call_ended")).toBe(false);
expect(runner.hasHooks("llm_input")).toBe(true);
expect(runner.hasHooks("llm_output")).toBe(false);
});