fix(plugins): time out hanging agent end hooks

This commit is contained in:
Peter Steinberger
2026-04-28 01:18:41 +01:00
parent 067888a608
commit 4d4c7c8ab3
4 changed files with 89 additions and 1 deletions

View File

@@ -24,6 +24,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Plugins/hooks: time out never-settling `agent_end` observation hooks after 30 seconds and log the plugin failure, so hung embedding endpoints no longer leave memory capture silently pending forever. Fixes #65544. Thanks @ghoc0099.
- Memory/LanceDB: call OpenAI-compatible embedding endpoints through the raw SDK transport without sending `encoding_format`, then normalize float-array or base64 responses so providers such as ZhiPu and DashScope no longer fail recall with wrong vector dimensions or rejected parameters. Fixes #63655. Thanks @kinthaiofficial.
- Plugins/install: run dependency installs with npm error-level logging instead of silent mode so failed plugin or hook installs surface actionable npm errors such as EUNSUPPORTEDPROTOCOL instead of `npm install failed:` with no detail. (#73093) Thanks @sanctrl.
- Memory/LanceDB: bound memory recall embedding queries with a new `recallMaxChars` setting, prefer the latest user message over channel prompt metadata during auto-recall, and document the knob so small Ollama embedding models avoid context-length failures. Fixes #56780. Thanks @rungmc357 and @zak-collaborator.

View File

@@ -202,6 +202,12 @@ Cron-driven runs also expose `ctx.jobId` (the originating cron job id) so
plugin hooks can scope metrics, side effects, or state to a specific scheduled
job.
`agent_end` is an observation hook and runs fire-and-forget after the turn. The
hook runner applies a 30 second timeout so a wedged plugin or embedding
endpoint cannot leave the hook promise pending forever. A timeout is logged and
OpenClaw continues; it does not cancel plugin-owned network work unless the
plugin also uses its own abort signal.
Use `model_call_started` and `model_call_ended` for provider-call telemetry
that should not receive raw prompts, history, responses, headers, request
bodies, or provider request IDs. These hooks include stable metadata such as

View File

@@ -52,4 +52,38 @@ describe("hook correlation fields", () => {
TEST_PLUGIN_AGENT_CTX,
);
});
it("times out never-settling agent_end handlers", async () => {
vi.useFakeTimers();
try {
const handler = vi.fn(() => new Promise<void>(() => {}));
addTestHook({
registry,
pluginId: "plugin-a",
hookName: "agent_end",
handler: handler as PluginHookRegistration["handler"],
});
const logger = {
error: vi.fn(),
warn: vi.fn(),
};
const runner = createHookRunner(registry, {
logger,
voidHookTimeoutMsByHook: { agent_end: 5 },
});
const run = runner.runAgentEnd({ messages: [], success: true }, TEST_PLUGIN_AGENT_CTX);
await vi.advanceTimersByTimeAsync(5);
await expect(run).resolves.toBeUndefined();
expect(logger.error).toHaveBeenCalledWith(
expect.stringContaining(
"[hooks] agent_end handler from plugin-a failed: timed out after 5ms",
),
);
} finally {
vi.useRealTimers();
}
});
});

View File

@@ -155,6 +155,15 @@ export type HookRunnerOptions = {
* Defaults to fail-open unless explicitly overridden for a hook name.
*/
failurePolicyByHook?: Partial<Record<PluginHookName, HookFailurePolicy>>;
/**
* Optional timeout for void/observation hooks. A timed-out hook is logged and
* the runner continues, but the plugin's underlying work is not cancelled.
*/
voidHookTimeoutMsByHook?: Partial<Record<PluginHookName, number>>;
};
const DEFAULT_VOID_HOOK_TIMEOUT_MS_BY_HOOK: Partial<Record<PluginHookName, number>> = {
agent_end: 30_000,
};
type ModifyingHookPolicy<K extends PluginHookName, TResult> = {
@@ -223,6 +232,10 @@ export function createHookRunner(
const logger = options.logger;
const catchErrors = options.catchErrors ?? true;
const failurePolicyByHook = options.failurePolicyByHook ?? {};
const voidHookTimeoutMsByHook = {
...DEFAULT_VOID_HOOK_TIMEOUT_MS_BY_HOOK,
...options.voidHookTimeoutMsByHook,
};
const shouldCatchHookErrors = (hookName: PluginHookName): boolean =>
catchErrors && (failurePolicyByHook[hookName] ?? "fail-open") === "fail-open";
@@ -364,6 +377,32 @@ export function createHookRunner(
return typeof (value as { then?: unknown }).then === "function";
};
const getVoidHookTimeoutMs = (hookName: PluginHookName): number | undefined => {
const timeoutMs = voidHookTimeoutMsByHook[hookName];
if (typeof timeoutMs !== "number" || !Number.isFinite(timeoutMs) || timeoutMs <= 0) {
return undefined;
}
return Math.floor(timeoutMs);
};
const withVoidHookTimeout = async <T>(promise: Promise<T>, timeoutMs: number): Promise<T> => {
let timer: ReturnType<typeof setTimeout> | undefined;
const timeout = new Promise<never>((_, reject) => {
timer = setTimeout(() => {
reject(new Error(`timed out after ${timeoutMs}ms`));
}, timeoutMs);
timer.unref?.();
});
try {
return await Promise.race([promise, timeout]);
} finally {
if (timer) {
clearTimeout(timer);
}
}
};
const runSyncHookHandler = <K extends SyncHookName>(
hook: PluginHookRegistration<K>,
event: SyncHookEvent<K>,
@@ -391,7 +430,15 @@ export function createHookRunner(
const promises = hooks.map(async (hook) => {
try {
await (hook.handler as (event: unknown, ctx: unknown) => Promise<void>)(event, ctx);
const promise = Promise.resolve(
(hook.handler as (event: unknown, ctx: unknown) => Promise<void> | void)(event, ctx),
);
const timeoutMs = getVoidHookTimeoutMs(hookName);
if (timeoutMs) {
await withVoidHookTimeout(promise, timeoutMs);
} else {
await promise;
}
} catch (err) {
handleHookError({ hookName, pluginId: hook.pluginId, error: err });
}