diff --git a/CHANGELOG.md b/CHANGELOG.md index 60b46a90743..e1b3bbc216b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Plugins/hooks: time out never-settling `agent_end` observation hooks after 30 seconds and log the plugin failure, so hung embedding endpoints no longer leave memory capture silently pending forever. Fixes #65544. Thanks @ghoc0099. - Memory/LanceDB: call OpenAI-compatible embedding endpoints through the raw SDK transport without sending `encoding_format`, then normalize float-array or base64 responses so providers such as ZhiPu and DashScope no longer fail recall with wrong vector dimensions or rejected parameters. Fixes #63655. Thanks @kinthaiofficial. - Plugins/install: run dependency installs with npm error-level logging instead of silent mode so failed plugin or hook installs surface actionable npm errors such as EUNSUPPORTEDPROTOCOL instead of `npm install failed:` with no detail. (#73093) Thanks @sanctrl. - Memory/LanceDB: bound memory recall embedding queries with a new `recallMaxChars` setting, prefer the latest user message over channel prompt metadata during auto-recall, and document the knob so small Ollama embedding models avoid context-length failures. Fixes #56780. Thanks @rungmc357 and @zak-collaborator. diff --git a/docs/plugins/hooks.md b/docs/plugins/hooks.md index 407c62b9399..d0e76f23941 100644 --- a/docs/plugins/hooks.md +++ b/docs/plugins/hooks.md @@ -202,6 +202,12 @@ Cron-driven runs also expose `ctx.jobId` (the originating cron job id) so plugin hooks can scope metrics, side effects, or state to a specific scheduled job. +`agent_end` is an observation hook and runs fire-and-forget after the turn. The +hook runner applies a 30 second timeout so a wedged plugin or embedding +endpoint cannot leave the hook promise pending forever. A timeout is logged and +OpenClaw continues; it does not cancel plugin-owned network work unless the +plugin also uses its own abort signal. + Use `model_call_started` and `model_call_ended` for provider-call telemetry that should not receive raw prompts, history, responses, headers, request bodies, or provider request IDs. These hooks include stable metadata such as diff --git a/src/plugins/hooks.correlation.test.ts b/src/plugins/hooks.correlation.test.ts index 58754858bf1..9d27cefbc46 100644 --- a/src/plugins/hooks.correlation.test.ts +++ b/src/plugins/hooks.correlation.test.ts @@ -52,4 +52,38 @@ describe("hook correlation fields", () => { TEST_PLUGIN_AGENT_CTX, ); }); + + it("times out never-settling agent_end handlers", async () => { + vi.useFakeTimers(); + try { + const handler = vi.fn(() => new Promise(() => {})); + addTestHook({ + registry, + pluginId: "plugin-a", + hookName: "agent_end", + handler: handler as PluginHookRegistration["handler"], + }); + const logger = { + error: vi.fn(), + warn: vi.fn(), + }; + + const runner = createHookRunner(registry, { + logger, + voidHookTimeoutMsByHook: { agent_end: 5 }, + }); + const run = runner.runAgentEnd({ messages: [], success: true }, TEST_PLUGIN_AGENT_CTX); + + await vi.advanceTimersByTimeAsync(5); + + await expect(run).resolves.toBeUndefined(); + expect(logger.error).toHaveBeenCalledWith( + expect.stringContaining( + "[hooks] agent_end handler from plugin-a failed: timed out after 5ms", + ), + ); + } finally { + vi.useRealTimers(); + } + }); }); diff --git a/src/plugins/hooks.ts b/src/plugins/hooks.ts index eeb9953bf2e..3f78d7193c7 100644 --- a/src/plugins/hooks.ts +++ b/src/plugins/hooks.ts @@ -155,6 +155,15 @@ export type HookRunnerOptions = { * Defaults to fail-open unless explicitly overridden for a hook name. */ failurePolicyByHook?: Partial>; + /** + * Optional timeout for void/observation hooks. A timed-out hook is logged and + * the runner continues, but the plugin's underlying work is not cancelled. + */ + voidHookTimeoutMsByHook?: Partial>; +}; + +const DEFAULT_VOID_HOOK_TIMEOUT_MS_BY_HOOK: Partial> = { + agent_end: 30_000, }; type ModifyingHookPolicy = { @@ -223,6 +232,10 @@ export function createHookRunner( const logger = options.logger; const catchErrors = options.catchErrors ?? true; const failurePolicyByHook = options.failurePolicyByHook ?? {}; + const voidHookTimeoutMsByHook = { + ...DEFAULT_VOID_HOOK_TIMEOUT_MS_BY_HOOK, + ...options.voidHookTimeoutMsByHook, + }; const shouldCatchHookErrors = (hookName: PluginHookName): boolean => catchErrors && (failurePolicyByHook[hookName] ?? "fail-open") === "fail-open"; @@ -364,6 +377,32 @@ export function createHookRunner( return typeof (value as { then?: unknown }).then === "function"; }; + const getVoidHookTimeoutMs = (hookName: PluginHookName): number | undefined => { + const timeoutMs = voidHookTimeoutMsByHook[hookName]; + if (typeof timeoutMs !== "number" || !Number.isFinite(timeoutMs) || timeoutMs <= 0) { + return undefined; + } + return Math.floor(timeoutMs); + }; + + const withVoidHookTimeout = async (promise: Promise, timeoutMs: number): Promise => { + let timer: ReturnType | undefined; + const timeout = new Promise((_, reject) => { + timer = setTimeout(() => { + reject(new Error(`timed out after ${timeoutMs}ms`)); + }, timeoutMs); + timer.unref?.(); + }); + + try { + return await Promise.race([promise, timeout]); + } finally { + if (timer) { + clearTimeout(timer); + } + } + }; + const runSyncHookHandler = ( hook: PluginHookRegistration, event: SyncHookEvent, @@ -391,7 +430,15 @@ export function createHookRunner( const promises = hooks.map(async (hook) => { try { - await (hook.handler as (event: unknown, ctx: unknown) => Promise)(event, ctx); + const promise = Promise.resolve( + (hook.handler as (event: unknown, ctx: unknown) => Promise | void)(event, ctx), + ); + const timeoutMs = getVoidHookTimeoutMs(hookName); + if (timeoutMs) { + await withVoidHookTimeout(promise, timeoutMs); + } else { + await promise; + } } catch (err) { handleHookError({ hookName, pluginId: hook.pluginId, error: err }); }