diff --git a/CHANGELOG.md b/CHANGELOG.md index b29f590f255..d02bf2c95e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -160,6 +160,7 @@ Docs: https://docs.openclaw.ai - Discord/inbound timeout isolation: separate inbound worker timeout tracking from listener timeout budgets so queued Discord replies are no longer dropped when listener watchdog windows expire mid-run. (#36602) Thanks @dutifulbob. - Memory/doctor SecretRef handling: treat SecretRef-backed memory-search API keys as configured, and fail embedding setup with explicit unresolved-secret errors instead of crashing. (#36835) Thanks @joshavant. - Memory/flush default prompt: ban timestamped variant filenames during default memory flush runs so durable notes stay in the canonical daily `memory/YYYY-MM-DD.md` file. (#34951) thanks @zerone0x. +- Agents/reply delivery timing: flush embedded Pi block replies before waiting on compaction retries so already-generated assistant replies reach channels before compaction wait completes. (#35489) thanks @Sid-Qin. ## 2026.3.2 diff --git a/extensions/llm-task/src/llm-task-tool.ts b/extensions/llm-task/src/llm-task-tool.ts index d762ec3e15d..3a2e42c7223 100644 --- a/extensions/llm-task/src/llm-task-tool.ts +++ b/extensions/llm-task/src/llm-task-tool.ts @@ -26,8 +26,8 @@ async function loadRunEmbeddedPiAgent(): Promise { // Bundled install (built) // NOTE: there is no src/ tree in a packaged install. Prefer a stable internal entrypoint. - const distModulePath = "../../../dist/extensionAPI.js"; - const mod = await import(distModulePath); + const distExtensionApi = "../../../dist/extensionAPI.js"; + const mod = (await import(distExtensionApi)) as { runEmbeddedPiAgent?: unknown }; // oxlint-disable-next-line typescript/no-explicit-any const fn = (mod as any).runEmbeddedPiAgent; if (typeof fn !== "function") { diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 54ac8b13489..e19dd3cedb2 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -1688,6 +1688,14 @@ export async function runEmbeddedAttempt( const preCompactionSessionId = activeSession.sessionId; try { + // Flush buffered block replies before waiting for compaction so the + // user receives the assistant response immediately. Without this, + // coalesced/buffered blocks stay in the pipeline until compaction + // finishes — which can take minutes on large contexts (#35074). + if (params.onBlockReplyFlush) { + await params.onBlockReplyFlush(); + } + await abortable(waitForCompactionRetry()); } catch (err) { if (isRunnerAbortError(err)) { diff --git a/src/agents/pi-embedded-subscribe.handlers.lifecycle.ts b/src/agents/pi-embedded-subscribe.handlers.lifecycle.ts index 326b51c7266..4c6803e814c 100644 --- a/src/agents/pi-embedded-subscribe.handlers.lifecycle.ts +++ b/src/agents/pi-embedded-subscribe.handlers.lifecycle.ts @@ -73,6 +73,11 @@ export function handleAgentEnd(ctx: EmbeddedPiSubscribeContext) { } ctx.flushBlockReplyBuffer(); + // Flush the reply pipeline so the response reaches the channel before + // compaction wait blocks the run. This mirrors the pattern used by + // handleToolExecutionStart and ensures delivery is not held hostage to + // long-running compaction (#35074). + void ctx.params.onBlockReplyFlush?.(); ctx.state.blockState.thinking = false; ctx.state.blockState.final = false;