fix: serialize tool result delivery to preserve message ordering

Tool result callbacks were dispatched as fire-and-forget tasks via
pendingToolTasks, with no ordering guarantee. When multiple tool calls
completed near-simultaneously, their typing signals and message sends
raced through independent async paths, causing out-of-order delivery
to the user — particularly visible on Telegram.

Replace the fire-and-forget Set<Promise> pattern with a serialized
promise chain (toolResultChain). Each tool result now awaits the
previous one before starting its typing signal and delivery, ensuring
messages arrive in the order they were produced by the agent.

The existing pendingToolTasks tracking is preserved so the post-run
Promise.allSettled() drain still works correctly.

Fixes #11044
This commit is contained in:
Berg
2026-02-19 20:15:43 +00:00
committed by joshavant
parent d871ee91d0
commit 80148cd2a2
2 changed files with 56 additions and 23 deletions

View File

@@ -379,29 +379,33 @@ export async function runAgentTurnWithFallback(params: {
shouldEmitToolResult: params.shouldEmitToolResult,
shouldEmitToolOutput: params.shouldEmitToolOutput,
onToolResult: onToolResult
? (payload) => {
// `subscribeEmbeddedPiSession` may invoke tool callbacks without awaiting them.
// If a tool callback starts typing after the run finalized, we can end up with
// a typing loop that never sees a matching markRunComplete(). Track and drain.
const task = (async () => {
const { text, skip } = normalizeStreamingText(payload);
if (skip) {
return;
}
await params.typingSignals.signalTextDelta(text);
await onToolResult({
text,
mediaUrls: payload.mediaUrls,
});
})()
.catch((err) => {
logVerbose(`tool result delivery failed: ${String(err)}`);
})
.finally(() => {
params.pendingToolTasks.delete(task);
});
params.pendingToolTasks.add(task);
}
? (() => {
// Serialize tool result delivery to preserve message ordering.
// Without this, concurrent tool callbacks race through typing signals
// and message sends, causing out-of-order delivery to the user.
// See: https://github.com/openclaw/openclaw/issues/11044
let toolResultChain: Promise<void> = Promise.resolve();
return (payload: ReplyPayload) => {
const task = (toolResultChain = toolResultChain.then(async () => {
const { text, skip } = normalizeStreamingText(payload);
if (skip) {
return;
}
await params.typingSignals.signalTextDelta(text);
await onToolResult({
text,
mediaUrls: payload.mediaUrls,
});
}))
.catch((err) => {
logVerbose(`tool result delivery failed: ${String(err)}`);
})
.finally(() => {
params.pendingToolTasks.delete(task);
});
params.pendingToolTasks.add(task);
};
})()
: undefined,
});
},

View File

@@ -533,6 +533,35 @@ describe("runReplyAgent typing (heartbeat)", () => {
vi.useRealTimers();
});
it("delivers tool results in order even when dispatched concurrently", async () => {
const deliveryOrder: string[] = [];
const onToolResult = vi.fn(async (payload: { text?: string }) => {
// Simulate variable network latency: first result is slower than second
const delay = payload.text === "first" ? 50 : 10;
await new Promise((r) => setTimeout(r, delay));
deliveryOrder.push(payload.text ?? "");
});
state.runEmbeddedPiAgentMock.mockImplementationOnce(async (params: AgentRunParams) => {
// Fire two tool results without awaiting — simulates concurrent tool completion
void params.onToolResult?.({ text: "first", mediaUrls: [] });
void params.onToolResult?.({ text: "second", mediaUrls: [] });
// Small delay to let the chain settle before returning
await new Promise((r) => setTimeout(r, 150));
return { payloads: [{ text: "final" }], meta: {} };
});
const { run } = createMinimalRun({
typingMode: "message",
opts: { onToolResult },
});
await run();
expect(onToolResult).toHaveBeenCalledTimes(2);
// Despite "first" having higher latency, it must be delivered before "second"
expect(deliveryOrder).toEqual(["first", "second"]);
});
it("announces auto-compaction in verbose mode and tracks count", async () => {
await withTempStateDir(async (stateDir) => {
const storePath = path.join(stateDir, "sessions", "sessions.json");