From a92e2b13b8b840e6f91d011fcaa989f1e805eec2 Mon Sep 17 00:00:00 2001 From: Alex Knight Date: Sun, 3 May 2026 21:32:06 +1000 Subject: [PATCH] fix(agents): detect incomplete tool-use turns with pre-tool text (#76477) (#76544) * fix(agents): detect incomplete tool-use turns with pre-tool text (#76477) When the last assistant message ended with stopReason=toolUse, pre-tool text alone (payloadCount > 0) was suppressing the incomplete-turn guard. The model expected to continue after tool results but the post-tool response was never produced, silently dropping the final answer. Fix isIncompleteTerminalAssistantTurn to always flag toolUse stop reason as incomplete regardless of pre-tool text, and update the early-return condition in resolveIncompleteTurnPayloadText to not skip the check when the last assistant ended with a tool call. * fix(agents): mark tool-use terminal with pre-tool text as abandoned in lifecycle (#76477) The lifecycle handler's derivedWorkingTerminalState was emitting 'working' for interrupted tool-use turns with pre-tool text because it required !hasAssistantVisibleText for the 'abandoned' state. Update the derivation to also mark as 'abandoned' when incompleteTerminalAssistant is true, so lifecycle consumers see a consistent state with the runner's terminal result. --- CHANGELOG.md | 1 + .../run.incomplete-turn.test.ts | 131 ++++++++++++++++++ .../pi-embedded-runner/run/incomplete-turn.ts | 16 ++- ...edded-subscribe.handlers.lifecycle.test.ts | 28 ++++ ...i-embedded-subscribe.handlers.lifecycle.ts | 8 +- 5 files changed, 181 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b5dfb1cccd..c112a75ee97 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -53,6 +53,7 @@ Docs: https://docs.openclaw.ai - Channels/secrets: resolve SecretRef-backed channel credentials through external plugin secret contracts after the plugin split, covering runtime startup, target discovery, webhook auth, disabled-account enumeration, and late-bound web_search config. Fixes #76371. (#76449) Thanks @joshavant and @neeravmakwana. - Docker/Gateway: pass Docker setup `.env` values into gateway and CLI containers and preserve exec SecretRef `passEnv` keys in managed service plans, so 1Password Connect-backed Discord tokens keep resolving after doctor or plugin repair. Thanks @vincentkoc. - Control UI/WebChat: explain compaction boundaries in chat history and link directly to session checkpoint controls so pre-compaction turns no longer look silently lost after refresh. Fixes #76415. Thanks @BunsDev. +- Agents/incomplete-turn: detect and surface a warning when the agent's final text after a tool-call chain is silently dropped because the post-tool assistant response was never produced, instead of completing the turn with only the pre-tool analysis text. Fixes #76477. Thanks @amknight. - Channels/WhatsApp: attach native outbound mention metadata for group text and media captions by resolving `@+` and `@` tokens against WhatsApp participant data, including LID groups. Fixes #39879; carries forward #56863. Thanks @kengi1437, @joe2643, and @fridayck. - Channels/WhatsApp: require outbound mention tokens to end at a word boundary so phone-number prefixes inside longer strings no longer trigger hidden native mentions. - Plugins/uninstall: remove empty managed git install parent directories after deleting cloned plugin repos and cover npm/git uninstall residue in Docker plugin lifecycle tests. Thanks @vincentkoc. diff --git a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts index 868bb8bc77c..b9e413e0f83 100644 --- a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts +++ b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts @@ -26,6 +26,7 @@ import { resolveEmptyResponseRetryInstruction, resolvePlanningOnlyRetryLimit, resolvePlanningOnlyRetryInstruction, + isIncompleteTerminalAssistantTurn, resolveIncompleteTurnPayloadText, resolveReasoningOnlyRetryInstruction, STRICT_AGENTIC_BLOCKED_TEXT, @@ -995,6 +996,136 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => { ).toBe("abandoned"); }); + it("flags tool-use stop reason as incomplete even when pre-tool text exists (#76477)", () => { + expect( + isIncompleteTerminalAssistantTurn({ + hasAssistantVisibleText: true, + lastAssistant: { stopReason: "toolUse" }, + }), + ).toBe(true); + expect( + isIncompleteTerminalAssistantTurn({ + hasAssistantVisibleText: false, + lastAssistant: { stopReason: "toolUse" }, + }), + ).toBe(true); + expect( + isIncompleteTerminalAssistantTurn({ + hasAssistantVisibleText: true, + lastAssistant: { stopReason: "end_turn" }, + }), + ).toBe(false); + }); + + it("detects tool-use terminal turn with pre-tool text as incomplete (#76477)", () => { + // When the last assistant message ended with stopReason=toolUse, pre-tool + // text alone must not suppress the incomplete-turn guard. The model + // expected to continue after tool results but the post-tool response was + // never produced. + const incompleteTurnText = resolveIncompleteTurnPayloadText({ + payloadCount: 1, + aborted: false, + timedOut: false, + attempt: makeAttemptResult({ + assistantTexts: ["Initial analysis of the codebase..."], + toolMetas: [{ toolName: "read", meta: "path=src/index.ts" }], + lastAssistant: { + role: "assistant", + stopReason: "toolUse", + provider: "anthropic", + model: "sonnet-4.6", + content: [ + { type: "text", text: "Initial analysis of the codebase..." }, + { type: "tool_use", id: "tool_1", name: "read", input: { path: "src/index.ts" } }, + ], + } as unknown as EmbeddedRunAttemptResult["lastAssistant"], + }), + }); + + expect(incompleteTurnText).not.toBeNull(); + expect(incompleteTurnText).toContain("couldn't generate a response"); + }); + + it("surfaces tool-use terminal with pre-tool text and side effects as replay-unsafe (#76477)", () => { + const incompleteTurnText = resolveIncompleteTurnPayloadText({ + payloadCount: 1, + aborted: false, + timedOut: false, + attempt: makeAttemptResult({ + assistantTexts: ["Let me update the file..."], + toolMetas: [{ toolName: "write" }], + lastAssistant: { + role: "assistant", + stopReason: "toolUse", + provider: "openai", + model: "gpt-5.4", + content: [ + { type: "text", text: "Let me update the file..." }, + { type: "tool_use", id: "tool_1", name: "write", input: {} }, + ], + } as unknown as EmbeddedRunAttemptResult["lastAssistant"], + }), + }); + + expect(incompleteTurnText).toContain("verify before retrying"); + }); + + it("does not flag a completed tool-use turn with end_turn as incomplete (#76477)", () => { + // When the model successfully produces post-tool text, lastAssistant has + // stopReason=end_turn. The incomplete-turn guard should not fire. + const incompleteTurnText = resolveIncompleteTurnPayloadText({ + payloadCount: 2, + aborted: false, + timedOut: false, + attempt: makeAttemptResult({ + assistantTexts: ["Initial analysis...", "Here is the final answer."], + toolMetas: [{ toolName: "read" }], + lastAssistant: { + role: "assistant", + stopReason: "end_turn", + provider: "anthropic", + model: "sonnet-4.6", + content: [{ type: "text", text: "Here is the final answer." }], + } as unknown as EmbeddedRunAttemptResult["lastAssistant"], + }), + }); + + expect(incompleteTurnText).toBeNull(); + }); + + it("surfaces an error for tool-use terminal turn with pre-tool text via runEmbeddedPiAgent (#76477)", async () => { + mockedClassifyFailoverReason.mockReturnValue(null); + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + makeAttemptResult({ + assistantTexts: ["Initial analysis of the issue..."], + toolMetas: [{ toolName: "read", meta: "path=src/index.ts" }], + lastAssistant: { + stopReason: "toolUse", + provider: "anthropic", + model: "sonnet-4.6", + content: [ + { type: "text", text: "Initial analysis of the issue..." }, + { type: "tool_use", id: "tool_1", name: "read", input: { path: "src/index.ts" } }, + ], + } as unknown as EmbeddedRunAttemptResult["lastAssistant"], + }), + ); + + const result = await runEmbeddedPiAgent({ + ...overflowBaseRunParams, + provider: "anthropic", + model: "sonnet-4.6", + runId: "run-tool-use-dropped-final-text", + }); + + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1); + expect(result.payloads?.[0]?.isError).toBe(true); + expect(result.payloads?.[0]?.text).toContain("couldn't generate a response"); + expect(mockedLog.warn).toHaveBeenCalledWith( + expect.stringContaining("incomplete turn detected"), + ); + }); + it("treats missing replay metadata as replay-invalid", () => { const attempt = makeAttemptResult(); delete (attempt as Partial).replayMetadata; diff --git a/src/agents/pi-embedded-runner/run/incomplete-turn.ts b/src/agents/pi-embedded-runner/run/incomplete-turn.ts index 1c21cf5e8e2..601a6b68a18 100644 --- a/src/agents/pi-embedded-runner/run/incomplete-turn.ts +++ b/src/agents/pi-embedded-runner/run/incomplete-turn.ts @@ -90,7 +90,12 @@ export function isIncompleteTerminalAssistantTurn(params: { hasAssistantVisibleText: boolean; lastAssistant?: { stopReason?: string } | null; }): boolean { - return !params.hasAssistantVisibleText && params.lastAssistant?.stopReason === "toolUse"; + // A tool-use stop reason means the model issued a tool call and expected + // to continue after tool results. If the session ended before the + // post-tool assistant message arrived, the turn is incomplete regardless + // of whether pre-tool text exists — that text is preliminary analysis, + // not the final answer. (#76477) + return params.lastAssistant?.stopReason === "toolUse"; } const PLANNING_ONLY_PROMISE_RE = @@ -220,8 +225,15 @@ export function resolveIncompleteTurnPayloadText(params: { timedOut: boolean; attempt: IncompleteTurnAttempt; }): string | null { + // Tool-use terminal guard: when the last assistant message ended with a + // tool-call stop reason, the model expected to continue after tool results. + // Pre-tool text alone (payloadCount > 0) must not suppress the incomplete- + // turn check in that case — the final post-tool response was never + // produced. (#76477) + const toolUseTerminal = params.attempt.lastAssistant?.stopReason === "toolUse"; + if ( - params.payloadCount !== 0 || + (params.payloadCount !== 0 && !toolUseTerminal) || params.aborted || params.timedOut || params.attempt.clientToolCalls || diff --git a/src/agents/pi-embedded-subscribe.handlers.lifecycle.test.ts b/src/agents/pi-embedded-subscribe.handlers.lifecycle.test.ts index a27fde95bfd..1c8a2973af5 100644 --- a/src/agents/pi-embedded-subscribe.handlers.lifecycle.test.ts +++ b/src/agents/pi-embedded-subscribe.handlers.lifecycle.test.ts @@ -289,6 +289,34 @@ describe("handleAgentEnd", () => { }); }); + it("marks tool-use terminal with pre-tool text as abandoned (#76477)", async () => { + const onAgentEvent = vi.fn(); + const ctx = createContext( + { + role: "assistant", + stopReason: "toolUse", + content: [ + { type: "text", text: "Initial analysis..." }, + { type: "tool_use", id: "tool_1", name: "read", input: { path: "src/index.ts" } }, + ], + }, + { onAgentEvent }, + ); + ctx.state.livenessState = "working"; + ctx.state.assistantTexts = ["Initial analysis..."]; + + await handleAgentEnd(ctx); + + expect(onAgentEvent).toHaveBeenCalledWith({ + stream: "lifecycle", + data: { + phase: "end", + livenessState: "abandoned", + replayInvalid: true, + }, + }); + }); + it("keeps accumulated deterministic side effects from being marked abandoned", async () => { const onAgentEvent = vi.fn(); const ctx = createContext(undefined, { onAgentEvent }); diff --git a/src/agents/pi-embedded-subscribe.handlers.lifecycle.ts b/src/agents/pi-embedded-subscribe.handlers.lifecycle.ts index 8840754c057..66a9aa643e0 100644 --- a/src/agents/pi-embedded-subscribe.handlers.lifecycle.ts +++ b/src/agents/pi-embedded-subscribe.handlers.lifecycle.ts @@ -54,9 +54,15 @@ export function handleAgentEnd(ctx: EmbeddedPiSubscribeContext): void | Promise< }); const replayInvalid = ctx.state.replayState.replayInvalid || incompleteTerminalAssistant ? true : undefined; + // Tool-use terminal guard: when the last assistant message ended with a + // tool-call stop reason, the turn is incomplete even when pre-tool text + // exists — mark as abandoned so lifecycle consumers do not see a working + // end state for an interrupted tool chain. (#76477) const derivedWorkingTerminalState = isError ? "blocked" - : replayInvalid && !hasAssistantVisibleText && !hadDeterministicSideEffect + : replayInvalid && + !hadDeterministicSideEffect && + (!hasAssistantVisibleText || incompleteTerminalAssistant) ? "abandoned" : ctx.state.livenessState; const livenessState =