fix(agents): detect incomplete tool-use turns with pre-tool text (#76477) (#76544)

* fix(agents): detect incomplete tool-use turns with pre-tool text (#76477)

When the last assistant message ended with stopReason=toolUse, pre-tool
text alone (payloadCount > 0) was suppressing the incomplete-turn guard.
The model expected to continue after tool results but the post-tool
response was never produced, silently dropping the final answer.

Fix isIncompleteTerminalAssistantTurn to always flag toolUse stop reason
as incomplete regardless of pre-tool text, and update the early-return
condition in resolveIncompleteTurnPayloadText to not skip the check when
the last assistant ended with a tool call.

* fix(agents): mark tool-use terminal with pre-tool text as abandoned in lifecycle (#76477)

The lifecycle handler's derivedWorkingTerminalState was emitting
'working' for interrupted tool-use turns with pre-tool text because
it required !hasAssistantVisibleText for the 'abandoned' state.

Update the derivation to also mark as 'abandoned' when
incompleteTerminalAssistant is true, so lifecycle consumers see a
consistent state with the runner's terminal result.
This commit is contained in:
Alex Knight
2026-05-03 21:32:06 +10:00
committed by GitHub
parent 79f77d877e
commit a92e2b13b8
5 changed files with 181 additions and 3 deletions

View File

@@ -53,6 +53,7 @@ Docs: https://docs.openclaw.ai
- Channels/secrets: resolve SecretRef-backed channel credentials through external plugin secret contracts after the plugin split, covering runtime startup, target discovery, webhook auth, disabled-account enumeration, and late-bound web_search config. Fixes #76371. (#76449) Thanks @joshavant and @neeravmakwana.
- Docker/Gateway: pass Docker setup `.env` values into gateway and CLI containers and preserve exec SecretRef `passEnv` keys in managed service plans, so 1Password Connect-backed Discord tokens keep resolving after doctor or plugin repair. Thanks @vincentkoc.
- Control UI/WebChat: explain compaction boundaries in chat history and link directly to session checkpoint controls so pre-compaction turns no longer look silently lost after refresh. Fixes #76415. Thanks @BunsDev.
- Agents/incomplete-turn: detect and surface a warning when the agent's final text after a tool-call chain is silently dropped because the post-tool assistant response was never produced, instead of completing the turn with only the pre-tool analysis text. Fixes #76477. Thanks @amknight.
- Channels/WhatsApp: attach native outbound mention metadata for group text and media captions by resolving `@+<digits>` and `@<digits>` tokens against WhatsApp participant data, including LID groups. Fixes #39879; carries forward #56863. Thanks @kengi1437, @joe2643, and @fridayck.
- Channels/WhatsApp: require outbound mention tokens to end at a word boundary so phone-number prefixes inside longer strings no longer trigger hidden native mentions.
- Plugins/uninstall: remove empty managed git install parent directories after deleting cloned plugin repos and cover npm/git uninstall residue in Docker plugin lifecycle tests. Thanks @vincentkoc.

View File

@@ -26,6 +26,7 @@ import {
resolveEmptyResponseRetryInstruction,
resolvePlanningOnlyRetryLimit,
resolvePlanningOnlyRetryInstruction,
isIncompleteTerminalAssistantTurn,
resolveIncompleteTurnPayloadText,
resolveReasoningOnlyRetryInstruction,
STRICT_AGENTIC_BLOCKED_TEXT,
@@ -995,6 +996,136 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
).toBe("abandoned");
});
it("flags tool-use stop reason as incomplete even when pre-tool text exists (#76477)", () => {
expect(
isIncompleteTerminalAssistantTurn({
hasAssistantVisibleText: true,
lastAssistant: { stopReason: "toolUse" },
}),
).toBe(true);
expect(
isIncompleteTerminalAssistantTurn({
hasAssistantVisibleText: false,
lastAssistant: { stopReason: "toolUse" },
}),
).toBe(true);
expect(
isIncompleteTerminalAssistantTurn({
hasAssistantVisibleText: true,
lastAssistant: { stopReason: "end_turn" },
}),
).toBe(false);
});
it("detects tool-use terminal turn with pre-tool text as incomplete (#76477)", () => {
// When the last assistant message ended with stopReason=toolUse, pre-tool
// text alone must not suppress the incomplete-turn guard. The model
// expected to continue after tool results but the post-tool response was
// never produced.
const incompleteTurnText = resolveIncompleteTurnPayloadText({
payloadCount: 1,
aborted: false,
timedOut: false,
attempt: makeAttemptResult({
assistantTexts: ["Initial analysis of the codebase..."],
toolMetas: [{ toolName: "read", meta: "path=src/index.ts" }],
lastAssistant: {
role: "assistant",
stopReason: "toolUse",
provider: "anthropic",
model: "sonnet-4.6",
content: [
{ type: "text", text: "Initial analysis of the codebase..." },
{ type: "tool_use", id: "tool_1", name: "read", input: { path: "src/index.ts" } },
],
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
}),
});
expect(incompleteTurnText).not.toBeNull();
expect(incompleteTurnText).toContain("couldn't generate a response");
});
it("surfaces tool-use terminal with pre-tool text and side effects as replay-unsafe (#76477)", () => {
const incompleteTurnText = resolveIncompleteTurnPayloadText({
payloadCount: 1,
aborted: false,
timedOut: false,
attempt: makeAttemptResult({
assistantTexts: ["Let me update the file..."],
toolMetas: [{ toolName: "write" }],
lastAssistant: {
role: "assistant",
stopReason: "toolUse",
provider: "openai",
model: "gpt-5.4",
content: [
{ type: "text", text: "Let me update the file..." },
{ type: "tool_use", id: "tool_1", name: "write", input: {} },
],
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
}),
});
expect(incompleteTurnText).toContain("verify before retrying");
});
it("does not flag a completed tool-use turn with end_turn as incomplete (#76477)", () => {
// When the model successfully produces post-tool text, lastAssistant has
// stopReason=end_turn. The incomplete-turn guard should not fire.
const incompleteTurnText = resolveIncompleteTurnPayloadText({
payloadCount: 2,
aborted: false,
timedOut: false,
attempt: makeAttemptResult({
assistantTexts: ["Initial analysis...", "Here is the final answer."],
toolMetas: [{ toolName: "read" }],
lastAssistant: {
role: "assistant",
stopReason: "end_turn",
provider: "anthropic",
model: "sonnet-4.6",
content: [{ type: "text", text: "Here is the final answer." }],
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
}),
});
expect(incompleteTurnText).toBeNull();
});
it("surfaces an error for tool-use terminal turn with pre-tool text via runEmbeddedPiAgent (#76477)", async () => {
mockedClassifyFailoverReason.mockReturnValue(null);
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
makeAttemptResult({
assistantTexts: ["Initial analysis of the issue..."],
toolMetas: [{ toolName: "read", meta: "path=src/index.ts" }],
lastAssistant: {
stopReason: "toolUse",
provider: "anthropic",
model: "sonnet-4.6",
content: [
{ type: "text", text: "Initial analysis of the issue..." },
{ type: "tool_use", id: "tool_1", name: "read", input: { path: "src/index.ts" } },
],
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
}),
);
const result = await runEmbeddedPiAgent({
...overflowBaseRunParams,
provider: "anthropic",
model: "sonnet-4.6",
runId: "run-tool-use-dropped-final-text",
});
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
expect(result.payloads?.[0]?.isError).toBe(true);
expect(result.payloads?.[0]?.text).toContain("couldn't generate a response");
expect(mockedLog.warn).toHaveBeenCalledWith(
expect.stringContaining("incomplete turn detected"),
);
});
it("treats missing replay metadata as replay-invalid", () => {
const attempt = makeAttemptResult();
delete (attempt as Partial<EmbeddedRunAttemptResult>).replayMetadata;

View File

@@ -90,7 +90,12 @@ export function isIncompleteTerminalAssistantTurn(params: {
hasAssistantVisibleText: boolean;
lastAssistant?: { stopReason?: string } | null;
}): boolean {
return !params.hasAssistantVisibleText && params.lastAssistant?.stopReason === "toolUse";
// A tool-use stop reason means the model issued a tool call and expected
// to continue after tool results. If the session ended before the
// post-tool assistant message arrived, the turn is incomplete regardless
// of whether pre-tool text exists — that text is preliminary analysis,
// not the final answer. (#76477)
return params.lastAssistant?.stopReason === "toolUse";
}
const PLANNING_ONLY_PROMISE_RE =
@@ -220,8 +225,15 @@ export function resolveIncompleteTurnPayloadText(params: {
timedOut: boolean;
attempt: IncompleteTurnAttempt;
}): string | null {
// Tool-use terminal guard: when the last assistant message ended with a
// tool-call stop reason, the model expected to continue after tool results.
// Pre-tool text alone (payloadCount > 0) must not suppress the incomplete-
// turn check in that case — the final post-tool response was never
// produced. (#76477)
const toolUseTerminal = params.attempt.lastAssistant?.stopReason === "toolUse";
if (
params.payloadCount !== 0 ||
(params.payloadCount !== 0 && !toolUseTerminal) ||
params.aborted ||
params.timedOut ||
params.attempt.clientToolCalls ||

View File

@@ -289,6 +289,34 @@ describe("handleAgentEnd", () => {
});
});
it("marks tool-use terminal with pre-tool text as abandoned (#76477)", async () => {
const onAgentEvent = vi.fn();
const ctx = createContext(
{
role: "assistant",
stopReason: "toolUse",
content: [
{ type: "text", text: "Initial analysis..." },
{ type: "tool_use", id: "tool_1", name: "read", input: { path: "src/index.ts" } },
],
},
{ onAgentEvent },
);
ctx.state.livenessState = "working";
ctx.state.assistantTexts = ["Initial analysis..."];
await handleAgentEnd(ctx);
expect(onAgentEvent).toHaveBeenCalledWith({
stream: "lifecycle",
data: {
phase: "end",
livenessState: "abandoned",
replayInvalid: true,
},
});
});
it("keeps accumulated deterministic side effects from being marked abandoned", async () => {
const onAgentEvent = vi.fn();
const ctx = createContext(undefined, { onAgentEvent });

View File

@@ -54,9 +54,15 @@ export function handleAgentEnd(ctx: EmbeddedPiSubscribeContext): void | Promise<
});
const replayInvalid =
ctx.state.replayState.replayInvalid || incompleteTerminalAssistant ? true : undefined;
// Tool-use terminal guard: when the last assistant message ended with a
// tool-call stop reason, the turn is incomplete even when pre-tool text
// exists — mark as abandoned so lifecycle consumers do not see a working
// end state for an interrupted tool chain. (#76477)
const derivedWorkingTerminalState = isError
? "blocked"
: replayInvalid && !hasAssistantVisibleText && !hadDeterministicSideEffect
: replayInvalid &&
!hadDeterministicSideEffect &&
(!hasAssistantVisibleText || incompleteTerminalAssistant)
? "abandoned"
: ctx.state.livenessState;
const livenessState =