fix(agents): detect incomplete tool-use turns with pre-tool text (#76477) (#76544)

* fix(agents): detect incomplete tool-use turns with pre-tool text (#76477) When the last assistant message ended with stopReason=toolUse, pre-tool text alone (payloadCount > 0) was suppressing the incomplete-turn guard. The model expected to continue after tool results but the post-tool response was never produced, silently dropping the final answer. Fix isIncompleteTerminalAssistantTurn to always flag toolUse stop reason as incomplete regardless of pre-tool text, and update the early-return condition in resolveIncompleteTurnPayloadText to not skip the check when the last assistant ended with a tool call. * fix(agents): mark tool-use terminal with pre-tool text as abandoned in lifecycle (#76477) The lifecycle handler's derivedWorkingTerminalState was emitting 'working' for interrupted tool-use turns with pre-tool text because it required !hasAssistantVisibleText for the 'abandoned' state. Update the derivation to also mark as 'abandoned' when incompleteTerminalAssistant is true, so lifecycle consumers see a consistent state with the runner's terminal result.
2026-05-06 07:00:43 +00:00 · 2026-05-03 21:32:06 +10:00
parent 79f77d877e
commit a92e2b13b8
5 changed files with 181 additions and 3 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -53,6 +53,7 @@ Docs: https://docs.openclaw.ai
 - Channels/secrets: resolve SecretRef-backed channel credentials through external plugin secret contracts after the plugin split, covering runtime startup, target discovery, webhook auth, disabled-account enumeration, and late-bound web_search config. Fixes #76371. (#76449) Thanks @joshavant and @neeravmakwana.
 - Docker/Gateway: pass Docker setup `.env` values into gateway and CLI containers and preserve exec SecretRef `passEnv` keys in managed service plans, so 1Password Connect-backed Discord tokens keep resolving after doctor or plugin repair. Thanks @vincentkoc.
 - Control UI/WebChat: explain compaction boundaries in chat history and link directly to session checkpoint controls so pre-compaction turns no longer look silently lost after refresh. Fixes #76415. Thanks @BunsDev.
+- Agents/incomplete-turn: detect and surface a warning when the agent's final text after a tool-call chain is silently dropped because the post-tool assistant response was never produced, instead of completing the turn with only the pre-tool analysis text. Fixes #76477. Thanks @amknight.
 - Channels/WhatsApp: attach native outbound mention metadata for group text and media captions by resolving `@+<digits>` and `@<digits>` tokens against WhatsApp participant data, including LID groups. Fixes #39879; carries forward #56863. Thanks @kengi1437, @joe2643, and @fridayck.
 - Channels/WhatsApp: require outbound mention tokens to end at a word boundary so phone-number prefixes inside longer strings no longer trigger hidden native mentions.
 - Plugins/uninstall: remove empty managed git install parent directories after deleting cloned plugin repos and cover npm/git uninstall residue in Docker plugin lifecycle tests. Thanks @vincentkoc.
--- a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
+++ b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
@@ -26,6 +26,7 @@ import {
  resolveEmptyResponseRetryInstruction,
  resolvePlanningOnlyRetryLimit,
  resolvePlanningOnlyRetryInstruction,
+  isIncompleteTerminalAssistantTurn,
  resolveIncompleteTurnPayloadText,
  resolveReasoningOnlyRetryInstruction,
  STRICT_AGENTIC_BLOCKED_TEXT,
@@ -995,6 +996,136 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
    ).toBe("abandoned");
  });

+  it("flags tool-use stop reason as incomplete even when pre-tool text exists (#76477)", () => {
+    expect(
+      isIncompleteTerminalAssistantTurn({
+        hasAssistantVisibleText: true,
+        lastAssistant: { stopReason: "toolUse" },
+      }),
+    ).toBe(true);
+    expect(
+      isIncompleteTerminalAssistantTurn({
+        hasAssistantVisibleText: false,
+        lastAssistant: { stopReason: "toolUse" },
+      }),
+    ).toBe(true);
+    expect(
+      isIncompleteTerminalAssistantTurn({
+        hasAssistantVisibleText: true,
+        lastAssistant: { stopReason: "end_turn" },
+      }),
+    ).toBe(false);
+  });
+
+  it("detects tool-use terminal turn with pre-tool text as incomplete (#76477)", () => {
+    // When the last assistant message ended with stopReason=toolUse, pre-tool
+    // text alone must not suppress the incomplete-turn guard. The model
+    // expected to continue after tool results but the post-tool response was
+    // never produced.
+    const incompleteTurnText = resolveIncompleteTurnPayloadText({
+      payloadCount: 1,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptResult({
+        assistantTexts: ["Initial analysis of the codebase..."],
+        toolMetas: [{ toolName: "read", meta: "path=src/index.ts" }],
+        lastAssistant: {
+          role: "assistant",
+          stopReason: "toolUse",
+          provider: "anthropic",
+          model: "sonnet-4.6",
+          content: [
+            { type: "text", text: "Initial analysis of the codebase..." },
+            { type: "tool_use", id: "tool_1", name: "read", input: { path: "src/index.ts" } },
+          ],
+        } as unknown as EmbeddedRunAttemptResult["lastAssistant"],
+      }),
+    });
+
+    expect(incompleteTurnText).not.toBeNull();
+    expect(incompleteTurnText).toContain("couldn't generate a response");
+  });
+
+  it("surfaces tool-use terminal with pre-tool text and side effects as replay-unsafe (#76477)", () => {
+    const incompleteTurnText = resolveIncompleteTurnPayloadText({
+      payloadCount: 1,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptResult({
+        assistantTexts: ["Let me update the file..."],
+        toolMetas: [{ toolName: "write" }],
+        lastAssistant: {
+          role: "assistant",
+          stopReason: "toolUse",
+          provider: "openai",
+          model: "gpt-5.4",
+          content: [
+            { type: "text", text: "Let me update the file..." },
+            { type: "tool_use", id: "tool_1", name: "write", input: {} },
+          ],
+        } as unknown as EmbeddedRunAttemptResult["lastAssistant"],
+      }),
+    });
+
+    expect(incompleteTurnText).toContain("verify before retrying");
+  });
+
+  it("does not flag a completed tool-use turn with end_turn as incomplete (#76477)", () => {
+    // When the model successfully produces post-tool text, lastAssistant has
+    // stopReason=end_turn. The incomplete-turn guard should not fire.
+    const incompleteTurnText = resolveIncompleteTurnPayloadText({
+      payloadCount: 2,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptResult({
+        assistantTexts: ["Initial analysis...", "Here is the final answer."],
+        toolMetas: [{ toolName: "read" }],
+        lastAssistant: {
+          role: "assistant",
+          stopReason: "end_turn",
+          provider: "anthropic",
+          model: "sonnet-4.6",
+          content: [{ type: "text", text: "Here is the final answer." }],
+        } as unknown as EmbeddedRunAttemptResult["lastAssistant"],
+      }),
+    });
+
+    expect(incompleteTurnText).toBeNull();
+  });
+
+  it("surfaces an error for tool-use terminal turn with pre-tool text via runEmbeddedPiAgent (#76477)", async () => {
+    mockedClassifyFailoverReason.mockReturnValue(null);
+    mockedRunEmbeddedAttempt.mockResolvedValueOnce(
+      makeAttemptResult({
+        assistantTexts: ["Initial analysis of the issue..."],
+        toolMetas: [{ toolName: "read", meta: "path=src/index.ts" }],
+        lastAssistant: {
+          stopReason: "toolUse",
+          provider: "anthropic",
+          model: "sonnet-4.6",
+          content: [
+            { type: "text", text: "Initial analysis of the issue..." },
+            { type: "tool_use", id: "tool_1", name: "read", input: { path: "src/index.ts" } },
+          ],
+        } as unknown as EmbeddedRunAttemptResult["lastAssistant"],
+      }),
+    );
+
+    const result = await runEmbeddedPiAgent({
+      ...overflowBaseRunParams,
+      provider: "anthropic",
+      model: "sonnet-4.6",
+      runId: "run-tool-use-dropped-final-text",
+    });
+
+    expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
+    expect(result.payloads?.[0]?.isError).toBe(true);
+    expect(result.payloads?.[0]?.text).toContain("couldn't generate a response");
+    expect(mockedLog.warn).toHaveBeenCalledWith(
+      expect.stringContaining("incomplete turn detected"),
+    );
+  });
+
  it("treats missing replay metadata as replay-invalid", () => {
    const attempt = makeAttemptResult();
    delete (attempt as Partial<EmbeddedRunAttemptResult>).replayMetadata;
--- a/src/agents/pi-embedded-runner/run/incomplete-turn.ts
+++ b/src/agents/pi-embedded-runner/run/incomplete-turn.ts
@@ -90,7 +90,12 @@ export function isIncompleteTerminalAssistantTurn(params: {
  hasAssistantVisibleText: boolean;
  lastAssistant?: { stopReason?: string } | null;
 }): boolean {
-  return !params.hasAssistantVisibleText && params.lastAssistant?.stopReason === "toolUse";
+  // A tool-use stop reason means the model issued a tool call and expected
+  // to continue after tool results. If the session ended before the
+  // post-tool assistant message arrived, the turn is incomplete regardless
+  // of whether pre-tool text exists — that text is preliminary analysis,
+  // not the final answer. (#76477)
+  return params.lastAssistant?.stopReason === "toolUse";
 }

 const PLANNING_ONLY_PROMISE_RE =
@@ -220,8 +225,15 @@ export function resolveIncompleteTurnPayloadText(params: {
  timedOut: boolean;
  attempt: IncompleteTurnAttempt;
 }): string | null {
+  // Tool-use terminal guard: when the last assistant message ended with a
+  // tool-call stop reason, the model expected to continue after tool results.
+  // Pre-tool text alone (payloadCount > 0) must not suppress the incomplete-
+  // turn check in that case — the final post-tool response was never
+  // produced. (#76477)
+  const toolUseTerminal = params.attempt.lastAssistant?.stopReason === "toolUse";
+
  if (
-    params.payloadCount !== 0 ||
+    (params.payloadCount !== 0 && !toolUseTerminal) ||
    params.aborted ||
    params.timedOut ||
    params.attempt.clientToolCalls ||
--- a/src/agents/pi-embedded-subscribe.handlers.lifecycle.test.ts
+++ b/src/agents/pi-embedded-subscribe.handlers.lifecycle.test.ts
@@ -289,6 +289,34 @@ describe("handleAgentEnd", () => {
    });
  });

+  it("marks tool-use terminal with pre-tool text as abandoned (#76477)", async () => {
+    const onAgentEvent = vi.fn();
+    const ctx = createContext(
+      {
+        role: "assistant",
+        stopReason: "toolUse",
+        content: [
+          { type: "text", text: "Initial analysis..." },
+          { type: "tool_use", id: "tool_1", name: "read", input: { path: "src/index.ts" } },
+        ],
+      },
+      { onAgentEvent },
+    );
+    ctx.state.livenessState = "working";
+    ctx.state.assistantTexts = ["Initial analysis..."];
+
+    await handleAgentEnd(ctx);
+
+    expect(onAgentEvent).toHaveBeenCalledWith({
+      stream: "lifecycle",
+      data: {
+        phase: "end",
+        livenessState: "abandoned",
+        replayInvalid: true,
+      },
+    });
+  });
+
  it("keeps accumulated deterministic side effects from being marked abandoned", async () => {
    const onAgentEvent = vi.fn();
    const ctx = createContext(undefined, { onAgentEvent });
--- a/src/agents/pi-embedded-subscribe.handlers.lifecycle.ts
+++ b/src/agents/pi-embedded-subscribe.handlers.lifecycle.ts
@@ -54,9 +54,15 @@ export function handleAgentEnd(ctx: EmbeddedPiSubscribeContext): void | Promise<
  });
  const replayInvalid =
    ctx.state.replayState.replayInvalid || incompleteTerminalAssistant ? true : undefined;
+  // Tool-use terminal guard: when the last assistant message ended with a
+  // tool-call stop reason, the turn is incomplete even when pre-tool text
+  // exists — mark as abandoned so lifecycle consumers do not see a working
+  // end state for an interrupted tool chain. (#76477)
  const derivedWorkingTerminalState = isError
    ? "blocked"
-    : replayInvalid && !hasAssistantVisibleText && !hadDeterministicSideEffect
+    : replayInvalid &&
+        !hadDeterministicSideEffect &&
+        (!hasAssistantVisibleText || incompleteTerminalAssistant)
      ? "abandoned"
      : ctx.state.livenessState;
  const livenessState =