mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 07:00:43 +00:00
* fix(agents): detect incomplete tool-use turns with pre-tool text (#76477) When the last assistant message ended with stopReason=toolUse, pre-tool text alone (payloadCount > 0) was suppressing the incomplete-turn guard. The model expected to continue after tool results but the post-tool response was never produced, silently dropping the final answer. Fix isIncompleteTerminalAssistantTurn to always flag toolUse stop reason as incomplete regardless of pre-tool text, and update the early-return condition in resolveIncompleteTurnPayloadText to not skip the check when the last assistant ended with a tool call. * fix(agents): mark tool-use terminal with pre-tool text as abandoned in lifecycle (#76477) The lifecycle handler's derivedWorkingTerminalState was emitting 'working' for interrupted tool-use turns with pre-tool text because it required !hasAssistantVisibleText for the 'abandoned' state. Update the derivation to also mark as 'abandoned' when incompleteTerminalAssistant is true, so lifecycle consumers see a consistent state with the runner's terminal result.
This commit is contained in:
@@ -53,6 +53,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Channels/secrets: resolve SecretRef-backed channel credentials through external plugin secret contracts after the plugin split, covering runtime startup, target discovery, webhook auth, disabled-account enumeration, and late-bound web_search config. Fixes #76371. (#76449) Thanks @joshavant and @neeravmakwana.
|
||||
- Docker/Gateway: pass Docker setup `.env` values into gateway and CLI containers and preserve exec SecretRef `passEnv` keys in managed service plans, so 1Password Connect-backed Discord tokens keep resolving after doctor or plugin repair. Thanks @vincentkoc.
|
||||
- Control UI/WebChat: explain compaction boundaries in chat history and link directly to session checkpoint controls so pre-compaction turns no longer look silently lost after refresh. Fixes #76415. Thanks @BunsDev.
|
||||
- Agents/incomplete-turn: detect and surface a warning when the agent's final text after a tool-call chain is silently dropped because the post-tool assistant response was never produced, instead of completing the turn with only the pre-tool analysis text. Fixes #76477. Thanks @amknight.
|
||||
- Channels/WhatsApp: attach native outbound mention metadata for group text and media captions by resolving `@+<digits>` and `@<digits>` tokens against WhatsApp participant data, including LID groups. Fixes #39879; carries forward #56863. Thanks @kengi1437, @joe2643, and @fridayck.
|
||||
- Channels/WhatsApp: require outbound mention tokens to end at a word boundary so phone-number prefixes inside longer strings no longer trigger hidden native mentions.
|
||||
- Plugins/uninstall: remove empty managed git install parent directories after deleting cloned plugin repos and cover npm/git uninstall residue in Docker plugin lifecycle tests. Thanks @vincentkoc.
|
||||
|
||||
@@ -26,6 +26,7 @@ import {
|
||||
resolveEmptyResponseRetryInstruction,
|
||||
resolvePlanningOnlyRetryLimit,
|
||||
resolvePlanningOnlyRetryInstruction,
|
||||
isIncompleteTerminalAssistantTurn,
|
||||
resolveIncompleteTurnPayloadText,
|
||||
resolveReasoningOnlyRetryInstruction,
|
||||
STRICT_AGENTIC_BLOCKED_TEXT,
|
||||
@@ -995,6 +996,136 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
|
||||
).toBe("abandoned");
|
||||
});
|
||||
|
||||
it("flags tool-use stop reason as incomplete even when pre-tool text exists (#76477)", () => {
|
||||
expect(
|
||||
isIncompleteTerminalAssistantTurn({
|
||||
hasAssistantVisibleText: true,
|
||||
lastAssistant: { stopReason: "toolUse" },
|
||||
}),
|
||||
).toBe(true);
|
||||
expect(
|
||||
isIncompleteTerminalAssistantTurn({
|
||||
hasAssistantVisibleText: false,
|
||||
lastAssistant: { stopReason: "toolUse" },
|
||||
}),
|
||||
).toBe(true);
|
||||
expect(
|
||||
isIncompleteTerminalAssistantTurn({
|
||||
hasAssistantVisibleText: true,
|
||||
lastAssistant: { stopReason: "end_turn" },
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("detects tool-use terminal turn with pre-tool text as incomplete (#76477)", () => {
|
||||
// When the last assistant message ended with stopReason=toolUse, pre-tool
|
||||
// text alone must not suppress the incomplete-turn guard. The model
|
||||
// expected to continue after tool results but the post-tool response was
|
||||
// never produced.
|
||||
const incompleteTurnText = resolveIncompleteTurnPayloadText({
|
||||
payloadCount: 1,
|
||||
aborted: false,
|
||||
timedOut: false,
|
||||
attempt: makeAttemptResult({
|
||||
assistantTexts: ["Initial analysis of the codebase..."],
|
||||
toolMetas: [{ toolName: "read", meta: "path=src/index.ts" }],
|
||||
lastAssistant: {
|
||||
role: "assistant",
|
||||
stopReason: "toolUse",
|
||||
provider: "anthropic",
|
||||
model: "sonnet-4.6",
|
||||
content: [
|
||||
{ type: "text", text: "Initial analysis of the codebase..." },
|
||||
{ type: "tool_use", id: "tool_1", name: "read", input: { path: "src/index.ts" } },
|
||||
],
|
||||
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
|
||||
}),
|
||||
});
|
||||
|
||||
expect(incompleteTurnText).not.toBeNull();
|
||||
expect(incompleteTurnText).toContain("couldn't generate a response");
|
||||
});
|
||||
|
||||
it("surfaces tool-use terminal with pre-tool text and side effects as replay-unsafe (#76477)", () => {
|
||||
const incompleteTurnText = resolveIncompleteTurnPayloadText({
|
||||
payloadCount: 1,
|
||||
aborted: false,
|
||||
timedOut: false,
|
||||
attempt: makeAttemptResult({
|
||||
assistantTexts: ["Let me update the file..."],
|
||||
toolMetas: [{ toolName: "write" }],
|
||||
lastAssistant: {
|
||||
role: "assistant",
|
||||
stopReason: "toolUse",
|
||||
provider: "openai",
|
||||
model: "gpt-5.4",
|
||||
content: [
|
||||
{ type: "text", text: "Let me update the file..." },
|
||||
{ type: "tool_use", id: "tool_1", name: "write", input: {} },
|
||||
],
|
||||
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
|
||||
}),
|
||||
});
|
||||
|
||||
expect(incompleteTurnText).toContain("verify before retrying");
|
||||
});
|
||||
|
||||
it("does not flag a completed tool-use turn with end_turn as incomplete (#76477)", () => {
|
||||
// When the model successfully produces post-tool text, lastAssistant has
|
||||
// stopReason=end_turn. The incomplete-turn guard should not fire.
|
||||
const incompleteTurnText = resolveIncompleteTurnPayloadText({
|
||||
payloadCount: 2,
|
||||
aborted: false,
|
||||
timedOut: false,
|
||||
attempt: makeAttemptResult({
|
||||
assistantTexts: ["Initial analysis...", "Here is the final answer."],
|
||||
toolMetas: [{ toolName: "read" }],
|
||||
lastAssistant: {
|
||||
role: "assistant",
|
||||
stopReason: "end_turn",
|
||||
provider: "anthropic",
|
||||
model: "sonnet-4.6",
|
||||
content: [{ type: "text", text: "Here is the final answer." }],
|
||||
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
|
||||
}),
|
||||
});
|
||||
|
||||
expect(incompleteTurnText).toBeNull();
|
||||
});
|
||||
|
||||
it("surfaces an error for tool-use terminal turn with pre-tool text via runEmbeddedPiAgent (#76477)", async () => {
|
||||
mockedClassifyFailoverReason.mockReturnValue(null);
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
makeAttemptResult({
|
||||
assistantTexts: ["Initial analysis of the issue..."],
|
||||
toolMetas: [{ toolName: "read", meta: "path=src/index.ts" }],
|
||||
lastAssistant: {
|
||||
stopReason: "toolUse",
|
||||
provider: "anthropic",
|
||||
model: "sonnet-4.6",
|
||||
content: [
|
||||
{ type: "text", text: "Initial analysis of the issue..." },
|
||||
{ type: "tool_use", id: "tool_1", name: "read", input: { path: "src/index.ts" } },
|
||||
],
|
||||
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await runEmbeddedPiAgent({
|
||||
...overflowBaseRunParams,
|
||||
provider: "anthropic",
|
||||
model: "sonnet-4.6",
|
||||
runId: "run-tool-use-dropped-final-text",
|
||||
});
|
||||
|
||||
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
|
||||
expect(result.payloads?.[0]?.isError).toBe(true);
|
||||
expect(result.payloads?.[0]?.text).toContain("couldn't generate a response");
|
||||
expect(mockedLog.warn).toHaveBeenCalledWith(
|
||||
expect.stringContaining("incomplete turn detected"),
|
||||
);
|
||||
});
|
||||
|
||||
it("treats missing replay metadata as replay-invalid", () => {
|
||||
const attempt = makeAttemptResult();
|
||||
delete (attempt as Partial<EmbeddedRunAttemptResult>).replayMetadata;
|
||||
|
||||
@@ -90,7 +90,12 @@ export function isIncompleteTerminalAssistantTurn(params: {
|
||||
hasAssistantVisibleText: boolean;
|
||||
lastAssistant?: { stopReason?: string } | null;
|
||||
}): boolean {
|
||||
return !params.hasAssistantVisibleText && params.lastAssistant?.stopReason === "toolUse";
|
||||
// A tool-use stop reason means the model issued a tool call and expected
|
||||
// to continue after tool results. If the session ended before the
|
||||
// post-tool assistant message arrived, the turn is incomplete regardless
|
||||
// of whether pre-tool text exists — that text is preliminary analysis,
|
||||
// not the final answer. (#76477)
|
||||
return params.lastAssistant?.stopReason === "toolUse";
|
||||
}
|
||||
|
||||
const PLANNING_ONLY_PROMISE_RE =
|
||||
@@ -220,8 +225,15 @@ export function resolveIncompleteTurnPayloadText(params: {
|
||||
timedOut: boolean;
|
||||
attempt: IncompleteTurnAttempt;
|
||||
}): string | null {
|
||||
// Tool-use terminal guard: when the last assistant message ended with a
|
||||
// tool-call stop reason, the model expected to continue after tool results.
|
||||
// Pre-tool text alone (payloadCount > 0) must not suppress the incomplete-
|
||||
// turn check in that case — the final post-tool response was never
|
||||
// produced. (#76477)
|
||||
const toolUseTerminal = params.attempt.lastAssistant?.stopReason === "toolUse";
|
||||
|
||||
if (
|
||||
params.payloadCount !== 0 ||
|
||||
(params.payloadCount !== 0 && !toolUseTerminal) ||
|
||||
params.aborted ||
|
||||
params.timedOut ||
|
||||
params.attempt.clientToolCalls ||
|
||||
|
||||
@@ -289,6 +289,34 @@ describe("handleAgentEnd", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("marks tool-use terminal with pre-tool text as abandoned (#76477)", async () => {
|
||||
const onAgentEvent = vi.fn();
|
||||
const ctx = createContext(
|
||||
{
|
||||
role: "assistant",
|
||||
stopReason: "toolUse",
|
||||
content: [
|
||||
{ type: "text", text: "Initial analysis..." },
|
||||
{ type: "tool_use", id: "tool_1", name: "read", input: { path: "src/index.ts" } },
|
||||
],
|
||||
},
|
||||
{ onAgentEvent },
|
||||
);
|
||||
ctx.state.livenessState = "working";
|
||||
ctx.state.assistantTexts = ["Initial analysis..."];
|
||||
|
||||
await handleAgentEnd(ctx);
|
||||
|
||||
expect(onAgentEvent).toHaveBeenCalledWith({
|
||||
stream: "lifecycle",
|
||||
data: {
|
||||
phase: "end",
|
||||
livenessState: "abandoned",
|
||||
replayInvalid: true,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it("keeps accumulated deterministic side effects from being marked abandoned", async () => {
|
||||
const onAgentEvent = vi.fn();
|
||||
const ctx = createContext(undefined, { onAgentEvent });
|
||||
|
||||
@@ -54,9 +54,15 @@ export function handleAgentEnd(ctx: EmbeddedPiSubscribeContext): void | Promise<
|
||||
});
|
||||
const replayInvalid =
|
||||
ctx.state.replayState.replayInvalid || incompleteTerminalAssistant ? true : undefined;
|
||||
// Tool-use terminal guard: when the last assistant message ended with a
|
||||
// tool-call stop reason, the turn is incomplete even when pre-tool text
|
||||
// exists — mark as abandoned so lifecycle consumers do not see a working
|
||||
// end state for an interrupted tool chain. (#76477)
|
||||
const derivedWorkingTerminalState = isError
|
||||
? "blocked"
|
||||
: replayInvalid && !hasAssistantVisibleText && !hadDeterministicSideEffect
|
||||
: replayInvalid &&
|
||||
!hadDeterministicSideEffect &&
|
||||
(!hasAssistantVisibleText || incompleteTerminalAssistant)
|
||||
? "abandoned"
|
||||
: ctx.state.livenessState;
|
||||
const livenessState =
|
||||
|
||||
Reference in New Issue
Block a user