mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 07:10:43 +00:00
fix(pi-embedded-runner): retry silent stopReason=error turns (non-frontier models)
ollama/glm-5.1:cloud (and occasionally other models) can end a turn with stopReason="error", usage.output=0, and empty content[] after a successful tool-call sequence. The existing empty-response retry path in src/agents/pi-embedded-runner/run/incomplete-turn.ts is gated on isStrictAgenticSupportedProviderModel (gpt-5 family only), so non-frontier models fall through to "incomplete turn detected" with payloads=0 and no recovery. The user sees no reply and has to nudge. Add a narrow, model-agnostic resubmission inside the attempt loop, placed before the incompleteTurnText surface-to-user return: - stopReason === "error" - usage.output === 0 - content.length === 0 (excludes reasoning-only error turns) - bounded by MAX_EMPTY_ERROR_RETRIES = 3 No instruction injection, no model gating; same prompt, same session transcript (tool results already captured), just let the loop try again. New test file run.empty-error-retry.test.ts covers: 1. Retries for ollama/glm-5.1:cloud → succeeds on 2nd attempt. 2. Caps at 3 retries → 4 total attempts → surfaces incomplete-turn error. 3. Does NOT retry when output > 0 (preserve produced text). 4. Does NOT retry when stopReason=stop + output=0 (NO_REPLY path). 5. Retries for anthropic/claude-opus-4-7 too — model-agnostic. Relates to #68281.
This commit is contained in:
committed by
Peter Steinberger
parent
982b1c9464
commit
5fb302ebf1
156
src/agents/pi-embedded-runner/run.empty-error-retry.test.ts
Normal file
156
src/agents/pi-embedded-runner/run.empty-error-retry.test.ts
Normal file
@@ -0,0 +1,156 @@
|
||||
import { beforeAll, beforeEach, describe, expect, it } from "vitest";
|
||||
import { makeAttemptResult } from "./run.overflow-compaction.fixture.js";
|
||||
import {
|
||||
loadRunOverflowCompactionHarness,
|
||||
mockedClassifyFailoverReason,
|
||||
mockedGlobalHookRunner,
|
||||
mockedRunEmbeddedAttempt,
|
||||
overflowBaseRunParams,
|
||||
resetRunOverflowCompactionHarnessMocks,
|
||||
} from "./run.overflow-compaction.harness.js";
|
||||
import type { EmbeddedRunAttemptResult } from "./run/types.js";
|
||||
|
||||
// Regression coverage for the silent-error retry in runEmbeddedPiAgent.
|
||||
//
|
||||
// Symptom: ollama/glm-5.1 occasionally ends a turn with stopReason="error" and
|
||||
// zero output tokens after a successful tool-call sequence. The user sees no
|
||||
// reply and has to nudge. The existing empty-response retry path is gated on
|
||||
// the strict-agentic contract (gpt-5 only), so non-frontier models fell
|
||||
// through to "incomplete turn detected". This suite locks in a narrower,
|
||||
// model-agnostic resubmission.
|
||||
|
||||
let runEmbeddedPiAgent: typeof import("./run.js").runEmbeddedPiAgent;
|
||||
|
||||
function emptyErrorAttempt(
|
||||
provider: string,
|
||||
model: string,
|
||||
outputTokens = 0,
|
||||
): EmbeddedRunAttemptResult {
|
||||
return makeAttemptResult({
|
||||
assistantTexts: [],
|
||||
lastAssistant: {
|
||||
stopReason: "error",
|
||||
provider,
|
||||
model,
|
||||
content: [],
|
||||
usage: { input: 100, output: outputTokens, totalTokens: 100 + outputTokens },
|
||||
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
|
||||
});
|
||||
}
|
||||
|
||||
function successAttempt(provider: string, model: string): EmbeddedRunAttemptResult {
|
||||
return makeAttemptResult({
|
||||
assistantTexts: ["Done."],
|
||||
lastAssistant: {
|
||||
stopReason: "stop",
|
||||
provider,
|
||||
model,
|
||||
content: [{ type: "text", text: "Done." }],
|
||||
usage: { input: 100, output: 5, totalTokens: 105 },
|
||||
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
|
||||
});
|
||||
}
|
||||
|
||||
describe("runEmbeddedPiAgent silent-error retry", () => {
|
||||
beforeAll(async () => {
|
||||
({ runEmbeddedPiAgent } = await loadRunOverflowCompactionHarness());
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
resetRunOverflowCompactionHarnessMocks();
|
||||
mockedGlobalHookRunner.hasHooks.mockImplementation(() => false);
|
||||
mockedClassifyFailoverReason.mockReturnValue(null);
|
||||
});
|
||||
|
||||
it("retries when a turn ends with stopReason=error and zero output tokens", async () => {
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(emptyErrorAttempt("ollama", "glm-5.1:cloud"));
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(successAttempt("ollama", "glm-5.1:cloud"));
|
||||
|
||||
const result = await runEmbeddedPiAgent({
|
||||
...overflowBaseRunParams,
|
||||
provider: "ollama",
|
||||
model: "glm-5.1:cloud",
|
||||
runId: "run-empty-error-retry-basic",
|
||||
});
|
||||
|
||||
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
|
||||
expect(result.payloads?.[0]?.isError).toBeFalsy();
|
||||
});
|
||||
|
||||
it("caps retries at MAX_EMPTY_ERROR_RETRIES and surfaces incomplete-turn error", async () => {
|
||||
// 1 initial + 3 retries = 4 attempts, all returning empty-error.
|
||||
for (let i = 0; i < 4; i += 1) {
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(emptyErrorAttempt("ollama", "glm-5.1:cloud"));
|
||||
}
|
||||
|
||||
const result = await runEmbeddedPiAgent({
|
||||
...overflowBaseRunParams,
|
||||
provider: "ollama",
|
||||
model: "glm-5.1:cloud",
|
||||
runId: "run-empty-error-retry-exhausted",
|
||||
});
|
||||
|
||||
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(4);
|
||||
expect(result.payloads?.[0]?.isError).toBe(true);
|
||||
});
|
||||
|
||||
it("does not retry when stopReason=error but output tokens > 0", async () => {
|
||||
// Model produced something before erroring; surfacing that text is better
|
||||
// than silent resubmission.
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
emptyErrorAttempt("ollama", "glm-5.1:cloud", 12),
|
||||
);
|
||||
|
||||
await runEmbeddedPiAgent({
|
||||
...overflowBaseRunParams,
|
||||
provider: "ollama",
|
||||
model: "glm-5.1:cloud",
|
||||
runId: "run-empty-error-retry-skip-with-output",
|
||||
});
|
||||
|
||||
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("does not retry when stopReason=stop and output=0 (out of scope)", async () => {
|
||||
// Clean stop with no output is a legitimate silent reply (e.g. NO_REPLY
|
||||
// token path), not a crash. This retry must not trigger there.
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
makeAttemptResult({
|
||||
assistantTexts: [],
|
||||
lastAssistant: {
|
||||
stopReason: "stop",
|
||||
provider: "ollama",
|
||||
model: "glm-5.1:cloud",
|
||||
content: [],
|
||||
usage: { input: 100, output: 0, totalTokens: 100 },
|
||||
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
|
||||
}),
|
||||
);
|
||||
|
||||
await runEmbeddedPiAgent({
|
||||
...overflowBaseRunParams,
|
||||
provider: "ollama",
|
||||
model: "glm-5.1:cloud",
|
||||
runId: "run-empty-error-retry-skip-clean-stop",
|
||||
});
|
||||
|
||||
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("retries for frontier models too — the fix is model-agnostic", async () => {
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
emptyErrorAttempt("anthropic", "claude-opus-4-7"),
|
||||
);
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(successAttempt("anthropic", "claude-opus-4-7"));
|
||||
|
||||
const result = await runEmbeddedPiAgent({
|
||||
...overflowBaseRunParams,
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
runId: "run-empty-error-retry-frontier",
|
||||
});
|
||||
|
||||
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
|
||||
expect(result.payloads?.[0]?.isError).toBeFalsy();
|
||||
});
|
||||
});
|
||||
@@ -489,6 +489,13 @@ export async function runEmbeddedPiAgent(
|
||||
});
|
||||
let rateLimitProfileRotations = 0;
|
||||
let timeoutCompactionAttempts = 0;
|
||||
// Silent-error retry: non-strict-agentic models (e.g. ollama/glm-5.1) can
|
||||
// end a turn with stopReason="error" + zero output tokens, producing no
|
||||
// user-visible text. The existing empty-response retry is gated on
|
||||
// isStrictAgenticSupportedProviderModel (gpt-5 only). This is an
|
||||
// orthogonal, model-agnostic resubmission.
|
||||
const MAX_EMPTY_ERROR_RETRIES = 3;
|
||||
let emptyErrorRetries = 0;
|
||||
const overloadFailoverBackoffMs = resolveOverloadFailoverBackoffMs(params.config);
|
||||
const overloadProfileRotationLimit = resolveOverloadProfileRotationLimit(params.config);
|
||||
const rateLimitProfileRotationLimit = resolveRateLimitProfileRotationLimit(params.config);
|
||||
@@ -1911,6 +1918,42 @@ export async function runEmbeddedPiAgent(
|
||||
`provider=${activeErrorContext.provider}/${activeErrorContext.model} attempts=${emptyResponseRetryAttempts}/${maxEmptyResponseRetryAttempts} — surfacing incomplete-turn error`,
|
||||
);
|
||||
}
|
||||
// ── silent-error retry ────────────────────────────────────────────
|
||||
// Observed with ollama/glm-5.1: a turn can end with stopReason="error"
|
||||
// and zero output tokens AND empty content after a successful
|
||||
// tool-call sequence, producing no user-visible text at all. The
|
||||
// existing empty-response retry path (resolveEmptyResponseRetryInstruction)
|
||||
// is gated on the strict-agentic contract (gpt-5 only), so non-frontier
|
||||
// models fall through to "incomplete turn detected" → silent gap
|
||||
// until the user nudges. This is a narrower, model-agnostic
|
||||
// resubmission: same prompt, same session transcript (tool results
|
||||
// already captured), no instruction injection. Placed before the
|
||||
// incompleteTurnText return so it actually gets a chance to fire.
|
||||
//
|
||||
// Content-empty guard: a reasoning-only error (content has thinking
|
||||
// blocks) is a distinct failure mode handled elsewhere; only retry
|
||||
// when the assistant truly produced nothing.
|
||||
const silentErrorContent = sessionLastAssistant?.content as Array<unknown> | undefined;
|
||||
if (
|
||||
incompleteTurnText &&
|
||||
!aborted &&
|
||||
!promptError &&
|
||||
!timedOut &&
|
||||
sessionLastAssistant?.stopReason === "error" &&
|
||||
((sessionLastAssistant?.usage as { output?: number } | undefined)?.output ?? 0) === 0 &&
|
||||
(silentErrorContent?.length ?? 0) === 0 &&
|
||||
emptyErrorRetries < MAX_EMPTY_ERROR_RETRIES
|
||||
) {
|
||||
emptyErrorRetries += 1;
|
||||
log.warn(
|
||||
`[empty-error-retry] stopReason=error output=0; resubmitting ` +
|
||||
`attempt=${emptyErrorRetries}/${MAX_EMPTY_ERROR_RETRIES} ` +
|
||||
`provider=${sessionLastAssistant?.provider ?? provider} ` +
|
||||
`model=${sessionLastAssistant?.model ?? model.id} ` +
|
||||
`sessionKey=${params.sessionKey ?? params.sessionId}`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
if (incompleteTurnText) {
|
||||
const replayInvalid = resolveReplayInvalidForAttempt(incompleteTurnText);
|
||||
const livenessState = resolveRunLivenessState({
|
||||
|
||||
Reference in New Issue
Block a user