diff --git a/src/agents/pi-embedded-runner/run.empty-error-retry.test.ts b/src/agents/pi-embedded-runner/run.empty-error-retry.test.ts index b1cab8458e9..64314132d72 100644 --- a/src/agents/pi-embedded-runner/run.empty-error-retry.test.ts +++ b/src/agents/pi-embedded-runner/run.empty-error-retry.test.ts @@ -112,14 +112,15 @@ describe("runEmbeddedPiAgent silent-error retry", () => { it("does not retry when stopReason=stop and output=0 (out of scope)", async () => { // Clean stop with no output is a legitimate silent reply (e.g. NO_REPLY - // token path), not a crash. This retry must not trigger there. + // token path), not a crash. Use a plain provider/model so this test stays + // scoped to the silent-error retry instead of the empty-response retry. mockedRunEmbeddedAttempt.mockResolvedValueOnce( makeAttemptResult({ assistantTexts: [], lastAssistant: { stopReason: "stop", - provider: "ollama", - model: "glm-5.1:cloud", + provider: "plain-provider", + model: "plain-model", content: [], usage: { input: 100, output: 0, totalTokens: 100 }, } as unknown as EmbeddedRunAttemptResult["lastAssistant"], @@ -128,8 +129,8 @@ describe("runEmbeddedPiAgent silent-error retry", () => { await runEmbeddedPiAgent({ ...overflowBaseRunParams, - provider: "ollama", - model: "glm-5.1:cloud", + provider: "plain-provider", + model: "plain-model", runId: "run-empty-error-retry-skip-clean-stop", }); diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 43070b8ab14..6094a5ba0aa 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -113,6 +113,7 @@ import { DEFAULT_EMPTY_RESPONSE_RETRY_LIMIT, DEFAULT_REASONING_ONLY_RETRY_LIMIT, resolveAckExecutionFastPathInstruction, + resolveAttemptReplayMetadata, extractPlanningOnlyPlanDetails, resolveEmptyResponseRetryInstruction, resolveIncompleteTurnPayloadText, @@ -151,6 +152,36 @@ import { createUsageAccumulator, mergeUsageIntoAccumulator } from "./usage-accum type ApiKeyInfo = ResolvedProviderAuth; const MAX_SAME_MODEL_IDLE_TIMEOUT_RETRIES = 1; +type EmbeddedRunAttemptForRunner = Awaited>; + +function normalizeEmbeddedRunAttemptResult( + attempt: EmbeddedRunAttemptForRunner, +): EmbeddedRunAttemptForRunner { + const raw = attempt as EmbeddedRunAttemptForRunner & { + assistantTexts?: EmbeddedRunAttemptForRunner["assistantTexts"] | null; + toolMetas?: EmbeddedRunAttemptForRunner["toolMetas"] | null; + messagesSnapshot?: EmbeddedRunAttemptForRunner["messagesSnapshot"] | null; + messagingToolSentTexts?: EmbeddedRunAttemptForRunner["messagingToolSentTexts"] | null; + messagingToolSentMediaUrls?: EmbeddedRunAttemptForRunner["messagingToolSentMediaUrls"] | null; + messagingToolSentTargets?: EmbeddedRunAttemptForRunner["messagingToolSentTargets"] | null; + itemLifecycle?: EmbeddedRunAttemptForRunner["itemLifecycle"] | null; + }; + return { + ...attempt, + assistantTexts: raw.assistantTexts ?? [], + toolMetas: raw.toolMetas ?? [], + messagesSnapshot: raw.messagesSnapshot ?? [], + messagingToolSentTexts: raw.messagingToolSentTexts ?? [], + messagingToolSentMediaUrls: raw.messagingToolSentMediaUrls ?? [], + messagingToolSentTargets: raw.messagingToolSentTargets ?? [], + itemLifecycle: raw.itemLifecycle ?? { + startedCount: 0, + completedCount: 0, + activeCount: 0, + }, + replayMetadata: resolveAttemptReplayMetadata(raw), + }; +} function createEmptyAuthProfileStore(): AuthProfileStore { return { @@ -855,7 +886,7 @@ export async function runEmbeddedPiAgent( }, }); - const attempt = await runEmbeddedAttemptWithBackend({ + const rawAttempt = await runEmbeddedAttemptWithBackend({ sessionId: activeSessionId, sessionKey: resolvedSessionKey, sandboxSessionKey: params.sandboxSessionKey, @@ -960,6 +991,7 @@ export async function runEmbeddedPiAgent( bootstrapPromptWarningSignature: bootstrapPromptWarningSignaturesSeen[bootstrapPromptWarningSignaturesSeen.length - 1], }); + const attempt = normalizeEmbeddedRunAttemptResult(rawAttempt); const { aborted, diff --git a/src/agents/pi-embedded-runner/run/incomplete-turn.ts b/src/agents/pi-embedded-runner/run/incomplete-turn.ts index e43ba2f5cee..81ea308b3a0 100644 --- a/src/agents/pi-embedded-runner/run/incomplete-turn.ts +++ b/src/agents/pi-embedded-runner/run/incomplete-turn.ts @@ -72,6 +72,11 @@ type RunLivenessAttempt = Pick< "lastAssistant" | "promptErrorSource" | "replayMetadata" | "timedOutDuringCompaction" >; +const REPLAY_UNSAFE_FALLBACK_METADATA: EmbeddedRunAttemptResult["replayMetadata"] = { + hadPotentialSideEffects: true, + replaySafe: false, +}; + export function isIncompleteTerminalAssistantTurn(params: { hasAssistantVisibleText: boolean; lastAssistant?: { stopReason?: string } | null; @@ -211,6 +216,12 @@ export function buildAttemptReplayMetadata( }; } +export function resolveAttemptReplayMetadata(attempt: { + replayMetadata?: EmbeddedRunAttemptResult["replayMetadata"] | null; +}): EmbeddedRunAttemptResult["replayMetadata"] { + return attempt.replayMetadata ?? REPLAY_UNSAFE_FALLBACK_METADATA; +} + export function resolveIncompleteTurnPayloadText(params: { payloadCount: number; aborted: boolean; @@ -258,7 +269,7 @@ export function resolveIncompleteTurnPayloadText(params: { return null; } - return params.attempt.replayMetadata.hadPotentialSideEffects + return resolveAttemptReplayMetadata(params.attempt).hadPotentialSideEffects ? "⚠️ Agent couldn't generate a response. Note: some tool actions may have already been executed — please verify before retrying." : "⚠️ Agent couldn't generate a response. Please try again."; } @@ -351,7 +362,7 @@ export function resolveReplayInvalidFlag(params: { incompleteTurnText?: string | null; }): boolean { return ( - !params.attempt.replayMetadata.replaySafe || + !resolveAttemptReplayMetadata(params.attempt).replaySafe || params.attempt.promptErrorSource === "compaction" || params.attempt.timedOutDuringCompaction || Boolean(params.incompleteTurnText) @@ -465,7 +476,7 @@ function shouldSkipPlanningOnlyRetry(params: { params.attempt.yieldDetected || params.attempt.didSendDeterministicApprovalPrompt || params.attempt.lastToolError || - params.attempt.replayMetadata.hadPotentialSideEffects, + resolveAttemptReplayMetadata(params.attempt).hadPotentialSideEffects, ); } @@ -796,7 +807,7 @@ export function resolvePlanningOnlyRetryInstruction(params: { (hasNonPlanToolActivity(params.attempt.toolMetas) && !allowSingleActionRetryBypass) || (params.attempt.itemLifecycle.startedCount > planOnlyToolMetaCount && !allowSingleActionRetryBypass) || - params.attempt.replayMetadata.hadPotentialSideEffects + resolveAttemptReplayMetadata(params.attempt).hadPotentialSideEffects ) { return null; }