Files
openclaw/src/agents/pi-embedded-runner/run.ts
2026-04-13 11:49:00 -07:00

1815 lines
77 KiB
TypeScript

import { randomBytes } from "node:crypto";
import fs from "node:fs/promises";
import type { ThinkLevel } from "../../auto-reply/thinking.js";
import { ensureContextEnginesInitialized } from "../../context-engine/init.js";
import { resolveContextEngine } from "../../context-engine/registry.js";
import { emitAgentPlanEvent } from "../../infra/agent-events.js";
import { sleepWithAbort } from "../../infra/backoff.js";
import { formatErrorMessage } from "../../infra/errors.js";
import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js";
import { enqueueCommandInLane } from "../../process/command-queue.js";
import { normalizeOptionalString } from "../../shared/string-coerce.js";
import { sanitizeForLog } from "../../terminal/ansi.js";
import { isMarkdownCapableMessageChannel } from "../../utils/message-channel.js";
import { resolveOpenClawAgentDir } from "../agent-paths.js";
import {
hasConfiguredModelFallbacks,
resolveAgentExecutionContract,
resolveSessionAgentIds,
} from "../agent-scope.js";
import {
type AuthProfileFailureReason,
markAuthProfileFailure,
resolveAuthProfileEligibility,
markAuthProfileGood,
markAuthProfileUsed,
} from "../auth-profiles.js";
import {
resolveSessionKeyForRequest,
resolveStoredSessionKeyForSessionId,
} from "../command/session.js";
import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js";
import { isStrictAgenticExecutionContractActive } from "../execution-contract.js";
import {
coerceToFailoverError,
describeFailoverError,
FailoverError,
resolveFailoverStatus,
} from "../failover-error.js";
import { LiveSessionModelSwitchError } from "../live-model-switch-error.js";
import { shouldSwitchToLiveModel, clearLiveModelSwitchPending } from "../live-model-switch.js";
import {
applyAuthHeaderOverride,
applyLocalNoAuthHeaderOverride,
ensureAuthProfileStore,
type ResolvedProviderAuth,
resolveAuthProfileOrder,
shouldPreferExplicitConfigApiKeyAuth,
} from "../model-auth.js";
import { normalizeProviderId } from "../model-selection.js";
import { ensureOpenClawModelsJson } from "../models-config.js";
import { disposeSessionMcpRuntime } from "../pi-bundle-mcp-tools.js";
import {
classifyFailoverReason,
extractObservedOverflowTokenCount,
type FailoverReason,
formatAssistantErrorText,
isAuthAssistantError,
isBillingAssistantError,
isCompactionFailureError,
isFailoverAssistantError,
isFailoverErrorMessage,
isLikelyContextOverflowError,
isRateLimitAssistantError,
parseImageDimensionError,
parseImageSizeError,
pickFallbackThinkingLevel,
} from "../pi-embedded-helpers.js";
import { ensureRuntimePluginsLoaded } from "../runtime-plugins.js";
import { derivePromptTokens, normalizeUsage, type UsageLike } from "../usage.js";
import { redactRunIdentifier, resolveRunWorkspaceDir } from "../workspace-run.js";
import { runPostCompactionSideEffects } from "./compaction-hooks.js";
import { buildEmbeddedCompactionRuntimeContext } from "./compaction-runtime-context.js";
import { runContextEngineMaintenance } from "./context-engine-maintenance.js";
import { resolveGlobalLane, resolveSessionLane } from "./lanes.js";
import { log } from "./logger.js";
import { resolveModelAsync } from "./model.js";
import { createEmbeddedRunReplayState, observeReplayMetadata } from "./replay-state.js";
import { handleAssistantFailover } from "./run/assistant-failover.js";
import { createEmbeddedRunAuthController } from "./run/auth-controller.js";
import { runEmbeddedAttemptWithBackend } from "./run/backend.js";
import { createFailoverDecisionLogger } from "./run/failover-observation.js";
import { mergeRetryFailoverReason, resolveRunFailoverDecision } from "./run/failover-policy.js";
import {
buildErrorAgentMeta,
buildUsageAgentMetaFields,
createCompactionDiagId,
resolveActiveErrorContext,
resolveFinalAssistantRawText,
resolveFinalAssistantVisibleText,
resolveMaxRunRetryIterations,
resolveOverloadFailoverBackoffMs,
resolveOverloadProfileRotationLimit,
resolveRateLimitProfileRotationLimit,
type RuntimeAuthState,
scrubAnthropicRefusalMagic,
} from "./run/helpers.js";
import {
resolveAckExecutionFastPathInstruction,
resolveIncompleteTurnPayloadText,
extractPlanningOnlyPlanDetails,
resolvePlanningOnlyRetryLimit,
resolvePlanningOnlyRetryInstruction,
STRICT_AGENTIC_BLOCKED_TEXT,
resolveReplayInvalidFlag,
resolveRunLivenessState,
} from "./run/incomplete-turn.js";
import type { RunEmbeddedPiAgentParams } from "./run/params.js";
import { buildEmbeddedRunPayloads } from "./run/payloads.js";
import { handleRetryLimitExhaustion } from "./run/retry-limit.js";
import { resolveEffectiveRuntimeModel, resolveHookModelSelection } from "./run/setup.js";
import { mergeAttemptToolMediaPayloads } from "./run/tool-media-payloads.js";
import {
sessionLikelyHasOversizedToolResults,
truncateOversizedToolResultsInSession,
} from "./tool-result-truncation.js";
import type {
EmbeddedPiAgentMeta,
EmbeddedPiRunResult,
EmbeddedRunLivenessState,
} from "./types.js";
import { createUsageAccumulator, mergeUsageIntoAccumulator } from "./usage-accumulator.js";
type ApiKeyInfo = ResolvedProviderAuth;
const MAX_SAME_MODEL_IDLE_TIMEOUT_RETRIES = 1;
/**
* Best-effort backfill of sessionKey from sessionId when not explicitly provided.
* The return value is normalized: whitespace-only inputs collapse to undefined, and
* successful resolution returns a trimmed session key. This is a read-only lookup
* with no side effects.
* See: https://github.com/openclaw/openclaw/issues/60552
*/
function backfillSessionKey(params: {
config: RunEmbeddedPiAgentParams["config"];
sessionId: string;
sessionKey?: string;
agentId?: string;
}): string | undefined {
const trimmed = normalizeOptionalString(params.sessionKey);
if (trimmed) {
return trimmed;
}
if (!params.config || !params.sessionId) {
return undefined;
}
try {
const resolved = normalizeOptionalString(params.agentId)
? resolveStoredSessionKeyForSessionId({
cfg: params.config,
sessionId: params.sessionId,
agentId: params.agentId,
})
: resolveSessionKeyForRequest({
cfg: params.config,
sessionId: params.sessionId,
});
return normalizeOptionalString(resolved.sessionKey);
} catch (err) {
log.warn(
`[backfillSessionKey] Failed to resolve sessionKey for sessionId=${redactRunIdentifier(sanitizeForLog(params.sessionId))}: ${formatErrorMessage(err)}`,
);
return undefined;
}
}
export async function runEmbeddedPiAgent(
params: RunEmbeddedPiAgentParams,
): Promise<EmbeddedPiRunResult> {
// Resolve sessionKey early so all downstream consumers (hooks, LCM, compaction)
// receive a non-null key even when callers omit it. See #60552.
const effectiveSessionKey = backfillSessionKey({
config: params.config,
sessionId: params.sessionId,
sessionKey: params.sessionKey,
agentId: params.agentId,
});
if (effectiveSessionKey !== params.sessionKey) {
params = { ...params, sessionKey: effectiveSessionKey };
}
const sessionLane = resolveSessionLane(params.sessionKey?.trim() || params.sessionId);
const globalLane = resolveGlobalLane(params.lane);
const enqueueGlobal =
params.enqueue ?? ((task, opts) => enqueueCommandInLane(globalLane, task, opts));
const enqueueSession =
params.enqueue ?? ((task, opts) => enqueueCommandInLane(sessionLane, task, opts));
const channelHint = params.messageChannel ?? params.messageProvider;
const resolvedToolResultFormat =
params.toolResultFormat ??
(channelHint
? isMarkdownCapableMessageChannel(channelHint)
? "markdown"
: "plain"
: "markdown");
const isProbeSession = params.sessionId?.startsWith("probe-") ?? false;
const throwIfAborted = () => {
if (!params.abortSignal?.aborted) {
return;
}
const reason = params.abortSignal.reason;
if (reason instanceof Error) {
throw reason;
}
const abortErr =
reason !== undefined
? new Error("Operation aborted", { cause: reason })
: new Error("Operation aborted");
abortErr.name = "AbortError";
throw abortErr;
};
throwIfAborted();
return enqueueSession(() => {
throwIfAborted();
return enqueueGlobal(async () => {
throwIfAborted();
const started = Date.now();
const workspaceResolution = resolveRunWorkspaceDir({
workspaceDir: params.workspaceDir,
sessionKey: params.sessionKey,
agentId: params.agentId,
config: params.config,
});
const resolvedWorkspace = workspaceResolution.workspaceDir;
const redactedSessionId = redactRunIdentifier(params.sessionId);
const redactedSessionKey = redactRunIdentifier(params.sessionKey);
const redactedWorkspace = redactRunIdentifier(resolvedWorkspace);
if (workspaceResolution.usedFallback) {
log.warn(
`[workspace-fallback] caller=runEmbeddedPiAgent reason=${workspaceResolution.fallbackReason} run=${params.runId} session=${redactedSessionId} sessionKey=${redactedSessionKey} agent=${workspaceResolution.agentId} workspace=${redactedWorkspace}`,
);
}
ensureRuntimePluginsLoaded({
config: params.config,
workspaceDir: resolvedWorkspace,
allowGatewaySubagentBinding: params.allowGatewaySubagentBinding,
});
let provider = (params.provider ?? DEFAULT_PROVIDER).trim() || DEFAULT_PROVIDER;
let modelId = (params.model ?? DEFAULT_MODEL).trim() || DEFAULT_MODEL;
const agentDir = params.agentDir ?? resolveOpenClawAgentDir();
const normalizedSessionKey = params.sessionKey?.trim();
const fallbackConfigured = hasConfiguredModelFallbacks({
cfg: params.config,
agentId: params.agentId,
sessionKey: normalizedSessionKey,
});
await ensureOpenClawModelsJson(params.config, agentDir);
const resolvedSessionKey = normalizedSessionKey;
const hookRunner = getGlobalHookRunner();
const hookCtx = {
runId: params.runId,
agentId: workspaceResolution.agentId,
sessionKey: resolvedSessionKey,
sessionId: params.sessionId,
workspaceDir: resolvedWorkspace,
modelProviderId: provider,
modelId,
messageProvider: params.messageProvider ?? undefined,
trigger: params.trigger,
channelId: params.messageChannel ?? params.messageProvider ?? undefined,
};
const hookSelection = await resolveHookModelSelection({
prompt: params.prompt,
provider,
modelId,
hookRunner,
hookContext: hookCtx,
});
provider = hookSelection.provider;
modelId = hookSelection.modelId;
const legacyBeforeAgentStartResult = hookSelection.legacyBeforeAgentStartResult;
const { model, error, authStorage, modelRegistry } = await resolveModelAsync(
provider,
modelId,
agentDir,
params.config,
);
if (!model) {
throw new FailoverError(error ?? `Unknown model: ${provider}/${modelId}`, {
reason: "model_not_found",
provider,
model: modelId,
});
}
let runtimeModel = model;
const resolvedRuntimeModel = resolveEffectiveRuntimeModel({
cfg: params.config,
provider,
modelId,
runtimeModel,
});
const ctxInfo = resolvedRuntimeModel.ctxInfo;
let effectiveModel = resolvedRuntimeModel.effectiveModel;
const authStore = ensureAuthProfileStore(agentDir, {
allowKeychainPrompt: false,
});
const preferredProfileId = params.authProfileId?.trim();
let lockedProfileId = params.authProfileIdSource === "user" ? preferredProfileId : undefined;
if (lockedProfileId) {
const lockedProfile = authStore.profiles[lockedProfileId];
if (
!lockedProfile ||
normalizeProviderId(lockedProfile.provider) !== normalizeProviderId(provider)
) {
lockedProfileId = undefined;
}
}
if (lockedProfileId) {
const eligibility = resolveAuthProfileEligibility({
cfg: params.config,
store: authStore,
provider,
profileId: lockedProfileId,
});
if (!eligibility.eligible) {
throw new Error(`Auth profile "${lockedProfileId}" is not configured for ${provider}.`);
}
}
const profileOrder = shouldPreferExplicitConfigApiKeyAuth(params.config, provider)
? []
: resolveAuthProfileOrder({
cfg: params.config,
store: authStore,
provider,
preferredProfile: preferredProfileId,
});
const profileCandidates = lockedProfileId
? [lockedProfileId]
: profileOrder.length > 0
? profileOrder
: [undefined];
let profileIndex = 0;
const initialThinkLevel = params.thinkLevel ?? "off";
let thinkLevel = initialThinkLevel;
const attemptedThinking = new Set<ThinkLevel>();
let apiKeyInfo: ApiKeyInfo | null = null;
let lastProfileId: string | undefined;
let runtimeAuthState: RuntimeAuthState | null = null;
let runtimeAuthRefreshCancelled = false;
const {
advanceAuthProfile,
initializeAuthProfile,
maybeRefreshRuntimeAuthForAuthError,
stopRuntimeAuthRefreshTimer,
} = createEmbeddedRunAuthController({
config: params.config,
agentDir,
workspaceDir: resolvedWorkspace,
authStore,
authStorage,
profileCandidates,
lockedProfileId,
initialThinkLevel,
attemptedThinking,
fallbackConfigured,
allowTransientCooldownProbe: params.allowTransientCooldownProbe === true,
getProvider: () => provider,
getModelId: () => modelId,
getRuntimeModel: () => runtimeModel,
setRuntimeModel: (next) => {
runtimeModel = next;
},
getEffectiveModel: () => effectiveModel,
setEffectiveModel: (next) => {
effectiveModel = next;
},
getApiKeyInfo: () => apiKeyInfo,
setApiKeyInfo: (next) => {
apiKeyInfo = next;
},
getLastProfileId: () => lastProfileId,
setLastProfileId: (next) => {
lastProfileId = next;
},
getRuntimeAuthState: () => runtimeAuthState,
setRuntimeAuthState: (next) => {
runtimeAuthState = next;
},
getRuntimeAuthRefreshCancelled: () => runtimeAuthRefreshCancelled,
setRuntimeAuthRefreshCancelled: (next) => {
runtimeAuthRefreshCancelled = next;
},
getProfileIndex: () => profileIndex,
setProfileIndex: (next) => {
profileIndex = next;
},
setThinkLevel: (next) => {
thinkLevel = next;
},
log,
});
await initializeAuthProfile();
const { sessionAgentId } = resolveSessionAgentIds({
sessionKey: params.sessionKey,
config: params.config,
agentId: params.agentId,
});
const configuredExecutionContract =
resolveAgentExecutionContract(params.config, sessionAgentId) ?? "default";
const strictAgenticActive = isStrictAgenticExecutionContractActive({
config: params.config,
sessionKey: params.sessionKey,
agentId: params.agentId,
provider,
modelId,
});
const executionContract = strictAgenticActive ? "strict-agentic" : "default";
const maxPlanningOnlyRetryAttempts = resolvePlanningOnlyRetryLimit(executionContract);
const MAX_TIMEOUT_COMPACTION_ATTEMPTS = 2;
const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3;
const MAX_RUN_LOOP_ITERATIONS = resolveMaxRunRetryIterations(profileCandidates.length);
let overflowCompactionAttempts = 0;
let toolResultTruncationAttempted = false;
let bootstrapPromptWarningSignaturesSeen =
params.bootstrapPromptWarningSignaturesSeen ??
(params.bootstrapPromptWarningSignature ? [params.bootstrapPromptWarningSignature] : []);
const usageAccumulator = createUsageAccumulator();
let lastRunPromptUsage: ReturnType<typeof normalizeUsage> | undefined;
let autoCompactionCount = 0;
let runLoopIterations = 0;
let overloadProfileRotations = 0;
let planningOnlyRetryAttempts = 0;
let sameModelIdleTimeoutRetries = 0;
let lastRetryFailoverReason: FailoverReason | null = null;
let planningOnlyRetryInstruction: string | null = null;
const ackExecutionFastPathInstruction = resolveAckExecutionFastPathInstruction({
provider,
modelId,
prompt: params.prompt,
});
let rateLimitProfileRotations = 0;
let timeoutCompactionAttempts = 0;
const overloadFailoverBackoffMs = resolveOverloadFailoverBackoffMs(params.config);
const overloadProfileRotationLimit = resolveOverloadProfileRotationLimit(params.config);
const rateLimitProfileRotationLimit = resolveRateLimitProfileRotationLimit(params.config);
const maybeEscalateRateLimitProfileFallback = (params: {
failoverProvider: string;
failoverModel: string;
logFallbackDecision: (decision: "fallback_model", extra?: { status?: number }) => void;
}) => {
rateLimitProfileRotations += 1;
if (rateLimitProfileRotations <= rateLimitProfileRotationLimit || !fallbackConfigured) {
return;
}
const status = resolveFailoverStatus("rate_limit");
log.warn(
`rate-limit profile rotation cap reached for ${sanitizeForLog(provider)}/${sanitizeForLog(modelId)} after ${rateLimitProfileRotations} rotations; escalating to model fallback`,
);
params.logFallbackDecision("fallback_model", { status });
throw new FailoverError(
"The AI service is temporarily rate-limited. Please try again in a moment.",
{
reason: "rate_limit",
provider: params.failoverProvider,
model: params.failoverModel,
profileId: lastProfileId,
status,
},
);
};
const maybeMarkAuthProfileFailure = async (failure: {
profileId?: string;
reason?: AuthProfileFailureReason | null;
config?: RunEmbeddedPiAgentParams["config"];
agentDir?: RunEmbeddedPiAgentParams["agentDir"];
modelId?: string;
}) => {
const { profileId, reason } = failure;
if (!profileId || !reason || reason === "timeout") {
return;
}
await markAuthProfileFailure({
store: authStore,
profileId,
reason,
cfg: params.config,
agentDir,
runId: params.runId,
modelId: failure.modelId,
});
};
const resolveAuthProfileFailureReason = (
failoverReason: FailoverReason | null,
): AuthProfileFailureReason | null => {
// Timeouts are transport/model-path failures, not auth health signals,
// so they should not persist auth-profile failure state.
if (!failoverReason || failoverReason === "timeout") {
return null;
}
return failoverReason;
};
const maybeBackoffBeforeOverloadFailover = async (reason: FailoverReason | null) => {
if (reason !== "overloaded" || overloadFailoverBackoffMs <= 0) {
return;
}
log.warn(
`overload backoff before failover for ${provider}/${modelId}: delayMs=${overloadFailoverBackoffMs}`,
);
try {
await sleepWithAbort(overloadFailoverBackoffMs, params.abortSignal);
} catch (err) {
if (params.abortSignal?.aborted) {
const abortErr = new Error("Operation aborted", { cause: err });
abortErr.name = "AbortError";
throw abortErr;
}
throw err;
}
};
// Resolve the context engine once and reuse across retries to avoid
// repeated initialization/connection overhead per attempt.
ensureContextEnginesInitialized();
const contextEngine = await resolveContextEngine(params.config);
try {
// When the engine owns compaction, compactEmbeddedPiSessionDirect is
// bypassed. Fire lifecycle hooks here so recovery paths still notify
// subscribers like memory extensions and usage trackers.
const runOwnsCompactionBeforeHook = async (reason: string) => {
if (
contextEngine.info.ownsCompaction !== true ||
!hookRunner?.hasHooks("before_compaction")
) {
return;
}
try {
await hookRunner.runBeforeCompaction(
{ messageCount: -1, sessionFile: params.sessionFile },
hookCtx,
);
} catch (hookErr) {
log.warn(`before_compaction hook failed during ${reason}: ${String(hookErr)}`);
}
};
const runOwnsCompactionAfterHook = async (
reason: string,
compactResult: Awaited<ReturnType<typeof contextEngine.compact>>,
) => {
if (
contextEngine.info.ownsCompaction !== true ||
!compactResult.ok ||
!compactResult.compacted ||
!hookRunner?.hasHooks("after_compaction")
) {
return;
}
try {
await hookRunner.runAfterCompaction(
{
messageCount: -1,
compactedCount: -1,
tokenCount: compactResult.result?.tokensAfter,
sessionFile: params.sessionFile,
},
hookCtx,
);
} catch (hookErr) {
log.warn(`after_compaction hook failed during ${reason}: ${String(hookErr)}`);
}
};
let authRetryPending = false;
let accumulatedReplayState = createEmbeddedRunReplayState();
// Hoisted so the retry-limit error path can use the most recent API total.
let lastTurnTotal: number | undefined;
while (true) {
if (runLoopIterations >= MAX_RUN_LOOP_ITERATIONS) {
const message =
`Exceeded retry limit after ${runLoopIterations} attempts ` +
`(max=${MAX_RUN_LOOP_ITERATIONS}).`;
log.error(
`[run-retry-limit] sessionKey=${params.sessionKey ?? params.sessionId} ` +
`provider=${provider}/${modelId} attempts=${runLoopIterations} ` +
`maxAttempts=${MAX_RUN_LOOP_ITERATIONS}`,
);
const retryLimitDecision = resolveRunFailoverDecision({
stage: "retry_limit",
fallbackConfigured,
failoverReason: lastRetryFailoverReason,
});
return handleRetryLimitExhaustion({
message,
decision: retryLimitDecision,
provider,
model: modelId,
profileId: lastProfileId,
durationMs: Date.now() - started,
agentMeta: buildErrorAgentMeta({
sessionId: params.sessionId,
provider,
model: model.id,
usageAccumulator,
lastRunPromptUsage,
lastTurnTotal,
}),
replayInvalid: accumulatedReplayState.replayInvalid ? true : undefined,
livenessState: "blocked",
});
}
runLoopIterations += 1;
const runtimeAuthRetry = authRetryPending;
authRetryPending = false;
attemptedThinking.add(thinkLevel);
await fs.mkdir(resolvedWorkspace, { recursive: true });
const basePrompt =
provider === "anthropic" ? scrubAnthropicRefusalMagic(params.prompt) : params.prompt;
const promptAdditions = [
ackExecutionFastPathInstruction,
planningOnlyRetryInstruction,
].filter(
(value): value is string => typeof value === "string" && value.trim().length > 0,
);
const prompt =
promptAdditions.length > 0
? `${basePrompt}\n\n${promptAdditions.join("\n\n")}`
: basePrompt;
let resolvedStreamApiKey: string | undefined;
if (!runtimeAuthState && apiKeyInfo) {
resolvedStreamApiKey = (apiKeyInfo as ApiKeyInfo).apiKey;
}
const attempt = await runEmbeddedAttemptWithBackend({
sessionId: params.sessionId,
sessionKey: resolvedSessionKey,
trigger: params.trigger,
memoryFlushWritePath: params.memoryFlushWritePath,
messageChannel: params.messageChannel,
messageProvider: params.messageProvider,
agentAccountId: params.agentAccountId,
messageTo: params.messageTo,
messageThreadId: params.messageThreadId,
groupId: params.groupId,
groupChannel: params.groupChannel,
groupSpace: params.groupSpace,
spawnedBy: params.spawnedBy,
senderId: params.senderId,
senderName: params.senderName,
senderUsername: params.senderUsername,
senderE164: params.senderE164,
senderIsOwner: params.senderIsOwner,
currentChannelId: params.currentChannelId,
currentThreadTs: params.currentThreadTs,
currentMessageId: params.currentMessageId,
replyToMode: params.replyToMode,
hasRepliedRef: params.hasRepliedRef,
sessionFile: params.sessionFile,
workspaceDir: resolvedWorkspace,
agentDir,
config: params.config,
allowGatewaySubagentBinding: params.allowGatewaySubagentBinding,
contextEngine,
contextTokenBudget: ctxInfo.tokens,
skillsSnapshot: params.skillsSnapshot,
prompt,
images: params.images,
imageOrder: params.imageOrder,
clientTools: params.clientTools,
disableTools: params.disableTools,
provider,
modelId,
model: applyAuthHeaderOverride(
applyLocalNoAuthHeaderOverride(effectiveModel, apiKeyInfo),
// When runtime auth exchange produced a different credential
// (runtimeAuthState is set), the exchanged token lives in
// authStorage and the SDK will pick it up automatically.
// Skip header injection to avoid leaking the pre-exchange key.
runtimeAuthState ? null : apiKeyInfo,
params.config,
),
resolvedApiKey: resolvedStreamApiKey,
authProfileId: lastProfileId,
authProfileIdSource: lockedProfileId ? "user" : "auto",
initialReplayState: accumulatedReplayState,
authStorage,
modelRegistry,
agentId: workspaceResolution.agentId,
legacyBeforeAgentStartResult,
thinkLevel,
fastMode: params.fastMode,
verboseLevel: params.verboseLevel,
reasoningLevel: params.reasoningLevel,
toolResultFormat: resolvedToolResultFormat,
execOverrides: params.execOverrides,
bashElevated: params.bashElevated,
timeoutMs: params.timeoutMs,
runId: params.runId,
abortSignal: params.abortSignal,
replyOperation: params.replyOperation,
shouldEmitToolResult: params.shouldEmitToolResult,
shouldEmitToolOutput: params.shouldEmitToolOutput,
onPartialReply: params.onPartialReply,
onAssistantMessageStart: params.onAssistantMessageStart,
onBlockReply: params.onBlockReply,
onBlockReplyFlush: params.onBlockReplyFlush,
blockReplyBreak: params.blockReplyBreak,
blockReplyChunking: params.blockReplyChunking,
onReasoningStream: params.onReasoningStream,
onReasoningEnd: params.onReasoningEnd,
onToolResult: params.onToolResult,
onAgentEvent: params.onAgentEvent,
extraSystemPrompt: params.extraSystemPrompt,
inputProvenance: params.inputProvenance,
streamParams: params.streamParams,
ownerNumbers: params.ownerNumbers,
enforceFinalTag: params.enforceFinalTag,
silentExpected: params.silentExpected,
bootstrapContextMode: params.bootstrapContextMode,
bootstrapContextRunKind: params.bootstrapContextRunKind,
bootstrapPromptWarningSignaturesSeen,
bootstrapPromptWarningSignature:
bootstrapPromptWarningSignaturesSeen[bootstrapPromptWarningSignaturesSeen.length - 1],
});
const {
aborted,
externalAbort,
promptError,
promptErrorSource,
preflightRecovery,
timedOut,
idleTimedOut,
timedOutDuringCompaction,
sessionIdUsed,
lastAssistant: sessionLastAssistant,
currentAttemptAssistant,
} = attempt;
bootstrapPromptWarningSignaturesSeen =
attempt.bootstrapPromptWarningSignaturesSeen ??
(attempt.bootstrapPromptWarningSignature
? Array.from(
new Set([
...bootstrapPromptWarningSignaturesSeen,
attempt.bootstrapPromptWarningSignature,
]),
)
: bootstrapPromptWarningSignaturesSeen);
const lastAssistantUsage = normalizeUsage(sessionLastAssistant?.usage as UsageLike);
const attemptUsage = attempt.attemptUsage ?? lastAssistantUsage;
mergeUsageIntoAccumulator(usageAccumulator, attemptUsage);
// Keep prompt size from the latest model call so session totalTokens
// reflects current context usage, not accumulated tool-loop usage.
lastRunPromptUsage = lastAssistantUsage ?? attemptUsage;
lastTurnTotal = lastAssistantUsage?.total ?? attemptUsage?.total;
const attemptCompactionCount = Math.max(0, attempt.compactionCount ?? 0);
autoCompactionCount += attemptCompactionCount;
const activeErrorContext = resolveActiveErrorContext({
provider,
model: modelId,
assistant: currentAttemptAssistant ?? sessionLastAssistant,
});
const resolveReplayInvalidForAttempt = (incompleteTurnText?: string | null) =>
accumulatedReplayState.replayInvalid ||
resolveReplayInvalidFlag({
attempt,
incompleteTurnText,
});
if (resolveReplayInvalidForAttempt(null)) {
accumulatedReplayState.replayInvalid = true;
}
accumulatedReplayState = observeReplayMetadata(
accumulatedReplayState,
attempt.replayMetadata,
);
const formattedAssistantErrorText = sessionLastAssistant
? formatAssistantErrorText(sessionLastAssistant, {
cfg: params.config,
sessionKey: resolvedSessionKey ?? params.sessionId,
provider: activeErrorContext.provider,
model: activeErrorContext.model,
})
: undefined;
const assistantErrorText =
sessionLastAssistant?.stopReason === "error"
? sessionLastAssistant.errorMessage?.trim() || formattedAssistantErrorText
: undefined;
const canRestartForLiveSwitch =
!attempt.didSendViaMessagingTool &&
!attempt.didSendDeterministicApprovalPrompt &&
!attempt.lastToolError &&
attempt.toolMetas.length === 0 &&
attempt.assistantTexts.length === 0;
if (preflightRecovery?.handled) {
log.info(
`[context-overflow-precheck] early recovery route=${preflightRecovery.route} ` +
`completed for ${provider}/${modelId}; retrying prompt`,
);
continue;
}
const requestedSelection = shouldSwitchToLiveModel({
cfg: params.config,
sessionKey: resolvedSessionKey,
agentId: params.agentId,
defaultProvider: DEFAULT_PROVIDER,
defaultModel: DEFAULT_MODEL,
currentProvider: provider,
currentModel: modelId,
currentAuthProfileId: preferredProfileId,
currentAuthProfileIdSource: params.authProfileIdSource,
});
if (requestedSelection && canRestartForLiveSwitch) {
await clearLiveModelSwitchPending({
cfg: params.config,
sessionKey: resolvedSessionKey,
agentId: params.agentId,
});
log.info(
`live session model switch requested during active attempt for ${params.sessionId}: ${provider}/${modelId} -> ${requestedSelection.provider}/${requestedSelection.model}`,
);
throw new LiveSessionModelSwitchError(requestedSelection);
}
// ── Timeout-triggered compaction ──────────────────────────────────
// When the LLM times out with high context usage, compact before
// retrying to break the death spiral of repeated timeouts.
if (timedOut && !timedOutDuringCompaction) {
// Only consider prompt-side tokens here. API totals include output
// tokens, which can make a long generation look like high context
// pressure even when the prompt itself was small.
const lastTurnPromptTokens = derivePromptTokens(lastRunPromptUsage);
const tokenUsedRatio =
lastTurnPromptTokens != null && ctxInfo.tokens > 0
? lastTurnPromptTokens / ctxInfo.tokens
: 0;
if (timeoutCompactionAttempts >= MAX_TIMEOUT_COMPACTION_ATTEMPTS) {
log.warn(
`[timeout-compaction] already attempted timeout compaction ${timeoutCompactionAttempts} time(s); falling through to failover rotation`,
);
} else if (tokenUsedRatio > 0.65) {
const timeoutDiagId = createCompactionDiagId();
timeoutCompactionAttempts++;
log.warn(
`[timeout-compaction] LLM timed out with high prompt token usage (${Math.round(tokenUsedRatio * 100)}%); ` +
`attempting compaction before retry (attempt ${timeoutCompactionAttempts}/${MAX_TIMEOUT_COMPACTION_ATTEMPTS}) diagId=${timeoutDiagId}`,
);
let timeoutCompactResult: Awaited<ReturnType<typeof contextEngine.compact>>;
await runOwnsCompactionBeforeHook("timeout recovery");
try {
const timeoutCompactionRuntimeContext = {
...buildEmbeddedCompactionRuntimeContext({
sessionKey: params.sessionKey,
messageChannel: params.messageChannel,
messageProvider: params.messageProvider,
agentAccountId: params.agentAccountId,
currentChannelId: params.currentChannelId,
currentThreadTs: params.currentThreadTs,
currentMessageId: params.currentMessageId,
authProfileId: lastProfileId,
workspaceDir: resolvedWorkspace,
agentDir,
config: params.config,
skillsSnapshot: params.skillsSnapshot,
senderIsOwner: params.senderIsOwner,
senderId: params.senderId,
provider,
modelId,
thinkLevel,
reasoningLevel: params.reasoningLevel,
bashElevated: params.bashElevated,
extraSystemPrompt: params.extraSystemPrompt,
ownerNumbers: params.ownerNumbers,
}),
...(attempt.promptCache ? { promptCache: attempt.promptCache } : {}),
runId: params.runId,
trigger: "timeout_recovery",
diagId: timeoutDiagId,
attempt: timeoutCompactionAttempts,
maxAttempts: MAX_TIMEOUT_COMPACTION_ATTEMPTS,
};
timeoutCompactResult = await contextEngine.compact({
sessionId: params.sessionId,
sessionKey: params.sessionKey,
sessionFile: params.sessionFile,
tokenBudget: ctxInfo.tokens,
force: true,
compactionTarget: "budget",
runtimeContext: timeoutCompactionRuntimeContext,
});
} catch (compactErr) {
log.warn(
`[timeout-compaction] contextEngine.compact() threw during timeout recovery for ${provider}/${modelId}: ${String(compactErr)}`,
);
timeoutCompactResult = {
ok: false,
compacted: false,
reason: String(compactErr),
};
}
await runOwnsCompactionAfterHook("timeout recovery", timeoutCompactResult);
if (timeoutCompactResult.compacted) {
autoCompactionCount += 1;
if (contextEngine.info.ownsCompaction === true) {
await runPostCompactionSideEffects({
config: params.config,
sessionKey: params.sessionKey,
sessionFile: params.sessionFile,
});
}
log.info(
`[timeout-compaction] compaction succeeded for ${provider}/${modelId}; retrying prompt`,
);
continue;
} else {
log.warn(
`[timeout-compaction] compaction did not reduce context for ${provider}/${modelId}; falling through to normal handling`,
);
}
}
}
const contextOverflowError = !aborted
? (() => {
if (promptError) {
const errorText = formatErrorMessage(promptError);
if (isLikelyContextOverflowError(errorText)) {
return { text: errorText, source: "promptError" as const };
}
// Prompt submission failed with a non-overflow error. Do not
// inspect prior assistant errors from history for this attempt.
return null;
}
if (assistantErrorText && isLikelyContextOverflowError(assistantErrorText)) {
return {
text: assistantErrorText,
source: "assistantError" as const,
};
}
return null;
})()
: null;
if (contextOverflowError) {
const overflowDiagId = createCompactionDiagId();
const errorText = contextOverflowError.text;
const msgCount = attempt.messagesSnapshot?.length ?? 0;
const observedOverflowTokens = extractObservedOverflowTokenCount(errorText);
log.warn(
`[context-overflow-diag] sessionKey=${params.sessionKey ?? params.sessionId} ` +
`provider=${provider}/${modelId} source=${contextOverflowError.source} ` +
`messages=${msgCount} sessionFile=${params.sessionFile} ` +
`diagId=${overflowDiagId} compactionAttempts=${overflowCompactionAttempts} ` +
`observedTokens=${observedOverflowTokens ?? "unknown"} ` +
`error=${errorText.slice(0, 200)}`,
);
const isCompactionFailure = isCompactionFailureError(errorText);
const hadAttemptLevelCompaction = attemptCompactionCount > 0;
// If this attempt already compacted (SDK auto-compaction), avoid immediately
// running another explicit compaction for the same overflow trigger.
if (
!isCompactionFailure &&
hadAttemptLevelCompaction &&
overflowCompactionAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS
) {
overflowCompactionAttempts++;
log.warn(
`context overflow persisted after in-attempt compaction (attempt ${overflowCompactionAttempts}/${MAX_OVERFLOW_COMPACTION_ATTEMPTS}); retrying prompt without additional compaction for ${provider}/${modelId}`,
);
continue;
}
// Attempt explicit overflow compaction only when this attempt did not
// already auto-compact.
if (
!isCompactionFailure &&
!hadAttemptLevelCompaction &&
overflowCompactionAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS
) {
if (log.isEnabled("debug")) {
log.debug(
`[compaction-diag] decision diagId=${overflowDiagId} branch=compact ` +
`isCompactionFailure=${isCompactionFailure} hasOversizedToolResults=unknown ` +
`attempt=${overflowCompactionAttempts + 1} maxAttempts=${MAX_OVERFLOW_COMPACTION_ATTEMPTS}`,
);
}
overflowCompactionAttempts++;
log.warn(
`context overflow detected (attempt ${overflowCompactionAttempts}/${MAX_OVERFLOW_COMPACTION_ATTEMPTS}); attempting auto-compaction for ${provider}/${modelId}`,
);
let compactResult: Awaited<ReturnType<typeof contextEngine.compact>>;
await runOwnsCompactionBeforeHook("overflow recovery");
try {
const overflowCompactionRuntimeContext = {
...buildEmbeddedCompactionRuntimeContext({
sessionKey: params.sessionKey,
messageChannel: params.messageChannel,
messageProvider: params.messageProvider,
agentAccountId: params.agentAccountId,
currentChannelId: params.currentChannelId,
currentThreadTs: params.currentThreadTs,
currentMessageId: params.currentMessageId,
authProfileId: lastProfileId,
workspaceDir: resolvedWorkspace,
agentDir,
config: params.config,
skillsSnapshot: params.skillsSnapshot,
senderIsOwner: params.senderIsOwner,
senderId: params.senderId,
provider,
modelId,
thinkLevel,
reasoningLevel: params.reasoningLevel,
bashElevated: params.bashElevated,
extraSystemPrompt: params.extraSystemPrompt,
ownerNumbers: params.ownerNumbers,
}),
...(attempt.promptCache ? { promptCache: attempt.promptCache } : {}),
runId: params.runId,
trigger: "overflow",
...(observedOverflowTokens !== undefined
? { currentTokenCount: observedOverflowTokens }
: {}),
diagId: overflowDiagId,
attempt: overflowCompactionAttempts,
maxAttempts: MAX_OVERFLOW_COMPACTION_ATTEMPTS,
};
compactResult = await contextEngine.compact({
sessionId: params.sessionId,
sessionKey: params.sessionKey,
sessionFile: params.sessionFile,
tokenBudget: ctxInfo.tokens,
...(observedOverflowTokens !== undefined
? { currentTokenCount: observedOverflowTokens }
: {}),
force: true,
compactionTarget: "budget",
runtimeContext: overflowCompactionRuntimeContext,
});
if (compactResult.ok && compactResult.compacted) {
await runContextEngineMaintenance({
contextEngine,
sessionId: params.sessionId,
sessionKey: params.sessionKey,
sessionFile: params.sessionFile,
reason: "compaction",
runtimeContext: overflowCompactionRuntimeContext,
});
}
} catch (compactErr) {
log.warn(
`contextEngine.compact() threw during overflow recovery for ${provider}/${modelId}: ${String(compactErr)}`,
);
compactResult = {
ok: false,
compacted: false,
reason: String(compactErr),
};
}
await runOwnsCompactionAfterHook("overflow recovery", compactResult);
if (compactResult.compacted) {
if (preflightRecovery?.route === "compact_then_truncate") {
const truncResult = await truncateOversizedToolResultsInSession({
sessionFile: params.sessionFile,
contextWindowTokens: ctxInfo.tokens,
sessionId: params.sessionId,
sessionKey: params.sessionKey,
});
if (truncResult.truncated) {
log.info(
`[context-overflow-precheck] post-compaction tool-result truncation succeeded for ` +
`${provider}/${modelId}; truncated ${truncResult.truncatedCount} tool result(s)`,
);
} else {
log.warn(
`[context-overflow-precheck] post-compaction tool-result truncation did not help for ` +
`${provider}/${modelId}: ${truncResult.reason ?? "unknown"}`,
);
}
}
autoCompactionCount += 1;
log.info(`auto-compaction succeeded for ${provider}/${modelId}; retrying prompt`);
continue;
}
log.warn(
`auto-compaction failed for ${provider}/${modelId}: ${compactResult.reason ?? "nothing to compact"}`,
);
}
if (!toolResultTruncationAttempted) {
const contextWindowTokens = ctxInfo.tokens;
const hasOversized = attempt.messagesSnapshot
? sessionLikelyHasOversizedToolResults({
messages: attempt.messagesSnapshot,
contextWindowTokens,
})
: false;
if (hasOversized) {
toolResultTruncationAttempted = true;
log.warn(
`[context-overflow-recovery] Attempting tool result truncation for ${provider}/${modelId} ` +
`(contextWindow=${contextWindowTokens} tokens)`,
);
const truncResult = await truncateOversizedToolResultsInSession({
sessionFile: params.sessionFile,
contextWindowTokens,
sessionId: params.sessionId,
sessionKey: params.sessionKey,
});
if (truncResult.truncated) {
log.info(
`[context-overflow-recovery] Truncated ${truncResult.truncatedCount} tool result(s); retrying prompt`,
);
continue;
}
log.warn(
`[context-overflow-recovery] Tool result truncation did not help: ${truncResult.reason ?? "unknown"}`,
);
}
}
if (
(isCompactionFailure ||
overflowCompactionAttempts >= MAX_OVERFLOW_COMPACTION_ATTEMPTS) &&
log.isEnabled("debug")
) {
log.debug(
`[compaction-diag] decision diagId=${overflowDiagId} branch=give_up ` +
`isCompactionFailure=${isCompactionFailure} hasOversizedToolResults=unknown ` +
`attempt=${overflowCompactionAttempts} maxAttempts=${MAX_OVERFLOW_COMPACTION_ATTEMPTS}`,
);
}
const kind = isCompactionFailure ? "compaction_failure" : "context_overflow";
attempt.setTerminalLifecycleMeta?.({
replayInvalid: resolveReplayInvalidForAttempt(),
livenessState: "blocked",
});
return {
payloads: [
{
text:
"Context overflow: prompt too large for the model. " +
"Try /reset (or /new) to start a fresh session, or use a larger-context model.",
isError: true,
},
],
meta: {
durationMs: Date.now() - started,
agentMeta: buildErrorAgentMeta({
sessionId: sessionIdUsed,
provider,
model: model.id,
usageAccumulator,
lastRunPromptUsage,
lastAssistant: sessionLastAssistant,
lastTurnTotal,
}),
systemPromptReport: attempt.systemPromptReport,
replayInvalid: resolveReplayInvalidForAttempt(),
livenessState: "blocked",
error: { kind, message: errorText },
},
};
}
if (promptError && !aborted && promptErrorSource !== "compaction") {
// Normalize wrapped errors (e.g. abort-wrapped RESOURCE_EXHAUSTED) into
// FailoverError so rate-limit classification works even for nested shapes.
//
// promptErrorSource === "compaction" means the model call already completed and the
// abort happened only while waiting for compaction/retry cleanup. Retrying from here
// would replay that completed tool turn as a fresh prompt attempt.
const normalizedPromptFailover = coerceToFailoverError(promptError, {
provider: activeErrorContext.provider,
model: activeErrorContext.model,
profileId: lastProfileId,
});
const promptErrorDetails = normalizedPromptFailover
? describeFailoverError(normalizedPromptFailover)
: describeFailoverError(promptError);
const errorText = promptErrorDetails.message || formatErrorMessage(promptError);
if (await maybeRefreshRuntimeAuthForAuthError(errorText, runtimeAuthRetry)) {
authRetryPending = true;
continue;
}
// Handle role ordering errors with a user-friendly message
if (/incorrect role information|roles must alternate/i.test(errorText)) {
attempt.setTerminalLifecycleMeta?.({
replayInvalid: resolveReplayInvalidForAttempt(),
livenessState: "blocked",
});
return {
payloads: [
{
text:
"Message ordering conflict - please try again. " +
"If this persists, use /new to start a fresh session.",
isError: true,
},
],
meta: {
durationMs: Date.now() - started,
agentMeta: buildErrorAgentMeta({
sessionId: sessionIdUsed,
provider,
model: model.id,
usageAccumulator,
lastRunPromptUsage,
lastAssistant: sessionLastAssistant,
lastTurnTotal,
}),
systemPromptReport: attempt.systemPromptReport,
replayInvalid: resolveReplayInvalidForAttempt(),
livenessState: "blocked",
error: { kind: "role_ordering", message: errorText },
},
};
}
// Handle image size errors with a user-friendly message (no retry needed)
const imageSizeError = parseImageSizeError(errorText);
if (imageSizeError) {
const maxMb = imageSizeError.maxMb;
const maxMbLabel =
typeof maxMb === "number" && Number.isFinite(maxMb) ? `${maxMb}` : null;
const maxBytesHint = maxMbLabel ? ` (max ${maxMbLabel}MB)` : "";
attempt.setTerminalLifecycleMeta?.({
replayInvalid: resolveReplayInvalidForAttempt(),
livenessState: "blocked",
});
return {
payloads: [
{
text:
`Image too large for the model${maxBytesHint}. ` +
"Please compress or resize the image and try again.",
isError: true,
},
],
meta: {
durationMs: Date.now() - started,
agentMeta: buildErrorAgentMeta({
sessionId: sessionIdUsed,
provider,
model: model.id,
usageAccumulator,
lastRunPromptUsage,
lastAssistant: sessionLastAssistant,
lastTurnTotal,
}),
systemPromptReport: attempt.systemPromptReport,
replayInvalid: resolveReplayInvalidForAttempt(),
livenessState: "blocked",
error: { kind: "image_size", message: errorText },
},
};
}
const promptFailoverReason =
promptErrorDetails.reason ?? classifyFailoverReason(errorText, { provider });
const promptProfileFailureReason =
resolveAuthProfileFailureReason(promptFailoverReason);
await maybeMarkAuthProfileFailure({
profileId: lastProfileId,
reason: promptProfileFailureReason,
modelId,
});
const promptFailoverFailure =
promptFailoverReason !== null || isFailoverErrorMessage(errorText, { provider });
// Capture the failing profile before auth-profile rotation mutates `lastProfileId`.
const failedPromptProfileId = lastProfileId;
const logPromptFailoverDecision = createFailoverDecisionLogger({
stage: "prompt",
runId: params.runId,
rawError: errorText,
failoverReason: promptFailoverReason,
profileFailureReason: promptProfileFailureReason,
provider,
model: modelId,
sourceProvider: provider,
sourceModel: modelId,
profileId: failedPromptProfileId,
fallbackConfigured,
aborted,
});
if (promptFailoverReason === "rate_limit") {
maybeEscalateRateLimitProfileFallback({
failoverProvider: provider,
failoverModel: modelId,
logFallbackDecision: logPromptFailoverDecision,
});
}
let promptFailoverDecision = resolveRunFailoverDecision({
stage: "prompt",
aborted,
externalAbort,
fallbackConfigured,
failoverFailure: promptFailoverFailure,
failoverReason: promptFailoverReason,
profileRotated: false,
});
if (
promptFailoverDecision.action === "rotate_profile" &&
(await advanceAuthProfile())
) {
lastRetryFailoverReason = mergeRetryFailoverReason({
previous: lastRetryFailoverReason,
failoverReason: promptFailoverReason,
});
logPromptFailoverDecision("rotate_profile");
await maybeBackoffBeforeOverloadFailover(promptFailoverReason);
continue;
}
if (promptFailoverDecision.action === "rotate_profile") {
promptFailoverDecision = resolveRunFailoverDecision({
stage: "prompt",
aborted,
externalAbort,
fallbackConfigured,
failoverFailure: promptFailoverFailure,
failoverReason: promptFailoverReason,
profileRotated: true,
});
}
const fallbackThinking = pickFallbackThinkingLevel({
message: errorText,
attempted: attemptedThinking,
});
if (fallbackThinking) {
log.warn(
`unsupported thinking level for ${provider}/${modelId}; retrying with ${fallbackThinking}`,
);
thinkLevel = fallbackThinking;
continue;
}
// Throw FailoverError for prompt-side failover reasons when fallbacks
// are configured so outer model fallback can continue on overload,
// rate-limit, auth, or billing failures.
if (promptFailoverDecision.action === "fallback_model") {
const fallbackReason = promptFailoverDecision.reason ?? "unknown";
const status = resolveFailoverStatus(fallbackReason);
logPromptFailoverDecision("fallback_model", { status });
await maybeBackoffBeforeOverloadFailover(promptFailoverReason);
throw (
normalizedPromptFailover ??
new FailoverError(errorText, {
reason: fallbackReason,
provider,
model: modelId,
profileId: lastProfileId,
status,
})
);
}
if (promptFailoverDecision.action === "surface_error") {
logPromptFailoverDecision("surface_error");
}
throw promptError;
}
const assistantForFailover = currentAttemptAssistant ?? sessionLastAssistant;
const fallbackThinking = pickFallbackThinkingLevel({
message: assistantForFailover?.errorMessage,
attempted: attemptedThinking,
});
if (fallbackThinking && !aborted) {
log.warn(
`unsupported thinking level for ${provider}/${modelId}; retrying with ${fallbackThinking}`,
);
thinkLevel = fallbackThinking;
continue;
}
const authFailure = isAuthAssistantError(assistantForFailover);
const rateLimitFailure = isRateLimitAssistantError(assistantForFailover);
const billingFailure = isBillingAssistantError(assistantForFailover);
const failoverFailure = isFailoverAssistantError(assistantForFailover);
const assistantFailoverReason = classifyFailoverReason(
assistantForFailover?.errorMessage ?? "",
{
provider: assistantForFailover?.provider,
},
);
const assistantProfileFailureReason =
resolveAuthProfileFailureReason(assistantFailoverReason);
const cloudCodeAssistFormatError = attempt.cloudCodeAssistFormatError;
const imageDimensionError = parseImageDimensionError(
assistantForFailover?.errorMessage ?? "",
);
// Capture the failing profile before auth-profile rotation mutates `lastProfileId`.
const failedAssistantProfileId = lastProfileId;
const logAssistantFailoverDecision = createFailoverDecisionLogger({
stage: "assistant",
runId: params.runId,
rawError: assistantForFailover?.errorMessage?.trim(),
failoverReason: assistantFailoverReason,
profileFailureReason: assistantProfileFailureReason,
provider: activeErrorContext.provider,
model: activeErrorContext.model,
sourceProvider: assistantForFailover?.provider ?? provider,
sourceModel: assistantForFailover?.model ?? modelId,
profileId: failedAssistantProfileId,
fallbackConfigured,
timedOut,
aborted,
});
if (
authFailure &&
(await maybeRefreshRuntimeAuthForAuthError(
assistantForFailover?.errorMessage ?? "",
runtimeAuthRetry,
))
) {
authRetryPending = true;
continue;
}
if (imageDimensionError && lastProfileId) {
const details = [
imageDimensionError.messageIndex !== undefined
? `message=${imageDimensionError.messageIndex}`
: null,
imageDimensionError.contentIndex !== undefined
? `content=${imageDimensionError.contentIndex}`
: null,
imageDimensionError.maxDimensionPx !== undefined
? `limit=${imageDimensionError.maxDimensionPx}px`
: null,
]
.filter(Boolean)
.join(" ");
log.warn(
`Profile ${lastProfileId} rejected image payload${details ? ` (${details})` : ""}.`,
);
}
const assistantFailoverDecision = resolveRunFailoverDecision({
stage: "assistant",
aborted,
externalAbort,
fallbackConfigured,
failoverFailure,
failoverReason: assistantFailoverReason,
timedOut,
timedOutDuringCompaction,
profileRotated: false,
});
const assistantFailoverOutcome = await handleAssistantFailover({
initialDecision: assistantFailoverDecision,
aborted,
externalAbort,
fallbackConfigured,
failoverFailure,
failoverReason: assistantFailoverReason,
timedOut,
idleTimedOut,
timedOutDuringCompaction,
allowSameModelIdleTimeoutRetry:
timedOut &&
idleTimedOut &&
!timedOutDuringCompaction &&
!fallbackConfigured &&
canRestartForLiveSwitch &&
sameModelIdleTimeoutRetries < MAX_SAME_MODEL_IDLE_TIMEOUT_RETRIES,
assistantProfileFailureReason,
lastProfileId,
modelId,
provider,
activeErrorContext,
lastAssistant: assistantForFailover,
config: params.config,
sessionKey: params.sessionKey ?? params.sessionId,
authFailure,
rateLimitFailure,
billingFailure,
cloudCodeAssistFormatError,
isProbeSession,
overloadProfileRotations,
overloadProfileRotationLimit,
previousRetryFailoverReason: lastRetryFailoverReason,
logAssistantFailoverDecision,
warn: (message) => log.warn(message),
maybeMarkAuthProfileFailure,
maybeEscalateRateLimitProfileFallback,
maybeBackoffBeforeOverloadFailover,
advanceAuthProfile,
});
overloadProfileRotations = assistantFailoverOutcome.overloadProfileRotations;
if (assistantFailoverOutcome.action === "retry") {
if (assistantFailoverOutcome.retryKind === "same_model_idle_timeout") {
sameModelIdleTimeoutRetries += 1;
}
lastRetryFailoverReason = assistantFailoverOutcome.lastRetryFailoverReason;
continue;
}
if (assistantFailoverOutcome.action === "throw") {
throw assistantFailoverOutcome.error;
}
const usageMeta = buildUsageAgentMetaFields({
usageAccumulator,
lastAssistantUsage: sessionLastAssistant?.usage as UsageLike | undefined,
lastRunPromptUsage,
lastTurnTotal,
});
const agentMeta: EmbeddedPiAgentMeta = {
sessionId: sessionIdUsed,
provider: sessionLastAssistant?.provider ?? provider,
model: sessionLastAssistant?.model ?? model.id,
usage: usageMeta.usage,
lastCallUsage: usageMeta.lastCallUsage,
promptTokens: usageMeta.promptTokens,
compactionCount: autoCompactionCount > 0 ? autoCompactionCount : undefined,
};
const finalAssistantVisibleText = resolveFinalAssistantVisibleText(sessionLastAssistant);
const finalAssistantRawText = resolveFinalAssistantRawText(sessionLastAssistant);
const payloads = buildEmbeddedRunPayloads({
assistantTexts: attempt.assistantTexts,
toolMetas: attempt.toolMetas,
lastAssistant: attempt.lastAssistant,
lastToolError: attempt.lastToolError,
config: params.config,
isCronTrigger: params.trigger === "cron",
sessionKey: params.sessionKey ?? params.sessionId,
provider: activeErrorContext.provider,
model: activeErrorContext.model,
verboseLevel: params.verboseLevel,
reasoningLevel: params.reasoningLevel,
toolResultFormat: resolvedToolResultFormat,
suppressToolErrorWarnings: params.suppressToolErrorWarnings,
inlineToolResultsAllowed: false,
didSendViaMessagingTool: attempt.didSendViaMessagingTool,
didSendDeterministicApprovalPrompt: attempt.didSendDeterministicApprovalPrompt,
});
const payloadsWithToolMedia = mergeAttemptToolMediaPayloads({
payloads,
toolMediaUrls: attempt.toolMediaUrls,
toolAudioAsVoice: attempt.toolAudioAsVoice,
});
// Timeout aborts can leave the run without any assistant payloads.
// Emit an explicit timeout error instead of silently completing, so
// callers do not lose the turn as an orphaned user message.
if (timedOut && !timedOutDuringCompaction && !payloadsWithToolMedia?.length) {
const timeoutText = idleTimedOut
? "The model did not produce a response before the LLM idle timeout. " +
"Please try again, or increase `agents.defaults.llm.idleTimeoutSeconds` in your config (set to 0 to disable)."
: "Request timed out before a response was generated. " +
"Please try again, or increase `agents.defaults.timeoutSeconds` in your config.";
const replayInvalid = resolveReplayInvalidForAttempt(null);
const livenessState = resolveRunLivenessState({
payloadCount: payloads.length,
aborted,
timedOut,
attempt,
incompleteTurnText: null,
});
attempt.setTerminalLifecycleMeta?.({
replayInvalid,
livenessState,
});
return {
payloads: [
{
text: timeoutText,
isError: true,
},
],
meta: {
durationMs: Date.now() - started,
agentMeta,
aborted,
systemPromptReport: attempt.systemPromptReport,
finalAssistantVisibleText,
finalAssistantRawText,
replayInvalid,
livenessState,
},
didSendViaMessagingTool: attempt.didSendViaMessagingTool,
didSendDeterministicApprovalPrompt: attempt.didSendDeterministicApprovalPrompt,
messagingToolSentTexts: attempt.messagingToolSentTexts,
messagingToolSentMediaUrls: attempt.messagingToolSentMediaUrls,
messagingToolSentTargets: attempt.messagingToolSentTargets,
successfulCronAdds: attempt.successfulCronAdds,
};
}
// Detect incomplete turns where prompt() resolved prematurely and the
// runner would otherwise drop an empty reply.
const incompleteTurnText = resolveIncompleteTurnPayloadText({
payloadCount: payloadsWithToolMedia?.length ?? 0,
aborted,
timedOut,
attempt,
});
const nextPlanningOnlyRetryInstruction = resolvePlanningOnlyRetryInstruction({
provider,
modelId,
prompt: params.prompt,
aborted,
timedOut,
attempt,
});
if (
!incompleteTurnText &&
nextPlanningOnlyRetryInstruction &&
planningOnlyRetryAttempts < maxPlanningOnlyRetryAttempts
) {
const planningOnlyText = attempt.assistantTexts.join("\n\n").trim();
const planDetails = extractPlanningOnlyPlanDetails(planningOnlyText);
if (planDetails) {
emitAgentPlanEvent({
runId: params.runId,
...(params.sessionKey ? { sessionKey: params.sessionKey } : {}),
data: {
phase: "update",
title: "Assistant proposed a plan",
explanation: planDetails.explanation,
steps: planDetails.steps,
source: "planning_only_retry",
},
});
void params.onAgentEvent?.({
stream: "plan",
data: {
phase: "update",
title: "Assistant proposed a plan",
explanation: planDetails.explanation,
steps: planDetails.steps,
source: "planning_only_retry",
},
});
}
planningOnlyRetryAttempts += 1;
planningOnlyRetryInstruction = nextPlanningOnlyRetryInstruction;
log.warn(
`planning-only turn detected: runId=${params.runId} sessionId=${params.sessionId} ` +
`provider=${provider}/${modelId} contract=${executionContract} configured=${configuredExecutionContract} — retrying ` +
`${planningOnlyRetryAttempts}/${maxPlanningOnlyRetryAttempts} with act-now steer`,
);
continue;
}
if (!incompleteTurnText && nextPlanningOnlyRetryInstruction && strictAgenticActive) {
log.warn(
`strict-agentic run exhausted planning-only retries: runId=${params.runId} sessionId=${params.sessionId} ` +
`provider=${provider}/${modelId} configured=${configuredExecutionContract} — surfacing blocked state`,
);
// Criterion 4 of the GPT-5.4 parity gate requires every terminal
// exit path to emit an explicit livenessState + replayInvalid so
// downstream observers never see "silent disappearance". Every
// other hard-error terminal branch in this file uses "blocked"
// for its livenessState (role ordering, image size, schema
// error, compaction timeout, aborted-with-no-payloads). Match
// that convention here so lifecycle consumers treat an
// isError:true strict-agentic-blocked payload the same way they
// treat any other error-terminal payload. Replay validity is
// delegated to the shared resolver because the plan-only
// transcript itself is replay-safe even though the run is
// terminal.
const replayInvalid = resolveReplayInvalidForAttempt(null);
const livenessState: EmbeddedRunLivenessState = "blocked";
attempt.setTerminalLifecycleMeta?.({
replayInvalid,
livenessState,
});
return {
payloads: [
{
text: STRICT_AGENTIC_BLOCKED_TEXT,
isError: true,
},
],
meta: {
durationMs: Date.now() - started,
agentMeta,
aborted,
systemPromptReport: attempt.systemPromptReport,
finalAssistantVisibleText,
finalAssistantRawText,
replayInvalid,
livenessState,
},
didSendViaMessagingTool: attempt.didSendViaMessagingTool,
didSendDeterministicApprovalPrompt: attempt.didSendDeterministicApprovalPrompt,
messagingToolSentTexts: attempt.messagingToolSentTexts,
messagingToolSentMediaUrls: attempt.messagingToolSentMediaUrls,
messagingToolSentTargets: attempt.messagingToolSentTargets,
successfulCronAdds: attempt.successfulCronAdds,
};
}
if (incompleteTurnText) {
const replayInvalid = resolveReplayInvalidForAttempt(incompleteTurnText);
const livenessState = resolveRunLivenessState({
payloadCount: payloads.length,
aborted,
timedOut,
attempt,
incompleteTurnText,
});
attempt.setTerminalLifecycleMeta?.({
replayInvalid,
livenessState,
});
const incompleteStopReason = attempt.lastAssistant?.stopReason;
log.warn(
`incomplete turn detected: runId=${params.runId} sessionId=${params.sessionId} ` +
`stopReason=${incompleteStopReason} payloads=0 — surfacing error to user`,
);
// Mark the failing profile for cooldown so multi-profile setups
// rotate away from the exhausted credential on the next turn.
if (lastProfileId) {
await maybeMarkAuthProfileFailure({
profileId: lastProfileId,
reason: resolveAuthProfileFailureReason(assistantFailoverReason),
});
}
return {
payloads: [
{
text: incompleteTurnText,
isError: true,
},
],
meta: {
durationMs: Date.now() - started,
agentMeta,
aborted,
systemPromptReport: attempt.systemPromptReport,
finalAssistantVisibleText,
finalAssistantRawText,
replayInvalid,
livenessState,
},
didSendViaMessagingTool: attempt.didSendViaMessagingTool,
didSendDeterministicApprovalPrompt: attempt.didSendDeterministicApprovalPrompt,
messagingToolSentTexts: attempt.messagingToolSentTexts,
messagingToolSentMediaUrls: attempt.messagingToolSentMediaUrls,
messagingToolSentTargets: attempt.messagingToolSentTargets,
successfulCronAdds: attempt.successfulCronAdds,
};
}
log.debug(
`embedded run done: runId=${params.runId} sessionId=${params.sessionId} durationMs=${Date.now() - started} aborted=${aborted}`,
);
if (lastProfileId) {
await markAuthProfileGood({
store: authStore,
provider,
profileId: lastProfileId,
agentDir: params.agentDir,
});
await markAuthProfileUsed({
store: authStore,
profileId: lastProfileId,
agentDir: params.agentDir,
});
}
const replayInvalid = resolveReplayInvalidForAttempt(null);
const livenessState = resolveRunLivenessState({
payloadCount: payloads.length,
aborted,
timedOut,
attempt,
incompleteTurnText: null,
});
attempt.setTerminalLifecycleMeta?.({
replayInvalid,
livenessState,
});
return {
payloads: payloadsWithToolMedia?.length ? payloadsWithToolMedia : undefined,
meta: {
durationMs: Date.now() - started,
agentMeta,
aborted,
systemPromptReport: attempt.systemPromptReport,
finalAssistantVisibleText,
finalAssistantRawText,
replayInvalid,
livenessState,
// Handle client tool calls (OpenResponses hosted tools)
// Propagate the LLM stop reason so callers (lifecycle events,
// ACP bridge) can distinguish end_turn from max_tokens.
stopReason: attempt.clientToolCall
? "tool_calls"
: attempt.yieldDetected
? "end_turn"
: (sessionLastAssistant?.stopReason as string | undefined),
pendingToolCalls: attempt.clientToolCall
? [
{
id: randomBytes(5).toString("hex").slice(0, 9),
name: attempt.clientToolCall.name,
arguments: JSON.stringify(attempt.clientToolCall.params),
},
]
: undefined,
},
didSendViaMessagingTool: attempt.didSendViaMessagingTool,
didSendDeterministicApprovalPrompt: attempt.didSendDeterministicApprovalPrompt,
messagingToolSentTexts: attempt.messagingToolSentTexts,
messagingToolSentMediaUrls: attempt.messagingToolSentMediaUrls,
messagingToolSentTargets: attempt.messagingToolSentTargets,
successfulCronAdds: attempt.successfulCronAdds,
};
}
} finally {
await contextEngine.dispose?.();
stopRuntimeAuthRefreshTimer();
if (params.cleanupBundleMcpOnRunEnd === true) {
await disposeSessionMcpRuntime(params.sessionId).catch((error) => {
log.warn(
`bundle-mcp cleanup failed after run for ${params.sessionId}: ${formatErrorMessage(error)}`,
);
});
}
}
});
});
}