mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 07:00:43 +00:00
fix: record model fallback steps in trajectories
This commit is contained in:
@@ -44,6 +44,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Gateway/hooks: route non-delivered hook completion and error summaries to the target agent's main session instead of the default agent session, preserving multi-agent hook isolation. Fixes #24693; carries forward #68667. Thanks @abersonFAC and @bluesky6868.
|
||||
- Discord: own the Carbon interaction listener and hand off Discord slash/component handling asynchronously, so compaction or long session locks no longer trip `InteractionEventListener` listener timeouts. Fixes #73204. Thanks @slideshow-dingo.
|
||||
- Compaction/diagnostics: keep unknown compaction failure classifications stable while logging sanitized detail for unclassified provider errors such as missing Ollama provider adapters. Thanks @gzsiang.
|
||||
- Models/fallbacks: record first-class `model.fallback_step` trajectory events with from/to models, failure detail, chain position, and final outcome so support exports preserve the primary model failure even when a later fallback also fails. Fixes #71744. Thanks @nikolaykazakovvs-ux.
|
||||
- Gateway/startup: keep value-option foreground starts on the gateway fast path and skip proxy bootstrap unless proxy env is configured, reducing normal gateway startup RSS and avoiding full CLI graph loading. Thanks @vincentkoc.
|
||||
- Heartbeat/models: show heartbeat model bleed guidance on context-overflow resets when the last runtime model matches configured `heartbeat.model`, so smaller local heartbeat models point users to `isolatedSession` or `lightContext` instead of only compaction-buffer tuning. Fixes #67314. Thanks @Knightmare6890.
|
||||
- Subagents/models: persist `sessions_spawn.model` and configured subagent models as child-session model overrides before the first turn, so spawned subagents actually run on the requested provider/model instead of reverting to the target agent default. Fixes #73180. Thanks @danielzinhu99.
|
||||
|
||||
@@ -64,6 +64,7 @@ Runtime events include:
|
||||
- `trace.metadata`
|
||||
- `context.compiled`
|
||||
- `prompt.submitted`
|
||||
- `model.fallback_step`, including the source model, next model, failure reason/detail, chain position, and whether fallback advanced, succeeded, or exhausted the chain
|
||||
- `model.completed`
|
||||
- `trace.artifacts`
|
||||
- `session.ended`
|
||||
|
||||
@@ -19,6 +19,8 @@ const state = vi.hoisted(() => ({
|
||||
clearAgentRunContextMock: vi.fn(),
|
||||
updateSessionStoreAfterAgentRunMock: vi.fn(),
|
||||
deliverAgentCommandResultMock: vi.fn(),
|
||||
trajectoryRecordEventMock: vi.fn(),
|
||||
trajectoryFlushMock: vi.fn(async () => undefined),
|
||||
clearSessionAuthProfileOverrideMock: vi.fn(),
|
||||
authProfileStoreMock: { profiles: {} } as { profiles: Record<string, unknown> },
|
||||
sessionEntryMock: undefined as unknown,
|
||||
@@ -249,6 +251,15 @@ vi.mock("../terminal/ansi.js", () => ({
|
||||
sanitizeForLog: (s: string) => s,
|
||||
}));
|
||||
|
||||
vi.mock("../trajectory/runtime.js", () => ({
|
||||
createTrajectoryRuntimeRecorder: () => ({
|
||||
enabled: true,
|
||||
filePath: "/tmp/session.trajectory.jsonl",
|
||||
recordEvent: (...args: unknown[]) => state.trajectoryRecordEventMock(...args),
|
||||
flush: () => state.trajectoryFlushMock(),
|
||||
}),
|
||||
}));
|
||||
|
||||
vi.mock("../utils/message-channel.js", () => ({
|
||||
resolveMessageChannel: () => "test",
|
||||
}));
|
||||
@@ -356,6 +367,7 @@ type FallbackRunnerParams = {
|
||||
provider: string;
|
||||
model: string;
|
||||
run: (provider: string, model: string) => Promise<unknown>;
|
||||
onFallbackStep?: (step: Record<string, unknown>) => void | Promise<void>;
|
||||
classifyResult?: (params: {
|
||||
provider: string;
|
||||
model: string;
|
||||
@@ -462,6 +474,7 @@ describe("agentCommand – LiveSessionModelSwitchError retry", () => {
|
||||
state.sessionStoreMock = undefined;
|
||||
state.deliverAgentCommandResultMock.mockResolvedValue(undefined);
|
||||
state.updateSessionStoreAfterAgentRunMock.mockResolvedValue(undefined);
|
||||
state.trajectoryFlushMock.mockResolvedValue(undefined);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
@@ -493,6 +506,42 @@ describe("agentCommand – LiveSessionModelSwitchError retry", () => {
|
||||
expect(lifecycleEndCalls.length).toBeGreaterThanOrEqual(1);
|
||||
});
|
||||
|
||||
it("records fallback steps to the session trajectory runtime", async () => {
|
||||
state.runWithModelFallbackMock.mockImplementation(async (params: FallbackRunnerParams) => {
|
||||
await params.onFallbackStep?.({
|
||||
fallbackStepType: "fallback_step",
|
||||
fallbackStepFromModel: "ollama/llama3",
|
||||
fallbackStepToModel: "openai/gpt-5.4",
|
||||
fallbackStepFromFailureReason: "overloaded",
|
||||
fallbackStepChainPosition: 1,
|
||||
fallbackStepFinalOutcome: "next_fallback",
|
||||
});
|
||||
const result = await params.run(params.provider, params.model);
|
||||
return {
|
||||
result,
|
||||
provider: params.provider,
|
||||
model: params.model,
|
||||
attempts: [],
|
||||
};
|
||||
});
|
||||
state.runAgentAttemptMock.mockResolvedValue(makeSuccessResult("openai", "gpt-5.4"));
|
||||
|
||||
await runBasicAgentCommand();
|
||||
|
||||
expect(state.trajectoryRecordEventMock).toHaveBeenCalledWith(
|
||||
"model.fallback_step",
|
||||
expect.objectContaining({
|
||||
fallbackStepType: "fallback_step",
|
||||
fallbackStepFromModel: "ollama/llama3",
|
||||
fallbackStepToModel: "openai/gpt-5.4",
|
||||
fallbackStepFromFailureReason: "overloaded",
|
||||
fallbackStepChainPosition: 1,
|
||||
fallbackStepFinalOutcome: "next_fallback",
|
||||
}),
|
||||
);
|
||||
expect(state.trajectoryFlushMock).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("propagates non-switch errors without retrying and emits lifecycle error", async () => {
|
||||
state.runWithModelFallbackMock.mockRejectedValueOnce(new Error("provider down"));
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ import { applyModelOverrideToSessionEntry } from "../sessions/model-overrides.js
|
||||
import { resolveSendPolicy } from "../sessions/send-policy.js";
|
||||
import { normalizeOptionalString } from "../shared/string-coerce.js";
|
||||
import { sanitizeForLog } from "../terminal/ansi.js";
|
||||
import { createTrajectoryRuntimeRecorder } from "../trajectory/runtime.js";
|
||||
import { resolveMessageChannel } from "../utils/message-channel.js";
|
||||
import { resolveAgentRuntimeConfig } from "./agent-runtime-config.js";
|
||||
import {
|
||||
@@ -909,6 +910,16 @@ async function agentCommandInternal(
|
||||
let fallbackModel = model;
|
||||
const MAX_LIVE_SWITCH_RETRIES = 5;
|
||||
let liveSwitchRetries = 0;
|
||||
const fallbackTrajectoryRecorder = createTrajectoryRuntimeRecorder({
|
||||
cfg,
|
||||
runId,
|
||||
sessionId,
|
||||
sessionKey,
|
||||
sessionFile,
|
||||
provider,
|
||||
modelId: model,
|
||||
workspaceDir,
|
||||
});
|
||||
for (;;) {
|
||||
try {
|
||||
const spawnedBy = normalizedSpawned.spawnedBy ?? sessionEntry?.spawnedBy;
|
||||
@@ -928,6 +939,9 @@ async function agentCommandInternal(
|
||||
runId,
|
||||
agentDir,
|
||||
fallbacksOverride: effectiveFallbacksOverride,
|
||||
onFallbackStep: (step) => {
|
||||
fallbackTrajectoryRecorder?.recordEvent("model.fallback_step", step);
|
||||
},
|
||||
classifyResult: ({ provider, model, result }) =>
|
||||
classifyEmbeddedPiRunResultForModelFallback({
|
||||
provider,
|
||||
@@ -1024,6 +1038,7 @@ async function agentCommandInternal(
|
||||
},
|
||||
});
|
||||
}
|
||||
await fallbackTrajectoryRecorder?.flush();
|
||||
throw new Error(
|
||||
`Exceeded maximum live model switch retries (${MAX_LIVE_SWITCH_RETRIES})`,
|
||||
{ cause: err },
|
||||
@@ -1048,6 +1063,7 @@ async function agentCommandInternal(
|
||||
},
|
||||
});
|
||||
}
|
||||
await fallbackTrajectoryRecorder?.flush();
|
||||
throw new Error(
|
||||
`Live model switch rejected: ${sanitizeForLog(err.provider)}/${sanitizeForLog(err.model)} is not in the agent allowlist`,
|
||||
{ cause: err },
|
||||
@@ -1094,9 +1110,11 @@ async function agentCommandInternal(
|
||||
},
|
||||
});
|
||||
}
|
||||
await fallbackTrajectoryRecorder?.flush();
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
await fallbackTrajectoryRecorder?.flush();
|
||||
|
||||
// Update token+model fields in the session store.
|
||||
if (sessionStore && sessionKey) {
|
||||
|
||||
@@ -29,6 +29,41 @@ function buildErrorObservationFields(error?: string): {
|
||||
|
||||
type FallbackStepOutcome = "next_fallback" | "succeeded" | "chain_exhausted";
|
||||
|
||||
export type ModelFallbackStepFields = {
|
||||
fallbackStepType: "fallback_step";
|
||||
fallbackStepFromModel: string;
|
||||
fallbackStepToModel?: string;
|
||||
fallbackStepFromFailureReason?: FailoverReason;
|
||||
fallbackStepFromFailureDetail?: string;
|
||||
fallbackStepChainPosition?: number;
|
||||
fallbackStepFinalOutcome: FallbackStepOutcome;
|
||||
};
|
||||
|
||||
export type ModelFallbackDecisionParams = {
|
||||
decision:
|
||||
| "skip_candidate"
|
||||
| "probe_cooldown_candidate"
|
||||
| "candidate_failed"
|
||||
| "candidate_succeeded";
|
||||
runId?: string;
|
||||
requestedProvider: string;
|
||||
requestedModel: string;
|
||||
candidate: ModelCandidate;
|
||||
attempt?: number;
|
||||
total?: number;
|
||||
reason?: FailoverReason | null;
|
||||
status?: number;
|
||||
code?: string;
|
||||
error?: string;
|
||||
nextCandidate?: ModelCandidate;
|
||||
isPrimary?: boolean;
|
||||
requestedModelMatched?: boolean;
|
||||
fallbackConfigured?: boolean;
|
||||
allowTransientCooldownProbe?: boolean;
|
||||
profileCount?: number;
|
||||
previousAttempts?: FallbackAttempt[];
|
||||
};
|
||||
|
||||
function formatModelRef(candidate: ModelCandidate): string {
|
||||
return `${candidate.provider}/${candidate.model}`;
|
||||
}
|
||||
@@ -41,17 +76,7 @@ function buildFallbackStepFields(params: {
|
||||
nextCandidate?: ModelCandidate;
|
||||
attempt?: number;
|
||||
previousAttempts?: FallbackAttempt[];
|
||||
}):
|
||||
| {
|
||||
fallbackStepType: "fallback_step";
|
||||
fallbackStepFromModel: string;
|
||||
fallbackStepToModel?: string;
|
||||
fallbackStepFromFailureReason?: FailoverReason;
|
||||
fallbackStepFromFailureDetail?: string;
|
||||
fallbackStepChainPosition?: number;
|
||||
fallbackStepFinalOutcome: FallbackStepOutcome;
|
||||
}
|
||||
| undefined {
|
||||
}): ModelFallbackStepFields | undefined {
|
||||
const lastPreviousAttempt = params.previousAttempts?.at(-1);
|
||||
if (params.decision === "candidate_succeeded") {
|
||||
if (!lastPreviousAttempt) {
|
||||
@@ -89,30 +114,9 @@ function buildFallbackStepFields(params: {
|
||||
};
|
||||
}
|
||||
|
||||
export function logModelFallbackDecision(params: {
|
||||
decision:
|
||||
| "skip_candidate"
|
||||
| "probe_cooldown_candidate"
|
||||
| "candidate_failed"
|
||||
| "candidate_succeeded";
|
||||
runId?: string;
|
||||
requestedProvider: string;
|
||||
requestedModel: string;
|
||||
candidate: ModelCandidate;
|
||||
attempt?: number;
|
||||
total?: number;
|
||||
reason?: FailoverReason | null;
|
||||
status?: number;
|
||||
code?: string;
|
||||
error?: string;
|
||||
nextCandidate?: ModelCandidate;
|
||||
isPrimary?: boolean;
|
||||
requestedModelMatched?: boolean;
|
||||
fallbackConfigured?: boolean;
|
||||
allowTransientCooldownProbe?: boolean;
|
||||
profileCount?: number;
|
||||
previousAttempts?: FallbackAttempt[];
|
||||
}): void {
|
||||
export function logModelFallbackDecision(
|
||||
params: ModelFallbackDecisionParams,
|
||||
): ModelFallbackStepFields | undefined {
|
||||
const nextText = params.nextCandidate
|
||||
? `${sanitizeForLog(params.nextCandidate.provider)}/${sanitizeForLog(params.nextCandidate.model)}`
|
||||
: "none";
|
||||
@@ -172,4 +176,5 @@ export function logModelFallbackDecision(params: {
|
||||
`model fallback decision: decision=${params.decision} requested=${sanitizeForLog(params.requestedProvider)}/${sanitizeForLog(params.requestedModel)} ` +
|
||||
`candidate=${sanitizeForLog(params.candidate.provider)}/${sanitizeForLog(params.candidate.model)} reason=${reasonText}${providerErrorTypeSuffix} next=${nextText}${detailSuffix}`,
|
||||
});
|
||||
return fallbackStepFields;
|
||||
}
|
||||
|
||||
@@ -306,8 +306,15 @@ describe("runWithModelFallback – probe logic", () => {
|
||||
.fn()
|
||||
.mockRejectedValueOnce(Object.assign(new Error("rate limited"), { status: 429 }))
|
||||
.mockResolvedValueOnce("fallback-ok");
|
||||
const onFallbackStep = vi.fn();
|
||||
|
||||
const fallbackResult = await runPrimaryCandidate(fallbackCfg, fallbackRun);
|
||||
const fallbackResult = await runWithModelFallback({
|
||||
cfg: fallbackCfg,
|
||||
provider: "openai",
|
||||
model: "gpt-4.1-mini",
|
||||
run: fallbackRun,
|
||||
onFallbackStep,
|
||||
});
|
||||
await logCapture.flush();
|
||||
|
||||
expect(fallbackResult.result).toBe("fallback-ok");
|
||||
@@ -369,6 +376,26 @@ describe("runWithModelFallback – probe logic", () => {
|
||||
}),
|
||||
]),
|
||||
);
|
||||
expect(onFallbackStep).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
fallbackStepType: "fallback_step",
|
||||
fallbackStepFromModel: "openai/gpt-4.1-mini",
|
||||
fallbackStepToModel: "anthropic/claude-haiku-3-5",
|
||||
fallbackStepFromFailureReason: "rate_limit",
|
||||
fallbackStepChainPosition: 1,
|
||||
fallbackStepFinalOutcome: "next_fallback",
|
||||
}),
|
||||
);
|
||||
expect(onFallbackStep).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
fallbackStepType: "fallback_step",
|
||||
fallbackStepFromModel: "openai/gpt-4.1-mini",
|
||||
fallbackStepToModel: "anthropic/claude-haiku-3-5",
|
||||
fallbackStepFromFailureReason: "rate_limit",
|
||||
fallbackStepChainPosition: 2,
|
||||
fallbackStepFinalOutcome: "succeeded",
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("probes primary model when cooldown already expired", async () => {
|
||||
|
||||
@@ -23,7 +23,11 @@ import {
|
||||
shouldUseTransientCooldownProbeSlot,
|
||||
} from "./failover-policy.js";
|
||||
import { LiveSessionModelSwitchError } from "./live-model-switch-error.js";
|
||||
import { logModelFallbackDecision } from "./model-fallback-observation.js";
|
||||
import {
|
||||
logModelFallbackDecision,
|
||||
type ModelFallbackDecisionParams,
|
||||
type ModelFallbackStepFields,
|
||||
} from "./model-fallback-observation.js";
|
||||
import type { FallbackAttempt, ModelCandidate } from "./model-fallback.types.js";
|
||||
import { modelKey, normalizeModelRef } from "./model-selection-normalize.js";
|
||||
import {
|
||||
@@ -133,6 +137,8 @@ type ModelFallbackErrorHandler = (attempt: {
|
||||
total: number;
|
||||
}) => void | Promise<void>;
|
||||
|
||||
type ModelFallbackStepHandler = (step: ModelFallbackStepFields) => void | Promise<void>;
|
||||
|
||||
export type ModelFallbackResultClassification =
|
||||
| {
|
||||
message: string;
|
||||
@@ -297,7 +303,7 @@ function recordFailedCandidateAttempt(params: {
|
||||
isPrimary: boolean;
|
||||
requestedModelMatched: boolean;
|
||||
fallbackConfigured: boolean;
|
||||
}) {
|
||||
}): ModelFallbackStepFields | undefined {
|
||||
const described = describeFailoverError(params.error);
|
||||
params.attempts.push({
|
||||
provider: params.candidate.provider,
|
||||
@@ -307,7 +313,7 @@ function recordFailedCandidateAttempt(params: {
|
||||
status: described.status,
|
||||
code: described.code,
|
||||
});
|
||||
logModelFallbackDecision({
|
||||
return logModelFallbackDecision({
|
||||
decision: "candidate_failed",
|
||||
runId: params.runId,
|
||||
requestedProvider: params.requestedProvider ?? params.candidate.provider,
|
||||
@@ -737,6 +743,7 @@ export async function runWithModelFallback<T>(params: {
|
||||
fallbacksOverride?: string[];
|
||||
run: ModelFallbackRunFn<T>;
|
||||
onError?: ModelFallbackErrorHandler;
|
||||
onFallbackStep?: ModelFallbackStepHandler;
|
||||
classifyResult?: ModelFallbackResultClassifier<T>;
|
||||
}): Promise<ModelFallbackRunResult<T>> {
|
||||
const candidates = resolveFallbackCandidates({
|
||||
@@ -755,6 +762,20 @@ export async function runWithModelFallback<T>(params: {
|
||||
const attempts: FallbackAttempt[] = [];
|
||||
let lastError: unknown;
|
||||
const cooldownProbeUsedProviders = new Set<string>();
|
||||
const observeDecision = async (decision: ModelFallbackDecisionParams) => {
|
||||
const fallbackStep = logModelFallbackDecision(decision);
|
||||
if (fallbackStep) {
|
||||
await params.onFallbackStep?.(fallbackStep);
|
||||
}
|
||||
};
|
||||
const observeFailedCandidate = async (
|
||||
failedAttempt: Parameters<typeof recordFailedCandidateAttempt>[0],
|
||||
) => {
|
||||
const fallbackStep = recordFailedCandidateAttempt(failedAttempt);
|
||||
if (fallbackStep) {
|
||||
await params.onFallbackStep?.(fallbackStep);
|
||||
}
|
||||
};
|
||||
|
||||
const hasFallbackCandidates = candidates.length > 1;
|
||||
|
||||
@@ -799,7 +820,7 @@ export async function runWithModelFallback<T>(params: {
|
||||
error: decision.error,
|
||||
reason: decision.reason,
|
||||
});
|
||||
logModelFallbackDecision({
|
||||
await observeDecision({
|
||||
decision: "skip_candidate",
|
||||
runId: params.runId,
|
||||
requestedProvider: params.provider,
|
||||
@@ -834,7 +855,7 @@ export async function runWithModelFallback<T>(params: {
|
||||
error,
|
||||
reason: decision.reason,
|
||||
});
|
||||
logModelFallbackDecision({
|
||||
await observeDecision({
|
||||
decision: "skip_candidate",
|
||||
runId: params.runId,
|
||||
requestedProvider: params.provider,
|
||||
@@ -858,7 +879,7 @@ export async function runWithModelFallback<T>(params: {
|
||||
}
|
||||
}
|
||||
attemptedDuringCooldown = true;
|
||||
logModelFallbackDecision({
|
||||
await observeDecision({
|
||||
decision: "probe_cooldown_candidate",
|
||||
runId: params.runId,
|
||||
requestedProvider: params.provider,
|
||||
@@ -888,7 +909,7 @@ export async function runWithModelFallback<T>(params: {
|
||||
});
|
||||
if ("success" in attemptRun) {
|
||||
if (i > 0 || attempts.length > 0 || attemptedDuringCooldown) {
|
||||
logModelFallbackDecision({
|
||||
await observeDecision({
|
||||
decision: "candidate_succeeded",
|
||||
runId: params.runId,
|
||||
requestedProvider: params.provider,
|
||||
@@ -956,7 +977,7 @@ export async function runWithModelFallback<T>(params: {
|
||||
model: candidate.model,
|
||||
});
|
||||
lastError = switchNormalized;
|
||||
recordFailedCandidateAttempt({
|
||||
await observeFailedCandidate({
|
||||
attempts,
|
||||
candidate,
|
||||
error: switchNormalized,
|
||||
@@ -982,7 +1003,7 @@ export async function runWithModelFallback<T>(params: {
|
||||
}
|
||||
|
||||
lastError = isKnownFailover ? normalized : err;
|
||||
recordFailedCandidateAttempt({
|
||||
await observeFailedCandidate({
|
||||
attempts,
|
||||
candidate,
|
||||
error: normalized,
|
||||
|
||||
Reference in New Issue
Block a user