fix(agents): classify terminal results for fallback

This commit is contained in:
Peter Steinberger
2026-04-28 02:35:46 +01:00
parent 82ca94fdd7
commit 13ff3142bd
3 changed files with 75 additions and 2 deletions

View File

@@ -38,6 +38,7 @@ Docs: https://docs.openclaw.ai
- CLI/status: show skipped fast-path memory checks as `not checked` and report active custom memory plugin runtime status from `status --json --all` without requiring built-in `agents.defaults.memorySearch`, so plugins such as memory-lancedb-pro and memory-cms no longer look unavailable when their own runtime is healthy. Fixes #56968. Thanks @Tony-ooo and @aderius.
- Gateway/channels: record and log unexpected clean channel monitor exits so channels that return without throwing no longer appear stopped with no error. Fixes #73099. Thanks @balaji1968-kingler.
- Channels/Telegram: centralize polling update tracking so accepted offsets remain durable across restarts, same-process handler failures can still retry, and slow offset writes cannot overwrite newer accepted watermarks. Refs #73115. Thanks @vdruts.
- Agents/models: classify empty, reasoning-only, and planning-only terminal agent runs before accepting a model fallback candidate, so invalid or incompatible models can advance to the next configured fallback instead of returning a 30-second terminal failure. Fixes #73115. Thanks @vdruts.
- Memory/LanceDB: let embedding config use provider-backed auth profiles, environment credentials, or provider config without a separate plugin `embedding.apiKey`, so OAuth-capable embedding providers can power auto-recall/capture. Fixes #68950. Thanks @malshaalan-ai.
- Plugins/hooks: time out never-settling `agent_end` observation hooks after 30 seconds and log the plugin failure, so hung embedding endpoints no longer leave memory capture silently pending forever. Fixes #65544. Thanks @ghoc0099.
- Gateway/config: serve runtime config schemas from the current plugin metadata snapshot and generated bundled channel schema metadata instead of rebuilding plugin channel config modules on every `config.get`/`config.schema`, preventing idle plugin-discovery CPU churn after upgrades. Fixes #73088. Thanks @sleitor and @geovansb.

View File

@@ -356,6 +356,13 @@ type FallbackRunnerParams = {
provider: string;
model: string;
run: (provider: string, model: string) => Promise<unknown>;
classifyResult?: (params: {
provider: string;
model: string;
result: unknown;
attempt: number;
total: number;
}) => unknown;
};
type ModelSwitchOptions = ConstructorParameters<typeof LiveSessionModelSwitchError>[0];
@@ -372,6 +379,19 @@ function makeSuccessResult(provider: string, model: string) {
};
}
function makeEmptyResult(provider: string, model: string) {
return {
payloads: [],
meta: {
durationMs: 30_000,
aborted: false,
stopReason: "end_turn",
agentHarnessResultClassification: "empty",
agentMeta: { provider, model },
},
};
}
function setupModelSwitchRetry(switchOptions: ModelSwitchOptions) {
let invocation = 0;
state.runWithModelFallbackMock.mockImplementation(async (params: FallbackRunnerParams) => {
@@ -556,6 +576,50 @@ describe("agentCommand LiveSessionModelSwitchError retry", () => {
expect(state.clearSessionAuthProfileOverrideMock).not.toHaveBeenCalled();
});
it("classifies empty embedded run results before model fallback accepts them", async () => {
let observedClassification: unknown;
state.runWithModelFallbackMock.mockImplementation(async (params: FallbackRunnerParams) => {
const primaryResult = await params.run(params.provider, params.model);
observedClassification = await params.classifyResult?.({
provider: params.provider,
model: params.model,
result: primaryResult,
attempt: 1,
total: 2,
});
const fallbackResult = await params.run("openai", "gpt-5.4");
return {
result: fallbackResult,
provider: "openai",
model: "gpt-5.4",
attempts: [
{
provider: params.provider,
model: params.model,
reason: "format",
code: "empty_result",
},
],
};
});
state.runAgentAttemptMock
.mockResolvedValueOnce(makeEmptyResult("anthropic", "claude"))
.mockResolvedValueOnce(makeSuccessResult("openai", "gpt-5.4"));
await runBasicAgentCommand();
expect(observedClassification).toMatchObject({
reason: "format",
code: "empty_result",
});
expect(state.runAgentAttemptMock).toHaveBeenCalledTimes(2);
expect(state.runAgentAttemptMock.mock.calls[1]?.[0]).toMatchObject({
providerOverride: "openai",
modelOverride: "gpt-5.4",
isFallbackRetry: true,
});
});
it("updates hasSessionModelOverride for fallback resolution after switch", async () => {
setupModelSwitchRetry({
provider: "openai",

View File

@@ -60,6 +60,7 @@ import {
resolveDefaultModelForAgent,
resolveThinkingDefault,
} from "./model-selection.js";
import { classifyEmbeddedPiRunResultForModelFallback } from "./pi-embedded-runner/result-fallback-classifier.js";
import { resolveProviderIdForAuth } from "./provider-auth-aliases.js";
import { normalizeSpawnedRunMetadata } from "./spawned-context.js";
import { resolveAgentTimeoutMs } from "./timeout.js";
@@ -67,6 +68,7 @@ import { ensureAgentWorkspace } from "./workspace.js";
const log = createSubsystemLogger("agents/agent-command");
type AttemptExecutionRuntime = typeof import("./command/attempt-execution.runtime.js");
type AgentAttemptResult = Awaited<ReturnType<AttemptExecutionRuntime["runAgentAttempt"]>>;
type AcpManagerRuntime = typeof import("../acp/control-plane/manager.js");
type AcpPolicyRuntime = typeof import("../acp/policy.js");
type AcpRuntimeErrorsRuntime = typeof import("../acp/runtime/errors.js");
@@ -902,7 +904,7 @@ async function agentCommandInternal(
opts.replyChannel ?? opts.channel,
);
let result: Awaited<ReturnType<AttemptExecutionRuntime["runAgentAttempt"]>>;
let result: AgentAttemptResult;
let fallbackProvider = provider;
let fallbackModel = model;
const MAX_LIVE_SWITCH_RETRIES = 5;
@@ -919,13 +921,19 @@ async function agentCommandInternal(
});
let fallbackAttemptIndex = 0;
const fallbackResult = await runWithModelFallback({
const fallbackResult = await runWithModelFallback<AgentAttemptResult>({
cfg,
provider,
model,
runId,
agentDir,
fallbacksOverride: effectiveFallbacksOverride,
classifyResult: ({ provider, model, result }) =>
classifyEmbeddedPiRunResultForModelFallback({
provider,
model,
result,
}),
run: async (providerOverride, modelOverride, runOptions) => {
const isFallbackRetry = fallbackAttemptIndex > 0;
fallbackAttemptIndex += 1;