fix(agents): classify terminal results for fallback

2026-05-06 10:20:42 +00:00 · 2026-04-28 02:35:46 +01:00
parent 82ca94fdd7
commit 13ff3142bd
3 changed files with 75 additions and 2 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -38,6 +38,7 @@ Docs: https://docs.openclaw.ai
 - CLI/status: show skipped fast-path memory checks as `not checked` and report active custom memory plugin runtime status from `status --json --all` without requiring built-in `agents.defaults.memorySearch`, so plugins such as memory-lancedb-pro and memory-cms no longer look unavailable when their own runtime is healthy. Fixes #56968. Thanks @Tony-ooo and @aderius.
 - Gateway/channels: record and log unexpected clean channel monitor exits so channels that return without throwing no longer appear stopped with no error. Fixes #73099. Thanks @balaji1968-kingler.
 - Channels/Telegram: centralize polling update tracking so accepted offsets remain durable across restarts, same-process handler failures can still retry, and slow offset writes cannot overwrite newer accepted watermarks. Refs #73115. Thanks @vdruts.
+- Agents/models: classify empty, reasoning-only, and planning-only terminal agent runs before accepting a model fallback candidate, so invalid or incompatible models can advance to the next configured fallback instead of returning a 30-second terminal failure. Fixes #73115. Thanks @vdruts.
 - Memory/LanceDB: let embedding config use provider-backed auth profiles, environment credentials, or provider config without a separate plugin `embedding.apiKey`, so OAuth-capable embedding providers can power auto-recall/capture. Fixes #68950. Thanks @malshaalan-ai.
 - Plugins/hooks: time out never-settling `agent_end` observation hooks after 30 seconds and log the plugin failure, so hung embedding endpoints no longer leave memory capture silently pending forever. Fixes #65544. Thanks @ghoc0099.
 - Gateway/config: serve runtime config schemas from the current plugin metadata snapshot and generated bundled channel schema metadata instead of rebuilding plugin channel config modules on every `config.get`/`config.schema`, preventing idle plugin-discovery CPU churn after upgrades. Fixes #73088. Thanks @sleitor and @geovansb.
--- a/src/agents/agent-command.live-model-switch.test.ts
+++ b/src/agents/agent-command.live-model-switch.test.ts
@@ -356,6 +356,13 @@ type FallbackRunnerParams = {
  provider: string;
  model: string;
  run: (provider: string, model: string) => Promise<unknown>;
+  classifyResult?: (params: {
+    provider: string;
+    model: string;
+    result: unknown;
+    attempt: number;
+    total: number;
+  }) => unknown;
 };

 type ModelSwitchOptions = ConstructorParameters<typeof LiveSessionModelSwitchError>[0];
@@ -372,6 +379,19 @@ function makeSuccessResult(provider: string, model: string) {
  };
 }

+function makeEmptyResult(provider: string, model: string) {
+  return {
+    payloads: [],
+    meta: {
+      durationMs: 30_000,
+      aborted: false,
+      stopReason: "end_turn",
+      agentHarnessResultClassification: "empty",
+      agentMeta: { provider, model },
+    },
+  };
+}
+
 function setupModelSwitchRetry(switchOptions: ModelSwitchOptions) {
  let invocation = 0;
  state.runWithModelFallbackMock.mockImplementation(async (params: FallbackRunnerParams) => {
@@ -556,6 +576,50 @@ describe("agentCommand – LiveSessionModelSwitchError retry", () => {
    expect(state.clearSessionAuthProfileOverrideMock).not.toHaveBeenCalled();
  });

+  it("classifies empty embedded run results before model fallback accepts them", async () => {
+    let observedClassification: unknown;
+    state.runWithModelFallbackMock.mockImplementation(async (params: FallbackRunnerParams) => {
+      const primaryResult = await params.run(params.provider, params.model);
+      observedClassification = await params.classifyResult?.({
+        provider: params.provider,
+        model: params.model,
+        result: primaryResult,
+        attempt: 1,
+        total: 2,
+      });
+      const fallbackResult = await params.run("openai", "gpt-5.4");
+      return {
+        result: fallbackResult,
+        provider: "openai",
+        model: "gpt-5.4",
+        attempts: [
+          {
+            provider: params.provider,
+            model: params.model,
+            reason: "format",
+            code: "empty_result",
+          },
+        ],
+      };
+    });
+    state.runAgentAttemptMock
+      .mockResolvedValueOnce(makeEmptyResult("anthropic", "claude"))
+      .mockResolvedValueOnce(makeSuccessResult("openai", "gpt-5.4"));
+
+    await runBasicAgentCommand();
+
+    expect(observedClassification).toMatchObject({
+      reason: "format",
+      code: "empty_result",
+    });
+    expect(state.runAgentAttemptMock).toHaveBeenCalledTimes(2);
+    expect(state.runAgentAttemptMock.mock.calls[1]?.[0]).toMatchObject({
+      providerOverride: "openai",
+      modelOverride: "gpt-5.4",
+      isFallbackRetry: true,
+    });
+  });
+
  it("updates hasSessionModelOverride for fallback resolution after switch", async () => {
    setupModelSwitchRetry({
      provider: "openai",
--- a/src/agents/agent-command.ts
+++ b/src/agents/agent-command.ts
@@ -60,6 +60,7 @@ import {
  resolveDefaultModelForAgent,
  resolveThinkingDefault,
 } from "./model-selection.js";
+import { classifyEmbeddedPiRunResultForModelFallback } from "./pi-embedded-runner/result-fallback-classifier.js";
 import { resolveProviderIdForAuth } from "./provider-auth-aliases.js";
 import { normalizeSpawnedRunMetadata } from "./spawned-context.js";
 import { resolveAgentTimeoutMs } from "./timeout.js";
@@ -67,6 +68,7 @@ import { ensureAgentWorkspace } from "./workspace.js";

 const log = createSubsystemLogger("agents/agent-command");
 type AttemptExecutionRuntime = typeof import("./command/attempt-execution.runtime.js");
+type AgentAttemptResult = Awaited<ReturnType<AttemptExecutionRuntime["runAgentAttempt"]>>;
 type AcpManagerRuntime = typeof import("../acp/control-plane/manager.js");
 type AcpPolicyRuntime = typeof import("../acp/policy.js");
 type AcpRuntimeErrorsRuntime = typeof import("../acp/runtime/errors.js");
@@ -902,7 +904,7 @@ async function agentCommandInternal(
      opts.replyChannel ?? opts.channel,
    );

-    let result: Awaited<ReturnType<AttemptExecutionRuntime["runAgentAttempt"]>>;
+    let result: AgentAttemptResult;
    let fallbackProvider = provider;
    let fallbackModel = model;
    const MAX_LIVE_SWITCH_RETRIES = 5;
@@ -919,13 +921,19 @@ async function agentCommandInternal(
        });

        let fallbackAttemptIndex = 0;
-        const fallbackResult = await runWithModelFallback({
+        const fallbackResult = await runWithModelFallback<AgentAttemptResult>({
          cfg,
          provider,
          model,
          runId,
          agentDir,
          fallbacksOverride: effectiveFallbacksOverride,
+          classifyResult: ({ provider, model, result }) =>
+            classifyEmbeddedPiRunResultForModelFallback({
+              provider,
+              model,
+              result,
+            }),
          run: async (providerOverride, modelOverride, runOptions) => {
            const isFallbackRetry = fallbackAttemptIndex > 0;
            fallbackAttemptIndex += 1;