fix(thinking): default implicit reasoning models to medium (#70601)

* fix(thinking): default implicit reasoning models to medium * fix(thinking): preserve reasoning metadata during default resolution
2026-05-06 06:50:43 +00:00 · 2026-04-23 07:55:47 -05:00
parent bc01cbb8a2
commit 87eee6e640
10 changed files with 282 additions and 21 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -42,6 +42,7 @@ Docs: https://docs.openclaw.ai

 ### Fixes

+- Thinking defaults/status: raise the implicit default thinking level for reasoning-capable models from legacy `off`/`low` fallback behavior to a safe provider-supported `medium` equivalent when no explicit config default is set, preserve configured-model reasoning metadata when runtime catalog loading is empty, and make `/status` report the same resolved default as runtime.
 - Gateway/model pricing: fetch OpenRouter and LiteLLM pricing asynchronously at startup and extend catalog fetch timeouts to 30 seconds, reducing noisy timeout warnings during slow upstream responses.
 - Status: show `Fast` in `/status` when fast mode is enabled, including config/default-derived fast mode, and omit it when disabled.
 - OpenAI/image generation: detect Azure OpenAI-style image endpoints, use Azure `api-key` auth plus deployment-scoped image URLs, and honor `AZURE_OPENAI_API_VERSION` so image generation and edits work against Azure-hosted OpenAI resources. (#70570) Thanks @zhanggpcsu.
--- a/src/agents/model-selection.test.ts
+++ b/src/agents/model-selection.test.ts
@@ -1363,10 +1363,10 @@ describe("model-selection", () => {
      expect(resolveAnthropicOpus47Thinking(cfg)).toBe("off");
    });

-    it("falls back to low when no provider thinking hook is active", () => {
+    it("falls back to medium when no provider thinking hook is active", () => {
      const cfg = {} as OpenClawConfig;

-      expect(resolveAnthropicOpusThinking(cfg)).toBe("low");
+      expect(resolveAnthropicOpusThinking(cfg)).toBe("medium");

      expect(
        resolveThinkingDefault({
@@ -1382,7 +1382,7 @@ describe("model-selection", () => {
            },
          ],
        }),
-      ).toBe("low");
+      ).toBe("medium");
    });
  });
 });
--- a/src/agents/model-thinking-default.ts
+++ b/src/agents/model-thinking-default.ts
@@ -1,4 +1,4 @@
-import { resolveThinkingDefaultForModel } from "../auto-reply/thinking.shared.js";
+import { resolveThinkingDefaultForModel } from "../auto-reply/thinking.js";
 import type { OpenClawConfig } from "../config/types.openclaw.js";
 import {
  normalizeLowercaseStringOrEmpty,
--- a/src/agents/openclaw-tools.session-status.test.ts
+++ b/src/agents/openclaw-tools.session-status.test.ts
@@ -141,6 +141,7 @@ function createModelCatalogModuleMock() {
        provider: "openai",
        id: "gpt-5.4",
        name: "GPT-5.4",
+        reasoning: true,
        contextWindow: 400000,
      },
    ],
@@ -940,6 +941,104 @@ describe("session_status tool", () => {
    }
  });

+  it("uses the implicit model thinking default when no config default is set", async () => {
+    resetSessionStore({
+      "agent:kira:main": {
+        sessionId: "agent-thinking-implicit",
+        updatedAt: 10,
+      },
+    });
+    const savedConfig = mockConfig;
+    try {
+      mockConfig = {
+        session: { mainKey: "main", scope: "per-sender" },
+        agents: {
+          defaults: {
+            model: { primary: "openai/gpt-5.4" },
+            models: {},
+          },
+          list: [
+            {
+              id: "kira",
+              model: "openai/gpt-5.4",
+            },
+          ],
+        },
+        tools: {
+          agentToAgent: { enabled: false },
+        },
+      };
+
+      const tool = getSessionStatusTool("agent:kira:main");
+
+      await tool.execute("call-agent-thinking-implicit", {});
+
+      expect(buildStatusMessageMock).toHaveBeenCalledWith(
+        expect.objectContaining({
+          agentId: "kira",
+          agent: expect.objectContaining({
+            thinkingDefault: "medium",
+          }),
+        }),
+      );
+    } finally {
+      mockConfig = savedConfig;
+    }
+  });
+
+  it("hydrates runtime catalog metadata for status when configured model metadata omits reasoning", async () => {
+    resetSessionStore({
+      "agent:kira:main": {
+        sessionId: "agent-thinking-runtime-hydration",
+        updatedAt: 10,
+      },
+    });
+    const savedConfig = mockConfig;
+    try {
+      mockConfig = {
+        session: { mainKey: "main", scope: "per-sender" },
+        agents: {
+          defaults: {
+            model: { primary: "openai/gpt-5.4" },
+            models: {},
+          },
+          list: [
+            {
+              id: "kira",
+              model: "openai/gpt-5.4",
+            },
+          ],
+        },
+        models: {
+          providers: {
+            openai: {
+              baseUrl: "https://api.openai.com/v1",
+              models: [{ id: "gpt-5.4", name: "GPT-5.4" }],
+            },
+          },
+        },
+        tools: {
+          agentToAgent: { enabled: false },
+        },
+      };
+
+      const tool = getSessionStatusTool("agent:kira:main");
+
+      await tool.execute("call-agent-thinking-runtime-hydration", {});
+
+      expect(buildStatusMessageMock).toHaveBeenCalledWith(
+        expect.objectContaining({
+          agentId: "kira",
+          agent: expect.objectContaining({
+            thinkingDefault: "medium",
+          }),
+        }),
+      );
+    } finally {
+      mockConfig = savedConfig;
+    }
+  });
+
  it("falls back to origin.provider when resolving queue settings", async () => {
    resetSessionStore({
      main: {
--- a/src/agents/tools/session-status-tool.ts
+++ b/src/agents/tools/session-status-tool.ts
@@ -28,10 +28,12 @@ import { formatTaskStatusDetail, formatTaskStatusTitle } from "../../tasks/task-
 import { loadModelCatalog } from "../model-catalog.js";
 import {
  buildAllowedModelSet,
+  buildConfiguredModelCatalog,
  buildModelAliasIndex,
  modelKey,
  resolveDefaultModelForAgent,
  resolveModelRefFromString,
+  resolveThinkingDefault,
 } from "../model-selection.js";
 import {
  describeSessionStatusTool,
@@ -561,7 +563,32 @@ export function createSessionStatusTool(opts?: {
        resolvedVerboseLevel: (statusSessionEntry.verboseLevel ?? "off") as VerboseLevel,
        resolvedReasoningLevel: (statusSessionEntry.reasoningLevel ?? "off") as ReasoningLevel,
        resolvedElevatedLevel: statusSessionEntry.elevatedLevel as ElevatedLevel | undefined,
-        resolveDefaultThinkingLevel: async () => cfg.agents?.defaults?.thinkingDefault,
+        resolveDefaultThinkingLevel: async () => {
+          const configuredCatalog = buildConfiguredModelCatalog({ cfg });
+          const configuredSelectedEntry = configuredCatalog.find(
+            (entry) => entry.provider === providerForCard && entry.id === defaultModelForCard,
+          );
+          const shouldHydrateRuntimeCatalog =
+            configuredCatalog.length === 0 ||
+            !configuredSelectedEntry ||
+            configuredSelectedEntry.reasoning === undefined;
+          const runtimeCatalog = shouldHydrateRuntimeCatalog
+            ? await loadModelCatalog({ config: cfg })
+            : undefined;
+          const runtimeSelectedEntry = runtimeCatalog?.find(
+            (entry) => entry.provider === providerForCard && entry.id === defaultModelForCard,
+          );
+          const catalog =
+            runtimeSelectedEntry || configuredCatalog.length === 0
+              ? (runtimeCatalog ?? configuredCatalog)
+              : configuredCatalog;
+          return resolveThinkingDefault({
+            cfg,
+            provider: providerForCard,
+            model: defaultModelForCard,
+            catalog,
+          });
+        },
        isGroup,
        defaultGroupActivation: () => "mention",
        taskLineOverride: taskLine,
--- a/src/auto-reply/reply/model-selection.test.ts
+++ b/src/auto-reply/reply/model-selection.test.ts
@@ -75,6 +75,77 @@ describe("createModelSelectionState catalog loading", () => {
    expect(loadModelCatalog).not.toHaveBeenCalled();
  });

+  it("uses the implicit model default when no global thinking default is configured", async () => {
+    vi.mocked(loadModelCatalog).mockClear();
+    const cfg = {
+      agents: {
+        defaults: {
+          models: {
+            "openai-codex/gpt-5.4": {},
+          },
+        },
+      },
+      models: {
+        providers: {
+          "openai-codex": {
+            baseUrl: "https://api.openai.com/v1",
+            models: [makeConfiguredModel()],
+          },
+        },
+      },
+    } as OpenClawConfig;
+
+    const state = await createModelSelectionState({
+      cfg,
+      agentCfg: cfg.agents?.defaults,
+      defaultProvider: "openai-codex",
+      defaultModel: "gpt-5.4",
+      provider: "openai-codex",
+      model: "gpt-5.4",
+      hasModelDirective: false,
+    });
+
+    await expect(state.resolveDefaultThinkingLevel()).resolves.toBe("medium");
+    expect(loadModelCatalog).not.toHaveBeenCalled();
+  });
+
+  it("hydrates runtime catalog metadata when the configured allowlist entry lacks reasoning", async () => {
+    vi.mocked(loadModelCatalog).mockClear();
+    vi.mocked(loadModelCatalog).mockResolvedValueOnce([
+      { provider: "openai-codex", id: "gpt-5.4", name: "GPT-5.4", reasoning: true },
+    ]);
+    const cfg = {
+      agents: {
+        defaults: {
+          models: {
+            "openai-codex/gpt-5.4": {},
+          },
+        },
+      },
+      models: {
+        providers: {
+          "openai-codex": {
+            baseUrl: "https://api.openai.com/v1",
+            models: [makeConfiguredModel({ reasoning: undefined })],
+          },
+        },
+      },
+    } as OpenClawConfig;
+
+    const state = await createModelSelectionState({
+      cfg,
+      agentCfg: cfg.agents?.defaults,
+      defaultProvider: "openai-codex",
+      defaultModel: "gpt-5.4",
+      provider: "openai-codex",
+      model: "gpt-5.4",
+      hasModelDirective: false,
+    });
+
+    await expect(state.resolveDefaultThinkingLevel()).resolves.toBe("medium");
+    expect(loadModelCatalog).toHaveBeenCalledOnce();
+  });
+
  it("prefers per-agent thinkingDefault over model and global defaults", async () => {
    vi.mocked(loadModelCatalog).mockClear();
    const cfg = {
--- a/src/auto-reply/reply/model-selection.ts
+++ b/src/auto-reply/reply/model-selection.ts
@@ -457,11 +457,26 @@ export async function createModelSelectionState(params: {
      defaultThinkingLevel = explicitThinkingDefault;
      return defaultThinkingLevel;
    }
-    if (!modelCatalog) {
+    let catalogForThinking =
+      modelCatalog && modelCatalog.length > 0 ? modelCatalog : allowedModelCatalog;
+    const selectedCatalogEntry = catalogForThinking?.find(
+      (entry) => entry.provider === provider && entry.id === model,
+    );
+    const shouldHydrateRuntimeCatalog =
+      !modelCatalog && (!selectedCatalogEntry || selectedCatalogEntry.reasoning === undefined);
+    if (shouldHydrateRuntimeCatalog) {
      modelCatalog = await (await loadModelCatalogRuntime()).loadModelCatalog({ config: cfg });
      logStage("catalog-loaded-for-thinking", `entries=${modelCatalog.length}`);
+      const runtimeSelectedEntry = modelCatalog.find(
+        (entry) => entry.provider === provider && entry.id === model,
+      );
+      catalogForThinking =
+        runtimeSelectedEntry || !catalogForThinking || catalogForThinking.length === 0
+          ? modelCatalog.length > 0
+            ? modelCatalog
+            : allowedModelCatalog
+          : allowedModelCatalog;
    }
-    const catalogForThinking = modelCatalog.length > 0 ? modelCatalog : allowedModelCatalog;
    const resolved = resolveThinkingDefault({
      cfg,
      provider,
--- a/src/auto-reply/thinking.test.ts
+++ b/src/auto-reply/thinking.test.ts
@@ -259,16 +259,49 @@ describe("resolveThinkingDefaultForModel", () => {
    ).toBe("off");
  });

-  it("defaults reasoning-capable catalog models to low", () => {
+  it("defaults reasoning-capable catalog models to medium", () => {
    expect(
      resolveThinkingDefaultForModel({
        provider: "openai",
        model: "gpt-5.4",
        catalog: [{ provider: "openai", id: "gpt-5.4", reasoning: true }],
      }),
+    ).toBe("medium");
+  });
+
+  it("remaps implicit reasoning defaults to the strongest supported level at or below medium", () => {
+    providerRuntimeMocks.resolveProviderBinaryThinking.mockImplementation(
+      ({ provider }) => provider === "demo-binary",
+    );
+
+    expect(
+      resolveThinkingDefaultForModel({
+        provider: "demo-binary",
+        model: "demo-model",
+        catalog: [{ provider: "demo-binary", id: "demo-model", reasoning: true }],
+      }),
    ).toBe("low");
  });

+  it("keeps catalog reasoning context when remapping implicit reasoning defaults", () => {
+    providerRuntimeMocks.resolveProviderThinkingProfile.mockImplementation(
+      ({ provider, context }) =>
+        provider === "demo-contextual" && context.reasoning
+          ? { levels: [{ id: "off" }, { id: "low" }, { id: "medium" }] }
+          : provider === "demo-contextual"
+            ? { levels: [{ id: "off" }] }
+            : undefined,
+    );
+
+    expect(
+      resolveThinkingDefaultForModel({
+        provider: "demo-contextual",
+        model: "demo-model",
+        catalog: [{ provider: "demo-contextual", id: "demo-model", reasoning: true }],
+      }),
+    ).toBe("medium");
+  });
+
  it("defaults to off when no adaptive or reasoning hint is present", () => {
    expect(
      resolveThinkingDefaultForModel({
--- a/src/auto-reply/thinking.ts
+++ b/src/auto-reply/thinking.ts
@@ -230,7 +230,11 @@ export function resolveThinkingDefaultForModel(params: {
  if (profile.defaultLevel) {
    return profile.defaultLevel;
  }
-  return resolveThinkingDefaultForModelFallback(params);
+  const fallback = resolveThinkingDefaultForModelFallback(params);
+  if (fallback === "off") {
+    return "off";
+  }
+  return resolveSupportedThinkingLevelFromProfile(profile, "medium");
 }

 export function resolveLargestSupportedThinkingLevel(
@@ -252,20 +256,27 @@ export function isThinkingLevelSupported(params: {
  return supportsThinkingLevel(params.provider, params.model, params.level);
 }

+function resolveSupportedThinkingLevelFromProfile(
+  profile: ResolvedThinkingProfile,
+  level: ThinkLevel,
+): ThinkLevel {
+  if (profile.levels.some((entry) => entry.id === level)) {
+    return level;
+  }
+  const requestedRank = THINKING_LEVEL_RANKS[level];
+  const ranked = profile.levels.toSorted((a, b) => b.rank - a.rank);
+  return (
+    ranked.find((entry) => entry.id !== "off" && entry.rank <= requestedRank)?.id ??
+    ranked.find((entry) => entry.id !== "off")?.id ??
+    "off"
+  );
+}
+
 export function resolveSupportedThinkingLevel(params: {
  provider?: string | null;
  model?: string | null;
  level: ThinkLevel;
 }): ThinkLevel {
  const profile = resolveThinkingProfile({ provider: params.provider, model: params.model });
-  if (profile.levels.some((entry) => entry.id === params.level)) {
-    return params.level;
-  }
-  const requestedRank = THINKING_LEVEL_RANKS[params.level];
-  const ranked = profile.levels.toSorted((a, b) => b.rank - a.rank);
-  return (
-    ranked.find((level) => level.id !== "off" && level.rank <= requestedRank)?.id ??
-    ranked.find((level) => level.id !== "off")?.id ??
-    "off"
-  );
+  return resolveSupportedThinkingLevelFromProfile(profile, params.level);
 }
--- a/src/status/status-text.ts
+++ b/src/status/status-text.ts
@@ -288,6 +288,9 @@ export async function buildStatusText(params: BuildStatusTextParams): Promise<st
    }).enabled;
  const agentFallbacksOverride = resolveAgentModelFallbacksOverride(cfg, statusAgentId);
  const { buildStatusMessage } = await loadStatusMessageRuntime();
+  const explicitThinkingDefault =
+    (agentConfig?.thinkingDefault as ThinkLevel | undefined) ??
+    (agentDefaults.thinkingDefault as ThinkLevel | undefined);
  return buildStatusMessage({
    config: cfg,
    agent: {
@@ -298,7 +301,7 @@ export async function buildStatusText(params: BuildStatusTextParams): Promise<st
        ...(agentFallbacksOverride === undefined ? {} : { fallbacks: agentFallbacksOverride }),
      },
      ...(typeof contextTokens === "number" && contextTokens > 0 ? { contextTokens } : {}),
-      thinkingDefault: agentConfig?.thinkingDefault ?? agentDefaults.thinkingDefault,
+      thinkingDefault: explicitThinkingDefault,
      verboseDefault: agentDefaults.verboseDefault,
      elevatedDefault: agentDefaults.elevatedDefault,
    },
@@ -313,7 +316,8 @@ export async function buildStatusText(params: BuildStatusTextParams): Promise<st
    sessionScope,
    sessionStorePath: storePath,
    groupActivation,
-    resolvedThink: resolvedThinkLevel ?? (await resolveDefaultThinkingLevel()),
+    resolvedThink:
+      resolvedThinkLevel ?? explicitThinkingDefault ?? (await resolveDefaultThinkingLevel()),
    resolvedFast: effectiveFastMode,
    resolvedVerbose: resolvedVerboseLevel,
    resolvedReasoning: resolvedReasoningLevel,