refactor(reasoning): unify thinking precedence resolution

This commit is contained in:
Peter Steinberger
2026-03-02 04:09:59 +00:00
parent 051fba6995
commit 30ec0139a2
11 changed files with 382 additions and 89 deletions

View File

@@ -0,0 +1,15 @@
import type { ModelCatalogEntry } from "./model-catalog.js";
type RequiredModelCatalogFields = Pick<ModelCatalogEntry, "provider" | "id" | "name">;
export function makeModelCatalogEntry(
required: RequiredModelCatalogFields,
optional?: Omit<ModelCatalogEntry, keyof RequiredModelCatalogFields>,
): ModelCatalogEntry {
return {
provider: required.provider,
id: required.id,
name: required.name,
...optional,
};
}

View File

@@ -1,6 +1,7 @@
import { describe, it, expect, vi } from "vitest";
import type { OpenClawConfig } from "../config/config.js";
import { resetLogger, setLoggerOverride } from "../logging/logger.js";
import { makeModelCatalogEntry } from "./model-catalog.test-helpers.js";
import {
buildAllowedModelSet,
inferUniqueProviderFromConfiguredModels,
@@ -11,6 +12,8 @@ import {
modelKey,
resolveAllowedModelRef,
resolveConfiguredModelRef,
resolveModelThinkingDefault,
supportsReasoningModel,
resolveThinkingDefault,
resolveModelRefFromString,
} from "./model-selection.js";
@@ -257,8 +260,16 @@ describe("model-selection", () => {
} as OpenClawConfig;
const catalog = [
{ provider: "anthropic", id: "claude-sonnet-4-5", name: "Claude Sonnet 4.5" },
{ provider: "openai", id: "gpt-5.2", name: "gpt-5.2" },
makeModelCatalogEntry({
provider: "anthropic",
id: "claude-sonnet-4-5",
name: "Claude Sonnet 4.5",
}),
makeModelCatalogEntry({
provider: "openai",
id: "gpt-5.2",
name: "gpt-5.2",
}),
];
const result = buildAllowedModelSet({
@@ -270,7 +281,11 @@ describe("model-selection", () => {
expect(result.allowAny).toBe(false);
expect(result.allowedKeys.has("anthropic/claude-sonnet-4-6")).toBe(true);
expect(result.allowedCatalog).toEqual([
{ provider: "anthropic", id: "claude-sonnet-4-6", name: "claude-sonnet-4-6" },
makeModelCatalogEntry({
provider: "anthropic",
id: "claude-sonnet-4-6",
name: "claude-sonnet-4-6",
}),
]);
});
});
@@ -289,8 +304,16 @@ describe("model-selection", () => {
} as OpenClawConfig;
const catalog = [
{ provider: "anthropic", id: "claude-sonnet-4-5", name: "Claude Sonnet 4.5" },
{ provider: "openai", id: "gpt-5.2", name: "gpt-5.2" },
makeModelCatalogEntry({
provider: "anthropic",
id: "claude-sonnet-4-5",
name: "Claude Sonnet 4.5",
}),
makeModelCatalogEntry({
provider: "openai",
id: "gpt-5.2",
name: "gpt-5.2",
}),
];
const result = resolveAllowedModelRef({
@@ -504,6 +527,75 @@ describe("model-selection", () => {
).toBe("high");
});
});
describe("supportsReasoningModel", () => {
it("detects reasoning support from provider/model catalog entry", () => {
expect(
supportsReasoningModel({
provider: "openrouter",
model: "x-ai/grok-4.1-fast",
catalog: [
{
provider: "openrouter",
id: "x-ai/grok-4.1-fast",
name: "Grok 4.1 Fast",
reasoning: true,
},
],
}),
).toBe(true);
});
it("returns false when model is missing from catalog", () => {
expect(
supportsReasoningModel({
provider: "openrouter",
model: "x-ai/grok-4.1-fast",
catalog: [],
}),
).toBe(false);
});
});
describe("resolveModelThinkingDefault", () => {
it("returns model-derived low thinking when reasoning is supported", () => {
const cfg = {} as OpenClawConfig;
expect(
resolveModelThinkingDefault({
cfg,
provider: "openrouter",
model: "x-ai/grok-4.1-fast",
catalog: [
{
provider: "openrouter",
id: "x-ai/grok-4.1-fast",
name: "Grok 4.1 Fast",
reasoning: true,
},
],
}),
).toBe("low");
});
it("returns undefined when no model-level default applies", () => {
const cfg = { agents: { defaults: { thinkingDefault: "high" } } } as OpenClawConfig;
expect(
resolveModelThinkingDefault({
cfg,
provider: "openai",
model: "gpt-4o-mini",
catalog: [
{
provider: "openai",
id: "gpt-4o-mini",
name: "GPT-4o mini",
reasoning: false,
},
],
}),
).toBeUndefined();
});
});
});
describe("normalizeModelSelection", () => {

View File

@@ -525,6 +525,24 @@ export function resolveThinkingDefault(params: {
model: string;
catalog?: ModelCatalogEntry[];
}): ThinkLevel {
return (
resolveModelThinkingDefault({
cfg: params.cfg,
provider: params.provider,
model: params.model,
catalog: params.catalog,
}) ??
params.cfg.agents?.defaults?.thinkingDefault ??
"off"
);
}
export function resolveModelThinkingDefault(params: {
cfg: OpenClawConfig;
provider: string;
model: string;
catalog?: ModelCatalogEntry[];
}): ThinkLevel | undefined {
const perModelThinking =
params.cfg.agents?.defaults?.models?.[modelKey(params.provider, params.model)]?.params
?.thinking;
@@ -538,17 +556,30 @@ export function resolveThinkingDefault(params: {
) {
return perModelThinking;
}
const configured = params.cfg.agents?.defaults?.thinkingDefault;
if (configured) {
return configured;
}
const candidate = params.catalog?.find(
(entry) => entry.provider === params.provider && entry.id === params.model,
);
if (candidate?.reasoning) {
if (
supportsReasoningModel({
provider: params.provider,
model: params.model,
catalog: params.catalog,
})
) {
return "low";
}
return "off";
return undefined;
}
export function supportsReasoningModel(params: {
provider: string;
model: string;
catalog?: ModelCatalogEntry[];
}): boolean {
const key = modelKey(params.provider, params.model);
const candidate = params.catalog?.find(
(entry) =>
(entry.provider === params.provider && entry.id === params.model) ||
(entry.provider === key && entry.id === params.model),
);
return candidate?.reasoning === true;
}
/** Default reasoning level when session/directive do not set it: "on" if model supports reasoning, else "off". */
@@ -557,13 +588,7 @@ export function resolveReasoningDefault(params: {
model: string;
catalog?: ModelCatalogEntry[];
}): "on" | "off" {
const key = modelKey(params.provider, params.model);
const candidate = params.catalog?.find(
(entry) =>
(entry.provider === params.provider && entry.id === params.model) ||
(entry.provider === key && entry.id === params.model),
);
return candidate?.reasoning === true ? "on" : "off";
return supportsReasoningModel(params) ? "on" : "off";
}
/**

View File

@@ -2,35 +2,49 @@ import { describe, expect, it, vi } from "vitest";
import { resolveCurrentDirectiveLevels } from "./directive-handling.levels.js";
describe("resolveCurrentDirectiveLevels", () => {
it("prefers resolved model default over agent thinkingDefault", async () => {
const resolveDefaultThinkingLevel = vi.fn().mockResolvedValue("high");
const result = await resolveCurrentDirectiveLevels({
it.each([
{
name: "uses session override first",
sessionEntry: { thinkingLevel: "minimal" },
agentCfg: { thinkingDefault: "low" },
modelDefault: "high",
expectedLevel: "minimal",
expectedModelCalls: 0,
},
{
name: "uses model default when no session override",
sessionEntry: {},
agentCfg: {
thinkingDefault: "low",
},
resolveDefaultThinkingLevel,
});
expect(result.currentThinkLevel).toBe("high");
expect(resolveDefaultThinkingLevel).toHaveBeenCalledTimes(1);
});
it("keeps session thinking override without consulting defaults", async () => {
const resolveDefaultThinkingLevel = vi.fn().mockResolvedValue("high");
agentCfg: { thinkingDefault: "low" },
modelDefault: "high",
expectedLevel: "high",
expectedModelCalls: 1,
},
{
name: "falls back to global default when model default missing",
sessionEntry: {},
agentCfg: { thinkingDefault: "low" },
modelDefault: undefined,
expectedLevel: "low",
expectedModelCalls: 1,
},
{
name: "falls back to off when no defaults are set",
sessionEntry: {},
agentCfg: {},
modelDefault: undefined,
expectedLevel: "off",
expectedModelCalls: 1,
},
])("$name", async (testCase) => {
const resolveDefaultThinkingLevel = vi.fn().mockResolvedValue(testCase.modelDefault);
const result = await resolveCurrentDirectiveLevels({
sessionEntry: {
thinkingLevel: "minimal",
},
agentCfg: {
thinkingDefault: "low",
},
sessionEntry: testCase.sessionEntry,
agentCfg: testCase.agentCfg,
resolveDefaultThinkingLevel,
});
expect(result.currentThinkLevel).toBe("minimal");
expect(resolveDefaultThinkingLevel).not.toHaveBeenCalled();
expect(result.currentThinkLevel).toBe(testCase.expectedLevel);
expect(resolveDefaultThinkingLevel).toHaveBeenCalledTimes(testCase.expectedModelCalls);
});
});

View File

@@ -1,3 +1,4 @@
import { resolveThinkingLevelByPrecedence } from "../../sessions/thinking-level.js";
import type { ElevatedLevel, ReasoningLevel, ThinkLevel, VerboseLevel } from "../thinking.js";
export async function resolveCurrentDirectiveLevels(params: {
@@ -14,16 +15,18 @@ export async function resolveCurrentDirectiveLevels(params: {
};
resolveDefaultThinkingLevel: () => Promise<ThinkLevel | undefined>;
}): Promise<{
currentThinkLevel: ThinkLevel | undefined;
currentThinkLevel: ThinkLevel;
currentVerboseLevel: VerboseLevel | undefined;
currentReasoningLevel: ReasoningLevel;
currentElevatedLevel: ElevatedLevel | undefined;
}> {
const resolvedDefaultThinkLevel =
(params.sessionEntry?.thinkingLevel as ThinkLevel | undefined) ??
(await params.resolveDefaultThinkingLevel()) ??
(params.agentCfg?.thinkingDefault as ThinkLevel | undefined);
const currentThinkLevel = resolvedDefaultThinkLevel;
const currentThinkLevel = (
await resolveThinkingLevelByPrecedence({
sessionThinkLevel: params.sessionEntry?.thinkingLevel as ThinkLevel | undefined,
resolveModelDefaultThinkingLevel: params.resolveDefaultThinkingLevel,
globalDefaultThinkLevel: params.agentCfg?.thinkingDefault as ThinkLevel | undefined,
})
).level;
const currentVerboseLevel =
(params.sessionEntry?.verboseLevel as VerboseLevel | undefined) ??
(params.agentCfg?.verboseDefault as VerboseLevel | undefined);

View File

@@ -4,6 +4,7 @@ import { resolveSandboxRuntimeStatus } from "../../agents/sandbox.js";
import type { SkillCommandSpec } from "../../agents/skills.js";
import type { OpenClawConfig } from "../../config/config.js";
import type { SessionEntry } from "../../config/sessions.js";
import { resolveThinkingLevelByPrecedence } from "../../sessions/thinking-level.js";
import { listChatCommands, shouldHandleTextCommands } from "../commands-registry.js";
import { listSkillCommandsForWorkspace } from "../skill-commands.js";
import type { MsgContext, TemplateContext } from "../templating.js";
@@ -338,9 +339,6 @@ export async function resolveReplyDirectives(params: {
groupResolution,
});
const defaultActivation = defaultGroupActivation(requireMention);
const resolvedThinkLevel =
directives.thinkLevel ?? (sessionEntry?.thinkingLevel as ThinkLevel | undefined);
const resolvedVerboseLevel =
directives.verboseLevel ??
(sessionEntry?.verboseLevel as VerboseLevel | undefined) ??
@@ -388,10 +386,14 @@ export async function resolveReplyDirectives(params: {
});
provider = modelState.provider;
model = modelState.model;
const resolvedThinkLevelWithDefault =
resolvedThinkLevel ??
(await modelState.resolveDefaultThinkingLevel()) ??
(agentCfg?.thinkingDefault as ThinkLevel | undefined);
const resolvedThinkLevelWithDefault = (
await resolveThinkingLevelByPrecedence({
commandThinkLevel: directives.thinkLevel,
sessionThinkLevel: sessionEntry?.thinkingLevel as ThinkLevel | undefined,
resolveModelDefaultThinkingLevel: () => modelState.resolveModelDefaultThinkingLevel(),
globalDefaultThinkLevel: agentCfg?.thinkingDefault as ThinkLevel | undefined,
})
).level;
// When neither directive nor session set reasoning, default to model capability
// (e.g. OpenRouter with reasoning: true). Skip auto-enabling when thinking is

View File

@@ -1,14 +1,23 @@
import { describe, expect, it, vi } from "vitest";
import { makeModelCatalogEntry } from "../../agents/model-catalog.test-helpers.js";
import type { OpenClawConfig } from "../../config/config.js";
import { createModelSelectionState } from "./model-selection.js";
vi.mock("../../agents/model-catalog.js", () => ({
loadModelCatalog: vi.fn(async () => [
{ provider: "anthropic", id: "claude-opus-4-5", name: "Claude Opus 4.5" },
{ provider: "inferencer", id: "deepseek-v3-4bit-mlx", name: "DeepSeek V3" },
{ provider: "kimi-coding", id: "k2p5", name: "Kimi K2.5" },
{ provider: "openai", id: "gpt-4o-mini", name: "GPT-4o mini" },
{ provider: "openai", id: "gpt-4o", name: "GPT-4o" },
makeModelCatalogEntry({
provider: "anthropic",
id: "claude-opus-4-5",
name: "Claude Opus 4.5",
}),
makeModelCatalogEntry({
provider: "inferencer",
id: "deepseek-v3-4bit-mlx",
name: "DeepSeek V3",
}),
makeModelCatalogEntry({ provider: "kimi-coding", id: "k2p5", name: "Kimi K2.5" }),
makeModelCatalogEntry({ provider: "openai", id: "gpt-4o-mini", name: "GPT-4o mini" }),
makeModelCatalogEntry({ provider: "openai", id: "gpt-4o", name: "GPT-4o" }),
]),
}));
@@ -269,7 +278,10 @@ describe("createModelSelectionState resolveDefaultReasoningLevel", () => {
it("returns on when catalog model has reasoning true", async () => {
const { loadModelCatalog } = await import("../../agents/model-catalog.js");
vi.mocked(loadModelCatalog).mockResolvedValueOnce([
{ provider: "openrouter", id: "x-ai/grok-4.1-fast", name: "Grok", reasoning: true },
makeModelCatalogEntry(
{ provider: "openrouter", id: "x-ai/grok-4.1-fast", name: "Grok" },
{ reasoning: true },
),
]);
const state = await createModelSelectionState({
cfg: {} as OpenClawConfig,

View File

@@ -7,9 +7,9 @@ import {
type ModelAliasIndex,
modelKey,
normalizeProviderId,
resolveModelThinkingDefault,
resolveModelRefFromString,
resolveReasoningDefault,
resolveThinkingDefault,
} from "../../agents/model-selection.js";
import type { OpenClawConfig } from "../../config/config.js";
import { type SessionEntry, updateSessionStore } from "../../config/sessions.js";
@@ -32,6 +32,7 @@ type ModelSelectionState = {
allowedModelKeys: Set<string>;
allowedModelCatalog: ModelCatalog;
resetModelOverride: boolean;
resolveModelDefaultThinkingLevel: () => Promise<ThinkLevel | undefined>;
resolveDefaultThinkingLevel: () => Promise<ThinkLevel>;
/** Default reasoning level from model capability: "on" if model has reasoning, else "off". */
resolveDefaultReasoningLevel: () => Promise<"on" | "off">;
@@ -379,25 +380,30 @@ export async function createModelSelectionState(params: {
}
}
let defaultThinkingLevel: ThinkLevel | undefined;
const resolveDefaultThinkingLevel = async () => {
if (defaultThinkingLevel) {
return defaultThinkingLevel;
let modelDefaultThinkingLevel: ThinkLevel | undefined;
let hasResolvedModelDefaultThinkingLevel = false;
const resolveModelDefaultThinkingLevel = async (): Promise<ThinkLevel | undefined> => {
if (hasResolvedModelDefaultThinkingLevel) {
return modelDefaultThinkingLevel;
}
let catalogForThinking = modelCatalog ?? allowedModelCatalog;
if (!catalogForThinking || catalogForThinking.length === 0) {
modelCatalog = await loadModelCatalog({ config: cfg });
catalogForThinking = modelCatalog;
}
const resolved = resolveThinkingDefault({
modelDefaultThinkingLevel = resolveModelThinkingDefault({
cfg,
provider,
model,
catalog: catalogForThinking,
});
defaultThinkingLevel =
resolved ?? (agentCfg?.thinkingDefault as ThinkLevel | undefined) ?? "off";
return defaultThinkingLevel;
hasResolvedModelDefaultThinkingLevel = true;
return modelDefaultThinkingLevel;
};
const resolveDefaultThinkingLevel = async () => {
const modelDefault = await resolveModelDefaultThinkingLevel();
return modelDefault ?? (agentCfg?.thinkingDefault as ThinkLevel | undefined) ?? "off";
};
const resolveDefaultReasoningLevel = async (): Promise<"on" | "off"> => {
@@ -419,6 +425,7 @@ export async function createModelSelectionState(params: {
allowedModelKeys,
allowedModelCatalog,
resetModelOverride,
resolveModelDefaultThinkingLevel,
resolveDefaultThinkingLevel,
resolveDefaultReasoningLevel,
needsModelCatalog,

View File

@@ -30,7 +30,7 @@ import {
normalizeProviderId,
resolveConfiguredModelRef,
resolveDefaultModelForAgent,
resolveThinkingDefault,
resolveModelThinkingDefault,
} from "../agents/model-selection.js";
import { runEmbeddedPiAgent } from "../agents/pi-embedded.js";
import { buildWorkspaceSkillSnapshot } from "../agents/skills.js";
@@ -72,6 +72,7 @@ import { defaultRuntime, type RuntimeEnv } from "../runtime.js";
import { applyVerboseOverride } from "../sessions/level-overrides.js";
import { applyModelOverrideToSessionEntry } from "../sessions/model-overrides.js";
import { resolveSendPolicy } from "../sessions/send-policy.js";
import { resolveThinkingLevelByPrecedence } from "../sessions/thinking-level.js";
import { resolveMessageChannel } from "../utils/message-channel.js";
import { deliverAgentCommandResult } from "./agent/delivery.js";
import { resolveAgentRunContext } from "./agent/run-context.js";
@@ -588,7 +589,7 @@ export async function agentCommand(
});
}
let resolvedThinkLevel = thinkOnce ?? thinkOverride ?? persistedThinking;
const commandThinkLevel = thinkOnce ?? thinkOverride;
const resolvedVerboseLevel =
verboseOverride ?? persistedVerbose ?? (agentCfg?.verboseDefault as VerboseLevel | undefined);
@@ -744,21 +745,28 @@ export async function agentCommand(
}
}
if (!resolvedThinkLevel) {
let catalogForThinking = modelCatalog ?? allowedModelCatalog;
if (!catalogForThinking || catalogForThinking.length === 0) {
modelCatalog = await loadModelCatalog({ config: cfg });
catalogForThinking = modelCatalog;
}
resolvedThinkLevel = resolveThinkingDefault({
cfg,
provider,
model,
catalog: catalogForThinking,
});
}
let resolvedThinkLevel = (
await resolveThinkingLevelByPrecedence({
commandThinkLevel,
sessionThinkLevel: persistedThinking,
resolveModelDefaultThinkingLevel: async () => {
let catalogForThinking = modelCatalog ?? allowedModelCatalog;
if (!catalogForThinking || catalogForThinking.length === 0) {
modelCatalog = await loadModelCatalog({ config: cfg });
catalogForThinking = modelCatalog;
}
return resolveModelThinkingDefault({
cfg,
provider,
model,
catalog: catalogForThinking,
});
},
globalDefaultThinkLevel: cfg.agents?.defaults?.thinkingDefault as ThinkLevel | undefined,
})
).level;
if (resolvedThinkLevel === "xhigh" && !supportsXHighThinking(provider, model)) {
const explicitThink = Boolean(thinkOnce || thinkOverride);
const explicitThink = Boolean(commandThinkLevel);
if (explicitThink) {
throw new Error(`Thinking level "xhigh" is only supported for ${formatXHighModelHint()}.`);
}

View File

@@ -0,0 +1,82 @@
import { describe, expect, it, vi } from "vitest";
import type { ThinkLevel } from "../auto-reply/thinking.js";
import { resolveThinkingLevelByPrecedence } from "./thinking-level.js";
function createModelDefaultResolver(value: ThinkLevel | undefined) {
const fn = vi.fn().mockResolvedValue(value);
return {
fn,
resolve: () => fn() as Promise<ThinkLevel | undefined>,
};
}
describe("resolveThinkingLevelByPrecedence", () => {
it.each([
{
name: "command override wins",
commandThinkLevel: "high" as ThinkLevel,
sessionThinkLevel: "medium" as ThinkLevel,
modelDefault: "low" as ThinkLevel,
globalDefaultThinkLevel: "minimal" as ThinkLevel,
expected: { level: "high", source: "command" },
expectedModelCalls: 0,
},
{
name: "session override wins when command unset",
commandThinkLevel: null,
sessionThinkLevel: "medium" as ThinkLevel,
modelDefault: "low" as ThinkLevel,
globalDefaultThinkLevel: "minimal" as ThinkLevel,
expected: { level: "medium", source: "session" },
expectedModelCalls: 0,
},
{
name: "model default wins when command and session unset",
commandThinkLevel: undefined,
sessionThinkLevel: null,
modelDefault: "low" as ThinkLevel,
globalDefaultThinkLevel: "minimal" as ThinkLevel,
expected: { level: "low", source: "model_default" },
expectedModelCalls: 1,
},
{
name: "global default wins when model default missing",
commandThinkLevel: undefined,
sessionThinkLevel: undefined,
modelDefault: undefined,
globalDefaultThinkLevel: "minimal" as ThinkLevel,
expected: { level: "minimal", source: "global_default" },
expectedModelCalls: 1,
},
{
name: "disabled fallback when everything unset",
commandThinkLevel: undefined,
sessionThinkLevel: undefined,
modelDefault: undefined,
globalDefaultThinkLevel: null,
expected: { level: "off", source: "disabled" },
expectedModelCalls: 1,
},
])("$name", async (testCase) => {
const modelDefaultResolver = createModelDefaultResolver(testCase.modelDefault);
const result = await resolveThinkingLevelByPrecedence({
commandThinkLevel: testCase.commandThinkLevel,
sessionThinkLevel: testCase.sessionThinkLevel,
resolveModelDefaultThinkingLevel: modelDefaultResolver.resolve,
globalDefaultThinkLevel: testCase.globalDefaultThinkLevel,
});
expect(result).toEqual(testCase.expected);
expect(modelDefaultResolver.fn).toHaveBeenCalledTimes(testCase.expectedModelCalls);
});
it("supports custom disabled fallback level", async () => {
const result = await resolveThinkingLevelByPrecedence({
disabledThinkLevel: "minimal",
});
expect(result).toEqual({
level: "minimal",
source: "disabled",
});
});
});

View File

@@ -0,0 +1,33 @@
import type { ThinkLevel } from "../auto-reply/thinking.js";
export type ThinkingLevelSource =
| "command"
| "session"
| "model_default"
| "global_default"
| "disabled";
export async function resolveThinkingLevelByPrecedence(params: {
commandThinkLevel?: ThinkLevel | null;
sessionThinkLevel?: ThinkLevel | null;
resolveModelDefaultThinkingLevel?: () => Promise<ThinkLevel | undefined>;
globalDefaultThinkLevel?: ThinkLevel | null;
disabledThinkLevel?: ThinkLevel;
}): Promise<{ level: ThinkLevel; source: ThinkingLevelSource }> {
if (params.commandThinkLevel) {
return { level: params.commandThinkLevel, source: "command" };
}
if (params.sessionThinkLevel) {
return { level: params.sessionThinkLevel, source: "session" };
}
if (params.resolveModelDefaultThinkingLevel) {
const modelDefault = await params.resolveModelDefaultThinkingLevel();
if (modelDefault) {
return { level: modelDefault, source: "model_default" };
}
}
if (params.globalDefaultThinkLevel) {
return { level: params.globalDefaultThinkLevel, source: "global_default" };
}
return { level: params.disabledThinkLevel ?? "off", source: "disabled" };
}