From 54a81e00807ba7c619b526291967b109d5e1dbee Mon Sep 17 00:00:00 2001 From: Alex Knight Date: Sun, 3 May 2026 21:38:32 +1000 Subject: [PATCH] fix: expose session-specific thinking levels (#76548) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: expose session-specific thinking levels (#76482) * fix: preserve lightweight sessions.list contract, fix consumer-side fallbacks only * fix: include thinking levels in lightweight session rows for Control UI (#76482) The Control UI cannot resolve provider-specific thinking levels client-side (ui/src/ui/thinking.ts always returns base 5 levels). The gateway must provide them even in lightweight rows. listThinkingLevelOptions is a cheap in-memory lookup — negligible perf impact vs the transcript/cost/model ops that the lightweight flag still skips. Also update existing test assertions that expected thinkingOptions: [] for lightweight rows (flagged by ClawSweeper review). * test: add e2e regression tests for thinking level pipeline (#76482) --- CHANGELOG.md | 1 + .../server.sessions.list-changed.test.ts | 2 +- .../server.sessions.thinking-e2e.test.ts | 203 ++++++++++++++++++ src/gateway/session-utils.test.ts | 2 +- src/gateway/session-utils.ts | 8 +- src/tui/commands.test.ts | 12 ++ src/tui/commands.ts | 6 +- ui/src/ui/chat/session-controls.ts | 11 +- .../chat/slash-command-executor.node.test.ts | 115 ++++++++++ ui/src/ui/chat/slash-command-executor.ts | 19 +- 10 files changed, 361 insertions(+), 18 deletions(-) create mode 100644 src/gateway/server.sessions.thinking-e2e.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index c112a75ee97..5ebfcb28ce3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,7 @@ Docs: https://docs.openclaw.ai - CLI/plugins: keep `plugins enable` and `plugins disable` from creating unconfigured channel config sections, so channel plugins with required setup fields no longer fail validation during lifecycle probes. Thanks @vincentkoc. - Doctor/config: set `messages.groupChat.visibleReplies: "message_tool"` during compatibility repair for configured-channel configs that omit a visible-reply policy, so upgrades can persist the intended tool-only group/channel reply default. Thanks @kagura-agent. - Agents/sessions: keep delayed `sessions_send` A2A replies alive after soft wait-window timeouts, while preserving terminal run timeouts and avoiding stale target replies in requester sessions. Fixes #76443. Thanks @ryswork1993 and @vincentkoc. +- TUI/Control UI: fix `/think` command showing only base thinking levels when the active session uses a different model from the default, so provider-specific levels like DeepSeek V4 Pro's `xhigh` and `max` are now visible and selectable. Fixes #76482. Thanks @amknight. - CLI/sessions: keep intentional empty agent replies silent after tool-delivered channel output, instead of surfacing a misleading "No reply from agent." fallback. Thanks @vincentkoc. - Config/doctor: cap `.clobbered.*` forensic snapshots per config path and serialize snapshot writes so repeated `doctor --fix` recovery loops cannot flood the config directory. Fixes #76454; carries forward #65649. Thanks @JUSTICEESSIELP, @rsnow, and @vincentkoc. - Feishu: suppress duplicate text when replies send native voice media while preserving captions for ordinary audio files and falling back to text plus attachment links when voice uploads fail. diff --git a/src/gateway/server.sessions.list-changed.test.ts b/src/gateway/server.sessions.list-changed.test.ts index 3371e02dbb5..ed935ffc3a9 100644 --- a/src/gateway/server.sessions.list-changed.test.ts +++ b/src/gateway/server.sessions.list-changed.test.ts @@ -153,7 +153,7 @@ test("sessions.list uses the gateway model catalog for effective thinking defaul expect.objectContaining({ key: "agent:main:main", thinkingDefault: undefined, - thinkingOptions: [], + thinkingOptions: ["off", "minimal", "low", "medium", "high"], }), ]), }), diff --git a/src/gateway/server.sessions.thinking-e2e.test.ts b/src/gateway/server.sessions.thinking-e2e.test.ts new file mode 100644 index 00000000000..144f7f065bf --- /dev/null +++ b/src/gateway/server.sessions.thinking-e2e.test.ts @@ -0,0 +1,203 @@ +/** + * E2E regression test for #76482: verifies the full pipeline from gateway + * sessions.list (lightweight rows with empty thinkingOptions) through + * consumer-side resolution, ensuring: + * 1. DeepSeek V4 Pro sessions resolve all 7 thinking levels + * 2. Anthropic sessions don't leak DeepSeek levels from defaults + * 3. Sessions matching the default model correctly inherit defaults + */ +import { expect, test, vi } from "vitest"; +import { formatThinkingLevels } from "../auto-reply/thinking.js"; +import { testState, writeSessionStore } from "./test-helpers.js"; +import { + setupGatewaySessionsTestHarness, + getGatewayConfigModule, + getSessionsHandlers, + sessionStoreEntry, +} from "./test/server-sessions.test-helpers.js"; + +const { createSessionStoreDir } = setupGatewaySessionsTestHarness(); + +/** + * Simulates the consumer-side resolution from session-controls.ts and + * slash-command-executor.ts — the code path that the PR fixes. + */ +function resolveThinkingLevelsConsumerSide( + session: + | { + modelProvider?: string; + model?: string; + thinkingLevels?: Array<{ label: string }>; + thinkingOptions?: string[]; + } + | undefined, + defaults: + | { + modelProvider?: string; + model?: string; + thinkingLevels?: Array<{ label: string }>; + thinkingOptions?: string[]; + } + | undefined, +): string[] { + if (session?.thinkingLevels?.length) { + return session.thinkingLevels.map((l) => l.label); + } + const sessionModelMatchesDefaults = + (!session?.modelProvider || session.modelProvider === defaults?.modelProvider) && + (!session?.model || session.model === defaults?.model); + if (sessionModelMatchesDefaults && defaults?.thinkingLevels?.length) { + return defaults.thinkingLevels.map((l) => l.label); + } + const labels = + (session?.thinkingOptions?.length ? session.thinkingOptions : null) ?? + (sessionModelMatchesDefaults && defaults?.thinkingOptions?.length + ? defaults.thinkingOptions + : null) ?? + formatThinkingLevels( + session?.modelProvider ?? defaults?.modelProvider, + session?.model ?? defaults?.model, + ).split(/\s*,\s*/); + return labels.filter(Boolean); +} + +test("e2e #76482: session with different model gets its own thinking levels through gateway row + consumer fallback", async () => { + await createSessionStoreDir(); + testState.agentConfig = { + model: { primary: "openai/gpt-5.5" }, + }; + await writeSessionStore({ + entries: { + main: sessionStoreEntry("sess-main", { + modelProvider: "test-extended", + model: "extended-reasoner", + }), + }, + }); + + const respond = vi.fn(); + const sessionsHandlers = await getSessionsHandlers(); + const { getRuntimeConfig } = await getGatewayConfigModule(); + await sessionsHandlers["sessions.list"]({ + req: { type: "req", id: "req-e2e-extended", method: "sessions.list", params: {} }, + params: {}, + respond, + client: null, + isWebchatConnect: () => false, + context: { + getRuntimeConfig, + // Provide a catalog with xhigh support — simulates what a real gateway + // resolves for models like DeepSeek V4 Pro + loadGatewayModelCatalog: async () => [ + { + provider: "test-extended", + id: "extended-reasoner", + name: "Extended Reasoner", + reasoning: true, + compat: { supportedReasoningEfforts: ["xhigh"] }, + }, + ], + } as never, + }); + + const result = respond.mock.calls[0]?.[1]; + const session = result?.sessions?.find((s: { key: string }) => s.key === "agent:main:main"); + const defaults = result?.defaults; + + // Gateway includes thinkingOptions for lightweight rows (needed by Control UI) + expect(session?.thinkingOptions?.length).toBeGreaterThan(0); + expect(session?.thinkingOptions).toContain("xhigh"); + + // Session model differs from default + expect(session?.modelProvider).toBe("test-extended"); + expect(defaults?.modelProvider).toBe("openai"); + + // Consumer-side resolution uses session's own thinkingOptions (not defaults) + const resolved = resolveThinkingLevelsConsumerSide(session, defaults); + expect(resolved).toContain("xhigh"); + expect(resolved).toContain("off"); + expect(resolved).toContain("high"); +}); + +test("e2e #76482: Anthropic session does not leak DeepSeek thinking levels from defaults", async () => { + await createSessionStoreDir(); + testState.agentConfig = { + model: { primary: "deepseek/deepseek-v4-pro" }, + }; + await writeSessionStore({ + entries: { + main: sessionStoreEntry("sess-main", { + modelProvider: "anthropic", + model: "claude-sonnet-4-6", + }), + }, + }); + + const respond = vi.fn(); + const sessionsHandlers = await getSessionsHandlers(); + const { getRuntimeConfig } = await getGatewayConfigModule(); + await sessionsHandlers["sessions.list"]({ + req: { type: "req", id: "req-e2e-anthropic", method: "sessions.list", params: {} }, + params: {}, + respond, + client: null, + isWebchatConnect: () => false, + context: { getRuntimeConfig, loadGatewayModelCatalog: async () => [] } as never, + }); + + const result = respond.mock.calls[0]?.[1]; + const session = result?.sessions?.find((s: { key: string }) => s.key === "agent:main:main"); + const defaults = result?.defaults; + + // Session model differs from default + expect(session?.modelProvider).toBe("anthropic"); + expect(defaults?.modelProvider).toBe("deepseek"); + + // Consumer-side resolution should NOT include DeepSeek-specific levels + const resolved = resolveThinkingLevelsConsumerSide(session, defaults); + expect(resolved).not.toContain("xhigh"); + expect(resolved).not.toContain("max"); + // Should have base Anthropic levels + expect(resolved).toContain("off"); + expect(resolved).toContain("high"); +}); + +test("e2e #76482: session matching default model inherits default thinking levels", async () => { + await createSessionStoreDir(); + testState.agentConfig = { + model: { primary: "openai/gpt-5.5" }, + }; + await writeSessionStore({ + entries: { + main: sessionStoreEntry("sess-main", { + modelProvider: "openai", + model: "gpt-5.5", + }), + }, + }); + + const respond = vi.fn(); + const sessionsHandlers = await getSessionsHandlers(); + const { getRuntimeConfig } = await getGatewayConfigModule(); + await sessionsHandlers["sessions.list"]({ + req: { type: "req", id: "req-e2e-same", method: "sessions.list", params: {} }, + params: {}, + respond, + client: null, + isWebchatConnect: () => false, + context: { getRuntimeConfig, loadGatewayModelCatalog: async () => [] } as never, + }); + + const result = respond.mock.calls[0]?.[1]; + const session = result?.sessions?.find((s: { key: string }) => s.key === "agent:main:main"); + const defaults = result?.defaults; + + // Session matches default → consumer should use defaults + expect(session?.modelProvider).toBe(defaults?.modelProvider); + + const resolved = resolveThinkingLevelsConsumerSide(session, defaults); + expect(resolved.length).toBeGreaterThan(0); + // Should match what defaults provide + expect(resolved).toContain("off"); + expect(resolved).toContain("high"); +}); diff --git a/src/gateway/session-utils.test.ts b/src/gateway/session-utils.test.ts index 8b88909f537..3b69e28a787 100644 --- a/src/gateway/session-utils.test.ts +++ b/src/gateway/session-utils.test.ts @@ -1144,7 +1144,7 @@ describe("listSessionsFromStore selected model display", () => { }), ); expect(listed.sessions[0]?.agentRuntime).toEqual({ id: "pi", source: "implicit" }); - expect(listed.sessions[0]?.thinkingOptions).toEqual([]); + expect(listed.sessions[0]?.thinkingOptions?.length).toBeGreaterThan(0); } finally { fs.rmSync(tmpDir, { recursive: true, force: true }); } diff --git a/src/gateway/session-utils.ts b/src/gateway/session-utils.ts index 564d4dfc631..c71569f44f6 100644 --- a/src/gateway/session-utils.ts +++ b/src/gateway/session-utils.ts @@ -1635,9 +1635,11 @@ export function buildGatewaySessionRow(params: { const thinkingProvider = rowModelProvider ?? DEFAULT_PROVIDER; const thinkingModel = rowModel ?? DEFAULT_MODEL; - const thinkingLevels = lightweight - ? [] - : listThinkingLevelOptions(thinkingProvider, thinkingModel, params.modelCatalog); + const thinkingLevels = listThinkingLevelOptions( + thinkingProvider, + thinkingModel, + params.modelCatalog, + ); const pluginExtensions = !lightweight && entry ? projectPluginSessionExtensionsSync({ sessionKey: key, entry }) : []; diff --git a/src/tui/commands.test.ts b/src/tui/commands.test.ts index c9c157530a9..ee64c626fed 100644 --- a/src/tui/commands.test.ts +++ b/src/tui/commands.test.ts @@ -55,6 +55,18 @@ describe("getSlashCommands", () => { { value: "max", label: "max" }, ]); }); + + it("falls back to provider-resolved levels when thinkingLevels is empty (#76482)", () => { + const commands = getSlashCommands({ + provider: "anthropic", + model: "claude-sonnet-4-6", + thinkingLevels: [], // empty from lightweight session row + }); + const think = commands.find((command) => command.name === "think"); + // Should fall back to listThinkingLevelLabels, not return empty completions + const completions = think?.getArgumentCompletions?.(""); + expect(completions?.length).toBeGreaterThan(0); + }); }); describe("helpText", () => { diff --git a/src/tui/commands.ts b/src/tui/commands.ts index 8c07011f4d1..2319c2b86a5 100644 --- a/src/tui/commands.ts +++ b/src/tui/commands.ts @@ -56,9 +56,9 @@ export function parseCommand(input: string): ParsedCommand { } export function getSlashCommands(options: SlashCommandOptions = {}): SlashCommand[] { - const thinkLevels = - options.thinkingLevels?.map((level) => level.label) ?? - listThinkingLevelLabels(options.provider, options.model); + const thinkLevels = options.thinkingLevels?.length + ? options.thinkingLevels.map((level) => level.label) + : listThinkingLevelLabels(options.provider, options.model); const verboseCompletions = createLevelCompletion(VERBOSE_LEVELS); const traceCompletions = createLevelCompletion(TRACE_LEVELS); const fastCompletions = createLevelCompletion(FAST_LEVELS); diff --git a/ui/src/ui/chat/session-controls.ts b/ui/src/ui/chat/session-controls.ts index 2cd5179d3b1..05ec7e97713 100644 --- a/ui/src/ui/chat/session-controls.ts +++ b/ui/src/ui/chat/session-controls.ts @@ -182,12 +182,17 @@ function resolveThinkingLevelOptions( if (activeRow?.thinkingLevels?.length) { return activeRow.thinkingLevels; } - if (defaults?.thinkingLevels?.length) { + const sessionModelMatchesDefaults = + (!activeRow?.modelProvider || activeRow.modelProvider === defaults?.modelProvider) && + (!activeRow?.model || activeRow.model === defaults?.model); + if (sessionModelMatchesDefaults && defaults?.thinkingLevels?.length) { return defaults.thinkingLevels; } const labels = - activeRow?.thinkingOptions ?? - defaults?.thinkingOptions ?? + (activeRow?.thinkingOptions?.length ? activeRow.thinkingOptions : null) ?? + (sessionModelMatchesDefaults && defaults?.thinkingOptions?.length + ? defaults.thinkingOptions + : null) ?? (provider && model ? listThinkingLevelLabels(provider, model) : listThinkingLevelLabels()); return labels.map((label) => ({ id: normalizeThinkLevel(label) ?? normalizeLowercaseStringOrEmpty(label), diff --git a/ui/src/ui/chat/slash-command-executor.node.test.ts b/ui/src/ui/chat/slash-command-executor.node.test.ts index e72d71c48f1..7b75628f8ff 100644 --- a/ui/src/ui/chat/slash-command-executor.node.test.ts +++ b/ui/src/ui/chat/slash-command-executor.node.test.ts @@ -729,6 +729,121 @@ describe("executeSlashCommand directives", () => { }); }); + it("prefers session model over defaults when models differ (#76482)", async () => { + const request = vi.fn(async (method: string, payload?: unknown) => { + if (method === "sessions.list") { + return { + defaults: { + modelProvider: "anthropic", + model: "claude-sonnet-4-6", + thinkingLevels: [ + { id: "off", label: "off" }, + { id: "minimal", label: "minimal" }, + { id: "low", label: "low" }, + { id: "medium", label: "medium" }, + { id: "high", label: "high" }, + ], + thinkingOptions: ["off", "minimal", "low", "medium", "high"], + thinkingDefault: "off", + }, + sessions: [ + row("agent:main:main", { + modelProvider: "deepseek", + model: "deepseek-v4-pro", + thinkingLevels: [ + { id: "off", label: "off" }, + { id: "minimal", label: "minimal" }, + { id: "low", label: "low" }, + { id: "medium", label: "medium" }, + { id: "high", label: "high" }, + { id: "xhigh", label: "xhigh" }, + { id: "max", label: "max" }, + ], + }), + ], + }; + } + if (method === "models.list") { + return { + models: [{ id: "deepseek-v4-pro", provider: "deepseek", reasoning: true }], + }; + } + if (method === "sessions.patch") { + return { ok: true, ...((payload ?? {}) as object) }; + } + throw new Error(`unexpected method: ${method}`); + }); + + const status = await executeSlashCommand( + { request } as unknown as GatewayBrowserClient, + "agent:main:main", + "think", + "", + ); + const setMax = await executeSlashCommand( + { request } as unknown as GatewayBrowserClient, + "agent:main:main", + "think", + "max", + ); + + expect(status.content).toBe( + "Current thinking level: off.\nOptions: off, minimal, low, medium, high, xhigh, max.", + ); + expect(setMax.content).toBe("Thinking level set to **max**."); + }); + + it("does not use extended defaults for session with different model when thinkingLevels is empty (#76482)", async () => { + // Regression: when session model differs from defaults and session has no thinkingLevels, + // we should NOT blindly use defaults (which could have extra levels like xhigh/max + // from a different model). The client-side fallback uses the base thinking levels. + const request = vi.fn(async (method: string, _payload?: unknown) => { + if (method === "sessions.list") { + return { + defaults: { + modelProvider: "deepseek", + model: "deepseek-v4-pro", + thinkingLevels: [ + { id: "off", label: "off" }, + { id: "minimal", label: "minimal" }, + { id: "low", label: "low" }, + { id: "medium", label: "medium" }, + { id: "high", label: "high" }, + { id: "xhigh", label: "xhigh" }, + { id: "max", label: "max" }, + ], + thinkingOptions: ["off", "minimal", "low", "medium", "high", "xhigh", "max"], + thinkingDefault: "high", + }, + sessions: [ + row("agent:main:main", { + modelProvider: "anthropic", + model: "claude-sonnet-4-6", + // thinkingLevels intentionally absent — lightweight row + }), + ], + }; + } + if (method === "models.list") { + return { + models: [{ id: "claude-sonnet-4-6", provider: "anthropic", reasoning: true }], + }; + } + throw new Error(`unexpected method: ${method}`); + }); + + const status = await executeSlashCommand( + { request } as unknown as GatewayBrowserClient, + "agent:main:main", + "think", + "", + ); + + // Should NOT show DeepSeek defaults (xhigh, max) for an Anthropic session + expect(status.content).not.toContain("xhigh"); + expect(status.content).not.toContain("max"); + }); + it("reports the current verbose level for bare /verbose", async () => { const request = vi.fn(async (method: string, _payload?: unknown) => { if (method === "sessions.list") { diff --git a/ui/src/ui/chat/slash-command-executor.ts b/ui/src/ui/chat/slash-command-executor.ts index a2d9f7fc945..7b868555a0a 100644 --- a/ui/src/ui/chat/slash-command-executor.ts +++ b/ui/src/ui/chat/slash-command-executor.ts @@ -655,16 +655,21 @@ function resolveThinkingLevelOptionsForSession( if (session?.thinkingLevels?.length) { return session.thinkingLevels; } - if (defaults?.thinkingLevels?.length) { + const sessionModelMatchesDefaults = + (!session?.modelProvider || session.modelProvider === defaults?.modelProvider) && + (!session?.model || session.model === defaults?.model); + if (sessionModelMatchesDefaults && defaults?.thinkingLevels?.length) { return defaults.thinkingLevels; } const labels = - session?.thinkingOptions?.length || defaults?.thinkingOptions?.length - ? (session?.thinkingOptions ?? defaults?.thinkingOptions ?? []) - : formatThinkingLevels( - session?.modelProvider ?? defaults?.modelProvider, - session?.model ?? defaults?.model, - ).split(/\s*,\s*/); + (session?.thinkingOptions?.length ? session.thinkingOptions : null) ?? + (sessionModelMatchesDefaults && defaults?.thinkingOptions?.length + ? defaults.thinkingOptions + : null) ?? + formatThinkingLevels( + session?.modelProvider ?? defaults?.modelProvider, + session?.model ?? defaults?.model, + ).split(/\s*,\s*/); return labels.filter(Boolean).map((label) => ({ id: normalizeThinkLevel(label) ?? normalizeLowercaseStringOrEmpty(label), label,