From 0c9f84451a9f6074bd9402a18c7dd794f628ce59 Mon Sep 17 00:00:00 2001 From: Deepak Jain Date: Wed, 29 Apr 2026 03:47:18 -0700 Subject: [PATCH] feat(config): add reasoningDefault to agents.defaults Add reasoningDefault support under agents.defaults and preserve the existing per-agent/session/inline override order. Includes authorization gating for configured reasoning state, /status coverage, config schema/docs baseline updates, and regression tests for the reply and status paths. Also carries the related cron startup-run preservation fix and CI test stabilization needed for this PR branch. Validated locally with pnpm check:changed, the focused Vitest bundle for touched gateway/cron/auto-reply/plugin-sdk/tooling tests, pnpm config:docs:check, and git diff --check. GitHub checks are green on the merged head; Greptile latest visible review is 4/5 with no P0/P1 findings. --- docs/.generated/config-baseline.sha256 | 4 +- docs/gateway/config-agents.md | 4 +- docs/gateway/configuration-examples.md | 1 + .../reply/directive-handling.levels.test.ts | 28 ++ .../reply/directive-handling.levels.ts | 2 + ...et-reply-directives.target-session.test.ts | 170 +++++++++++- src/auto-reply/reply/get-reply-directives.ts | 40 ++- src/config/schema.base.generated.ts | 16 ++ src/config/types.agent-defaults.ts | 2 + src/config/zod-schema.agent-defaults.ts | 3 + src/cron/service/ops.ts | 2 +- .../server-methods/agent.create-event.test.ts | 23 +- ...erver.agent.gateway-server-agent-a.test.ts | 43 +-- src/plugin-sdk/command-status.runtime.test.ts | 252 ++++++++++++++++++ src/plugin-sdk/command-status.runtime.ts | 13 +- src/status/status-message.ts | 6 +- src/status/status-text.ts | 1 + test/scripts/docker-build-helper.test.ts | 2 +- 18 files changed, 556 insertions(+), 56 deletions(-) create mode 100644 src/plugin-sdk/command-status.runtime.test.ts diff --git a/docs/.generated/config-baseline.sha256 b/docs/.generated/config-baseline.sha256 index 7d80e892f2a..f2fa57a9c0e 100644 --- a/docs/.generated/config-baseline.sha256 +++ b/docs/.generated/config-baseline.sha256 @@ -1,4 +1,4 @@ -664d715fc9aba21236c9ef31e30a81f7ff96ede9a3b77273af569288ece0e7f7 config-baseline.json -0cc8ae3ae49d324face60240b4d3ed545c9ccec9b333bf1a1d98887151d37b77 config-baseline.core.json +7436d39dbbe5fb2642f9036198572d021e5a56daaecb207e5a1a21838730bd02 config-baseline.json +c481235c42b8845c36eb92923bbd4d00ce9e417955f0a4b40a02f5ba0842a432 config-baseline.core.json 9f5fad66a49fa618d64a963470aa69fed9fe4b4639cc4321f9ec04bfb2f8aa50 config-baseline.channel.json 0dd6583fafae6c9134e46c4cf9bddee9822d6436436dcb1a6dcba6d012962e51 config-baseline.plugin.json diff --git a/docs/gateway/config-agents.md b/docs/gateway/config-agents.md index f6b730f1b9a..dd0d272396a 100644 --- a/docs/gateway/config-agents.md +++ b/docs/gateway/config-agents.md @@ -326,6 +326,7 @@ Time format in system prompt. Default: `auto` (OS preference). pdfMaxPages: 20, thinkingDefault: "low", verboseDefault: "off", + reasoningDefault: "off", elevatedDefault: "on", timeoutSeconds: 600, mediaMaxMb: 5, @@ -365,6 +366,7 @@ Time format in system prompt. Default: `auto` (OS preference). - `pdfMaxBytesMb`: default PDF size limit for the `pdf` tool when `maxBytesMb` is not passed at call time. - `pdfMaxPages`: default maximum pages considered by extraction fallback mode in the `pdf` tool. - `verboseDefault`: default verbose level for agents. Values: `"off"`, `"on"`, `"full"`. Default: `"off"`. +- `reasoningDefault`: default reasoning visibility for agents. Values: `"off"`, `"on"`, `"stream"`. Per-agent `agents.list[].reasoningDefault` overrides this default. Configured reasoning defaults are only applied for owners, authorized senders, or operator-admin gateway contexts when no per-message or session reasoning override is set. - `elevatedDefault`: default elevated-output level for agents. Values: `"off"`, `"on"`, `"ask"`, `"full"`. Default: `"on"`. - `model.primary`: format `provider/model` (e.g. `openai/gpt-5.5` for API-key access or `openai-codex/gpt-5.5` for Codex OAuth). If you omit the provider, OpenClaw tries an alias first, then a unique configured-provider match for that exact model id, and only then falls back to the configured default provider (deprecated compatibility behavior, so prefer explicit `provider/model`). If that provider no longer exposes the configured default model, OpenClaw falls back to the first configured provider/model instead of surfacing a stale removed-provider default. - `models`: the configured model catalog and allowlist for `/model`. Each entry can include `alias` (shortcut) and `params` (provider-specific, for example `temperature`, `maxTokens`, `cacheRetention`, `context1m`, `responsesServerCompaction`, `responsesCompactThreshold`, `chat_template_kwargs`, `extra_body`/`extraBody`). @@ -980,7 +982,7 @@ for provider examples and precedence. - `tts`: optional per-agent text-to-speech overrides. The block deep-merges over `messages.tts`, so keep shared provider credentials and fallback policy in `messages.tts` and set only persona-specific values such as provider, voice, model, style, or auto mode here. - `skills`: optional per-agent skill allowlist. If omitted, the agent inherits `agents.defaults.skills` when set; an explicit list replaces defaults instead of merging, and `[]` means no skills. - `thinkingDefault`: optional per-agent default thinking level (`off | minimal | low | medium | high | xhigh | adaptive | max`). Overrides `agents.defaults.thinkingDefault` for this agent when no per-message or session override is set. The selected provider/model profile controls which values are valid; for Google Gemini, `adaptive` keeps provider-owned dynamic thinking (`thinkingLevel` omitted on Gemini 3/3.1, `thinkingBudget: -1` on Gemini 2.5). -- `reasoningDefault`: optional per-agent default reasoning visibility (`on | off | stream`). Applies when no per-message or session reasoning override is set. +- `reasoningDefault`: optional per-agent default reasoning visibility (`on | off | stream`). Overrides `agents.defaults.reasoningDefault` for this agent when no per-message or session reasoning override is set. - `fastModeDefault`: optional per-agent default for fast mode (`true | false`). Applies when no per-message or session fast-mode override is set. - `agentRuntime`: optional per-agent low-level runtime policy override. Use `{ id: "codex" }` to make one agent Codex-only while other agents keep the default PI fallback in `auto` mode. - `runtime`: optional per-agent runtime descriptor. Use `type: "acp"` with `runtime.acp` defaults (`agent`, `backend`, `mode`, `cwd`) when the agent should default to ACP harness sessions. diff --git a/docs/gateway/configuration-examples.md b/docs/gateway/configuration-examples.md index 94470e66e73..3346c230975 100644 --- a/docs/gateway/configuration-examples.md +++ b/docs/gateway/configuration-examples.md @@ -249,6 +249,7 @@ Save to `~/.openclaw/openclaw.json` and you can DM the bot from that number. skills: ["github", "weather"], // inherited by agents that omit list[].skills thinkingDefault: "low", verboseDefault: "off", + reasoningDefault: "off", elevatedDefault: "on", blockStreamingDefault: "off", blockStreamingBreak: "text_end", diff --git a/src/auto-reply/reply/directive-handling.levels.test.ts b/src/auto-reply/reply/directive-handling.levels.test.ts index be786cfcdae..f4b81c7e1b8 100644 --- a/src/auto-reply/reply/directive-handling.levels.test.ts +++ b/src/auto-reply/reply/directive-handling.levels.test.ts @@ -94,6 +94,20 @@ describe("resolveCurrentDirectiveLevels", () => { expect(result.currentReasoningLevel).toBe("stream"); }); + it("falls back to agentCfg reasoningDefault when agent entry is absent", async () => { + const resolveDefaultThinkingLevel = vi.fn().mockResolvedValue("off"); + + const result = await resolveCurrentDirectiveLevels({ + sessionEntry: {}, + agentCfg: { + reasoningDefault: "stream", + }, + resolveDefaultThinkingLevel, + }); + + expect(result.currentReasoningLevel).toBe("stream"); + }); + it("applies agent reasoningDefault even when thinking is active", async () => { const resolveDefaultThinkingLevel = vi.fn().mockResolvedValue("high"); @@ -136,4 +150,18 @@ describe("resolveCurrentDirectiveLevels", () => { // Agent explicitly setting "off" should be respected, not overridden by model default expect(result.currentReasoningLevel).toBe("off"); }); + + it("respects agentCfg reasoningDefault: off as explicit override", async () => { + const resolveDefaultThinkingLevel = vi.fn().mockResolvedValue("off"); + + const result = await resolveCurrentDirectiveLevels({ + sessionEntry: {}, + agentCfg: { + reasoningDefault: "off", + }, + resolveDefaultThinkingLevel, + }); + + expect(result.currentReasoningLevel).toBe("off"); + }); }); diff --git a/src/auto-reply/reply/directive-handling.levels.ts b/src/auto-reply/reply/directive-handling.levels.ts index df9b252b6d6..0eb6c1ad810 100644 --- a/src/auto-reply/reply/directive-handling.levels.ts +++ b/src/auto-reply/reply/directive-handling.levels.ts @@ -15,6 +15,7 @@ export async function resolveCurrentDirectiveLevels(params: { agentCfg?: { thinkingDefault?: unknown; verboseDefault?: unknown; + reasoningDefault?: unknown; elevatedDefault?: unknown; }; resolveDefaultThinkingLevel: () => Promise; @@ -42,6 +43,7 @@ export async function resolveCurrentDirectiveLevels(params: { const currentReasoningLevel = (params.sessionEntry?.reasoningLevel as ReasoningLevel | undefined) ?? (params.agentEntry?.reasoningDefault as ReasoningLevel | undefined) ?? + (params.agentCfg?.reasoningDefault as ReasoningLevel | undefined) ?? "off"; const currentElevatedLevel = (params.sessionEntry?.elevatedLevel as ElevatedLevel | undefined) ?? diff --git a/src/auto-reply/reply/get-reply-directives.target-session.test.ts b/src/auto-reply/reply/get-reply-directives.target-session.test.ts index 506fbf40a9d..dd5ab485d97 100644 --- a/src/auto-reply/reply/get-reply-directives.target-session.test.ts +++ b/src/auto-reply/reply/get-reply-directives.target-session.test.ts @@ -34,6 +34,33 @@ function makeTypingController() { function parseInlineDirectivesForTest(body: string) { const normalized = body.trim(); + if (normalized === "/reasoning stream") { + return { + cleaned: "", + hasThinkDirective: false, + hasVerboseDirective: false, + hasTraceDirective: false, + traceLevel: undefined, + rawTraceLevel: undefined, + hasFastDirective: false, + hasReasoningDirective: true, + reasoningLevel: "stream", + rawReasoningLevel: "stream", + hasElevatedDirective: false, + hasExecDirective: false, + hasModelDirective: false, + hasQueueDirective: false, + hasStatusDirective: false, + queueReset: false, + thinkLevel: undefined, + verboseLevel: undefined, + fastMode: undefined, + elevatedLevel: undefined, + rawElevatedLevel: undefined, + rawModelDirective: undefined, + execSecurity: undefined, + }; + } if (normalized === "/trace on") { return { cleaned: "", @@ -89,7 +116,11 @@ function parseInlineDirectivesForTest(body: string) { async function resolveHelloWithModelDefaults(params: { defaultThinking: "off" | "low"; defaultReasoning: "on"; + body?: string; sessionEntry?: SessionEntry; + agentCfg?: { reasoningDefault?: "off" | "on" | "stream" }; + commandAuthorized?: boolean; + ctx?: Parameters[0]; }) { const resolveDefaultThinkingLevel = vi.fn(async () => params.defaultThinking); const resolveDefaultReasoningLevel = vi.fn(async () => params.defaultReasoning); @@ -105,19 +136,20 @@ async function resolveHelloWithModelDefaults(params: { const result = await resolveReplyDirectives({ ctx: buildTestCtx({ - Body: "hello", - CommandBody: "hello", + Body: params.body ?? "hello", + CommandBody: params.body ?? "hello", + ...params.ctx, }), cfg: {}, agentId: "main", agentDir: "/tmp/main-agent", workspaceDir: "/tmp", - agentCfg: {}, + agentCfg: params.agentCfg ?? {}, sessionCtx: { - Body: "hello", - BodyStripped: "hello", - BodyForAgent: "hello", - CommandBody: "hello", + Body: params.body ?? "hello", + BodyStripped: params.body ?? "hello", + BodyForAgent: params.body ?? "hello", + CommandBody: params.body ?? "hello", Provider: "whatsapp", } as TemplateContext, sessionEntry: params.sessionEntry ?? makeSessionEntry(), @@ -129,7 +161,7 @@ async function resolveHelloWithModelDefaults(params: { isGroup: false, triggerBodyNormalized: "hello", resetTriggered: false, - commandAuthorized: false, + commandAuthorized: params.commandAuthorized ?? false, defaultProvider: "openai", defaultModel: "gpt-4o-mini", aliasIndex: { byAlias: new Map(), byKey: new Map() }, @@ -169,13 +201,13 @@ vi.mock("../commands-text-routing.js", () => ({ })); vi.mock("./commands-context.js", () => ({ - buildCommandContext: vi.fn(() => ({ + buildCommandContext: vi.fn((params: { commandAuthorized?: boolean }) => ({ surface: "whatsapp", channel: "whatsapp", channelId: "whatsapp", ownerList: [], senderIsOwner: false, - isAuthorizedSender: false, + isAuthorizedSender: params.commandAuthorized === true, senderId: undefined, abortKey: "abort-key", rawBodyNormalized: "hello", @@ -304,7 +336,7 @@ describe("resolveReplyDirectives", () => { isGroup: false, triggerBodyNormalized: "hello", resetTriggered: false, - commandAuthorized: false, + commandAuthorized: true, defaultProvider: "openai", defaultModel: "gpt-4o-mini", aliasIndex: { byAlias: new Map(), byKey: new Map() }, @@ -465,6 +497,122 @@ describe("resolveReplyDirectives", () => { expect(resolveDefaultReasoningLevel).not.toHaveBeenCalled(); }); + it("does not re-enable model reasoning when agentCfg reasoningDefault is explicitly off", async () => { + const { result, resolveDefaultReasoningLevel } = await resolveHelloWithModelDefaults({ + defaultThinking: "off", + defaultReasoning: "on", + agentCfg: { reasoningDefault: "off" }, + }); + + expect(result).toEqual({ + kind: "continue", + result: expect.objectContaining({ + resolvedThinkLevel: "off", + resolvedReasoningLevel: "off", + }), + }); + expect(resolveDefaultReasoningLevel).not.toHaveBeenCalled(); + }); + + it("does not expose configured reasoning defaults to untrusted senders", async () => { + const { result, resolveDefaultReasoningLevel } = await resolveHelloWithModelDefaults({ + defaultThinking: "off", + defaultReasoning: "on", + agentCfg: { reasoningDefault: "stream" }, + }); + + expect(result).toEqual({ + kind: "continue", + result: expect.objectContaining({ + resolvedReasoningLevel: "off", + }), + }); + expect(resolveDefaultReasoningLevel).not.toHaveBeenCalled(); + }); + + it("ignores inline reasoning directives from untrusted senders", async () => { + const { result, resolveDefaultReasoningLevel } = await resolveHelloWithModelDefaults({ + body: "/reasoning stream", + defaultThinking: "off", + defaultReasoning: "on", + }); + + expect(result).toEqual({ + kind: "continue", + result: expect.objectContaining({ + resolvedReasoningLevel: "off", + }), + }); + expect(resolveDefaultReasoningLevel).not.toHaveBeenCalled(); + }); + + it("does not expose session reasoning state to untrusted senders", async () => { + const { result, resolveDefaultReasoningLevel } = await resolveHelloWithModelDefaults({ + defaultThinking: "off", + defaultReasoning: "on", + sessionEntry: makeSessionEntry({ reasoningLevel: "stream" }), + }); + + expect(result).toEqual({ + kind: "continue", + result: expect.objectContaining({ + resolvedReasoningLevel: "off", + }), + }); + expect(resolveDefaultReasoningLevel).not.toHaveBeenCalled(); + }); + + it("allows session reasoning state for authorized senders", async () => { + const { result, resolveDefaultReasoningLevel } = await resolveHelloWithModelDefaults({ + defaultThinking: "off", + defaultReasoning: "on", + sessionEntry: makeSessionEntry({ reasoningLevel: "stream" }), + commandAuthorized: true, + }); + + expect(result).toEqual({ + kind: "continue", + result: expect.objectContaining({ + resolvedReasoningLevel: "stream", + }), + }); + expect(resolveDefaultReasoningLevel).not.toHaveBeenCalled(); + }); + + it("allows configured reasoning defaults for operator gateway clients", async () => { + const { result, resolveDefaultReasoningLevel } = await resolveHelloWithModelDefaults({ + defaultThinking: "off", + defaultReasoning: "on", + agentCfg: { reasoningDefault: "stream" }, + ctx: { GatewayClientScopes: ["operator.admin"] }, + }); + + expect(result).toEqual({ + kind: "continue", + result: expect.objectContaining({ + resolvedReasoningLevel: "stream", + }), + }); + expect(resolveDefaultReasoningLevel).not.toHaveBeenCalled(); + }); + + it("allows configured reasoning defaults for authorized senders", async () => { + const { result, resolveDefaultReasoningLevel } = await resolveHelloWithModelDefaults({ + defaultThinking: "off", + defaultReasoning: "on", + agentCfg: { reasoningDefault: "stream" }, + commandAuthorized: true, + }); + + expect(result).toEqual({ + kind: "continue", + result: expect.objectContaining({ + resolvedReasoningLevel: "stream", + }), + }); + expect(resolveDefaultReasoningLevel).not.toHaveBeenCalled(); + }); + it("keeps consumed text reset triggers empty after directive cleanup", async () => { const sessionCtx = { Body: "", diff --git a/src/auto-reply/reply/get-reply-directives.ts b/src/auto-reply/reply/get-reply-directives.ts index f8f3e547244..e1d7a783b66 100644 --- a/src/auto-reply/reply/get-reply-directives.ts +++ b/src/auto-reply/reply/get-reply-directives.ts @@ -321,6 +321,8 @@ export async function resolveReplyDirectives(params: { } // Use command.isAuthorizedSender (resolved authorization) instead of raw commandAuthorized // to ensure inline directives work when commands.allowFrom grants access (e.g., LINE). + const unauthorizedReasoningDirectiveAttempt = + !command.isAuthorizedSender && parsedDirectives.hasReasoningDirective; let directives = command.isAuthorizedSender ? parsedDirectives : { @@ -329,6 +331,8 @@ export async function resolveReplyDirectives(params: { hasVerboseDirective: false, hasFastDirective: false, hasReasoningDirective: false, + reasoningLevel: undefined, + rawReasoningLevel: undefined, hasStatusDirective: false, hasModelDirective: false, hasQueueDirective: false, @@ -428,11 +432,31 @@ export async function resolveReplyDirectives(params: { directives.verboseLevel ?? (targetSessionEntry?.verboseLevel as VerboseLevel | undefined) ?? (agentCfg?.verboseDefault as VerboseLevel | undefined); - let resolvedReasoningLevel: ReasoningLevel = - directives.reasoningLevel ?? - (targetSessionEntry?.reasoningLevel as ReasoningLevel | undefined) ?? + const configuredReasoningDefault = (agentEntry?.reasoningDefault as ReasoningLevel | undefined) ?? - "off"; + (agentCfg?.reasoningDefault as ReasoningLevel | undefined); + const canUseReasoningState = + command.isAuthorizedSender || + command.senderIsOwner || + (Array.isArray(ctx.GatewayClientScopes) && ctx.GatewayClientScopes.includes("operator.admin")); + const rawSessionReasoningLevel = targetSessionEntry?.reasoningLevel as + | ReasoningLevel + | null + | undefined; + const sessionReasoningLevel = canUseReasoningState ? rawSessionReasoningLevel : undefined; + const blockedSessionReasoningLevel = + rawSessionReasoningLevel !== undefined && + rawSessionReasoningLevel !== null && + !canUseReasoningState; + const reasoningUsesConfiguredDefault = + directives.reasoningLevel === undefined && + sessionReasoningLevel == null && + configuredReasoningDefault != null; + let resolvedReasoningLevel: ReasoningLevel = + directives.reasoningLevel ?? sessionReasoningLevel ?? configuredReasoningDefault ?? "off"; + if (reasoningUsesConfiguredDefault && !canUseReasoningState) { + resolvedReasoningLevel = "off"; + } const resolvedElevatedLevel = elevatedAllowed ? (directives.elevatedLevel ?? (targetSessionEntry?.elevatedLevel as ElevatedLevel | undefined) ?? @@ -511,11 +535,13 @@ export async function resolveReplyDirectives(params: { // (e.g. OpenRouter with reasoning: true). Skip model default when thinking is active // or when thinking was explicitly disabled. const hasAgentReasoningDefault = - agentEntry?.reasoningDefault !== undefined && agentEntry?.reasoningDefault !== null; + (agentEntry?.reasoningDefault !== undefined && agentEntry?.reasoningDefault !== null) || + (agentCfg?.reasoningDefault !== undefined && agentCfg?.reasoningDefault !== null); const reasoningExplicitlySet = directives.reasoningLevel !== undefined || - (targetSessionEntry?.reasoningLevel !== undefined && - targetSessionEntry?.reasoningLevel !== null) || + unauthorizedReasoningDirectiveAttempt || + blockedSessionReasoningLevel || + (sessionReasoningLevel !== undefined && sessionReasoningLevel !== null) || hasAgentReasoningDefault; const thinkingActive = resolvedThinkLevelWithDefault !== "off"; if ( diff --git a/src/config/schema.base.generated.ts b/src/config/schema.base.generated.ts index 41ed87587a2..18704ef562d 100644 --- a/src/config/schema.base.generated.ts +++ b/src/config/schema.base.generated.ts @@ -5200,6 +5200,22 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { }, ], }, + reasoningDefault: { + anyOf: [ + { + type: "string", + const: "off", + }, + { + type: "string", + const: "on", + }, + { + type: "string", + const: "stream", + }, + ], + }, elevatedDefault: { anyOf: [ { diff --git a/src/config/types.agent-defaults.ts b/src/config/types.agent-defaults.ts index 9ec543104b0..211d82666f0 100644 --- a/src/config/types.agent-defaults.ts +++ b/src/config/types.agent-defaults.ts @@ -301,6 +301,8 @@ export type AgentDefaultsConfig = { thinkingDefault?: "off" | "minimal" | "low" | "medium" | "high" | "xhigh" | "adaptive" | "max"; /** Default verbose level when no /verbose directive is present. */ verboseDefault?: "off" | "on" | "full"; + /** Default reasoning level when no /reasoning directive is present. */ + reasoningDefault?: "off" | "on" | "stream"; /** Default elevated level when no /elevated directive is present. */ elevatedDefault?: "off" | "on" | "ask" | "full"; /** Default block streaming level when no override is present. */ diff --git a/src/config/zod-schema.agent-defaults.ts b/src/config/zod-schema.agent-defaults.ts index 0b99b6f8db0..70c55de4b8b 100644 --- a/src/config/zod-schema.agent-defaults.ts +++ b/src/config/zod-schema.agent-defaults.ts @@ -225,6 +225,9 @@ export const AgentDefaultsSchema = z ]) .optional(), verboseDefault: z.union([z.literal("off"), z.literal("on"), z.literal("full")]).optional(), + reasoningDefault: z + .union([z.literal("off"), z.literal("on"), z.literal("stream")]) + .optional(), elevatedDefault: z .union([z.literal("off"), z.literal("on"), z.literal("ask"), z.literal("full")]) .optional(), diff --git a/src/cron/service/ops.ts b/src/cron/service/ops.ts index 9f813ee3751..9f3fa08d871 100644 --- a/src/cron/service/ops.ts +++ b/src/cron/service/ops.ts @@ -175,7 +175,7 @@ export async function start(state: CronServiceState) { // this path runs before the scheduler begins servicing regular timer ticks. // Avoid an extra reload/write cycle on startup. await ensureLoaded(state, { skipRecompute: true }); - const changed = recomputeNextRuns(state); + const changed = recomputeNextRunsForMaintenance(state, { recomputeExpired: true }); if (changed) { await persist(state); } diff --git a/src/gateway/server-methods/agent.create-event.test.ts b/src/gateway/server-methods/agent.create-event.test.ts index 95f5a84c742..38f4c2fff60 100644 --- a/src/gateway/server-methods/agent.create-event.test.ts +++ b/src/gateway/server-methods/agent.create-event.test.ts @@ -98,16 +98,19 @@ describe("agent handler session create events", () => { undefined, { runId: "idem-agent-create-event" }, ); - await vi.waitFor(() => - expect(broadcastToConnIds).toHaveBeenCalledWith( - "sessions.changed", - expect.objectContaining({ - sessionKey: "agent:main:subagent:create-test", - reason: "create", - }), - new Set(["conn-1"]), - { dropIfSlow: true }, - ), + await vi.waitFor( + () => { + expect(broadcastToConnIds).toHaveBeenCalledWith( + "sessions.changed", + expect.objectContaining({ + sessionKey: "agent:main:subagent:create-test", + reason: "create", + }), + new Set(["conn-1"]), + { dropIfSlow: true }, + ); + }, + { timeout: 2_000, interval: 5 }, ); }); }); diff --git a/src/gateway/server.agent.gateway-server-agent-a.test.ts b/src/gateway/server.agent.gateway-server-agent-a.test.ts index 696f18b3683..fc96a137813 100644 --- a/src/gateway/server.agent.gateway-server-agent-a.test.ts +++ b/src/gateway/server.agent.gateway-server-agent-a.test.ts @@ -43,6 +43,8 @@ const BASE_IMAGE_PNG = type AgentCommandCall = Record; +const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)); + function expectChannels(call: Record, channel: string) { expect(call.channel).toBe(channel); expect(call.messageChannel).toBe(channel); @@ -61,18 +63,22 @@ async function setTestSessionStore(params: { }); } -async function waitForAgentCall(runId: string): Promise { - await vi.waitFor(() => - expect( - (vi.mocked(agentCommand).mock.calls as unknown as Array<[AgentCommandCall]>).some( - ([call]) => call.runId === runId, - ), - ).toBe(true), +async function latestAgentCall(runId?: string): Promise { + for (let elapsed = 0; elapsed <= 2_000; elapsed += 5) { + const calls = vi.mocked(agentCommand).mock.calls as unknown as Array<[unknown]>; + const call = runId + ? calls.map((entry) => entry[0] as AgentCommandCall).find((entry) => entry.runId === runId) + : (calls.at(-1)?.[0] as AgentCommandCall | undefined); + if (call) { + return call; + } + await sleep(5); + } + throw new Error( + runId + ? `expected agentCommand to be called for ${runId}` + : "expected agentCommand to be called", ); - const calls = vi.mocked(agentCommand).mock.calls as unknown as Array<[unknown]>; - return calls.find( - ([call]) => (call as AgentCommandCall).runId === runId, - )?.[0] as AgentCommandCall; } async function runMainAgentDeliveryWithSession(params: { @@ -98,7 +104,8 @@ async function runMainAgentDeliveryWithSession(params: { ...params.request, }); expect(res.ok).toBe(true); - return await waitForAgentCall(String(params.request.idempotencyKey)); + const runId = params.request.idempotencyKey; + return await latestAgentCall(typeof runId === "string" ? runId : undefined); } finally { testState.allowFrom = undefined; } @@ -202,7 +209,7 @@ describe("gateway server agent", () => { }); expect(res.ok).toBe(true); - const call = await waitForAgentCall("idem-agent-last-stale"); + const call = await latestAgentCall("idem-agent-last-stale"); expectChannels(call, "whatsapp"); expect(call.to).toBe("+1555"); expect(call.deliveryTargetMode).toBe("implicit"); @@ -226,7 +233,7 @@ describe("gateway server agent", () => { }); expect(res.ok).toBe(true); - const call = await waitForAgentCall("idem-agent-subkey"); + const call = await latestAgentCall("idem-agent-subkey"); expect(call.sessionKey).toBe("agent:main:subagent:abc"); expect(call.sessionId).toBe("sess-sub"); expectChannels(call, "webchat"); @@ -252,7 +259,7 @@ describe("gateway server agent", () => { idempotencyKey: "idem-agent-subdepth", }); expect(res.ok).toBe(true); - await waitForAgentCall("idem-agent-subdepth"); + await latestAgentCall("idem-agent-subdepth"); const raw = await fs.readFile(sharedSessionStorePath, "utf-8"); const persisted = JSON.parse(raw) as Record< @@ -281,7 +288,7 @@ describe("gateway server agent", () => { }); expect(res.ok).toBe(true); - const call = await waitForAgentCall("idem-agent-id"); + const call = await latestAgentCall("idem-agent-id"); expect(call.sessionKey).toBe("agent:ops:main"); expect(call.sessionId).toBe("sess-ops"); }); @@ -428,7 +435,7 @@ describe("gateway server agent", () => { }); expect(res.ok).toBe(true); - const call = await waitForAgentCall("idem-agent-attachments"); + const call = await latestAgentCall("idem-agent-attachments"); expect(call.sessionKey).toBe("agent:main:main"); expectChannels(call, "webchat"); expect(typeof call.message).toBe("string"); @@ -525,7 +532,7 @@ describe("gateway server agent", () => { }); expect(res.ok).toBe(true); - const call = await waitForAgentCall(tc.idempotencyKey); + const call = await latestAgentCall(tc.idempotencyKey); expectChannels(call, tc.lastChannel); expect(call.to).toBe(tc.lastTo); expect(call.deliver).toBe(true); diff --git a/src/plugin-sdk/command-status.runtime.test.ts b/src/plugin-sdk/command-status.runtime.test.ts new file mode 100644 index 00000000000..d8d17968da1 --- /dev/null +++ b/src/plugin-sdk/command-status.runtime.test.ts @@ -0,0 +1,252 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; + +const buildStatusReply = vi.fn(async (params: unknown) => params); +const loadSessionEntry = vi.fn(); +const resolveSessionAgentId = vi.fn(); +const listAgentEntries = vi.fn(); +const resolveDefaultModelForAgent = vi.fn(); +const resolveDefaultModel = vi.fn(); +const createModelSelectionState = vi.fn(); +const resolveCurrentDirectiveLevels = vi.fn(); + +vi.mock("../auto-reply/reply/commands-status.js", () => ({ + buildStatusReply, +})); + +vi.mock("../gateway/session-utils.js", () => ({ + loadSessionEntry, +})); + +vi.mock("../agents/agent-scope.js", () => ({ + listAgentEntries, + resolveSessionAgentId, +})); + +vi.mock("../agents/model-selection.js", () => ({ + resolveDefaultModelForAgent, +})); + +vi.mock("../auto-reply/reply/directive-handling.defaults.js", () => ({ + resolveDefaultModel, +})); + +vi.mock("../auto-reply/reply/model-selection.js", () => ({ + createModelSelectionState, +})); + +vi.mock("../auto-reply/reply/directive-handling.levels.js", () => ({ + resolveCurrentDirectiveLevels, +})); + +const { resolveDirectStatusReplyForSession } = await import("./command-status.runtime.js"); + +describe("resolveDirectStatusReplyForSession", () => { + beforeEach(() => { + buildStatusReply.mockReset(); + loadSessionEntry.mockReset(); + resolveSessionAgentId.mockReset(); + listAgentEntries.mockReset(); + resolveDefaultModelForAgent.mockReset(); + resolveDefaultModel.mockReset(); + createModelSelectionState.mockReset(); + resolveCurrentDirectiveLevels.mockReset(); + + buildStatusReply.mockImplementation(async (params: unknown) => params); + loadSessionEntry.mockReturnValue({ + cfg: { + agents: { + defaults: { + reasoningDefault: "off", + }, + }, + }, + canonicalKey: "main", + entry: { + sessionId: "sess-main", + }, + store: {}, + storePath: "/tmp/sessions.json", + }); + resolveSessionAgentId.mockReturnValue("main"); + listAgentEntries.mockReturnValue([]); + resolveDefaultModelForAgent.mockReturnValue({ provider: "openai", model: "gpt-5.4" }); + resolveDefaultModel.mockReturnValue({ defaultProvider: "openai", defaultModel: "gpt-5.4" }); + createModelSelectionState.mockResolvedValue({ + resolveDefaultThinkingLevel: vi.fn(async () => "off"), + resolveDefaultReasoningLevel: vi.fn(async () => "on"), + }); + resolveCurrentDirectiveLevels.mockResolvedValue({ + currentThinkLevel: "off", + currentFastMode: false, + currentVerboseLevel: "off", + currentReasoningLevel: "off", + currentElevatedLevel: "off", + }); + }); + + it("treats agentCfg reasoningDefault as explicit for direct /status", async () => { + const result = await resolveDirectStatusReplyForSession({ + cfg: {}, + sessionKey: "main", + channel: "cli", + senderIsOwner: true, + isAuthorizedSender: true, + isGroup: false, + defaultGroupActivation: () => "always", + }); + + expect(buildStatusReply).toHaveBeenCalledOnce(); + expect(buildStatusReply.mock.calls[0]?.[0]).toMatchObject({ + resolvedReasoningLevel: "off", + }); + expect(result).toMatchObject({ + resolvedReasoningLevel: "off", + }); + }); + + it("allows configured reasoning defaults for authorized direct /status senders", async () => { + loadSessionEntry.mockReturnValue({ + cfg: { + agents: { + defaults: { + reasoningDefault: "stream", + }, + }, + }, + canonicalKey: "main", + entry: { + sessionId: "sess-main", + }, + store: {}, + storePath: "/tmp/sessions.json", + }); + resolveCurrentDirectiveLevels.mockResolvedValueOnce({ + currentThinkLevel: "off", + currentFastMode: false, + currentVerboseLevel: "off", + currentReasoningLevel: "stream", + currentElevatedLevel: "off", + }); + + const result = await resolveDirectStatusReplyForSession({ + cfg: {}, + sessionKey: "main", + channel: "cli", + senderIsOwner: false, + isAuthorizedSender: true, + isGroup: false, + defaultGroupActivation: () => "always", + }); + + expect(result).toMatchObject({ + resolvedReasoningLevel: "stream", + }); + }); + + it("hides configured reasoning defaults from unauthorized direct /status senders", async () => { + loadSessionEntry.mockReturnValue({ + cfg: { + agents: { + defaults: { + reasoningDefault: "stream", + }, + }, + }, + canonicalKey: "main", + entry: { + sessionId: "sess-main", + }, + store: {}, + storePath: "/tmp/sessions.json", + }); + resolveCurrentDirectiveLevels.mockResolvedValueOnce({ + currentThinkLevel: "off", + currentFastMode: false, + currentVerboseLevel: "off", + currentReasoningLevel: "stream", + currentElevatedLevel: "off", + }); + + const result = await resolveDirectStatusReplyForSession({ + cfg: {}, + sessionKey: "main", + channel: "cli", + senderIsOwner: false, + isAuthorizedSender: false, + isGroup: false, + defaultGroupActivation: () => "always", + }); + + expect(result).toMatchObject({ + resolvedReasoningLevel: "off", + }); + }); + + it("hides session reasoning state from unauthorized direct /status senders", async () => { + loadSessionEntry.mockReturnValue({ + cfg: {}, + canonicalKey: "main", + entry: { + sessionId: "sess-main", + reasoningLevel: "stream", + }, + store: {}, + storePath: "/tmp/sessions.json", + }); + resolveCurrentDirectiveLevels.mockResolvedValueOnce({ + currentThinkLevel: "off", + currentFastMode: false, + currentVerboseLevel: "off", + currentReasoningLevel: "stream", + currentElevatedLevel: "off", + }); + + const result = await resolveDirectStatusReplyForSession({ + cfg: {}, + sessionKey: "main", + channel: "cli", + senderIsOwner: false, + isAuthorizedSender: false, + isGroup: false, + defaultGroupActivation: () => "always", + }); + + expect(result).toMatchObject({ + resolvedReasoningLevel: "off", + }); + }); + + it("allows session reasoning state for authorized direct /status senders", async () => { + loadSessionEntry.mockReturnValue({ + cfg: {}, + canonicalKey: "main", + entry: { + sessionId: "sess-main", + reasoningLevel: "stream", + }, + store: {}, + storePath: "/tmp/sessions.json", + }); + resolveCurrentDirectiveLevels.mockResolvedValueOnce({ + currentThinkLevel: "off", + currentFastMode: false, + currentVerboseLevel: "off", + currentReasoningLevel: "stream", + currentElevatedLevel: "off", + }); + + const result = await resolveDirectStatusReplyForSession({ + cfg: {}, + sessionKey: "main", + channel: "cli", + senderIsOwner: false, + isAuthorizedSender: true, + isGroup: false, + defaultGroupActivation: () => "always", + }); + + expect(result).toMatchObject({ + resolvedReasoningLevel: "stream", + }); + }); +}); diff --git a/src/plugin-sdk/command-status.runtime.ts b/src/plugin-sdk/command-status.runtime.ts index de65e91fca1..6005c81dace 100644 --- a/src/plugin-sdk/command-status.runtime.ts +++ b/src/plugin-sdk/command-status.runtime.ts @@ -83,10 +83,15 @@ export async function resolveDirectStatusReplyForSession( }); let resolvedReasoningLevel = currentReasoningLevel; const hasAgentReasoningDefault = - agentEntry?.reasoningDefault !== undefined && agentEntry.reasoningDefault !== null; - const reasoningExplicitlySet = - (statusEntry?.reasoningLevel !== undefined && statusEntry.reasoningLevel !== null) || - hasAgentReasoningDefault; + (agentEntry?.reasoningDefault !== undefined && agentEntry.reasoningDefault !== null) || + (agentCfg?.reasoningDefault !== undefined && agentCfg.reasoningDefault !== null); + const sessionReasoningExplicitlySet = + statusEntry?.reasoningLevel !== undefined && statusEntry.reasoningLevel !== null; + const canUseReasoningState = params.senderIsOwner || params.isAuthorizedSender; + if (!canUseReasoningState && (sessionReasoningExplicitlySet || hasAgentReasoningDefault)) { + resolvedReasoningLevel = "off"; + } + const reasoningExplicitlySet = sessionReasoningExplicitlySet || hasAgentReasoningDefault; if (!reasoningExplicitlySet && resolvedReasoningLevel === "off" && currentThinkLevel === "off") { resolvedReasoningLevel = await modelState.resolveDefaultReasoningLevel(); } diff --git a/src/status/status-message.ts b/src/status/status-message.ts index 157558fd2b4..064acb44d2f 100644 --- a/src/status/status-message.ts +++ b/src/status/status-message.ts @@ -725,7 +725,11 @@ export function buildStatusMessage(args: StatusArgs): string { const verboseLevel = args.resolvedVerbose ?? args.sessionEntry?.verboseLevel ?? args.agent?.verboseDefault ?? "off"; const fastMode = args.resolvedFast ?? args.sessionEntry?.fastMode ?? false; - const reasoningLevel = args.resolvedReasoning ?? args.sessionEntry?.reasoningLevel ?? "off"; + const reasoningLevel = + args.resolvedReasoning ?? + args.sessionEntry?.reasoningLevel ?? + args.agent?.reasoningDefault ?? + "off"; const elevatedLevel = args.resolvedElevated ?? args.sessionEntry?.elevatedLevel ?? diff --git a/src/status/status-text.ts b/src/status/status-text.ts index 1c385a97d7b..2e81c03c659 100644 --- a/src/status/status-text.ts +++ b/src/status/status-text.ts @@ -306,6 +306,7 @@ export async function buildStatusText(params: BuildStatusTextParams): Promise 0 ? { contextTokens } : {}), thinkingDefault: explicitThinkingDefault, verboseDefault: agentDefaults.verboseDefault, + reasoningDefault: agentConfig?.reasoningDefault ?? agentDefaults.reasoningDefault, elevatedDefault: agentDefaults.elevatedDefault, }, agentId: statusAgentId, diff --git a/test/scripts/docker-build-helper.test.ts b/test/scripts/docker-build-helper.test.ts index 014a416ec25..998740e1003 100644 --- a/test/scripts/docker-build-helper.test.ts +++ b/test/scripts/docker-build-helper.test.ts @@ -146,8 +146,8 @@ describe("docker build helper", () => { const pluginsAssertions = readFileSync(PLUGINS_DOCKER_ASSERTIONS_PATH, "utf8"); const pluginUpdateScenario = readFileSync(PLUGIN_UPDATE_SCENARIO_PATH, "utf8"); const pluginUpdateProbe = readFileSync(PLUGIN_UPDATE_PROBE_PATH, "utf8"); - const packageCompat = readFileSync(PACKAGE_COMPAT_PATH, "utf8"); const updateChannelAssertions = readFileSync(UPDATE_CHANNEL_SWITCH_ASSERTIONS_PATH, "utf8"); + const packageCompat = readFileSync(PACKAGE_COMPAT_PATH, "utf8"); const scripts = [ doctorScenario, updateChannel,