From 2701e75f40aa9a464ded3ba4b7042eff7d7c9b7e Mon Sep 17 00:00:00 2001 From: moktamd Date: Fri, 27 Mar 2026 13:32:11 +0000 Subject: [PATCH] fix: disable thinking for MiniMax anthropic-messages streaming MiniMax M2.7 returns reasoning_content in OpenAI-style delta chunks ({delta: {content: "", reasoning_content: "..."}}) when thinking is active, rather than native Anthropic thinking block SSE events. Pi-ai's Anthropic provider does not handle this format, causing the model's internal reasoning to appear as visible chat output. Add createMinimaxThinkingDisabledWrapper that injects thinking: {type: "disabled"} into the outgoing payload for any MiniMax anthropic-messages request where thinking is not already explicitly configured, preventing the provider from generating reasoning_content deltas during streaming. Fixes #55739 --- .../pi-embedded-runner-extraparams.test.ts | 24 ++++ src/agents/pi-embedded-runner/extra-params.ts | 6 + .../minimax-stream-wrappers.test.ts | 124 ++++++++++++++++++ .../minimax-stream-wrappers.ts | 40 ++++++ 4 files changed, 194 insertions(+) create mode 100644 src/agents/pi-embedded-runner/minimax-stream-wrappers.test.ts diff --git a/src/agents/pi-embedded-runner-extraparams.test.ts b/src/agents/pi-embedded-runner-extraparams.test.ts index 7ec674ed90d..55b99f67fd5 100644 --- a/src/agents/pi-embedded-runner-extraparams.test.ts +++ b/src/agents/pi-embedded-runner-extraparams.test.ts @@ -629,6 +629,30 @@ describe("applyExtraParamsToAgent", () => { expect(payloads[0]?.reasoning).toEqual({ effort: "low" }); }); + it("disables thinking for MiniMax anthropic-messages payloads", () => { + const payloads: Record[] = []; + const baseStreamFn: StreamFn = (_model, _context, options) => { + const payload: Record = {}; + options?.onPayload?.(payload, _model); + payloads.push(payload); + return {} as ReturnType; + }; + const agent = { streamFn: baseStreamFn }; + + applyExtraParamsToAgent(agent, undefined, "minimax", "MiniMax-M2.7"); + + const model = { + api: "anthropic-messages", + provider: "minimax", + id: "MiniMax-M2.7", + } as Model<"anthropic-messages">; + const context: Context = { messages: [] }; + void agent.streamFn?.(model, context, {}); + + expect(payloads).toHaveLength(1); + expect(payloads[0]?.thinking).toEqual({ type: "disabled" }); + }); + it("removes legacy reasoning_effort and keeps reasoning unset when thinkingLevel is off", () => { const payloads: Record[] = []; const baseStreamFn: StreamFn = (_model, _context, options) => { diff --git a/src/agents/pi-embedded-runner/extra-params.ts b/src/agents/pi-embedded-runner/extra-params.ts index f44ed001081..12b4e83b5a3 100644 --- a/src/agents/pi-embedded-runner/extra-params.ts +++ b/src/agents/pi-embedded-runner/extra-params.ts @@ -12,6 +12,7 @@ import type { ProviderRuntimeModel } from "../../plugins/types.js"; import { resolveCacheRetention } from "./anthropic-cache-retention.js"; import { createGoogleThinkingPayloadWrapper } from "./google-stream-wrappers.js"; import { log } from "./logger.js"; +import { createMinimaxThinkingDisabledWrapper } from "./minimax-stream-wrappers.js"; import { createSiliconFlowThinkingWrapper, shouldApplySiliconFlowThinkingOffCompat, @@ -374,6 +375,11 @@ function applyPostPluginStreamWrappers( ); } + // MiniMax's Anthropic-compatible stream can leak reasoning_content into the + // visible reply path because it does not emit native Anthropic thinking + // blocks. Disable thinking unless an earlier wrapper already set it. + ctx.agent.streamFn = createMinimaxThinkingDisabledWrapper(ctx.agent.streamFn); + const rawParallelToolCalls = resolveAliasedParamValue( [ctx.resolvedExtraParams, ctx.override], "parallel_tool_calls", diff --git a/src/agents/pi-embedded-runner/minimax-stream-wrappers.test.ts b/src/agents/pi-embedded-runner/minimax-stream-wrappers.test.ts new file mode 100644 index 00000000000..9b2934a43f0 --- /dev/null +++ b/src/agents/pi-embedded-runner/minimax-stream-wrappers.test.ts @@ -0,0 +1,124 @@ +import type { StreamFn } from "@mariozechner/pi-agent-core"; +import type { Context, Model } from "@mariozechner/pi-ai"; +import { describe, expect, it } from "vitest"; +import { + createMinimaxFastModeWrapper, + createMinimaxThinkingDisabledWrapper, +} from "./minimax-stream-wrappers.js"; + +function captureThinkingPayload(params: { + provider: string; + api: string; + modelId: string; +}): unknown { + let capturedThinking: unknown = undefined; + const baseStreamFn: StreamFn = (_model, _context, options) => { + const payload: Record = {}; + options?.onPayload?.(payload, _model); + capturedThinking = payload.thinking; + return {} as ReturnType; + }; + + const wrapped = createMinimaxThinkingDisabledWrapper(baseStreamFn); + void wrapped( + { + api: params.api, + provider: params.provider, + id: params.modelId, + } as Model<"anthropic-messages">, + { messages: [] } as Context, + {}, + ); + + return capturedThinking; +} + +describe("createMinimaxThinkingDisabledWrapper", () => { + it("disables thinking for minimax anthropic-messages provider", () => { + expect( + captureThinkingPayload({ + provider: "minimax", + api: "anthropic-messages", + modelId: "MiniMax-M2.7", + }), + ).toEqual({ type: "disabled" }); + }); + + it("disables thinking for minimax-portal anthropic-messages provider", () => { + expect( + captureThinkingPayload({ + provider: "minimax-portal", + api: "anthropic-messages", + modelId: "MiniMax-M2.7", + }), + ).toEqual({ type: "disabled" }); + }); + + it("does not affect non-minimax providers", () => { + expect( + captureThinkingPayload({ + provider: "anthropic", + api: "anthropic-messages", + modelId: "claude-sonnet-4-6", + }), + ).toBeUndefined(); + }); + + it("does not affect minimax with non-anthropic-messages api", () => { + expect( + captureThinkingPayload({ + provider: "minimax", + api: "openai-completions", + modelId: "MiniMax-M2.7", + }), + ).toBeUndefined(); + }); + + it("preserves an already-set thinking value", () => { + let capturedThinking: unknown = undefined; + const baseStreamFn: StreamFn = (_model, _context, options) => { + const payload: Record = { + thinking: { type: "enabled", budget_tokens: 1024 }, + }; + options?.onPayload?.(payload, _model); + capturedThinking = payload.thinking; + return {} as ReturnType; + }; + + const wrapped = createMinimaxThinkingDisabledWrapper(baseStreamFn); + void wrapped( + { + api: "anthropic-messages", + provider: "minimax", + id: "MiniMax-M2.7", + } as Model<"anthropic-messages">, + { messages: [] } as Context, + {}, + ); + + expect(capturedThinking).toEqual({ type: "enabled", budget_tokens: 1024 }); + }); +}); + +describe("createMinimaxFastModeWrapper", () => { + it("rewrites MiniMax-M2.7 to highspeed variant in fast mode", () => { + let capturedId = ""; + const baseStreamFn: StreamFn = (model) => { + capturedId = model.id; + return {} as ReturnType; + }; + + const wrapped = createMinimaxFastModeWrapper(baseStreamFn, true); + void wrapped( + { + api: "anthropic-messages", + provider: "minimax", + id: "MiniMax-M2.7", + } as Model<"anthropic-messages">, + { messages: [] } as Context, + {}, + ); + + expect(capturedId).toBe("MiniMax-M2.7-highspeed"); + }); +}); diff --git a/src/agents/pi-embedded-runner/minimax-stream-wrappers.ts b/src/agents/pi-embedded-runner/minimax-stream-wrappers.ts index e8f83c809fa..6890be2235c 100644 --- a/src/agents/pi-embedded-runner/minimax-stream-wrappers.ts +++ b/src/agents/pi-embedded-runner/minimax-stream-wrappers.ts @@ -12,6 +12,13 @@ function resolveMinimaxFastModelId(modelId: unknown): string | undefined { return MINIMAX_FAST_MODEL_IDS.get(modelId.trim()); } +function isMinimaxAnthropicMessagesModel(model: { api?: unknown; provider?: unknown }): boolean { + return ( + model.api === "anthropic-messages" && + (model.provider === "minimax" || model.provider === "minimax-portal") + ); +} + export function createMinimaxFastModeWrapper( baseStreamFn: StreamFn | undefined, fastMode: boolean, @@ -34,3 +41,36 @@ export function createMinimaxFastModeWrapper( return underlying({ ...model, id: fastModelId }, context, options); }; } + +/** + * MiniMax's Anthropic-compatible streaming endpoint returns reasoning_content + * in OpenAI-style delta chunks ({delta: {content: "", reasoning_content: "..."}}) + * rather than the native Anthropic thinking block format. Pi-ai's Anthropic + * provider cannot process this format and leaks the reasoning text as visible + * content. Disable thinking in the outgoing payload so MiniMax does not produce + * reasoning_content deltas during streaming. + */ +export function createMinimaxThinkingDisabledWrapper(baseStreamFn: StreamFn | undefined): StreamFn { + const underlying = baseStreamFn ?? streamSimple; + return (model, context, options) => { + if (!isMinimaxAnthropicMessagesModel(model)) { + return underlying(model, context, options); + } + + const originalOnPayload = options?.onPayload; + return underlying(model, context, { + ...options, + onPayload: (payload) => { + if (payload && typeof payload === "object") { + const payloadObj = payload as Record; + // Only inject if thinking is not already explicitly set. + // This preserves any intentional override from other wrappers. + if (payloadObj.thinking === undefined) { + payloadObj.thinking = { type: "disabled" }; + } + } + return originalOnPayload?.(payload, model); + }, + }); + }; +}