fix: disable thinking for MiniMax anthropic-messages streaming

MiniMax M2.7 returns reasoning_content in OpenAI-style delta chunks
({delta: {content: "", reasoning_content: "..."}}) when thinking is
active, rather than native Anthropic thinking block SSE events. Pi-ai's
Anthropic provider does not handle this format, causing the model's
internal reasoning to appear as visible chat output.

Add createMinimaxThinkingDisabledWrapper that injects
thinking: {type: "disabled"} into the outgoing payload for any MiniMax
anthropic-messages request where thinking is not already explicitly
configured, preventing the provider from generating reasoning_content
deltas during streaming.

Fixes #55739
This commit is contained in:
moktamd
2026-03-27 13:32:11 +00:00
committed by Peter Steinberger
parent 561bacd06a
commit 2701e75f40
4 changed files with 194 additions and 0 deletions

View File

@@ -629,6 +629,30 @@ describe("applyExtraParamsToAgent", () => {
expect(payloads[0]?.reasoning).toEqual({ effort: "low" });
});
it("disables thinking for MiniMax anthropic-messages payloads", () => {
const payloads: Record<string, unknown>[] = [];
const baseStreamFn: StreamFn = (_model, _context, options) => {
const payload: Record<string, unknown> = {};
options?.onPayload?.(payload, _model);
payloads.push(payload);
return {} as ReturnType<StreamFn>;
};
const agent = { streamFn: baseStreamFn };
applyExtraParamsToAgent(agent, undefined, "minimax", "MiniMax-M2.7");
const model = {
api: "anthropic-messages",
provider: "minimax",
id: "MiniMax-M2.7",
} as Model<"anthropic-messages">;
const context: Context = { messages: [] };
void agent.streamFn?.(model, context, {});
expect(payloads).toHaveLength(1);
expect(payloads[0]?.thinking).toEqual({ type: "disabled" });
});
it("removes legacy reasoning_effort and keeps reasoning unset when thinkingLevel is off", () => {
const payloads: Record<string, unknown>[] = [];
const baseStreamFn: StreamFn = (_model, _context, options) => {

View File

@@ -12,6 +12,7 @@ import type { ProviderRuntimeModel } from "../../plugins/types.js";
import { resolveCacheRetention } from "./anthropic-cache-retention.js";
import { createGoogleThinkingPayloadWrapper } from "./google-stream-wrappers.js";
import { log } from "./logger.js";
import { createMinimaxThinkingDisabledWrapper } from "./minimax-stream-wrappers.js";
import {
createSiliconFlowThinkingWrapper,
shouldApplySiliconFlowThinkingOffCompat,
@@ -374,6 +375,11 @@ function applyPostPluginStreamWrappers(
);
}
// MiniMax's Anthropic-compatible stream can leak reasoning_content into the
// visible reply path because it does not emit native Anthropic thinking
// blocks. Disable thinking unless an earlier wrapper already set it.
ctx.agent.streamFn = createMinimaxThinkingDisabledWrapper(ctx.agent.streamFn);
const rawParallelToolCalls = resolveAliasedParamValue(
[ctx.resolvedExtraParams, ctx.override],
"parallel_tool_calls",

View File

@@ -0,0 +1,124 @@
import type { StreamFn } from "@mariozechner/pi-agent-core";
import type { Context, Model } from "@mariozechner/pi-ai";
import { describe, expect, it } from "vitest";
import {
createMinimaxFastModeWrapper,
createMinimaxThinkingDisabledWrapper,
} from "./minimax-stream-wrappers.js";
function captureThinkingPayload(params: {
provider: string;
api: string;
modelId: string;
}): unknown {
let capturedThinking: unknown = undefined;
const baseStreamFn: StreamFn = (_model, _context, options) => {
const payload: Record<string, unknown> = {};
options?.onPayload?.(payload, _model);
capturedThinking = payload.thinking;
return {} as ReturnType<StreamFn>;
};
const wrapped = createMinimaxThinkingDisabledWrapper(baseStreamFn);
void wrapped(
{
api: params.api,
provider: params.provider,
id: params.modelId,
} as Model<"anthropic-messages">,
{ messages: [] } as Context,
{},
);
return capturedThinking;
}
describe("createMinimaxThinkingDisabledWrapper", () => {
it("disables thinking for minimax anthropic-messages provider", () => {
expect(
captureThinkingPayload({
provider: "minimax",
api: "anthropic-messages",
modelId: "MiniMax-M2.7",
}),
).toEqual({ type: "disabled" });
});
it("disables thinking for minimax-portal anthropic-messages provider", () => {
expect(
captureThinkingPayload({
provider: "minimax-portal",
api: "anthropic-messages",
modelId: "MiniMax-M2.7",
}),
).toEqual({ type: "disabled" });
});
it("does not affect non-minimax providers", () => {
expect(
captureThinkingPayload({
provider: "anthropic",
api: "anthropic-messages",
modelId: "claude-sonnet-4-6",
}),
).toBeUndefined();
});
it("does not affect minimax with non-anthropic-messages api", () => {
expect(
captureThinkingPayload({
provider: "minimax",
api: "openai-completions",
modelId: "MiniMax-M2.7",
}),
).toBeUndefined();
});
it("preserves an already-set thinking value", () => {
let capturedThinking: unknown = undefined;
const baseStreamFn: StreamFn = (_model, _context, options) => {
const payload: Record<string, unknown> = {
thinking: { type: "enabled", budget_tokens: 1024 },
};
options?.onPayload?.(payload, _model);
capturedThinking = payload.thinking;
return {} as ReturnType<StreamFn>;
};
const wrapped = createMinimaxThinkingDisabledWrapper(baseStreamFn);
void wrapped(
{
api: "anthropic-messages",
provider: "minimax",
id: "MiniMax-M2.7",
} as Model<"anthropic-messages">,
{ messages: [] } as Context,
{},
);
expect(capturedThinking).toEqual({ type: "enabled", budget_tokens: 1024 });
});
});
describe("createMinimaxFastModeWrapper", () => {
it("rewrites MiniMax-M2.7 to highspeed variant in fast mode", () => {
let capturedId = "";
const baseStreamFn: StreamFn = (model) => {
capturedId = model.id;
return {} as ReturnType<StreamFn>;
};
const wrapped = createMinimaxFastModeWrapper(baseStreamFn, true);
void wrapped(
{
api: "anthropic-messages",
provider: "minimax",
id: "MiniMax-M2.7",
} as Model<"anthropic-messages">,
{ messages: [] } as Context,
{},
);
expect(capturedId).toBe("MiniMax-M2.7-highspeed");
});
});

View File

@@ -12,6 +12,13 @@ function resolveMinimaxFastModelId(modelId: unknown): string | undefined {
return MINIMAX_FAST_MODEL_IDS.get(modelId.trim());
}
function isMinimaxAnthropicMessagesModel(model: { api?: unknown; provider?: unknown }): boolean {
return (
model.api === "anthropic-messages" &&
(model.provider === "minimax" || model.provider === "minimax-portal")
);
}
export function createMinimaxFastModeWrapper(
baseStreamFn: StreamFn | undefined,
fastMode: boolean,
@@ -34,3 +41,36 @@ export function createMinimaxFastModeWrapper(
return underlying({ ...model, id: fastModelId }, context, options);
};
}
/**
* MiniMax's Anthropic-compatible streaming endpoint returns reasoning_content
* in OpenAI-style delta chunks ({delta: {content: "", reasoning_content: "..."}})
* rather than the native Anthropic thinking block format. Pi-ai's Anthropic
* provider cannot process this format and leaks the reasoning text as visible
* content. Disable thinking in the outgoing payload so MiniMax does not produce
* reasoning_content deltas during streaming.
*/
export function createMinimaxThinkingDisabledWrapper(baseStreamFn: StreamFn | undefined): StreamFn {
const underlying = baseStreamFn ?? streamSimple;
return (model, context, options) => {
if (!isMinimaxAnthropicMessagesModel(model)) {
return underlying(model, context, options);
}
const originalOnPayload = options?.onPayload;
return underlying(model, context, {
...options,
onPayload: (payload) => {
if (payload && typeof payload === "object") {
const payloadObj = payload as Record<string, unknown>;
// Only inject if thinking is not already explicitly set.
// This preserves any intentional override from other wrappers.
if (payloadObj.thinking === undefined) {
payloadObj.thinking = { type: "disabled" };
}
}
return originalOnPayload?.(payload, model);
},
});
};
}