mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-25 20:29:33 +00:00
331 lines
9.3 KiB
TypeScript
331 lines
9.3 KiB
TypeScript
// Vllm tests cover stream plugin behavior.
|
|
import type { StreamFn } from "openclaw/plugin-sdk/agent-core";
|
|
import type { Context, Model } from "openclaw/plugin-sdk/llm";
|
|
import { describe, expect, it } from "vitest";
|
|
import {
|
|
createVllmProviderThinkingWrapper,
|
|
createVllmQwenThinkingWrapper,
|
|
wrapVllmProviderStream,
|
|
} from "./stream.js";
|
|
|
|
function capturePayload(params: {
|
|
format: "chat-template" | "top-level";
|
|
thinkingLevel?: "off" | "low" | "medium" | "high" | "xhigh" | "max";
|
|
reasoning?: unknown;
|
|
initialPayload?: Record<string, unknown>;
|
|
model?: Partial<Model<"openai-completions">>;
|
|
}): Record<string, unknown> {
|
|
let captured: Record<string, unknown> = {};
|
|
const baseStreamFn: StreamFn = (_model, _context, options) => {
|
|
const payload = { ...params.initialPayload };
|
|
options?.onPayload?.(payload, _model);
|
|
captured = payload;
|
|
return {} as ReturnType<StreamFn>;
|
|
};
|
|
|
|
const wrapped = createVllmQwenThinkingWrapper({
|
|
baseStreamFn,
|
|
format: params.format,
|
|
thinkingLevel: params.thinkingLevel ?? "high",
|
|
});
|
|
void wrapped(
|
|
{
|
|
api: "openai-completions",
|
|
provider: "vllm",
|
|
id: "Qwen/Qwen3-8B",
|
|
reasoning: true,
|
|
...params.model,
|
|
} as Model<"openai-completions">,
|
|
{ messages: [] } as Context,
|
|
params.reasoning === undefined ? {} : ({ reasoning: params.reasoning } as never),
|
|
);
|
|
|
|
return captured;
|
|
}
|
|
|
|
describe("createVllmQwenThinkingWrapper", () => {
|
|
it("maps Qwen chat-template thinking off to chat_template_kwargs", () => {
|
|
const payload = capturePayload({
|
|
format: "chat-template",
|
|
reasoning: "none",
|
|
initialPayload: {
|
|
reasoning_effort: "high",
|
|
reasoning: { effort: "high" },
|
|
reasoningEffort: "high",
|
|
},
|
|
});
|
|
|
|
expect(payload).toEqual({
|
|
chat_template_kwargs: {
|
|
enable_thinking: false,
|
|
preserve_thinking: true,
|
|
},
|
|
});
|
|
});
|
|
|
|
it("maps Qwen chat-template thinking on to chat_template_kwargs", () => {
|
|
expect(capturePayload({ format: "chat-template", reasoning: "medium" })).toEqual({
|
|
chat_template_kwargs: {
|
|
enable_thinking: true,
|
|
preserve_thinking: true,
|
|
},
|
|
});
|
|
});
|
|
|
|
it("preserves explicit chat-template kwargs while setting enable_thinking", () => {
|
|
expect(
|
|
capturePayload({
|
|
format: "chat-template",
|
|
thinkingLevel: "off",
|
|
initialPayload: {
|
|
chat_template_kwargs: {
|
|
preserve_thinking: false,
|
|
force_nonempty_content: true,
|
|
},
|
|
},
|
|
}),
|
|
).toEqual({
|
|
chat_template_kwargs: {
|
|
enable_thinking: false,
|
|
preserve_thinking: false,
|
|
force_nonempty_content: true,
|
|
},
|
|
});
|
|
});
|
|
|
|
it("maps Qwen top-level thinking format to enable_thinking", () => {
|
|
expect(capturePayload({ format: "top-level", thinkingLevel: "off" })).toEqual({
|
|
enable_thinking: false,
|
|
});
|
|
expect(capturePayload({ format: "top-level", thinkingLevel: "high" })).toEqual({
|
|
enable_thinking: true,
|
|
});
|
|
});
|
|
|
|
it("patches configured Qwen models unless reasoning is explicitly disabled", () => {
|
|
expect(capturePayload({ format: "chat-template", model: { reasoning: undefined } })).toEqual({
|
|
chat_template_kwargs: {
|
|
enable_thinking: true,
|
|
preserve_thinking: true,
|
|
},
|
|
});
|
|
expect(capturePayload({ format: "chat-template", model: { reasoning: false } })).toStrictEqual(
|
|
{},
|
|
);
|
|
});
|
|
|
|
it("skips non-completions models", () => {
|
|
expect(
|
|
capturePayload({ format: "chat-template", model: { api: "openai-responses" as never } }),
|
|
).toStrictEqual({});
|
|
});
|
|
});
|
|
|
|
describe("createVllmProviderThinkingWrapper", () => {
|
|
function captureProviderPayload(params: {
|
|
thinkingLevel?: "off" | "low" | "medium" | "high" | "xhigh" | "max";
|
|
initialPayload?: Record<string, unknown>;
|
|
model?: Partial<Model<"openai-completions">>;
|
|
}): Record<string, unknown> {
|
|
let captured: Record<string, unknown> = {};
|
|
const baseStreamFn: StreamFn = (_model, _context, options) => {
|
|
const payload = { ...params.initialPayload };
|
|
options?.onPayload?.(payload, _model);
|
|
captured = payload;
|
|
return {} as ReturnType<StreamFn>;
|
|
};
|
|
|
|
const wrapped = createVllmProviderThinkingWrapper({
|
|
baseStreamFn,
|
|
thinkingLevel: params.thinkingLevel ?? "high",
|
|
});
|
|
void wrapped(
|
|
{
|
|
api: "openai-completions",
|
|
provider: "vllm",
|
|
id: "nemotron-3-super",
|
|
reasoning: true,
|
|
...params.model,
|
|
} as Model<"openai-completions">,
|
|
{ messages: [] } as Context,
|
|
{},
|
|
);
|
|
|
|
return captured;
|
|
}
|
|
|
|
it("injects Nemotron 3 chat-template kwargs when thinking is off", () => {
|
|
expect(captureProviderPayload({ thinkingLevel: "off" })).toEqual({
|
|
chat_template_kwargs: {
|
|
enable_thinking: false,
|
|
force_nonempty_content: true,
|
|
},
|
|
});
|
|
});
|
|
|
|
it("does not inject Nemotron 3 chat-template kwargs when thinking is enabled", () => {
|
|
expect(captureProviderPayload({ thinkingLevel: "low" })).toStrictEqual({});
|
|
});
|
|
|
|
it("preserves existing Nemotron 3 chat-template kwargs over defaults", () => {
|
|
expect(
|
|
captureProviderPayload({
|
|
thinkingLevel: "off",
|
|
initialPayload: {
|
|
chat_template_kwargs: {
|
|
enable_thinking: true,
|
|
},
|
|
},
|
|
}),
|
|
).toEqual({
|
|
chat_template_kwargs: {
|
|
enable_thinking: true,
|
|
force_nonempty_content: true,
|
|
},
|
|
});
|
|
});
|
|
|
|
it("skips non-Nemotron vLLM models", () => {
|
|
expect(
|
|
captureProviderPayload({
|
|
thinkingLevel: "off",
|
|
model: { id: "Qwen/Qwen3-8B" },
|
|
}),
|
|
).toStrictEqual({});
|
|
});
|
|
});
|
|
|
|
describe("wrapVllmProviderStream", () => {
|
|
it("registers when vLLM Qwen thinking format compat is configured", () => {
|
|
expect(
|
|
wrapVllmProviderStream({
|
|
provider: "vllm",
|
|
modelId: "Qwen/Qwen3-8B",
|
|
extraParams: {},
|
|
model: {
|
|
api: "openai-completions",
|
|
provider: "vllm",
|
|
id: "Qwen/Qwen3-8B",
|
|
reasoning: true,
|
|
compat: { thinkingFormat: "qwen-chat-template" },
|
|
} as Model<"openai-completions">,
|
|
streamFn: undefined,
|
|
} as never),
|
|
).toBeTypeOf("function");
|
|
});
|
|
|
|
it("ignores request params when Qwen thinking format compat is not configured", () => {
|
|
expect(
|
|
wrapVllmProviderStream({
|
|
provider: "vllm",
|
|
modelId: "Qwen/Qwen3-8B",
|
|
extraParams: { qwenThinkingFormat: "chat-template" },
|
|
model: {
|
|
api: "openai-completions",
|
|
provider: "vllm",
|
|
id: "Qwen/Qwen3-8B",
|
|
reasoning: true,
|
|
} as Model<"openai-completions">,
|
|
streamFn: undefined,
|
|
} as never),
|
|
).toBeUndefined();
|
|
});
|
|
|
|
it("uses model compat for Qwen thinking format", () => {
|
|
let captured: Record<string, unknown> = {};
|
|
const baseStreamFn: StreamFn = (_model, _context, options) => {
|
|
const payload = {};
|
|
options?.onPayload?.(payload, _model);
|
|
captured = payload;
|
|
return {} as ReturnType<StreamFn>;
|
|
};
|
|
const model = {
|
|
api: "openai-completions",
|
|
provider: "vllm",
|
|
id: "Qwen/Qwen3-8B",
|
|
reasoning: true,
|
|
compat: { thinkingFormat: "qwen-chat-template" },
|
|
} as unknown as Model<"openai-completions">;
|
|
const wrapped = wrapVllmProviderStream({
|
|
provider: "vllm",
|
|
modelId: "Qwen/Qwen3-8B",
|
|
extraParams: {},
|
|
thinkingLevel: "off",
|
|
model,
|
|
streamFn: baseStreamFn,
|
|
} as never);
|
|
|
|
expect(wrapped).toBeTypeOf("function");
|
|
void wrapped?.(model, { messages: [] } as Context, {});
|
|
|
|
expect(captured).toEqual({
|
|
chat_template_kwargs: {
|
|
enable_thinking: false,
|
|
preserve_thinking: true,
|
|
},
|
|
});
|
|
});
|
|
|
|
it("skips unconfigured vLLM and non-vLLM providers", () => {
|
|
expect(
|
|
wrapVllmProviderStream({
|
|
provider: "vllm",
|
|
modelId: "Qwen/Qwen3-8B",
|
|
extraParams: {},
|
|
model: {
|
|
api: "openai-completions",
|
|
provider: "vllm",
|
|
id: "Qwen/Qwen3-8B",
|
|
} as Model<"openai-completions">,
|
|
streamFn: undefined,
|
|
} as never),
|
|
).toBeUndefined();
|
|
|
|
expect(
|
|
wrapVllmProviderStream({
|
|
provider: "openai",
|
|
modelId: "gpt-5.4",
|
|
extraParams: {},
|
|
model: {
|
|
api: "openai-completions",
|
|
provider: "openai",
|
|
id: "gpt-5.4",
|
|
} as Model<"openai-completions">,
|
|
streamFn: undefined,
|
|
} as never),
|
|
).toBeUndefined();
|
|
});
|
|
|
|
it("registers for vLLM Nemotron when thinking is off", () => {
|
|
expect(
|
|
wrapVllmProviderStream({
|
|
provider: "vllm",
|
|
modelId: "nemotron-3-super",
|
|
extraParams: {},
|
|
thinkingLevel: "off",
|
|
model: {
|
|
api: "openai-completions",
|
|
provider: "vllm",
|
|
id: "nemotron-3-super",
|
|
} as Model<"openai-completions">,
|
|
streamFn: undefined,
|
|
} as never),
|
|
).toBeTypeOf("function");
|
|
|
|
expect(
|
|
wrapVllmProviderStream({
|
|
provider: "vllm",
|
|
modelId: "nemotron-3-super",
|
|
extraParams: {},
|
|
thinkingLevel: "low",
|
|
model: {
|
|
api: "openai-completions",
|
|
provider: "vllm",
|
|
id: "nemotron-3-super",
|
|
} as Model<"openai-completions">,
|
|
streamFn: undefined,
|
|
} as never),
|
|
).toBeUndefined();
|
|
});
|
|
});
|