Files
openclaw/src/plugins/provider-self-hosted-setup.test.ts
Alix-007 dad5ce64d4 fix(providers): bound self-hosted provider discovery JSON reads (#95244)
* fix(providers): bound self-hosted discovery JSON reads

discoverLlamaCppRuntimeContextTokens and discoverOpenAICompatibleLocalModels
parsed their HTTP responses via an unbounded await response.json(). Self-hosted
provider base URLs are user-supplied and untrusted (an endpoint reachable via
SSRF could stream an unbounded JSON body), so a hostile or buggy endpoint could
drive the setup wizard into OOM.

Route both reads through the shared byte-bounded reader (readResponseWithLimit
from @openclaw/media-core) under a single 4 MiB cap before JSON.parse, mirroring
the bound-stream hardening landed for Anthropic error bodies. Overflow cancels
the stream and is swallowed by the existing discovery error handling, so a
capped endpoint degrades gracefully (returns [] / skips the runtime context
probe) instead of buffering the whole body.

* tune self-hosted discovery cap

Signed-off-by: sallyom <somalley@redhat.com>

---------

Signed-off-by: sallyom <somalley@redhat.com>
Co-authored-by: sallyom <somalley@redhat.com>
2026-06-24 17:51:14 -04:00

640 lines
20 KiB
TypeScript

/** Tests self-hosted provider setup helpers and auth/config defaults. */
import { beforeEach, describe, expect, it, vi } from "vitest";
import {
configureOpenAICompatibleSelfHostedProviderNonInteractive,
discoverOpenAICompatibleLocalModels,
} from "./provider-self-hosted-setup.js";
import type { ProviderAuthMethodNonInteractiveContext } from "./types.js";
const { fetchWithSsrFGuardMock, upsertAuthProfileWithLock } = vi.hoisted(() => ({
fetchWithSsrFGuardMock: vi.fn(),
upsertAuthProfileWithLock: vi.fn(async () => null),
}));
vi.mock("../infra/net/fetch-guard.js", () => ({
fetchWithSsrFGuard: fetchWithSsrFGuardMock,
}));
vi.mock("../agents/auth-profiles/upsert-with-lock.js", () => ({
upsertAuthProfileWithLock,
}));
beforeEach(() => {
vi.clearAllMocks();
});
// Mirrors SELF_HOSTED_DISCOVERY_JSON_MAX_BYTES in the source under test. Kept in
// sync deliberately so the regression asserts the body is capped, not drained.
const SELF_HOSTED_DISCOVERY_JSON_MAX_BYTES = 16 * 1024 * 1024;
const CHUNK_BYTES = 1024 * 1024;
/**
* Builds a Response body that would never terminate on its own: each pull emits
* a 1 MiB chunk forever. A bounded reader must cancel it after the byte cap.
*/
function createUnboundedJsonStream(): {
body: ReadableStream<Uint8Array>;
cancelCount: number;
bytesPulled: number;
} {
const state = { cancelCount: 0, bytesPulled: 0 };
const chunk = new Uint8Array(CHUNK_BYTES).fill(0x20); // ASCII spaces: valid stream, never closes
const body = new ReadableStream<Uint8Array>({
pull(controller) {
state.bytesPulled += chunk.byteLength;
controller.enqueue(chunk);
},
cancel() {
state.cancelCount += 1;
},
});
return {
body,
get cancelCount() {
return state.cancelCount;
},
get bytesPulled() {
return state.bytesPulled;
},
};
}
function createRuntime() {
return {
error: vi.fn(),
exit: vi.fn(),
log: vi.fn(),
};
}
function createContext(params: {
providerId: string;
baseUrl?: string;
apiKey?: string;
modelId?: string;
}): ProviderAuthMethodNonInteractiveContext {
const resolved = {
key: params.apiKey ?? "self-hosted-test-key",
source: "flag" as const,
};
return {
authChoice: params.providerId,
config: { agents: { defaults: {} } },
baseConfig: { agents: { defaults: {} } },
opts: {
customBaseUrl: params.baseUrl,
customApiKey: params.apiKey,
customModelId: params.modelId,
},
runtime: createRuntime() as never,
agentDir: "/tmp/openclaw-self-hosted-test-agent",
resolveApiKey: vi.fn<ProviderAuthMethodNonInteractiveContext["resolveApiKey"]>(
async () => resolved,
),
toApiKeyCredential: vi.fn<ProviderAuthMethodNonInteractiveContext["toApiKeyCredential"]>(
({ provider, resolved: apiKeyResult }) => ({
type: "api_key",
provider,
key: apiKeyResult.key,
}),
),
};
}
function readPrimaryModel(config: Awaited<ReturnType<typeof configureSelfHostedTestProvider>>) {
const model = config?.agents?.defaults?.model;
return model && typeof model === "object" ? model.primary : undefined;
}
async function configureSelfHostedTestProvider(params: {
ctx: ProviderAuthMethodNonInteractiveContext;
providerId: string;
providerLabel: string;
envVar: string;
}) {
return await configureOpenAICompatibleSelfHostedProviderNonInteractive({
ctx: params.ctx,
providerId: params.providerId,
providerLabel: params.providerLabel,
defaultBaseUrl: "http://127.0.0.1:8000/v1",
defaultApiKeyEnvVar: params.envVar,
modelPlaceholder: "Qwen/Qwen3-32B",
});
}
function cancelTrackedResponse(init?: ResponseInit): {
response: Response;
wasCanceled: () => boolean;
} {
let canceled = false;
const stream = new ReadableStream<Uint8Array>({
start(controller) {
controller.enqueue(new TextEncoder().encode("ignored"));
},
cancel() {
canceled = true;
},
});
return {
response: new Response(stream, init),
wasCanceled: () => canceled,
};
}
describe("discoverOpenAICompatibleLocalModels", () => {
it("uses guarded fetch pinned to the configured self-hosted provider", async () => {
const release = vi.fn(async () => undefined);
const propsRelease = vi.fn(async () => undefined);
const propsResponse = cancelTrackedResponse({ status: 404 });
fetchWithSsrFGuardMock.mockResolvedValueOnce({
response: new Response(JSON.stringify({ data: [{ id: "Qwen/Qwen3-32B" }] }), {
status: 200,
}),
finalUrl: "http://127.0.0.1:8000/v1/models",
release,
});
fetchWithSsrFGuardMock.mockResolvedValueOnce({
response: propsResponse.response,
finalUrl: "http://127.0.0.1:8000/props",
release: propsRelease,
});
const models = await discoverOpenAICompatibleLocalModels({
baseUrl: "http://127.0.0.1:8000/v1/",
apiKey: "self-hosted-test-key",
label: "vLLM",
env: {},
});
expect(models).toEqual([
{
id: "Qwen/Qwen3-32B",
name: "Qwen/Qwen3-32B",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 128000,
maxTokens: 8192,
},
]);
expect(fetchWithSsrFGuardMock).toHaveBeenNthCalledWith(1, {
url: "http://127.0.0.1:8000/v1/models",
init: { headers: { Authorization: "Bearer self-hosted-test-key" } },
policy: {
hostnameAllowlist: ["127.0.0.1"],
allowPrivateNetwork: true,
},
timeoutMs: 5000,
});
expect(fetchWithSsrFGuardMock).toHaveBeenNthCalledWith(2, {
url: "http://127.0.0.1:8000/props",
init: { headers: { Authorization: "Bearer self-hosted-test-key" } },
policy: {
hostnameAllowlist: ["127.0.0.1"],
allowPrivateNetwork: true,
},
timeoutMs: 2500,
});
expect(release).toHaveBeenCalledOnce();
expect(propsRelease).toHaveBeenCalledOnce();
expect(propsResponse.wasCanceled()).toBe(true);
});
it("cancels model discovery error bodies before falling back", async () => {
const release = vi.fn(async () => undefined);
const response = cancelTrackedResponse({ status: 503, statusText: "Service Unavailable" });
fetchWithSsrFGuardMock.mockResolvedValueOnce({
response: response.response,
finalUrl: "http://127.0.0.1:8000/v1/models",
release,
});
const models = await discoverOpenAICompatibleLocalModels({
baseUrl: "http://127.0.0.1:8000/v1/",
apiKey: "self-hosted-test-key",
label: "vLLM",
env: {},
});
expect(models).toEqual([]);
expect(release).toHaveBeenCalledOnce();
expect(response.wasCanceled()).toBe(true);
});
it("uses llama.cpp nested /props n_ctx as the runtime context cap", async () => {
const modelsRelease = vi.fn(async () => undefined);
const propsRelease = vi.fn(async () => undefined);
fetchWithSsrFGuardMock.mockResolvedValueOnce({
response: new Response(
JSON.stringify({
data: [
{
id: "qwen3.6-mxfp4-moe",
meta: { n_ctx_train: 262_144 },
},
],
}),
{ status: 200 },
),
finalUrl: "http://127.0.0.1:8080/v1/models",
release: modelsRelease,
});
fetchWithSsrFGuardMock.mockResolvedValueOnce({
response: new Response(JSON.stringify({ default_generation_settings: { n_ctx: 65_536 } }), {
status: 200,
}),
finalUrl: "http://127.0.0.1:8080/props",
release: propsRelease,
});
const models = await discoverOpenAICompatibleLocalModels({
baseUrl: "http://127.0.0.1:8080/v1",
label: "llama.cpp",
env: {},
});
expect(models).toEqual([
{
id: "qwen3.6-mxfp4-moe",
name: "qwen3.6-mxfp4-moe",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 262_144,
contextTokens: 65_536,
maxTokens: 8192,
},
]);
expect(fetchWithSsrFGuardMock).toHaveBeenNthCalledWith(2, {
url: "http://127.0.0.1:8080/props",
init: { headers: undefined },
policy: {
hostnameAllowlist: ["127.0.0.1"],
allowPrivateNetwork: true,
},
timeoutMs: 2500,
});
expect(modelsRelease).toHaveBeenCalledOnce();
expect(propsRelease).toHaveBeenCalledOnce();
});
it("scopes llama.cpp /props runtime caps to each discovered model without autoloading", async () => {
const modelsRelease = vi.fn(async () => undefined);
const firstPropsRelease = vi.fn(async () => undefined);
const secondPropsRelease = vi.fn(async () => undefined);
fetchWithSsrFGuardMock.mockResolvedValueOnce({
response: new Response(
JSON.stringify({
data: [
{
id: "qwen/router-a",
meta: { n_ctx_train: 262_144 },
},
{
id: "qwen/router-b",
meta: { n_ctx_train: 131_072 },
},
],
}),
{ status: 200 },
),
finalUrl: "http://127.0.0.1:8080/v1/models",
release: modelsRelease,
});
fetchWithSsrFGuardMock.mockResolvedValueOnce({
response: new Response(JSON.stringify({ default_generation_settings: { n_ctx: 65_536 } }), {
status: 200,
}),
finalUrl: "http://127.0.0.1:8080/props?model=qwen%2Frouter-a&autoload=false",
release: firstPropsRelease,
});
fetchWithSsrFGuardMock.mockResolvedValueOnce({
response: new Response(JSON.stringify({ default_generation_settings: { n_ctx: 32_768 } }), {
status: 200,
}),
finalUrl: "http://127.0.0.1:8080/props?model=qwen%2Frouter-b&autoload=false",
release: secondPropsRelease,
});
const models = await discoverOpenAICompatibleLocalModels({
baseUrl: "http://127.0.0.1:8080/v1",
label: "llama.cpp",
env: {},
});
expect(models).toEqual([
{
id: "qwen/router-a",
name: "qwen/router-a",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 262_144,
contextTokens: 65_536,
maxTokens: 8192,
},
{
id: "qwen/router-b",
name: "qwen/router-b",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 131_072,
contextTokens: 32_768,
maxTokens: 8192,
},
]);
expect(fetchWithSsrFGuardMock).toHaveBeenNthCalledWith(2, {
url: "http://127.0.0.1:8080/props?model=qwen%2Frouter-a&autoload=false",
init: { headers: undefined },
policy: {
hostnameAllowlist: ["127.0.0.1"],
allowPrivateNetwork: true,
},
timeoutMs: 2500,
});
expect(fetchWithSsrFGuardMock).toHaveBeenNthCalledWith(3, {
url: "http://127.0.0.1:8080/props?model=qwen%2Frouter-b&autoload=false",
init: { headers: undefined },
policy: {
hostnameAllowlist: ["127.0.0.1"],
allowPrivateNetwork: true,
},
timeoutMs: 2500,
});
expect(modelsRelease).toHaveBeenCalledOnce();
expect(firstPropsRelease).toHaveBeenCalledOnce();
expect(secondPropsRelease).toHaveBeenCalledOnce();
});
it("keeps top-level llama.cpp /props n_ctx as a compatibility fallback", async () => {
const modelsRelease = vi.fn(async () => undefined);
const propsRelease = vi.fn(async () => undefined);
fetchWithSsrFGuardMock.mockResolvedValueOnce({
response: new Response(
JSON.stringify({
data: [
{
id: "qwen3.6-mxfp4-moe",
meta: { n_ctx_train: 262_144 },
},
],
}),
{ status: 200 },
),
finalUrl: "http://127.0.0.1:8080/v1/models",
release: modelsRelease,
});
fetchWithSsrFGuardMock.mockResolvedValueOnce({
response: new Response(JSON.stringify({ n_ctx: 65_536 }), { status: 200 }),
finalUrl: "http://127.0.0.1:8080/props",
release: propsRelease,
});
const models = await discoverOpenAICompatibleLocalModels({
baseUrl: "http://127.0.0.1:8080/v1",
label: "llama.cpp",
env: {},
});
expect(models).toEqual([
{
id: "qwen3.6-mxfp4-moe",
name: "qwen3.6-mxfp4-moe",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 262_144,
contextTokens: 65_536,
maxTokens: 8192,
},
]);
expect(modelsRelease).toHaveBeenCalledOnce();
expect(propsRelease).toHaveBeenCalledOnce();
});
it("preserves explicit configured context windows ahead of llama.cpp /props", async () => {
const release = vi.fn(async () => undefined);
fetchWithSsrFGuardMock.mockResolvedValueOnce({
response: new Response(
JSON.stringify({
data: [{ id: "qwen3.6-mxfp4-moe", meta: { n_ctx_train: 262_144 } }],
}),
{ status: 200 },
),
finalUrl: "http://127.0.0.1:8080/v1/models",
release,
});
const models = await discoverOpenAICompatibleLocalModels({
baseUrl: "http://127.0.0.1:8080/v1",
label: "llama.cpp",
contextWindow: 65_536,
env: {},
});
expect(models).toEqual([
{
id: "qwen3.6-mxfp4-moe",
name: "qwen3.6-mxfp4-moe",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 65_536,
maxTokens: 8192,
},
]);
expect(models[0]).not.toHaveProperty("contextTokens");
expect(fetchWithSsrFGuardMock).toHaveBeenCalledTimes(1);
expect(release).toHaveBeenCalledOnce();
});
it("does not allowlist always-blocked metadata hostnames", async () => {
const release = vi.fn(async () => undefined);
fetchWithSsrFGuardMock.mockResolvedValueOnce({
response: new Response(JSON.stringify({ data: [{ id: "metadata-probe" }] }), {
status: 200,
}),
finalUrl: "http://metadata.google.internal/v1/models",
release,
});
await discoverOpenAICompatibleLocalModels({
baseUrl: "http://metadata.google.internal/v1",
label: "vLLM",
env: {},
});
expect(fetchWithSsrFGuardMock).toHaveBeenCalledWith({
url: "http://metadata.google.internal/v1/models",
init: { headers: undefined },
policy: undefined,
timeoutMs: 5000,
});
expect(release).toHaveBeenCalledOnce();
});
it("bounds an unbounded /models discovery stream instead of buffering it", async () => {
const release = vi.fn(async () => undefined);
const oversized = createUnboundedJsonStream();
fetchWithSsrFGuardMock.mockResolvedValueOnce({
response: new Response(oversized.body, { status: 200 }),
finalUrl: "http://127.0.0.1:8000/v1/models",
release,
});
const models = await discoverOpenAICompatibleLocalModels({
baseUrl: "http://127.0.0.1:8000/v1",
label: "vLLM",
env: {},
});
// The reader cancels the body once the byte cap is exceeded; without the
// cap the stream would never finish and the discovery would buffer it all.
expect(models).toEqual([]);
expect(oversized.cancelCount).toBe(1);
expect(oversized.bytesPulled).toBeLessThanOrEqual(
SELF_HOSTED_DISCOVERY_JSON_MAX_BYTES + 2 * CHUNK_BYTES,
);
expect(release).toHaveBeenCalledOnce();
});
it("bounds an unbounded llama.cpp /props discovery stream instead of buffering it", async () => {
const modelsRelease = vi.fn(async () => undefined);
const propsRelease = vi.fn(async () => undefined);
const oversized = createUnboundedJsonStream();
fetchWithSsrFGuardMock.mockResolvedValueOnce({
response: new Response(JSON.stringify({ data: [{ id: "qwen3.6-mxfp4-moe" }] }), {
status: 200,
}),
finalUrl: "http://127.0.0.1:8080/v1/models",
release: modelsRelease,
});
fetchWithSsrFGuardMock.mockResolvedValueOnce({
response: new Response(oversized.body, { status: 200 }),
finalUrl: "http://127.0.0.1:8080/props",
release: propsRelease,
});
const models = await discoverOpenAICompatibleLocalModels({
baseUrl: "http://127.0.0.1:8080/v1",
label: "llama.cpp",
env: {},
});
// /props overflow is swallowed so discovery still succeeds, but the body is
// capped: the runtime context token probe is skipped, not OOM'd.
expect(models).toEqual([
{
id: "qwen3.6-mxfp4-moe",
name: "qwen3.6-mxfp4-moe",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 128000,
maxTokens: 8192,
},
]);
expect(oversized.cancelCount).toBe(1);
expect(oversized.bytesPulled).toBeLessThanOrEqual(
SELF_HOSTED_DISCOVERY_JSON_MAX_BYTES + 2 * CHUNK_BYTES,
);
expect(modelsRelease).toHaveBeenCalledOnce();
expect(propsRelease).toHaveBeenCalledOnce();
});
});
describe("configureOpenAICompatibleSelfHostedProviderNonInteractive", () => {
it.each([
{
providerId: "vllm",
providerLabel: "vLLM",
envVar: "VLLM_API_KEY",
baseUrl: "http://127.0.0.1:8100/v1/",
apiKey: "vllm-test-key",
modelId: "Qwen/Qwen3-8B",
},
{
providerId: "sglang",
providerLabel: "SGLang",
envVar: "SGLANG_API_KEY",
baseUrl: "http://127.0.0.1:31000/v1",
apiKey: "sglang-test-key",
modelId: "Qwen/Qwen3-32B",
},
])("configures $providerLabel config and auth profile", async (params) => {
const ctx = createContext(params);
const cfg = await configureSelfHostedTestProvider({
ctx,
providerId: params.providerId,
providerLabel: params.providerLabel,
envVar: params.envVar,
});
const profileId = `${params.providerId}:default`;
expect(cfg?.auth?.profiles?.[profileId]).toEqual({
provider: params.providerId,
mode: "api_key",
});
expect(cfg?.models?.providers?.[params.providerId]).toEqual({
baseUrl: params.baseUrl.replace(/\/+$/, ""),
api: "openai-completions",
apiKey: params.envVar,
models: [
{
id: params.modelId,
name: params.modelId,
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 128000,
maxTokens: 8192,
},
],
});
expect(readPrimaryModel(cfg)).toBe(`${params.providerId}/${params.modelId}`);
expect(ctx.resolveApiKey).toHaveBeenCalledWith({
provider: params.providerId,
flagValue: params.apiKey,
flagName: "--custom-api-key",
envVar: params.envVar,
envVarName: params.envVar,
});
expect(upsertAuthProfileWithLock).toHaveBeenCalledWith({
profileId,
agentDir: ctx.agentDir,
credential: {
type: "api_key",
provider: params.providerId,
key: params.apiKey,
},
});
});
it("exits without touching auth when custom model id is missing", async () => {
const ctx = createContext({
providerId: "vllm",
apiKey: "vllm-test-key",
});
const cfg = await configureSelfHostedTestProvider({
ctx,
providerId: "vllm",
providerLabel: "vLLM",
envVar: "VLLM_API_KEY",
});
expect(cfg).toBeNull();
expect(ctx.runtime.error).toHaveBeenCalledWith(
[
"Missing --custom-model-id for --auth-choice vllm.",
"Pass the vLLM model id to use, for example Qwen/Qwen3-32B.",
].join("\n"),
);
expect(ctx.runtime.exit).toHaveBeenCalledWith(1);
expect(ctx.resolveApiKey).not.toHaveBeenCalled();
expect(upsertAuthProfileWithLock).not.toHaveBeenCalled();
});
});