test(plugins): harden kitchen sink live gauntlet

This commit is contained in:
Vincent Koc
2026-05-04 14:01:33 -07:00
parent a71f906837
commit e2eb8e3cfe
10 changed files with 287 additions and 10 deletions

View File

@@ -144,6 +144,12 @@ inside every shard.
`aimock` starts a local AIMock-backed provider server for experimental
fixture and protocol-mock coverage without replacing the scenario-aware
`mock-openai` lane.
- `pnpm test:plugins:kitchen-sink-live`
- Runs the live OpenAI Kitchen Sink plugin gauntlet through QA Lab. It
installs the external Kitchen Sink package, verifies the plugin SDK surface
inventory, probes `/healthz` and `/readyz`, records gateway CPU/RSS
evidence, runs a live OpenAI turn, and checks adversarial diagnostics.
Requires live OpenAI auth such as `OPENAI_API_KEY`.
- `pnpm test:gateway:cpu-scenarios`
- Runs the gateway startup bench plus a small mock QA Lab scenario pack
(`channel-chat-baseline`, `memory-failure-fallback`,

View File

@@ -407,6 +407,44 @@ describe("buildQaRuntimeEnv", () => {
});
});
it("stages live env API-key profiles for isolated QA workers", async () => {
const stateDir = await mkdtemp(path.join(os.tmpdir(), "qa-live-api-key-state-"));
cleanups.push(async () => {
await rm(stateDir, { recursive: true, force: true });
});
const cfg = await __testing.stageQaLiveApiKeyProfiles({
cfg: {},
stateDir,
providerIds: ["openai"],
env: {
OPENAI_API_KEY: "qa-live-not-a-real-key",
},
});
expect(cfg.auth?.profiles?.["qa-live-openai-env"]).toMatchObject({
provider: "openai",
mode: "api_key",
displayName: "QA live openai env credential",
});
for (const agentId of ["main", "qa"]) {
const storeRaw = await readFile(
path.join(stateDir, "agents", agentId, "agent", "auth-profiles.json"),
"utf8",
);
expect(JSON.parse(storeRaw)).toMatchObject({
profiles: {
"qa-live-openai-env": {
type: "api_key",
provider: "openai",
key: "qa-live-not-a-real-key",
},
},
});
}
});
it("stages placeholder mock auth profiles per agent dir so mock-openai runs can resolve credentials", async () => {
const stateDir = await mkdtemp(path.join(os.tmpdir(), "qa-mock-auth-"));
cleanups.push(async () => {

View File

@@ -34,6 +34,7 @@ import { DEFAULT_QA_PROVIDER_MODE, getQaProvider } from "./providers/index.js";
import {
QA_LIVE_ANTHROPIC_SETUP_TOKEN_ENV,
QA_LIVE_SETUP_TOKEN_VALUE_ENV,
stageQaLiveApiKeyProfiles,
stageQaLiveAnthropicSetupToken,
} from "./providers/live-frontier/auth.js";
import { stageQaMockAuthProfiles } from "./providers/shared/mock-auth.js";
@@ -314,6 +315,7 @@ export const __testing = {
redactQaGatewayDebugText,
readQaLiveProviderConfigOverrides,
resolveQaGatewayChildProviderMode,
stageQaLiveApiKeyProfiles,
stageQaLiveAnthropicSetupToken,
stageQaMockAuthProfiles,
resolveQaLiveCliAuthEnv,
@@ -573,6 +575,11 @@ export async function startQaGatewayChild(params: {
});
const buildStagedGatewayConfig = async (gatewayPort: number) => {
let cfg = buildGatewayConfig(gatewayPort);
cfg = await stageQaLiveApiKeyProfiles({
cfg,
stateDir,
providerIds: liveProviderIds,
});
cfg = await stageQaLiveAnthropicSetupToken({
cfg,
stateDir,

View File

@@ -1,6 +1,7 @@
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-types";
import {
applyAuthProfileConfig,
resolveEnvApiKey,
validateAnthropicSetupToken,
} from "openclaw/plugin-sdk/provider-auth";
import { resolveQaAgentAuthDir, writeQaAuthProfiles } from "../shared/auth-store.js";
@@ -9,6 +10,11 @@ export const QA_LIVE_ANTHROPIC_SETUP_TOKEN_ENV = "OPENCLAW_QA_LIVE_ANTHROPIC_SET
export const QA_LIVE_SETUP_TOKEN_VALUE_ENV = "OPENCLAW_LIVE_SETUP_TOKEN_VALUE";
const QA_LIVE_ANTHROPIC_SETUP_TOKEN_PROFILE_ENV = "OPENCLAW_QA_LIVE_ANTHROPIC_SETUP_TOKEN_PROFILE";
const QA_LIVE_ANTHROPIC_SETUP_TOKEN_PROFILE_ID = "anthropic:qa-setup-token";
const QA_LIVE_API_KEY_AGENT_IDS = Object.freeze(["main", "qa"] as const);
function buildQaLiveApiKeyProfileId(provider: string): string {
return `qa-live-${provider.replaceAll(/[^a-z0-9_-]/giu, "-")}-env`;
}
function resolveQaLiveAnthropicSetupToken(env: NodeJS.ProcessEnv = process.env) {
const token = (
@@ -55,3 +61,59 @@ export async function stageQaLiveAnthropicSetupToken(params: {
displayName: "QA setup-token",
});
}
export async function stageQaLiveApiKeyProfiles(params: {
cfg: OpenClawConfig;
stateDir: string;
providerIds: readonly string[];
env?: NodeJS.ProcessEnv;
agentIds?: readonly string[];
}): Promise<OpenClawConfig> {
const env = params.env ?? process.env;
const providerIds = [...new Set(params.providerIds.map((providerId) => providerId.trim()))]
.filter((providerId) => providerId.length > 0)
.toSorted();
const profiles: Record<
string,
{
type: "api_key";
provider: string;
key: string;
displayName: string;
}
> = {};
let next = params.cfg;
for (const providerId of providerIds) {
const resolved = resolveEnvApiKey(providerId, env, { config: next });
if (!resolved?.apiKey) {
continue;
}
const profileId = buildQaLiveApiKeyProfileId(providerId);
const displayName = `QA live ${providerId} env credential`;
profiles[profileId] = {
type: "api_key",
provider: providerId,
key: resolved.apiKey,
displayName,
};
next = applyAuthProfileConfig(next, {
profileId,
provider: providerId,
mode: "api_key",
displayName,
});
}
if (Object.keys(profiles).length === 0) {
return next;
}
const agentIds = [...new Set(params.agentIds ?? QA_LIVE_API_KEY_AGENT_IDS)];
await Promise.all(
agentIds.map((agentId) =>
writeQaAuthProfiles({
agentDir: resolveQaAgentAuthDir({ stateDir: params.stateDir, agentId }),
profiles,
}),
),
);
return next;
}

View File

@@ -22,10 +22,15 @@ export async function writeQaAuthProfiles(params: {
agentDir: string;
profiles: Record<string, QaAuthProfileCredential>;
}): Promise<void> {
const authPath = path.join(params.agentDir, "auth-profiles.json");
const existing = await fs
.readFile(authPath, "utf8")
.then((raw) => JSON.parse(raw) as { profiles?: Record<string, QaAuthProfileCredential> })
.catch(() => ({ profiles: {} }));
await fs.mkdir(params.agentDir, { recursive: true });
await fs.writeFile(
path.join(params.agentDir, "auth-profiles.json"),
`${JSON.stringify({ version: 1, profiles: params.profiles }, null, 2)}\n`,
authPath,
`${JSON.stringify({ version: 1, profiles: { ...existing.profiles, ...params.profiles } }, null, 2)}\n`,
"utf8",
);
}

View File

@@ -187,6 +187,7 @@ describe("qa scenario catalog", () => {
pluginId?: string;
pluginPersonality?: string;
adversarialPersonality?: string;
expectedSurfaceIds?: Record<string, string[]>;
expectedAdversarialDiagnostics?: string[];
}
| undefined;
@@ -198,9 +199,22 @@ describe("qa scenario catalog", () => {
expect(config?.pluginId).toBe("openclaw-kitchen-sink-fixture");
expect(config?.pluginPersonality).toBe("conformance");
expect(config?.adversarialPersonality).toBe("adversarial");
expect(config?.expectedSurfaceIds?.webSearchProviderIds).toContain(
"kitchen-sink-web-search-provider",
);
expect(config?.expectedSurfaceIds?.realtimeVoiceProviderIds).toContain(
"kitchen-sink-realtime-voice-provider",
);
expect(config?.expectedAdversarialDiagnostics).toContain(
"only bundled plugins can register agent tool result middleware",
);
expect(config?.expectedAdversarialDiagnostics).toContain(
"control UI descriptor registration requires id, surface, label, and valid optional fields",
);
expect(
config?.expectedAdversarialDiagnostics?.every((entry) => typeof entry === "string"),
).toBe(true);
expect(JSON.stringify(scenario.execution.flow)).toContain("--runtime");
expect(scenario.execution.flow?.steps.map((step) => step.name)).toEqual([
"installs and inspects the Kitchen Sink plugin",
"restarts gateway with Kitchen Sink configured",

View File

@@ -51,6 +51,8 @@ import {
import { createTempDirHarness } from "./temp-dir.test-helper.js";
const { cleanup, makeTempDir } = createTempDirHarness();
const repoRoot = "/repo/openclaw";
const gatewayTempRoot = "/tmp/openclaw-qa-runtime";
afterEach(cleanup);
@@ -111,12 +113,14 @@ describe("qa suite runtime agent tools helpers", () => {
callPluginToolsMcp({
env: {
gateway: {
tempRoot: gatewayTempRoot,
runtimeEnv: {
PATH: "/usr/bin",
OPENCLAW_KEY: "1",
EMPTY: undefined,
},
},
repoRoot,
} as never,
toolName: "plugin.echo",
args: { text: "hello" },
@@ -127,8 +131,13 @@ describe("qa suite runtime agent tools helpers", () => {
expect(stdioTransportMock).toHaveBeenCalledWith({
command: "/usr/bin/node",
args: ["--import", "tsx", "src/mcp/plugin-tools-serve.ts"],
args: [
"--import",
expect.stringContaining(path.join("node_modules", "tsx")),
path.join(repoRoot, "src", "mcp", "plugin-tools-serve.ts"),
],
stderr: "pipe",
cwd: gatewayTempRoot,
env: {
PATH: "/usr/bin",
OPENCLAW_KEY: "1",
@@ -140,4 +149,31 @@ describe("qa suite runtime agent tools helpers", () => {
});
expect(closeMock).toHaveBeenCalled();
});
it("reports available plugin-tools MCP names when the requested tool is missing", async () => {
listToolsMock.mockResolvedValueOnce({
tools: [{ name: "plugin.beta" }, { name: "plugin.alpha" }] as never[],
});
await expect(
callPluginToolsMcp({
env: {
gateway: {
tempRoot: gatewayTempRoot,
runtimeEnv: {
PATH: "/usr/bin",
},
},
repoRoot,
} as never,
toolName: "plugin.missing",
args: {},
}),
).rejects.toThrow(
"MCP tool missing: plugin.missing; available tools: plugin.alpha, plugin.beta",
);
expect(callToolMock).not.toHaveBeenCalled();
expect(closeMock).toHaveBeenCalled();
});
});

View File

@@ -1,4 +1,5 @@
import fs from "node:fs/promises";
import { createRequire } from "node:module";
import path from "node:path";
import { Client } from "@modelcontextprotocol/sdk/client/index.js";
import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
@@ -11,6 +12,8 @@ import type {
QaTransportActionName,
} from "./suite-runtime-types.js";
const requireFromHere = createRequire(import.meta.url);
function findSkill(skills: QaSkillStatusEntry[], name: string) {
return skills.find((skill) => skill.name === name);
}
@@ -28,7 +31,7 @@ async function writeWorkspaceSkill(params: {
}
async function callPluginToolsMcp(params: {
env: Pick<QaSuiteRuntimeEnv, "gateway">;
env: Pick<QaSuiteRuntimeEnv, "gateway" | "repoRoot">;
toolName: string;
args: Record<string, unknown>;
}) {
@@ -40,8 +43,13 @@ async function callPluginToolsMcp(params: {
const nodeExecPath = await resolveQaNodeExecPath();
const transport = new StdioClientTransport({
command: nodeExecPath,
args: ["--import", "tsx", "src/mcp/plugin-tools-serve.ts"],
args: [
"--import",
requireFromHere.resolve("tsx"),
path.join(params.env.repoRoot, "src/mcp/plugin-tools-serve.ts"),
],
stderr: "pipe",
cwd: params.env.gateway.tempRoot,
env: transportEnv,
});
const client = new Client({ name: "openclaw-qa-suite", version: "0.0.0" }, {});
@@ -50,7 +58,13 @@ async function callPluginToolsMcp(params: {
const listed = await client.listTools();
const tool = listed.tools.find((entry) => entry.name === params.toolName);
if (!tool) {
throw new Error(`MCP tool missing: ${params.toolName}`);
const availableTools = listed.tools
.map((entry) => entry.name)
.filter((name): name is string => typeof name === "string" && name.length > 0)
.toSorted();
throw new Error(
`MCP tool missing: ${params.toolName}; available tools: ${availableTools.join(", ") || "<none>"}`,
);
}
return await client.callTool({
name: params.toolName,

View File

@@ -1614,6 +1614,7 @@
"test:perf:profile:main": "node scripts/run-vitest-profile.mjs main",
"test:perf:profile:runner": "node scripts/run-vitest-profile.mjs runner",
"test:plugins:gateway-gauntlet": "node scripts/check-plugin-gateway-gauntlet.mjs",
"test:plugins:kitchen-sink-live": "pnpm openclaw qa suite --provider-mode live-frontier --scenario kitchen-sink-live-openai",
"test:sectriage": "OPENCLAW_GATEWAY_PROJECT_SHARDS=1 node scripts/run-vitest.mjs run --config test/vitest/vitest.gateway.config.ts && node scripts/run-vitest.mjs run --config test/vitest/vitest.unit.config.ts --exclude src/daemon/launchd.integration.test.ts --exclude src/process/exec.test.ts",
"test:serial": "OPENCLAW_TEST_PROJECTS_SERIAL=1 OPENCLAW_VITEST_MAX_WORKERS=1 node scripts/test-projects.mjs",
"test:stability:gateway": "OPENCLAW_VITEST_MAX_WORKERS=1 node scripts/run-vitest.mjs run --config test/vitest/vitest.gateway.config.ts src/gateway/gateway-stability.test.ts && OPENCLAW_VITEST_MAX_WORKERS=1 node scripts/run-vitest.mjs run --config test/vitest/vitest.logging.config.ts src/logging/diagnostic-stability-bundle.test.ts && OPENCLAW_VITEST_MAX_WORKERS=1 node scripts/run-vitest.mjs run --config test/vitest/vitest.infra.config.ts src/infra/fatal-error-hooks.test.ts",

View File

@@ -49,12 +49,44 @@ execution:
- kitchen_sink_text
- kitchen_sink_search
- kitchen_sink_image_job
expectedSurfaceIds:
speechProviderIds:
- kitchen-sink-speech
- kitchen-sink-speech-provider
realtimeTranscriptionProviderIds:
- kitchen-sink-realtime-transcription
- kitchen-sink-realtime-transcription-provider
realtimeVoiceProviderIds:
- kitchen-sink-realtime-voice
- kitchen-sink-realtime-voice-provider
mediaUnderstandingProviderIds:
- kitchen-sink-media
- kitchen-sink-media-understanding-provider
imageGenerationProviderIds:
- kitchen-sink-image
- kitchen-sink-image-generation-provider
videoGenerationProviderIds:
- kitchen-sink-video
- kitchen-sink-video-generation-provider
musicGenerationProviderIds:
- kitchen-sink-music
- kitchen-sink-music-generation-provider
webFetchProviderIds:
- kitchen-sink-fetch
- kitchen-sink-web-fetch-provider
webSearchProviderIds:
- kitchen-sink-search
- kitchen-sink-web-search-provider
migrationProviderIds:
- kitchen-sink-migration-providers
- kitchen-sink-migration-provider
maxGatewayCpuCoreRatio: 1.5
maxGatewayRssMiB: 2048
agentTurnTimeoutMs: 120000
outboundTimeoutMs: 60000
livePrompt: "Kitchen Sink OpenAI marker. Reply exactly: KITCHEN-SINK-OPENAI-OK"
expectedAdversarialDiagnostics:
- agent event subscription registration requires id and handle
- only bundled plugins can register agent tool result middleware
- agent harness "kitchen-sink-agent-harness" registration missing required runtime methods
- channel "kitchen-sink-channel-probe" registration missing required config helpers
@@ -62,9 +94,16 @@ execution:
- only bundled plugins can register Codex app-server extension factories
- compaction provider "kitchen-sink-compaction-provider" registration missing summarize
- context engine registration missing id
- http route registration missing or invalid auth: /kitchen-sink/http-route
- control UI descriptor registration requires id, surface, label, and valid optional fields
- "http route registration missing or invalid auth: /kitchen-sink/http-route"
- "plugin must own memory slot or declare contracts.memoryEmbeddingProviders for adapter: kitchen-sink-memory-embedding-provider"
- memory prompt supplement registration missing builder
- node invoke policy registration missing commands
- session extension registration requires namespace and description
- session scheduler job registration requires unique id, sessionKey, and kind
- "plugin must declare contracts.tools for: kitchen-sink-tool"
- tool metadata registration missing toolName
- only bundled plugins can register trusted tool policies
```
```yaml qa-flow
@@ -110,6 +149,10 @@ steps:
...(cfg.channels || {}),
[config.channelId]: { enabled: true, token: "kitchen-sink-qa" },
};
cfg.tools = {
...(cfg.tools || {}),
alsoAllow: [...new Set([...(cfg.tools?.alsoAllow || []), ...config.expectedToolAny])],
};
await fs.writeFile(env.gateway.configPath, `${JSON.stringify(cfg, null, 2)}\n`, "utf8");
return env.gateway.configPath;
})()
@@ -129,6 +172,7 @@ steps:
- - plugins
- inspect
- expr: config.pluginId
- --runtime
- --json
- json: true
timeoutMs: 60000
@@ -148,9 +192,22 @@ steps:
channels: [...new Set([...(plugin.channelIds ?? []), ...(plugin.channels ?? [])])],
providers: [...new Set([...(plugin.providerIds ?? []), ...(plugin.providers ?? [])])],
tools: [...new Set([...namesFromTools, ...(contracts.tools ?? [])])],
commands: inspect.commands ?? [],
services: inspect.services ?? [],
typedHookCount: Array.isArray(inspect.typedHooks) ? inspect.typedHooks.length : 0,
hookCount: plugin.hookCount ?? 0,
surfaceIds: Object.fromEntries(
Object.keys(config.expectedSurfaceIds ?? {})
.map((field) => [field, Array.isArray(plugin[field]) ? plugin[field] : []])
),
agentHarnessIds: plugin.agentHarnessIds ?? [],
diagnostics: [...(pluginList.diagnostics ?? []), ...(inspect.diagnostics ?? [])]
.filter((entry) => entry?.level === "error")
.map((entry) => String(entry.message ?? "")),
unexpectedDiagnostics: [...new Set([...(pluginList.diagnostics ?? []), ...(inspect.diagnostics ?? [])]
.filter((entry) => entry?.level === "error")
.map((entry) => String(entry.message ?? ""))
.filter((message) => !config.expectedAdversarialDiagnostics.includes(message)))],
};
})()
- assert:
@@ -170,9 +227,25 @@ steps:
message:
expr: "`Kitchen Sink tools missing from inspect output: ${JSON.stringify(inspectFacts.tools)}`"
- assert:
expr: "inspectFacts.diagnostics.length === 0"
expr: "Object.entries(config.expectedSurfaceIds).every(([field, expected]) => expected.some((id) => (inspectFacts.surfaceIds[field] ?? []).includes(id)))"
message:
expr: "`Kitchen Sink conformance personality emitted diagnostics: ${JSON.stringify(inspectFacts.diagnostics)}`"
expr: "`Kitchen Sink SDK provider surface missing from inspect output: ${JSON.stringify(inspectFacts.surfaceIds)}`"
- assert:
expr: "inspectFacts.commands.includes('kitchen') && inspectFacts.services.includes('kitchen-sink-service')"
message:
expr: "`Kitchen Sink command/service surfaces missing: ${JSON.stringify({ commands: inspectFacts.commands, services: inspectFacts.services })}`"
- assert:
expr: "inspectFacts.hookCount >= 30 && inspectFacts.typedHookCount >= 30"
message:
expr: "`Kitchen Sink hook surfaces missing: ${JSON.stringify({ hookCount: inspectFacts.hookCount, typedHookCount: inspectFacts.typedHookCount })}`"
- assert:
expr: "!inspectFacts.agentHarnessIds.includes('kitchen-sink-agent-harness')"
message:
expr: "`External Kitchen Sink plugin unexpectedly registered bundled-only agent harness: ${JSON.stringify(inspectFacts.agentHarnessIds)}`"
- assert:
expr: "inspectFacts.unexpectedDiagnostics.length === 0"
message:
expr: "`Kitchen Sink conformance personality emitted unexpected diagnostics: ${JSON.stringify(inspectFacts.unexpectedDiagnostics)}`"
detailsExpr: inspectFacts
- name: restarts gateway with Kitchen Sink configured
@@ -208,12 +281,32 @@ steps:
...(cfg.channels || {}),
[config.channelId]: { enabled: true, token: "kitchen-sink-qa" },
};
cfg.tools = {
...(cfg.tools || {}),
alsoAllow: [...new Set([...(cfg.tools?.alsoAllow || []), ...config.expectedToolAny])],
};
await fs.writeFile(ctx.configPath, `${JSON.stringify(cfg, null, 2)}\n`, "utf8");
})()
- call: waitForGatewayHealthy
args:
- ref: env
- 120000
- call: fetchJson
saveAs: healthz
args:
- expr: "`${env.gateway.baseUrl}/healthz`"
- call: fetchJson
saveAs: readyz
args:
- expr: "`${env.gateway.baseUrl}/readyz`"
- assert:
expr: "healthz?.ok === true && healthz?.status === 'live'"
message:
expr: "`/healthz did not report live: ${JSON.stringify(healthz)}`"
- assert:
expr: "readyz?.ready === true"
message:
expr: "`/readyz did not report ready: ${JSON.stringify(readyz)}`"
- call: waitForQaChannelReady
args:
- ref: env
@@ -241,7 +334,7 @@ steps:
expr: "kitchenChannelAccount?.running === true && kitchenChannelAccount?.configured === true"
message:
expr: "`Kitchen Sink channel did not report running+configured: ${JSON.stringify(kitchenChannelAccount)}`"
detailsExpr: kitchenChannelAccount
detailsExpr: "{ healthz, readyz, kitchenChannelAccount }"
- name: exercises command inventory and MCP tool surfaces
actions:
@@ -390,6 +483,7 @@ steps:
- - plugins
- inspect
- expr: config.pluginId
- --runtime
- --json
- json: true
timeoutMs: 60000