test(qa): add kitchen sink plugin gauntlet

This commit is contained in:
Vincent Koc
2026-04-28 21:24:05 -07:00
parent e4b09e1bf3
commit abed3a056d
10 changed files with 464 additions and 3 deletions

View File

@@ -39,6 +39,9 @@ Docs: https://docs.openclaw.ai
- Control UI: keep Agents Overview and config-form select dropdowns on their configured value after options render while preserving inherited agent model placeholders. Fixes #40352; carries forward #52948. Thanks @xiaoquanidea.
- Agents/exec: launch zsh, bash, and fish host exec shells with startup files suppressed while preserving existing PATH fallbacks, so daemon env is not overridden by shell startup files. Carries forward #40200; fixes #40179. Thanks @NewdlDewdl.
- Plugins/QA: prebuild the private QA channel runtime before plugin gauntlet source runs so wrapper CPU/RSS measurements are not polluted by private QA dist rebuild work. Thanks @vincentkoc.
- Plugins/QA: add a Kitchen Sink plugin gauntlet that installs the external package, checks command inventory, MCP tools, channel status, provider turns, gateway RSS, CPU, and fatal log anomalies. Thanks @vincentkoc.
- Plugins/channels: reject malformed runtime channel registrations that omit required config helpers before they can poison channel status. Thanks @vincentkoc.
- MCP/plugins: serialize raw plugin tool return values through the plugin-tools MCP bridge so Kitchen Sink-style tools no longer surface `undefined` content. Thanks @vincentkoc.
- Gateway/reload: bound default restart deferral and SIGUSR1 restart drain to five minutes while preserving explicit `deferralTimeoutMs: 0` indefinite waits, so stale active work accounting cannot block config reloads forever. Thanks @vincentkoc.
- Active Memory: register the prompt-build hook with the configured recall timeout plus setup grace instead of the 150s maximum budget, so default memory recall cannot delay turn startup for multiple minutes. Thanks @vincentkoc.
- Gateway/readiness: include an `eventLoop` diagnostic block in local or authenticated `/readyz` responses with event-loop delay (p99 and max), event-loop utilization, CPU core ratio, and a `degraded` flag, so operators can see when slow startups or runaway turns stall the event loop. Thanks @vincentkoc.

View File

@@ -22,7 +22,7 @@ import { formatQaGatewayLogsForError, redactQaGatewayDebugText } from "./gateway
import { startQaGatewayRpcClient } from "./gateway-rpc-client.js";
import { splitQaModelRef, type QaProviderMode } from "./model-selection.js";
import { resolveQaNodeExecPath } from "./node-exec.js";
import { readProcessTreeCpuMs } from "./process-tree-cpu.js";
import { readProcessTreeCpuMs, readProcessTreeRssBytes } from "./process-tree-cpu.js";
import {
normalizeQaProviderModeEnv,
QA_LIVE_PROVIDER_CONFIG_PATH_ENV,
@@ -828,6 +828,7 @@ export async function startQaGatewayChild(params: {
wsUrl,
pid: child.pid ?? null,
getProcessCpuMs: () => readProcessTreeCpuMs(activeChild.pid ?? null),
getProcessRssBytes: () => readProcessTreeRssBytes(activeChild.pid ?? null),
token: gatewayToken,
workspaceDir,
tempRoot,

View File

@@ -1,5 +1,5 @@
import { describe, expect, it } from "vitest";
import { parsePsCpuTimeMs } from "./process-tree-cpu.js";
import { parsePsCpuTimeMs, parsePsRssBytes } from "./process-tree-cpu.js";
describe("process tree CPU helpers", () => {
it("parses ps CPU time strings", () => {
@@ -13,4 +13,15 @@ describe("process tree CPU helpers", () => {
expect(parsePsCpuTimeMs("nope")).toBeNull();
expect(parsePsCpuTimeMs("1:2:3:4")).toBeNull();
});
it("parses ps RSS KiB values as bytes", () => {
expect(parsePsRssBytes("1024")).toBe(1_048_576);
expect(parsePsRssBytes("1.5")).toBe(1_536);
});
it("rejects malformed ps RSS values", () => {
expect(parsePsRssBytes("")).toBeNull();
expect(parsePsRssBytes("nope")).toBeNull();
expect(parsePsRssBytes("-1")).toBeNull();
});
});

View File

@@ -14,6 +14,18 @@ export function parsePsCpuTimeMs(raw: string): number | null {
return null;
}
export function parsePsRssBytes(raw: string): number | null {
const trimmed = raw.trim();
if (!trimmed) {
return null;
}
const rssKiB = Number(trimmed);
if (!Number.isFinite(rssKiB) || rssKiB < 0) {
return null;
}
return Math.round(rssKiB * 1024);
}
export function readProcessTreeCpuMs(rootPid: number | null | undefined): number | null {
if (
typeof rootPid !== "number" ||
@@ -70,3 +82,60 @@ export function readProcessTreeCpuMs(rootPid: number | null | undefined): number
}
return totalCpuMs;
}
export function readProcessTreeRssBytes(rootPid: number | null | undefined): number | null {
if (
typeof rootPid !== "number" ||
!Number.isInteger(rootPid) ||
rootPid <= 0 ||
process.platform === "win32"
) {
return null;
}
const result = spawnSync("ps", ["-eo", "pid=,ppid=,rss="], {
encoding: "utf8",
stdio: ["ignore", "pipe", "ignore"],
});
if (result.status !== 0) {
return null;
}
const childrenByParent = new Map<number, number[]>();
const rssByPid = new Map<number, number>();
for (const line of result.stdout.split("\n")) {
const match = line.trim().match(/^(\d+)\s+(\d+)\s+(\S+)$/u);
if (!match) {
continue;
}
const [, pidRaw, ppidRaw, rssRaw] = match;
const pid = Number(pidRaw);
const ppid = Number(ppidRaw);
const rssBytes = parsePsRssBytes(rssRaw ?? "");
if (!Number.isInteger(pid) || !Number.isInteger(ppid) || rssBytes === null) {
continue;
}
rssByPid.set(pid, rssBytes);
const children = childrenByParent.get(ppid) ?? [];
children.push(pid);
childrenByParent.set(ppid, children);
}
if (!rssByPid.has(rootPid)) {
return null;
}
let totalRssBytes = 0;
const seen = new Set<number>();
const stack: number[] = [rootPid];
while (stack.length > 0) {
const pid = stack.pop();
if (pid === undefined || seen.has(pid)) {
continue;
}
seen.add(pid);
totalRssBytes += rssByPid.get(pid) ?? 0;
for (const childPid of childrenByParent.get(pid) ?? []) {
stack.push(childPid);
}
}
return totalRssBytes;
}

View File

@@ -179,6 +179,31 @@ describe("qa scenario catalog", () => {
]);
});
it("includes the Kitchen Sink live OpenAI plugin gauntlet", () => {
const scenario = readQaScenarioById("kitchen-sink-live-openai");
const config = readQaScenarioExecutionConfig("kitchen-sink-live-openai") as
| {
requiredProviderMode?: string;
requiredProvider?: string;
pluginSpec?: string;
pluginId?: string;
}
| undefined;
expect(scenario.sourcePath).toBe("qa/scenarios/plugins/kitchen-sink-live-openai.md");
expect(config?.requiredProviderMode).toBe("live-frontier");
expect(config?.requiredProvider).toBe("openai");
expect(config?.pluginSpec).toBe("npm:@openclaw/kitchen-sink@latest");
expect(config?.pluginId).toBe("openclaw-kitchen-sink-fixture");
expect(scenario.execution.flow?.steps.map((step) => step.name)).toEqual([
"installs and inspects the Kitchen Sink plugin",
"restarts gateway with Kitchen Sink configured",
"exercises command inventory and MCP tool surfaces",
"runs live OpenAI turn with Kitchen Sink loaded",
"records gateway CPU RSS and log anomaly evidence",
]);
});
it("includes the thinking slash model remap scenario", () => {
const scenario = readQaScenarioById("thinking-slash-model-remap");
const config = readQaScenarioExecutionConfig("thinking-slash-model-remap") as

View File

@@ -8,6 +8,8 @@ export type QaRuntimeGatewayClient = {
workspaceDir: string;
runtimeEnv: NodeJS.ProcessEnv;
getProcessCpuMs?: () => number | null;
getProcessRssBytes?: () => number | null;
logs?: () => string;
restartAfterStateMutation?: (
mutateState: (context: {
configPath: string;

View File

@@ -18,6 +18,9 @@ export type QaSuiteSummaryJson = {
wallMs: number;
gatewayProcessCpuMs?: number | null;
gatewayCpuCoreRatio?: number | null;
gatewayProcessRssStartBytes?: number | null;
gatewayProcessRssEndBytes?: number | null;
gatewayProcessRssDeltaBytes?: number | null;
};
run: {
startedAt: string;

View File

@@ -106,12 +106,18 @@ describe("buildQaSuiteSummaryJson", () => {
wallMs: 12_000,
gatewayProcessCpuMs: 3_400,
gatewayCpuCoreRatio: 0.283,
gatewayProcessRssStartBytes: 100_000_000,
gatewayProcessRssEndBytes: 125_000_000,
gatewayProcessRssDeltaBytes: 25_000_000,
},
});
expect(json.metrics).toEqual({
wallMs: 12_000,
gatewayProcessCpuMs: 3_400,
gatewayCpuCoreRatio: 0.283,
gatewayProcessRssStartBytes: 100_000_000,
gatewayProcessRssEndBytes: 125_000_000,
gatewayProcessRssDeltaBytes: 25_000_000,
});
});
});

View File

@@ -390,10 +390,21 @@ function buildQaSuiteRuntimeMetrics(params: {
finishedAt: Date;
gatewayProcessCpuStartMs: number | null;
gatewayProcessCpuEndMs: number | null;
gatewayProcessRssStartBytes: number | null;
gatewayProcessRssEndBytes: number | null;
}): QaSuiteSummaryJson["metrics"] {
const wallMs = Math.max(1, params.finishedAt.getTime() - params.startedAt.getTime());
const rssMetrics =
params.gatewayProcessRssStartBytes === null || params.gatewayProcessRssEndBytes === null
? {}
: {
gatewayProcessRssStartBytes: params.gatewayProcessRssStartBytes,
gatewayProcessRssEndBytes: params.gatewayProcessRssEndBytes,
gatewayProcessRssDeltaBytes:
params.gatewayProcessRssEndBytes - params.gatewayProcessRssStartBytes,
};
if (params.gatewayProcessCpuStartMs === null || params.gatewayProcessCpuEndMs === null) {
return { wallMs };
return { wallMs, ...rssMetrics };
}
const gatewayProcessCpuMs = Math.max(
0,
@@ -403,6 +414,7 @@ function buildQaSuiteRuntimeMetrics(params: {
wallMs,
gatewayProcessCpuMs,
gatewayCpuCoreRatio: Math.round((gatewayProcessCpuMs / wallMs) * 1000) / 1000,
...rssMetrics,
};
}
@@ -773,6 +785,7 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
});
const gatewayProcessCpuStartMs = gateway.getProcessCpuMs?.() ?? null;
const gatewayProcessRssStartBytes = gateway.getProcessRssBytes?.() ?? null;
for (const [index, scenario] of selectedCatalogScenarios.entries()) {
const scenarioIdForLog = sanitizeQaSuiteProgressValue(scenario.id);
writeQaSuiteProgress(
@@ -821,6 +834,8 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
finishedAt,
gatewayProcessCpuStartMs,
gatewayProcessCpuEndMs: gateway.getProcessCpuMs?.() ?? null,
gatewayProcessRssStartBytes,
gatewayProcessRssEndBytes: gateway.getProcessRssBytes?.() ?? null,
});
const failedCount = scenarios.filter((scenario) => scenario.status === "fail").length;
if (scenarios.some((scenario) => scenario.status === "fail")) {

View File

@@ -0,0 +1,326 @@
# Kitchen Sink live OpenAI plugin gauntlet
```yaml qa-scenario
id: kitchen-sink-live-openai
title: Kitchen Sink live OpenAI plugin gauntlet
surface: plugins
category: pre-release
coverage:
primary:
- plugins.kitchen-sink
secondary:
- plugins.lifecycle
- plugins.plugin-tools
- models.live-openai
- gateway.performance
risk: high
objective: Verify the external Kitchen Sink plugin can be installed into a qa-lab gateway, expose its major runtime surfaces, and coexist with a live OpenAI provider turn.
successCriteria:
- The npm Kitchen Sink package installs, enables, and inspects as loaded.
- Kitchen Sink command inventory, MCP tool, and channel status work after gateway restart.
- A live OpenAI turn still completes while the Kitchen Sink plugin is installed.
- Gateway logs and process metrics are captured and stay under broad anomaly thresholds.
docsRefs:
- docs/concepts/qa-e2e-automation.md
- docs/channels/qa-channel.md
- docs/plugins/manifest.md
codeRefs:
- extensions/qa-lab/src/suite.ts
- extensions/qa-lab/src/gateway-child.ts
- scripts/e2e/kitchen-sink-plugin-docker.sh
execution:
kind: flow
summary: Install @openclaw/kitchen-sink, restart the gateway, exercise command inventory/tool/channel/OpenAI paths, and record CPU/RSS/log evidence.
config:
requiredProviderMode: live-frontier
requiredProvider: openai
pluginSpec: npm:@openclaw/kitchen-sink@latest
pluginId: openclaw-kitchen-sink-fixture
channelId: kitchen-sink-channel
channelAccountId: local
textProviderId: kitchen-sink-llm
textModel: kitchen-sink-text-v1
expectedProviderAny:
- kitchen-sink-provider
- kitchen-sink-llm
expectedToolAny:
- kitchen_sink_text
- kitchen_sink_search
- kitchen_sink_image_job
maxGatewayCpuCoreRatio: 1.5
maxGatewayRssMiB: 2048
livePrompt: "Kitchen Sink OpenAI marker. Reply exactly: KITCHEN-SINK-OPENAI-OK"
```
```yaml qa-flow
steps:
- name: installs and inspects the Kitchen Sink plugin
actions:
- call: runQaCli
args:
- ref: env
- - plugins
- install
- expr: config.pluginSpec
- timeoutMs: 180000
- call: runQaCli
args:
- ref: env
- - plugins
- enable
- expr: config.pluginId
- timeoutMs: 60000
- set: configuredPluginPath
value:
expr: |-
(async () => {
const raw = await fs.readFile(env.gateway.configPath, "utf8").catch(() => "{}");
const cfg = JSON.parse(raw || "{}");
cfg.plugins = cfg.plugins || {};
cfg.plugins.allow = [...new Set([...(cfg.plugins.allow || []), config.pluginId])];
cfg.plugins.entries = cfg.plugins.entries || {};
cfg.plugins.entries[config.pluginId] = {
...(cfg.plugins.entries[config.pluginId] || {}),
enabled: true,
hooks: {
...(cfg.plugins.entries[config.pluginId]?.hooks || {}),
allowConversationAccess: true,
},
};
cfg.channels = {
...(cfg.channels || {}),
[config.channelId]: { enabled: true, token: "kitchen-sink-qa" },
};
await fs.writeFile(env.gateway.configPath, `${JSON.stringify(cfg, null, 2)}\n`, "utf8");
return env.gateway.configPath;
})()
- call: runQaCli
saveAs: pluginList
args:
- ref: env
- - plugins
- list
- --json
- json: true
timeoutMs: 60000
- call: runQaCli
saveAs: inspect
args:
- ref: env
- - plugins
- inspect
- expr: config.pluginId
- --json
- json: true
timeoutMs: 60000
- set: inspectFacts
value:
expr: |-
(() => {
const plugin = inspect.plugin ?? {};
const namesFromTools = Array.isArray(inspect.tools)
? inspect.tools.flatMap((entry) => Array.isArray(entry?.names) ? entry.names : [entry?.name]).filter(Boolean)
: [];
const contracts = plugin.contracts && typeof plugin.contracts === "object" ? plugin.contracts : {};
return {
id: plugin.id,
enabled: plugin.enabled,
status: plugin.status,
channels: [...new Set([...(plugin.channelIds ?? []), ...(plugin.channels ?? [])])],
providers: [...new Set([...(plugin.providerIds ?? []), ...(plugin.providers ?? [])])],
tools: [...new Set([...namesFromTools, ...(contracts.tools ?? [])])],
diagnostics: [...(pluginList.diagnostics ?? []), ...(inspect.diagnostics ?? [])]
.filter((entry) => entry?.level === "error")
.map((entry) => String(entry.message ?? "")),
};
})()
- assert:
expr: "inspectFacts.id === config.pluginId && inspectFacts.enabled === true && inspectFacts.status === 'loaded'"
message:
expr: "`Kitchen Sink plugin did not inspect as enabled+loaded: ${JSON.stringify(inspectFacts)}`"
- assert:
expr: "inspectFacts.channels.includes(config.channelId)"
message:
expr: "`Kitchen Sink channel missing from inspect output: ${JSON.stringify(inspectFacts.channels)}`"
- assert:
expr: "config.expectedProviderAny.some((provider) => inspectFacts.providers.includes(provider))"
message:
expr: "`Kitchen Sink providers missing from inspect output: ${JSON.stringify(inspectFacts.providers)}`"
- assert:
expr: "config.expectedToolAny.some((tool) => inspectFacts.tools.includes(tool))"
message:
expr: "`Kitchen Sink tools missing from inspect output: ${JSON.stringify(inspectFacts.tools)}`"
detailsExpr: inspectFacts
- name: restarts gateway with Kitchen Sink configured
actions:
- assert:
expr: "typeof env.gateway.restartAfterStateMutation === 'function'"
message: "qa gateway child does not expose restartAfterStateMutation"
- call: env.gateway.restartAfterStateMutation
args:
- lambda:
async: true
params: [ctx]
expr: |-
(async () => {
const raw = await fs.readFile(ctx.configPath, "utf8").catch(() => "{}");
const cfg = JSON.parse(raw || "{}");
cfg.plugins = cfg.plugins || {};
cfg.plugins.allow = [...new Set([...(cfg.plugins.allow || []), config.pluginId])];
cfg.plugins.entries = cfg.plugins.entries || {};
cfg.plugins.entries[config.pluginId] = {
...(cfg.plugins.entries[config.pluginId] || {}),
enabled: true,
hooks: {
...(cfg.plugins.entries[config.pluginId]?.hooks || {}),
allowConversationAccess: true,
},
};
cfg.channels = {
...(cfg.channels || {}),
[config.channelId]: { enabled: true, token: "kitchen-sink-qa" },
};
await fs.writeFile(ctx.configPath, `${JSON.stringify(cfg, null, 2)}\n`, "utf8");
})()
- call: waitForGatewayHealthy
args:
- ref: env
- 120000
- call: waitForQaChannelReady
args:
- ref: env
- 120000
- set: perfStartedAtMs
value:
expr: "Date.now()"
- set: cpuStartMs
value:
expr: "env.gateway.getProcessCpuMs?.() ?? null"
- set: rssStartBytes
value:
expr: "env.gateway.getProcessRssBytes?.() ?? null"
- call: env.gateway.call
saveAs: channelStatus
args:
- channels.status
- probe: true
timeoutMs: 10000
- timeoutMs: 15000
- set: kitchenChannelAccount
value:
expr: "(channelStatus.channelAccounts?.[config.channelId] ?? []).find((entry) => entry.accountId === config.channelAccountId) ?? null"
- assert:
expr: "kitchenChannelAccount?.running === true && kitchenChannelAccount?.configured === true"
message:
expr: "`Kitchen Sink channel did not report running+configured: ${JSON.stringify(kitchenChannelAccount)}`"
detailsExpr: kitchenChannelAccount
- name: exercises command inventory and MCP tool surfaces
actions:
- call: env.gateway.call
saveAs: commandList
args:
- commands.list
- agentId: qa
scope: text
- timeoutMs: 15000
- set: pluginCommandNames
value:
expr: "(commandList.commands ?? []).filter((entry) => entry.source === 'plugin').map((entry) => entry.name).sort()"
- assert:
expr: "pluginCommandNames.includes('kitchen') && pluginCommandNames.includes('kitchen-sink')"
message:
expr: "`Kitchen Sink plugin commands missing from commands.list: ${JSON.stringify(pluginCommandNames)}`"
- call: callPluginToolsMcp
saveAs: mcpTool
args:
- env:
ref: env
toolName: kitchen_sink_search
args:
query: "kitchen sink qa live openai"
- set: mcpToolText
value:
expr: "JSON.stringify(mcpTool.content ?? mcpTool)"
- assert:
expr: "mcpToolText.includes('Kitchen Sink image fixture')"
message:
expr: "`Kitchen Sink MCP tool output missed expected fixture: ${mcpToolText.slice(0, 500)}`"
detailsExpr: "{ pluginCommandNames, mcpToolText: mcpToolText.slice(0, 500) }"
- name: runs live OpenAI turn with Kitchen Sink loaded
actions:
- call: reset
- call: runAgentPrompt
args:
- ref: env
- sessionKey:
expr: "`agent:qa:kitchen-sink-openai:${randomUUID().slice(0, 8)}`"
message:
expr: config.livePrompt
timeoutMs:
expr: liveTurnTimeoutMs(env, 60000)
- call: waitForOutboundMessage
saveAs: openaiReply
args:
- ref: state
- lambda:
params: [candidate]
expr: "candidate.conversation.id === 'qa-operator' && candidate.text.includes('KITCHEN-SINK-OPENAI-OK')"
- expr: liveTurnTimeoutMs(env, 30000)
detailsExpr: "{ openaiReply: openaiReply.text }"
- name: records gateway CPU RSS and log anomaly evidence
actions:
- set: perfEvidence
value:
expr: |-
(() => {
const cpuStart = typeof vars.cpuStartMs === "number" ? vars.cpuStartMs : null;
const cpuEnd = env.gateway.getProcessCpuMs?.() ?? null;
const rssStart = typeof vars.rssStartBytes === "number" ? vars.rssStartBytes : null;
const rssEnd = env.gateway.getProcessRssBytes?.() ?? null;
const logs = env.gateway.logs?.() ?? "";
const deny = [
/\buncaught exception\b/iu,
/\bunhandled rejection\b/iu,
/\bfatal\b/iu,
/\bpanic\b/iu,
];
const findings = logs
.split(/\r?\n/u)
.filter((line) => deny.some((pattern) => pattern.test(line)))
.slice(0, 10)
.map((line) => line.replaceAll(env.repoRoot, "<repo>").slice(0, 500));
const wallMs = Date.now() - Number(vars.perfStartedAtMs ?? Date.now());
const cpuDeltaMs = cpuStart === null || cpuEnd === null ? null : Math.max(0, cpuEnd - cpuStart);
const cpuCoreRatio = cpuDeltaMs === null || wallMs <= 0 ? null : Math.round((cpuDeltaMs / wallMs) * 1000) / 1000;
const rssMiB = rssEnd === null ? null : Math.round((rssEnd / 1024 / 1024) * 10) / 10;
return {
wallMs,
cpuStart,
cpuEnd,
cpuDeltaMs,
cpuCoreRatio,
rssStartBytes: rssStart,
rssEndBytes: rssEnd,
rssMiB,
logBytes: logs.length,
findings,
};
})()
- assert:
expr: "perfEvidence.findings.length === 0"
message:
expr: "`Gateway logs contain fatal runtime lines: ${JSON.stringify(perfEvidence.findings)}`"
- assert:
expr: "perfEvidence.cpuCoreRatio === null || perfEvidence.cpuCoreRatio <= config.maxGatewayCpuCoreRatio"
message:
expr: "`Gateway CPU ratio exceeded Kitchen Sink anomaly threshold: ${JSON.stringify(perfEvidence)}`"
- assert:
expr: "perfEvidence.rssMiB === null || perfEvidence.rssMiB <= config.maxGatewayRssMiB"
message:
expr: "`Gateway RSS exceeded Kitchen Sink anomaly threshold: ${JSON.stringify(perfEvidence)}`"
detailsExpr: perfEvidence
```