mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 11:40:42 +00:00
test(qa): add kitchen sink plugin gauntlet
This commit is contained in:
@@ -39,6 +39,9 @@ Docs: https://docs.openclaw.ai
|
||||
- Control UI: keep Agents Overview and config-form select dropdowns on their configured value after options render while preserving inherited agent model placeholders. Fixes #40352; carries forward #52948. Thanks @xiaoquanidea.
|
||||
- Agents/exec: launch zsh, bash, and fish host exec shells with startup files suppressed while preserving existing PATH fallbacks, so daemon env is not overridden by shell startup files. Carries forward #40200; fixes #40179. Thanks @NewdlDewdl.
|
||||
- Plugins/QA: prebuild the private QA channel runtime before plugin gauntlet source runs so wrapper CPU/RSS measurements are not polluted by private QA dist rebuild work. Thanks @vincentkoc.
|
||||
- Plugins/QA: add a Kitchen Sink plugin gauntlet that installs the external package, checks command inventory, MCP tools, channel status, provider turns, gateway RSS, CPU, and fatal log anomalies. Thanks @vincentkoc.
|
||||
- Plugins/channels: reject malformed runtime channel registrations that omit required config helpers before they can poison channel status. Thanks @vincentkoc.
|
||||
- MCP/plugins: serialize raw plugin tool return values through the plugin-tools MCP bridge so Kitchen Sink-style tools no longer surface `undefined` content. Thanks @vincentkoc.
|
||||
- Gateway/reload: bound default restart deferral and SIGUSR1 restart drain to five minutes while preserving explicit `deferralTimeoutMs: 0` indefinite waits, so stale active work accounting cannot block config reloads forever. Thanks @vincentkoc.
|
||||
- Active Memory: register the prompt-build hook with the configured recall timeout plus setup grace instead of the 150s maximum budget, so default memory recall cannot delay turn startup for multiple minutes. Thanks @vincentkoc.
|
||||
- Gateway/readiness: include an `eventLoop` diagnostic block in local or authenticated `/readyz` responses with event-loop delay (p99 and max), event-loop utilization, CPU core ratio, and a `degraded` flag, so operators can see when slow startups or runaway turns stall the event loop. Thanks @vincentkoc.
|
||||
|
||||
@@ -22,7 +22,7 @@ import { formatQaGatewayLogsForError, redactQaGatewayDebugText } from "./gateway
|
||||
import { startQaGatewayRpcClient } from "./gateway-rpc-client.js";
|
||||
import { splitQaModelRef, type QaProviderMode } from "./model-selection.js";
|
||||
import { resolveQaNodeExecPath } from "./node-exec.js";
|
||||
import { readProcessTreeCpuMs } from "./process-tree-cpu.js";
|
||||
import { readProcessTreeCpuMs, readProcessTreeRssBytes } from "./process-tree-cpu.js";
|
||||
import {
|
||||
normalizeQaProviderModeEnv,
|
||||
QA_LIVE_PROVIDER_CONFIG_PATH_ENV,
|
||||
@@ -828,6 +828,7 @@ export async function startQaGatewayChild(params: {
|
||||
wsUrl,
|
||||
pid: child.pid ?? null,
|
||||
getProcessCpuMs: () => readProcessTreeCpuMs(activeChild.pid ?? null),
|
||||
getProcessRssBytes: () => readProcessTreeRssBytes(activeChild.pid ?? null),
|
||||
token: gatewayToken,
|
||||
workspaceDir,
|
||||
tempRoot,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { parsePsCpuTimeMs } from "./process-tree-cpu.js";
|
||||
import { parsePsCpuTimeMs, parsePsRssBytes } from "./process-tree-cpu.js";
|
||||
|
||||
describe("process tree CPU helpers", () => {
|
||||
it("parses ps CPU time strings", () => {
|
||||
@@ -13,4 +13,15 @@ describe("process tree CPU helpers", () => {
|
||||
expect(parsePsCpuTimeMs("nope")).toBeNull();
|
||||
expect(parsePsCpuTimeMs("1:2:3:4")).toBeNull();
|
||||
});
|
||||
|
||||
it("parses ps RSS KiB values as bytes", () => {
|
||||
expect(parsePsRssBytes("1024")).toBe(1_048_576);
|
||||
expect(parsePsRssBytes("1.5")).toBe(1_536);
|
||||
});
|
||||
|
||||
it("rejects malformed ps RSS values", () => {
|
||||
expect(parsePsRssBytes("")).toBeNull();
|
||||
expect(parsePsRssBytes("nope")).toBeNull();
|
||||
expect(parsePsRssBytes("-1")).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -14,6 +14,18 @@ export function parsePsCpuTimeMs(raw: string): number | null {
|
||||
return null;
|
||||
}
|
||||
|
||||
export function parsePsRssBytes(raw: string): number | null {
|
||||
const trimmed = raw.trim();
|
||||
if (!trimmed) {
|
||||
return null;
|
||||
}
|
||||
const rssKiB = Number(trimmed);
|
||||
if (!Number.isFinite(rssKiB) || rssKiB < 0) {
|
||||
return null;
|
||||
}
|
||||
return Math.round(rssKiB * 1024);
|
||||
}
|
||||
|
||||
export function readProcessTreeCpuMs(rootPid: number | null | undefined): number | null {
|
||||
if (
|
||||
typeof rootPid !== "number" ||
|
||||
@@ -70,3 +82,60 @@ export function readProcessTreeCpuMs(rootPid: number | null | undefined): number
|
||||
}
|
||||
return totalCpuMs;
|
||||
}
|
||||
|
||||
export function readProcessTreeRssBytes(rootPid: number | null | undefined): number | null {
|
||||
if (
|
||||
typeof rootPid !== "number" ||
|
||||
!Number.isInteger(rootPid) ||
|
||||
rootPid <= 0 ||
|
||||
process.platform === "win32"
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
const result = spawnSync("ps", ["-eo", "pid=,ppid=,rss="], {
|
||||
encoding: "utf8",
|
||||
stdio: ["ignore", "pipe", "ignore"],
|
||||
});
|
||||
if (result.status !== 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const childrenByParent = new Map<number, number[]>();
|
||||
const rssByPid = new Map<number, number>();
|
||||
for (const line of result.stdout.split("\n")) {
|
||||
const match = line.trim().match(/^(\d+)\s+(\d+)\s+(\S+)$/u);
|
||||
if (!match) {
|
||||
continue;
|
||||
}
|
||||
const [, pidRaw, ppidRaw, rssRaw] = match;
|
||||
const pid = Number(pidRaw);
|
||||
const ppid = Number(ppidRaw);
|
||||
const rssBytes = parsePsRssBytes(rssRaw ?? "");
|
||||
if (!Number.isInteger(pid) || !Number.isInteger(ppid) || rssBytes === null) {
|
||||
continue;
|
||||
}
|
||||
rssByPid.set(pid, rssBytes);
|
||||
const children = childrenByParent.get(ppid) ?? [];
|
||||
children.push(pid);
|
||||
childrenByParent.set(ppid, children);
|
||||
}
|
||||
if (!rssByPid.has(rootPid)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let totalRssBytes = 0;
|
||||
const seen = new Set<number>();
|
||||
const stack: number[] = [rootPid];
|
||||
while (stack.length > 0) {
|
||||
const pid = stack.pop();
|
||||
if (pid === undefined || seen.has(pid)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(pid);
|
||||
totalRssBytes += rssByPid.get(pid) ?? 0;
|
||||
for (const childPid of childrenByParent.get(pid) ?? []) {
|
||||
stack.push(childPid);
|
||||
}
|
||||
}
|
||||
return totalRssBytes;
|
||||
}
|
||||
|
||||
@@ -179,6 +179,31 @@ describe("qa scenario catalog", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it("includes the Kitchen Sink live OpenAI plugin gauntlet", () => {
|
||||
const scenario = readQaScenarioById("kitchen-sink-live-openai");
|
||||
const config = readQaScenarioExecutionConfig("kitchen-sink-live-openai") as
|
||||
| {
|
||||
requiredProviderMode?: string;
|
||||
requiredProvider?: string;
|
||||
pluginSpec?: string;
|
||||
pluginId?: string;
|
||||
}
|
||||
| undefined;
|
||||
|
||||
expect(scenario.sourcePath).toBe("qa/scenarios/plugins/kitchen-sink-live-openai.md");
|
||||
expect(config?.requiredProviderMode).toBe("live-frontier");
|
||||
expect(config?.requiredProvider).toBe("openai");
|
||||
expect(config?.pluginSpec).toBe("npm:@openclaw/kitchen-sink@latest");
|
||||
expect(config?.pluginId).toBe("openclaw-kitchen-sink-fixture");
|
||||
expect(scenario.execution.flow?.steps.map((step) => step.name)).toEqual([
|
||||
"installs and inspects the Kitchen Sink plugin",
|
||||
"restarts gateway with Kitchen Sink configured",
|
||||
"exercises command inventory and MCP tool surfaces",
|
||||
"runs live OpenAI turn with Kitchen Sink loaded",
|
||||
"records gateway CPU RSS and log anomaly evidence",
|
||||
]);
|
||||
});
|
||||
|
||||
it("includes the thinking slash model remap scenario", () => {
|
||||
const scenario = readQaScenarioById("thinking-slash-model-remap");
|
||||
const config = readQaScenarioExecutionConfig("thinking-slash-model-remap") as
|
||||
|
||||
@@ -8,6 +8,8 @@ export type QaRuntimeGatewayClient = {
|
||||
workspaceDir: string;
|
||||
runtimeEnv: NodeJS.ProcessEnv;
|
||||
getProcessCpuMs?: () => number | null;
|
||||
getProcessRssBytes?: () => number | null;
|
||||
logs?: () => string;
|
||||
restartAfterStateMutation?: (
|
||||
mutateState: (context: {
|
||||
configPath: string;
|
||||
|
||||
@@ -18,6 +18,9 @@ export type QaSuiteSummaryJson = {
|
||||
wallMs: number;
|
||||
gatewayProcessCpuMs?: number | null;
|
||||
gatewayCpuCoreRatio?: number | null;
|
||||
gatewayProcessRssStartBytes?: number | null;
|
||||
gatewayProcessRssEndBytes?: number | null;
|
||||
gatewayProcessRssDeltaBytes?: number | null;
|
||||
};
|
||||
run: {
|
||||
startedAt: string;
|
||||
|
||||
@@ -106,12 +106,18 @@ describe("buildQaSuiteSummaryJson", () => {
|
||||
wallMs: 12_000,
|
||||
gatewayProcessCpuMs: 3_400,
|
||||
gatewayCpuCoreRatio: 0.283,
|
||||
gatewayProcessRssStartBytes: 100_000_000,
|
||||
gatewayProcessRssEndBytes: 125_000_000,
|
||||
gatewayProcessRssDeltaBytes: 25_000_000,
|
||||
},
|
||||
});
|
||||
expect(json.metrics).toEqual({
|
||||
wallMs: 12_000,
|
||||
gatewayProcessCpuMs: 3_400,
|
||||
gatewayCpuCoreRatio: 0.283,
|
||||
gatewayProcessRssStartBytes: 100_000_000,
|
||||
gatewayProcessRssEndBytes: 125_000_000,
|
||||
gatewayProcessRssDeltaBytes: 25_000_000,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -390,10 +390,21 @@ function buildQaSuiteRuntimeMetrics(params: {
|
||||
finishedAt: Date;
|
||||
gatewayProcessCpuStartMs: number | null;
|
||||
gatewayProcessCpuEndMs: number | null;
|
||||
gatewayProcessRssStartBytes: number | null;
|
||||
gatewayProcessRssEndBytes: number | null;
|
||||
}): QaSuiteSummaryJson["metrics"] {
|
||||
const wallMs = Math.max(1, params.finishedAt.getTime() - params.startedAt.getTime());
|
||||
const rssMetrics =
|
||||
params.gatewayProcessRssStartBytes === null || params.gatewayProcessRssEndBytes === null
|
||||
? {}
|
||||
: {
|
||||
gatewayProcessRssStartBytes: params.gatewayProcessRssStartBytes,
|
||||
gatewayProcessRssEndBytes: params.gatewayProcessRssEndBytes,
|
||||
gatewayProcessRssDeltaBytes:
|
||||
params.gatewayProcessRssEndBytes - params.gatewayProcessRssStartBytes,
|
||||
};
|
||||
if (params.gatewayProcessCpuStartMs === null || params.gatewayProcessCpuEndMs === null) {
|
||||
return { wallMs };
|
||||
return { wallMs, ...rssMetrics };
|
||||
}
|
||||
const gatewayProcessCpuMs = Math.max(
|
||||
0,
|
||||
@@ -403,6 +414,7 @@ function buildQaSuiteRuntimeMetrics(params: {
|
||||
wallMs,
|
||||
gatewayProcessCpuMs,
|
||||
gatewayCpuCoreRatio: Math.round((gatewayProcessCpuMs / wallMs) * 1000) / 1000,
|
||||
...rssMetrics,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -773,6 +785,7 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
|
||||
});
|
||||
|
||||
const gatewayProcessCpuStartMs = gateway.getProcessCpuMs?.() ?? null;
|
||||
const gatewayProcessRssStartBytes = gateway.getProcessRssBytes?.() ?? null;
|
||||
for (const [index, scenario] of selectedCatalogScenarios.entries()) {
|
||||
const scenarioIdForLog = sanitizeQaSuiteProgressValue(scenario.id);
|
||||
writeQaSuiteProgress(
|
||||
@@ -821,6 +834,8 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
|
||||
finishedAt,
|
||||
gatewayProcessCpuStartMs,
|
||||
gatewayProcessCpuEndMs: gateway.getProcessCpuMs?.() ?? null,
|
||||
gatewayProcessRssStartBytes,
|
||||
gatewayProcessRssEndBytes: gateway.getProcessRssBytes?.() ?? null,
|
||||
});
|
||||
const failedCount = scenarios.filter((scenario) => scenario.status === "fail").length;
|
||||
if (scenarios.some((scenario) => scenario.status === "fail")) {
|
||||
|
||||
326
qa/scenarios/plugins/kitchen-sink-live-openai.md
Normal file
326
qa/scenarios/plugins/kitchen-sink-live-openai.md
Normal file
@@ -0,0 +1,326 @@
|
||||
# Kitchen Sink live OpenAI plugin gauntlet
|
||||
|
||||
```yaml qa-scenario
|
||||
id: kitchen-sink-live-openai
|
||||
title: Kitchen Sink live OpenAI plugin gauntlet
|
||||
surface: plugins
|
||||
category: pre-release
|
||||
coverage:
|
||||
primary:
|
||||
- plugins.kitchen-sink
|
||||
secondary:
|
||||
- plugins.lifecycle
|
||||
- plugins.plugin-tools
|
||||
- models.live-openai
|
||||
- gateway.performance
|
||||
risk: high
|
||||
objective: Verify the external Kitchen Sink plugin can be installed into a qa-lab gateway, expose its major runtime surfaces, and coexist with a live OpenAI provider turn.
|
||||
successCriteria:
|
||||
- The npm Kitchen Sink package installs, enables, and inspects as loaded.
|
||||
- Kitchen Sink command inventory, MCP tool, and channel status work after gateway restart.
|
||||
- A live OpenAI turn still completes while the Kitchen Sink plugin is installed.
|
||||
- Gateway logs and process metrics are captured and stay under broad anomaly thresholds.
|
||||
docsRefs:
|
||||
- docs/concepts/qa-e2e-automation.md
|
||||
- docs/channels/qa-channel.md
|
||||
- docs/plugins/manifest.md
|
||||
codeRefs:
|
||||
- extensions/qa-lab/src/suite.ts
|
||||
- extensions/qa-lab/src/gateway-child.ts
|
||||
- scripts/e2e/kitchen-sink-plugin-docker.sh
|
||||
execution:
|
||||
kind: flow
|
||||
summary: Install @openclaw/kitchen-sink, restart the gateway, exercise command inventory/tool/channel/OpenAI paths, and record CPU/RSS/log evidence.
|
||||
config:
|
||||
requiredProviderMode: live-frontier
|
||||
requiredProvider: openai
|
||||
pluginSpec: npm:@openclaw/kitchen-sink@latest
|
||||
pluginId: openclaw-kitchen-sink-fixture
|
||||
channelId: kitchen-sink-channel
|
||||
channelAccountId: local
|
||||
textProviderId: kitchen-sink-llm
|
||||
textModel: kitchen-sink-text-v1
|
||||
expectedProviderAny:
|
||||
- kitchen-sink-provider
|
||||
- kitchen-sink-llm
|
||||
expectedToolAny:
|
||||
- kitchen_sink_text
|
||||
- kitchen_sink_search
|
||||
- kitchen_sink_image_job
|
||||
maxGatewayCpuCoreRatio: 1.5
|
||||
maxGatewayRssMiB: 2048
|
||||
livePrompt: "Kitchen Sink OpenAI marker. Reply exactly: KITCHEN-SINK-OPENAI-OK"
|
||||
```
|
||||
|
||||
```yaml qa-flow
|
||||
steps:
|
||||
- name: installs and inspects the Kitchen Sink plugin
|
||||
actions:
|
||||
- call: runQaCli
|
||||
args:
|
||||
- ref: env
|
||||
- - plugins
|
||||
- install
|
||||
- expr: config.pluginSpec
|
||||
- timeoutMs: 180000
|
||||
- call: runQaCli
|
||||
args:
|
||||
- ref: env
|
||||
- - plugins
|
||||
- enable
|
||||
- expr: config.pluginId
|
||||
- timeoutMs: 60000
|
||||
- set: configuredPluginPath
|
||||
value:
|
||||
expr: |-
|
||||
(async () => {
|
||||
const raw = await fs.readFile(env.gateway.configPath, "utf8").catch(() => "{}");
|
||||
const cfg = JSON.parse(raw || "{}");
|
||||
cfg.plugins = cfg.plugins || {};
|
||||
cfg.plugins.allow = [...new Set([...(cfg.plugins.allow || []), config.pluginId])];
|
||||
cfg.plugins.entries = cfg.plugins.entries || {};
|
||||
cfg.plugins.entries[config.pluginId] = {
|
||||
...(cfg.plugins.entries[config.pluginId] || {}),
|
||||
enabled: true,
|
||||
hooks: {
|
||||
...(cfg.plugins.entries[config.pluginId]?.hooks || {}),
|
||||
allowConversationAccess: true,
|
||||
},
|
||||
};
|
||||
cfg.channels = {
|
||||
...(cfg.channels || {}),
|
||||
[config.channelId]: { enabled: true, token: "kitchen-sink-qa" },
|
||||
};
|
||||
await fs.writeFile(env.gateway.configPath, `${JSON.stringify(cfg, null, 2)}\n`, "utf8");
|
||||
return env.gateway.configPath;
|
||||
})()
|
||||
- call: runQaCli
|
||||
saveAs: pluginList
|
||||
args:
|
||||
- ref: env
|
||||
- - plugins
|
||||
- list
|
||||
- --json
|
||||
- json: true
|
||||
timeoutMs: 60000
|
||||
- call: runQaCli
|
||||
saveAs: inspect
|
||||
args:
|
||||
- ref: env
|
||||
- - plugins
|
||||
- inspect
|
||||
- expr: config.pluginId
|
||||
- --json
|
||||
- json: true
|
||||
timeoutMs: 60000
|
||||
- set: inspectFacts
|
||||
value:
|
||||
expr: |-
|
||||
(() => {
|
||||
const plugin = inspect.plugin ?? {};
|
||||
const namesFromTools = Array.isArray(inspect.tools)
|
||||
? inspect.tools.flatMap((entry) => Array.isArray(entry?.names) ? entry.names : [entry?.name]).filter(Boolean)
|
||||
: [];
|
||||
const contracts = plugin.contracts && typeof plugin.contracts === "object" ? plugin.contracts : {};
|
||||
return {
|
||||
id: plugin.id,
|
||||
enabled: plugin.enabled,
|
||||
status: plugin.status,
|
||||
channels: [...new Set([...(plugin.channelIds ?? []), ...(plugin.channels ?? [])])],
|
||||
providers: [...new Set([...(plugin.providerIds ?? []), ...(plugin.providers ?? [])])],
|
||||
tools: [...new Set([...namesFromTools, ...(contracts.tools ?? [])])],
|
||||
diagnostics: [...(pluginList.diagnostics ?? []), ...(inspect.diagnostics ?? [])]
|
||||
.filter((entry) => entry?.level === "error")
|
||||
.map((entry) => String(entry.message ?? "")),
|
||||
};
|
||||
})()
|
||||
- assert:
|
||||
expr: "inspectFacts.id === config.pluginId && inspectFacts.enabled === true && inspectFacts.status === 'loaded'"
|
||||
message:
|
||||
expr: "`Kitchen Sink plugin did not inspect as enabled+loaded: ${JSON.stringify(inspectFacts)}`"
|
||||
- assert:
|
||||
expr: "inspectFacts.channels.includes(config.channelId)"
|
||||
message:
|
||||
expr: "`Kitchen Sink channel missing from inspect output: ${JSON.stringify(inspectFacts.channels)}`"
|
||||
- assert:
|
||||
expr: "config.expectedProviderAny.some((provider) => inspectFacts.providers.includes(provider))"
|
||||
message:
|
||||
expr: "`Kitchen Sink providers missing from inspect output: ${JSON.stringify(inspectFacts.providers)}`"
|
||||
- assert:
|
||||
expr: "config.expectedToolAny.some((tool) => inspectFacts.tools.includes(tool))"
|
||||
message:
|
||||
expr: "`Kitchen Sink tools missing from inspect output: ${JSON.stringify(inspectFacts.tools)}`"
|
||||
detailsExpr: inspectFacts
|
||||
|
||||
- name: restarts gateway with Kitchen Sink configured
|
||||
actions:
|
||||
- assert:
|
||||
expr: "typeof env.gateway.restartAfterStateMutation === 'function'"
|
||||
message: "qa gateway child does not expose restartAfterStateMutation"
|
||||
- call: env.gateway.restartAfterStateMutation
|
||||
args:
|
||||
- lambda:
|
||||
async: true
|
||||
params: [ctx]
|
||||
expr: |-
|
||||
(async () => {
|
||||
const raw = await fs.readFile(ctx.configPath, "utf8").catch(() => "{}");
|
||||
const cfg = JSON.parse(raw || "{}");
|
||||
cfg.plugins = cfg.plugins || {};
|
||||
cfg.plugins.allow = [...new Set([...(cfg.plugins.allow || []), config.pluginId])];
|
||||
cfg.plugins.entries = cfg.plugins.entries || {};
|
||||
cfg.plugins.entries[config.pluginId] = {
|
||||
...(cfg.plugins.entries[config.pluginId] || {}),
|
||||
enabled: true,
|
||||
hooks: {
|
||||
...(cfg.plugins.entries[config.pluginId]?.hooks || {}),
|
||||
allowConversationAccess: true,
|
||||
},
|
||||
};
|
||||
cfg.channels = {
|
||||
...(cfg.channels || {}),
|
||||
[config.channelId]: { enabled: true, token: "kitchen-sink-qa" },
|
||||
};
|
||||
await fs.writeFile(ctx.configPath, `${JSON.stringify(cfg, null, 2)}\n`, "utf8");
|
||||
})()
|
||||
- call: waitForGatewayHealthy
|
||||
args:
|
||||
- ref: env
|
||||
- 120000
|
||||
- call: waitForQaChannelReady
|
||||
args:
|
||||
- ref: env
|
||||
- 120000
|
||||
- set: perfStartedAtMs
|
||||
value:
|
||||
expr: "Date.now()"
|
||||
- set: cpuStartMs
|
||||
value:
|
||||
expr: "env.gateway.getProcessCpuMs?.() ?? null"
|
||||
- set: rssStartBytes
|
||||
value:
|
||||
expr: "env.gateway.getProcessRssBytes?.() ?? null"
|
||||
- call: env.gateway.call
|
||||
saveAs: channelStatus
|
||||
args:
|
||||
- channels.status
|
||||
- probe: true
|
||||
timeoutMs: 10000
|
||||
- timeoutMs: 15000
|
||||
- set: kitchenChannelAccount
|
||||
value:
|
||||
expr: "(channelStatus.channelAccounts?.[config.channelId] ?? []).find((entry) => entry.accountId === config.channelAccountId) ?? null"
|
||||
- assert:
|
||||
expr: "kitchenChannelAccount?.running === true && kitchenChannelAccount?.configured === true"
|
||||
message:
|
||||
expr: "`Kitchen Sink channel did not report running+configured: ${JSON.stringify(kitchenChannelAccount)}`"
|
||||
detailsExpr: kitchenChannelAccount
|
||||
|
||||
- name: exercises command inventory and MCP tool surfaces
|
||||
actions:
|
||||
- call: env.gateway.call
|
||||
saveAs: commandList
|
||||
args:
|
||||
- commands.list
|
||||
- agentId: qa
|
||||
scope: text
|
||||
- timeoutMs: 15000
|
||||
- set: pluginCommandNames
|
||||
value:
|
||||
expr: "(commandList.commands ?? []).filter((entry) => entry.source === 'plugin').map((entry) => entry.name).sort()"
|
||||
- assert:
|
||||
expr: "pluginCommandNames.includes('kitchen') && pluginCommandNames.includes('kitchen-sink')"
|
||||
message:
|
||||
expr: "`Kitchen Sink plugin commands missing from commands.list: ${JSON.stringify(pluginCommandNames)}`"
|
||||
- call: callPluginToolsMcp
|
||||
saveAs: mcpTool
|
||||
args:
|
||||
- env:
|
||||
ref: env
|
||||
toolName: kitchen_sink_search
|
||||
args:
|
||||
query: "kitchen sink qa live openai"
|
||||
- set: mcpToolText
|
||||
value:
|
||||
expr: "JSON.stringify(mcpTool.content ?? mcpTool)"
|
||||
- assert:
|
||||
expr: "mcpToolText.includes('Kitchen Sink image fixture')"
|
||||
message:
|
||||
expr: "`Kitchen Sink MCP tool output missed expected fixture: ${mcpToolText.slice(0, 500)}`"
|
||||
detailsExpr: "{ pluginCommandNames, mcpToolText: mcpToolText.slice(0, 500) }"
|
||||
|
||||
- name: runs live OpenAI turn with Kitchen Sink loaded
|
||||
actions:
|
||||
- call: reset
|
||||
- call: runAgentPrompt
|
||||
args:
|
||||
- ref: env
|
||||
- sessionKey:
|
||||
expr: "`agent:qa:kitchen-sink-openai:${randomUUID().slice(0, 8)}`"
|
||||
message:
|
||||
expr: config.livePrompt
|
||||
timeoutMs:
|
||||
expr: liveTurnTimeoutMs(env, 60000)
|
||||
- call: waitForOutboundMessage
|
||||
saveAs: openaiReply
|
||||
args:
|
||||
- ref: state
|
||||
- lambda:
|
||||
params: [candidate]
|
||||
expr: "candidate.conversation.id === 'qa-operator' && candidate.text.includes('KITCHEN-SINK-OPENAI-OK')"
|
||||
- expr: liveTurnTimeoutMs(env, 30000)
|
||||
detailsExpr: "{ openaiReply: openaiReply.text }"
|
||||
|
||||
- name: records gateway CPU RSS and log anomaly evidence
|
||||
actions:
|
||||
- set: perfEvidence
|
||||
value:
|
||||
expr: |-
|
||||
(() => {
|
||||
const cpuStart = typeof vars.cpuStartMs === "number" ? vars.cpuStartMs : null;
|
||||
const cpuEnd = env.gateway.getProcessCpuMs?.() ?? null;
|
||||
const rssStart = typeof vars.rssStartBytes === "number" ? vars.rssStartBytes : null;
|
||||
const rssEnd = env.gateway.getProcessRssBytes?.() ?? null;
|
||||
const logs = env.gateway.logs?.() ?? "";
|
||||
const deny = [
|
||||
/\buncaught exception\b/iu,
|
||||
/\bunhandled rejection\b/iu,
|
||||
/\bfatal\b/iu,
|
||||
/\bpanic\b/iu,
|
||||
];
|
||||
const findings = logs
|
||||
.split(/\r?\n/u)
|
||||
.filter((line) => deny.some((pattern) => pattern.test(line)))
|
||||
.slice(0, 10)
|
||||
.map((line) => line.replaceAll(env.repoRoot, "<repo>").slice(0, 500));
|
||||
const wallMs = Date.now() - Number(vars.perfStartedAtMs ?? Date.now());
|
||||
const cpuDeltaMs = cpuStart === null || cpuEnd === null ? null : Math.max(0, cpuEnd - cpuStart);
|
||||
const cpuCoreRatio = cpuDeltaMs === null || wallMs <= 0 ? null : Math.round((cpuDeltaMs / wallMs) * 1000) / 1000;
|
||||
const rssMiB = rssEnd === null ? null : Math.round((rssEnd / 1024 / 1024) * 10) / 10;
|
||||
return {
|
||||
wallMs,
|
||||
cpuStart,
|
||||
cpuEnd,
|
||||
cpuDeltaMs,
|
||||
cpuCoreRatio,
|
||||
rssStartBytes: rssStart,
|
||||
rssEndBytes: rssEnd,
|
||||
rssMiB,
|
||||
logBytes: logs.length,
|
||||
findings,
|
||||
};
|
||||
})()
|
||||
- assert:
|
||||
expr: "perfEvidence.findings.length === 0"
|
||||
message:
|
||||
expr: "`Gateway logs contain fatal runtime lines: ${JSON.stringify(perfEvidence.findings)}`"
|
||||
- assert:
|
||||
expr: "perfEvidence.cpuCoreRatio === null || perfEvidence.cpuCoreRatio <= config.maxGatewayCpuCoreRatio"
|
||||
message:
|
||||
expr: "`Gateway CPU ratio exceeded Kitchen Sink anomaly threshold: ${JSON.stringify(perfEvidence)}`"
|
||||
- assert:
|
||||
expr: "perfEvidence.rssMiB === null || perfEvidence.rssMiB <= config.maxGatewayRssMiB"
|
||||
message:
|
||||
expr: "`Gateway RSS exceeded Kitchen Sink anomaly threshold: ${JSON.stringify(perfEvidence)}`"
|
||||
detailsExpr: perfEvidence
|
||||
```
|
||||
Reference in New Issue
Block a user