test(plugins): split kitchen sink qa personalities (#74612)

This commit is contained in:
Vincent Koc
2026-04-29 13:56:49 -07:00
committed by GitHub
parent 987af6805b
commit 1a4d55de43
6 changed files with 155 additions and 32 deletions

View File

@@ -187,6 +187,9 @@ describe("qa scenario catalog", () => {
requiredProvider?: string;
pluginSpec?: string;
pluginId?: string;
pluginPersonality?: string;
adversarialPersonality?: string;
expectedAdversarialDiagnostics?: string[];
}
| undefined;
@@ -195,12 +198,18 @@ describe("qa scenario catalog", () => {
expect(config?.requiredProvider).toBe("openai");
expect(config?.pluginSpec).toBe("npm:@openclaw/kitchen-sink@latest");
expect(config?.pluginId).toBe("openclaw-kitchen-sink-fixture");
expect(config?.pluginPersonality).toBe("conformance");
expect(config?.adversarialPersonality).toBe("adversarial");
expect(config?.expectedAdversarialDiagnostics).toContain(
"only bundled plugins can register agent tool result middleware",
);
expect(scenario.execution.flow?.steps.map((step) => step.name)).toEqual([
"installs and inspects the Kitchen Sink plugin",
"restarts gateway with Kitchen Sink configured",
"exercises command inventory and MCP tool surfaces",
"runs live OpenAI turn with Kitchen Sink loaded",
"records gateway CPU RSS and log anomaly evidence",
"verifies adversarial diagnostics personality",
]);
});

View File

@@ -36,6 +36,8 @@ execution:
requiredProvider: openai
pluginSpec: npm:@openclaw/kitchen-sink@latest
pluginId: openclaw-kitchen-sink-fixture
pluginPersonality: conformance
adversarialPersonality: adversarial
channelId: kitchen-sink-channel
channelAccountId: local
textProviderId: kitchen-sink-llm
@@ -52,6 +54,17 @@ execution:
agentTurnTimeoutMs: 120000
outboundTimeoutMs: 60000
livePrompt: "Kitchen Sink OpenAI marker. Reply exactly: KITCHEN-SINK-OPENAI-OK"
expectedAdversarialDiagnostics:
- only bundled plugins can register agent tool result middleware
- agent harness "kitchen-sink-agent-harness" registration missing required runtime methods
- channel "kitchen-sink-channel-probe" registration missing required config helpers
- cli registration missing explicit commands metadata
- only bundled plugins can register Codex app-server extension factories
- compaction provider "kitchen-sink-compaction-provider" registration missing summarize
- context engine registration missing id
- http route registration missing or invalid auth: /kitchen-sink/http-route
- "plugin must own memory slot or declare contracts.memoryEmbeddingProviders for adapter: kitchen-sink-memory-embedding-provider"
- memory prompt supplement registration missing builder
```
```yaml qa-flow
@@ -84,6 +97,10 @@ steps:
cfg.plugins.entries[config.pluginId] = {
...(cfg.plugins.entries[config.pluginId] || {}),
enabled: true,
config: {
...(cfg.plugins.entries[config.pluginId]?.config || {}),
personality: config.pluginPersonality,
},
hooks: {
...(cfg.plugins.entries[config.pluginId]?.hooks || {}),
allowConversationAccess: true,
@@ -152,6 +169,10 @@ steps:
expr: "config.expectedToolAny.some((tool) => inspectFacts.tools.includes(tool))"
message:
expr: "`Kitchen Sink tools missing from inspect output: ${JSON.stringify(inspectFacts.tools)}`"
- assert:
expr: "inspectFacts.diagnostics.length === 0"
message:
expr: "`Kitchen Sink conformance personality emitted diagnostics: ${JSON.stringify(inspectFacts.diagnostics)}`"
detailsExpr: inspectFacts
- name: restarts gateway with Kitchen Sink configured
@@ -174,6 +195,10 @@ steps:
cfg.plugins.entries[config.pluginId] = {
...(cfg.plugins.entries[config.pluginId] || {}),
enabled: true,
config: {
...(cfg.plugins.entries[config.pluginId]?.config || {}),
personality: config.pluginPersonality,
},
hooks: {
...(cfg.plugins.entries[config.pluginId]?.hooks || {}),
allowConversationAccess: true,
@@ -325,4 +350,62 @@ steps:
message:
expr: "`Gateway RSS exceeded Kitchen Sink anomaly threshold: ${JSON.stringify(perfEvidence)}`"
detailsExpr: perfEvidence
- name: verifies adversarial diagnostics personality
actions:
- call: env.gateway.restartAfterStateMutation
args:
- lambda:
async: true
params: [ctx]
expr: |-
(async () => {
const raw = await fs.readFile(ctx.configPath, "utf8").catch(() => "{}");
const cfg = JSON.parse(raw || "{}");
cfg.plugins = cfg.plugins || {};
cfg.plugins.allow = [...new Set([...(cfg.plugins.allow || []), config.pluginId])];
cfg.plugins.entries = cfg.plugins.entries || {};
cfg.plugins.entries[config.pluginId] = {
...(cfg.plugins.entries[config.pluginId] || {}),
enabled: true,
config: {
...(cfg.plugins.entries[config.pluginId]?.config || {}),
personality: config.adversarialPersonality,
},
hooks: {
...(cfg.plugins.entries[config.pluginId]?.hooks || {}),
allowConversationAccess: true,
},
};
await fs.writeFile(ctx.configPath, `${JSON.stringify(cfg, null, 2)}\n`, "utf8");
})()
- call: waitForGatewayHealthy
args:
- ref: env
- 120000
- call: runQaCli
saveAs: adversarialInspect
args:
- ref: env
- - plugins
- inspect
- expr: config.pluginId
- --json
- json: true
timeoutMs: 60000
- set: adversarialDiagnostics
value:
expr: |-
(adversarialInspect.diagnostics ?? [])
.filter((entry) => entry?.level === "error")
.map((entry) => String(entry.message ?? ""))
- assert:
expr: "config.expectedAdversarialDiagnostics.every((message) => adversarialDiagnostics.includes(message))"
message:
expr: "`Kitchen Sink adversarial diagnostics missing expected messages: ${JSON.stringify({ expected: config.expectedAdversarialDiagnostics, actual: adversarialDiagnostics })}`"
- assert:
expr: "adversarialDiagnostics.every((message) => config.expectedAdversarialDiagnostics.includes(message))"
message:
expr: "`Kitchen Sink adversarial diagnostics contained unexpected messages: ${JSON.stringify(adversarialDiagnostics)}`"
detailsExpr: "{ diagnostics: adversarialDiagnostics }"
```

View File

@@ -10,7 +10,9 @@ OPENCLAW_TEST_STATE_SCRIPT_B64="$(docker_e2e_test_state_shell_b64 kitchen-sink-p
DEFAULT_KITCHEN_SINK_SCENARIOS="$(
cat <<'SCENARIOS'
npm-latest|npm:@openclaw/kitchen-sink@latest|openclaw-kitchen-sink-fixture|npm|success|full
npm-latest-full|npm:@openclaw/kitchen-sink@latest|openclaw-kitchen-sink-fixture|npm|success|full
npm-latest-conformance|npm:@openclaw/kitchen-sink@latest|openclaw-kitchen-sink-fixture|npm|success|conformance|conformance
npm-latest-adversarial|npm:@openclaw/kitchen-sink@latest|openclaw-kitchen-sink-fixture|npm|success|adversarial|adversarial
npm-beta|npm:@openclaw/kitchen-sink@beta|openclaw-kitchen-sink-fixture|npm|failure|none
clawhub-latest|clawhub:openclaw-kitchen-sink@latest|openclaw-kitchen-sink-fixture|clawhub|success|basic
clawhub-beta|clawhub:openclaw-kitchen-sink@beta|openclaw-kitchen-sink-fixture|clawhub|failure|none

View File

@@ -87,11 +87,20 @@ function readConfig() {
function configureRuntime() {
const pluginId = process.env.KITCHEN_SINK_ID;
const personality = process.env.KITCHEN_SINK_PERSONALITY;
const { configPath, config } = readConfig();
config.plugins = config.plugins || {};
config.plugins.entries = config.plugins.entries || {};
config.plugins.entries[pluginId] = {
...config.plugins.entries[pluginId],
...(personality
? {
config: {
...config.plugins.entries[pluginId]?.config,
personality,
},
}
: {}),
hooks: {
...config.plugins.entries[pluginId]?.hooks,
allowConversationAccess: true,
@@ -124,6 +133,39 @@ const expectMissing = (listValue, expected, field) => {
}
};
function assertExpectedDiagnostics(surfaceMode, errorMessages) {
const expectedErrorMessages = new Set([
"only bundled plugins can register agent tool result middleware",
'agent harness "kitchen-sink-agent-harness" registration missing required runtime methods',
'channel "kitchen-sink-channel-probe" registration missing required config helpers',
"cli registration missing explicit commands metadata",
"only bundled plugins can register Codex app-server extension factories",
'compaction provider "kitchen-sink-compaction-provider" registration missing summarize',
"context engine registration missing id",
"http route registration missing or invalid auth: /kitchen-sink/http-route",
"plugin must own memory slot or declare contracts.memoryEmbeddingProviders for adapter: kitchen-sink-memory-embedding-provider",
"memory prompt supplement registration missing builder",
]);
if (surfaceMode !== "full" && surfaceMode !== "adversarial") {
if (errorMessages.size > 0) {
throw new Error(
`unexpected kitchen-sink diagnostic errors: ${[...errorMessages].join(", ")}`,
);
}
return;
}
for (const message of errorMessages) {
if (!expectedErrorMessages.has(message)) {
throw new Error(`unexpected kitchen-sink diagnostic error: ${message}`);
}
}
for (const message of expectedErrorMessages) {
if (!errorMessages.has(message)) {
throw new Error(`missing expected kitchen-sink diagnostic error: ${message}`);
}
}
}
function assertRealPathInside(parentPath, childPath, label) {
const parentRealPath = fs.realpathSync(parentPath);
const childRealPath = fs.realpathSync(childPath);
@@ -181,8 +223,10 @@ function assertInstalled() {
);
}
expectIncludes(inspect.plugin?.channelIds, "kitchen-sink-channel", "channels");
expectIncludes(inspect.plugin?.providerIds, "kitchen-sink-provider", "providers");
if (surfaceMode !== "adversarial") {
expectIncludes(inspect.plugin?.channelIds, "kitchen-sink-channel", "channels");
expectIncludes(inspect.plugin?.providerIds, "kitchen-sink-provider", "providers");
}
const diagnostics = [
...(list.diagnostics || []),
@@ -193,7 +237,7 @@ function assertInstalled() {
diagnostics.filter((diag) => diag?.level === "error").map((diag) => String(diag.message || "")),
);
if (surfaceMode === "full") {
if (surfaceMode === "full" || surfaceMode === "conformance") {
const toolNames = Array.isArray(inspect.tools)
? inspect.tools.flatMap((entry) => (Array.isArray(entry?.names) ? entry.names : []))
: [];
@@ -232,8 +276,13 @@ function assertInstalled() {
}
expectMissing(inspect.plugin?.agentHarnessIds, "kitchen-sink-agent-harness", "agent harnesses");
expectIncludes(inspect.services, "kitchen-sink-service", "services");
expectIncludes(inspect.commands, "kitchen-sink-command", "commands");
expectIncludes(toolNames, "kitchen-sink-tool", "tools");
if (surfaceMode === "full") {
expectIncludes(inspect.commands, "kitchen-sink-command", "commands");
expectIncludes(toolNames, "kitchen-sink-tool", "tools");
} else {
expectIncludes(inspect.commands, "kitchen", "commands");
expectIncludes(toolNames, "kitchen_sink_text", "tools");
}
if (
(inspect.plugin?.hookCount || 0) < 30 ||
!Array.isArray(inspect.typedHooks) ||
@@ -243,32 +292,8 @@ function assertInstalled() {
`expected kitchen-sink typed hooks to load, got hookCount=${inspect.plugin?.hookCount} typedHooks=${inspect.typedHooks?.length}`,
);
}
const expectedErrorMessages = new Set([
"only bundled plugins can register agent tool result middleware",
'agent harness "kitchen-sink-agent-harness" registration missing required runtime methods',
'channel "kitchen-sink-channel-probe" registration missing required config helpers',
"cli registration missing explicit commands metadata",
"only bundled plugins can register Codex app-server extension factories",
'compaction provider "kitchen-sink-compaction-provider" registration missing summarize',
"context engine registration missing id",
"http route registration missing or invalid auth: /kitchen-sink/http-route",
"plugin must own memory slot or declare contracts.memoryEmbeddingProviders for adapter: kitchen-sink-memory-embedding-provider",
"memory prompt supplement registration missing builder",
]);
for (const message of errorMessages) {
if (!expectedErrorMessages.has(message)) {
throw new Error(`unexpected kitchen-sink diagnostic error: ${message}`);
}
}
for (const message of expectedErrorMessages) {
if (!errorMessages.has(message)) {
throw new Error(`missing expected kitchen-sink diagnostic error: ${message}`);
}
}
} else if (errorMessages.size > 0) {
throw new Error(`unexpected kitchen-sink diagnostic errors: ${[...errorMessages].join(", ")}`);
}
assertExpectedDiagnostics(surfaceMode, errorMessages);
const indexPath = path.join(process.env.HOME, ".openclaw", "plugins", "installs.json");
const index = readJson(indexPath);

View File

@@ -108,7 +108,7 @@ if [[ "$KITCHEN_SINK_SCENARIOS" == *"clawhub:"* ]] &&
fi
scenario_count=0
while IFS='|' read -r label spec plugin_id source expectation surface_mode; do
while IFS='|' read -r label spec plugin_id source expectation surface_mode personality; do
if [ -z "${label:-}" ] || [[ "$label" == \#* ]]; then
continue
fi
@@ -118,6 +118,7 @@ while IFS='|' read -r label spec plugin_id source expectation surface_mode; do
export KITCHEN_SINK_ID="$plugin_id"
export KITCHEN_SINK_SOURCE="$source"
export KITCHEN_SINK_SURFACE_MODE="$surface_mode"
export KITCHEN_SINK_PERSONALITY="${personality:-}"
case "$expectation" in
success)
run_success_scenario

View File

@@ -100,11 +100,14 @@ describe("scripts/lib/plugin-prerelease-test-plan.mjs", () => {
}),
);
expect(script).toContain("npm:@openclaw/kitchen-sink@latest");
expect(script).toContain("npm-latest-conformance");
expect(script).toContain("npm-latest-adversarial");
expect(script).toContain("npm:@openclaw/kitchen-sink@beta");
expect(script).toContain("clawhub:openclaw-kitchen-sink@latest");
expect(script).toContain("clawhub:openclaw-kitchen-sink@beta");
expect(script).toContain("scripts/e2e/lib/kitchen-sink-plugin/sweep.sh");
expect(sweepScript).toContain('plugins install "$KITCHEN_SINK_SPEC"');
expect(sweepScript).toContain("KITCHEN_SINK_PERSONALITY");
expect(sweepScript).toContain('plugins uninstall "$KITCHEN_SINK_SPEC" --force');
expect(sweepScript).toContain("run_failure_scenario");
expect(assertionsScript).toContain("record.source !== source");