From 1a4d55de430a94da9d87f73a960e3fbcdf6b1c8b Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Wed, 29 Apr 2026 13:56:49 -0700 Subject: [PATCH] test(plugins): split kitchen sink qa personalities (#74612) --- .../qa-lab/src/scenario-catalog.test.ts | 9 ++ .../plugins/kitchen-sink-live-openai.md | 83 ++++++++++++++++++ scripts/e2e/kitchen-sink-plugin-docker.sh | 4 +- .../lib/kitchen-sink-plugin/assertions.mjs | 85 ++++++++++++------- scripts/e2e/lib/kitchen-sink-plugin/sweep.sh | 3 +- .../plugin-prerelease-test-plan.test.ts | 3 + 6 files changed, 155 insertions(+), 32 deletions(-) diff --git a/extensions/qa-lab/src/scenario-catalog.test.ts b/extensions/qa-lab/src/scenario-catalog.test.ts index 36cd28eaae2..afe67d016be 100644 --- a/extensions/qa-lab/src/scenario-catalog.test.ts +++ b/extensions/qa-lab/src/scenario-catalog.test.ts @@ -187,6 +187,9 @@ describe("qa scenario catalog", () => { requiredProvider?: string; pluginSpec?: string; pluginId?: string; + pluginPersonality?: string; + adversarialPersonality?: string; + expectedAdversarialDiagnostics?: string[]; } | undefined; @@ -195,12 +198,18 @@ describe("qa scenario catalog", () => { expect(config?.requiredProvider).toBe("openai"); expect(config?.pluginSpec).toBe("npm:@openclaw/kitchen-sink@latest"); expect(config?.pluginId).toBe("openclaw-kitchen-sink-fixture"); + expect(config?.pluginPersonality).toBe("conformance"); + expect(config?.adversarialPersonality).toBe("adversarial"); + expect(config?.expectedAdversarialDiagnostics).toContain( + "only bundled plugins can register agent tool result middleware", + ); expect(scenario.execution.flow?.steps.map((step) => step.name)).toEqual([ "installs and inspects the Kitchen Sink plugin", "restarts gateway with Kitchen Sink configured", "exercises command inventory and MCP tool surfaces", "runs live OpenAI turn with Kitchen Sink loaded", "records gateway CPU RSS and log anomaly evidence", + "verifies adversarial diagnostics personality", ]); }); diff --git a/qa/scenarios/plugins/kitchen-sink-live-openai.md b/qa/scenarios/plugins/kitchen-sink-live-openai.md index 8c2ba32bc7b..ecf62e53ccb 100644 --- a/qa/scenarios/plugins/kitchen-sink-live-openai.md +++ b/qa/scenarios/plugins/kitchen-sink-live-openai.md @@ -36,6 +36,8 @@ execution: requiredProvider: openai pluginSpec: npm:@openclaw/kitchen-sink@latest pluginId: openclaw-kitchen-sink-fixture + pluginPersonality: conformance + adversarialPersonality: adversarial channelId: kitchen-sink-channel channelAccountId: local textProviderId: kitchen-sink-llm @@ -52,6 +54,17 @@ execution: agentTurnTimeoutMs: 120000 outboundTimeoutMs: 60000 livePrompt: "Kitchen Sink OpenAI marker. Reply exactly: KITCHEN-SINK-OPENAI-OK" + expectedAdversarialDiagnostics: + - only bundled plugins can register agent tool result middleware + - agent harness "kitchen-sink-agent-harness" registration missing required runtime methods + - channel "kitchen-sink-channel-probe" registration missing required config helpers + - cli registration missing explicit commands metadata + - only bundled plugins can register Codex app-server extension factories + - compaction provider "kitchen-sink-compaction-provider" registration missing summarize + - context engine registration missing id + - http route registration missing or invalid auth: /kitchen-sink/http-route + - "plugin must own memory slot or declare contracts.memoryEmbeddingProviders for adapter: kitchen-sink-memory-embedding-provider" + - memory prompt supplement registration missing builder ``` ```yaml qa-flow @@ -84,6 +97,10 @@ steps: cfg.plugins.entries[config.pluginId] = { ...(cfg.plugins.entries[config.pluginId] || {}), enabled: true, + config: { + ...(cfg.plugins.entries[config.pluginId]?.config || {}), + personality: config.pluginPersonality, + }, hooks: { ...(cfg.plugins.entries[config.pluginId]?.hooks || {}), allowConversationAccess: true, @@ -152,6 +169,10 @@ steps: expr: "config.expectedToolAny.some((tool) => inspectFacts.tools.includes(tool))" message: expr: "`Kitchen Sink tools missing from inspect output: ${JSON.stringify(inspectFacts.tools)}`" + - assert: + expr: "inspectFacts.diagnostics.length === 0" + message: + expr: "`Kitchen Sink conformance personality emitted diagnostics: ${JSON.stringify(inspectFacts.diagnostics)}`" detailsExpr: inspectFacts - name: restarts gateway with Kitchen Sink configured @@ -174,6 +195,10 @@ steps: cfg.plugins.entries[config.pluginId] = { ...(cfg.plugins.entries[config.pluginId] || {}), enabled: true, + config: { + ...(cfg.plugins.entries[config.pluginId]?.config || {}), + personality: config.pluginPersonality, + }, hooks: { ...(cfg.plugins.entries[config.pluginId]?.hooks || {}), allowConversationAccess: true, @@ -325,4 +350,62 @@ steps: message: expr: "`Gateway RSS exceeded Kitchen Sink anomaly threshold: ${JSON.stringify(perfEvidence)}`" detailsExpr: perfEvidence + + - name: verifies adversarial diagnostics personality + actions: + - call: env.gateway.restartAfterStateMutation + args: + - lambda: + async: true + params: [ctx] + expr: |- + (async () => { + const raw = await fs.readFile(ctx.configPath, "utf8").catch(() => "{}"); + const cfg = JSON.parse(raw || "{}"); + cfg.plugins = cfg.plugins || {}; + cfg.plugins.allow = [...new Set([...(cfg.plugins.allow || []), config.pluginId])]; + cfg.plugins.entries = cfg.plugins.entries || {}; + cfg.plugins.entries[config.pluginId] = { + ...(cfg.plugins.entries[config.pluginId] || {}), + enabled: true, + config: { + ...(cfg.plugins.entries[config.pluginId]?.config || {}), + personality: config.adversarialPersonality, + }, + hooks: { + ...(cfg.plugins.entries[config.pluginId]?.hooks || {}), + allowConversationAccess: true, + }, + }; + await fs.writeFile(ctx.configPath, `${JSON.stringify(cfg, null, 2)}\n`, "utf8"); + })() + - call: waitForGatewayHealthy + args: + - ref: env + - 120000 + - call: runQaCli + saveAs: adversarialInspect + args: + - ref: env + - - plugins + - inspect + - expr: config.pluginId + - --json + - json: true + timeoutMs: 60000 + - set: adversarialDiagnostics + value: + expr: |- + (adversarialInspect.diagnostics ?? []) + .filter((entry) => entry?.level === "error") + .map((entry) => String(entry.message ?? "")) + - assert: + expr: "config.expectedAdversarialDiagnostics.every((message) => adversarialDiagnostics.includes(message))" + message: + expr: "`Kitchen Sink adversarial diagnostics missing expected messages: ${JSON.stringify({ expected: config.expectedAdversarialDiagnostics, actual: adversarialDiagnostics })}`" + - assert: + expr: "adversarialDiagnostics.every((message) => config.expectedAdversarialDiagnostics.includes(message))" + message: + expr: "`Kitchen Sink adversarial diagnostics contained unexpected messages: ${JSON.stringify(adversarialDiagnostics)}`" + detailsExpr: "{ diagnostics: adversarialDiagnostics }" ``` diff --git a/scripts/e2e/kitchen-sink-plugin-docker.sh b/scripts/e2e/kitchen-sink-plugin-docker.sh index 83a932cc729..d4716410706 100644 --- a/scripts/e2e/kitchen-sink-plugin-docker.sh +++ b/scripts/e2e/kitchen-sink-plugin-docker.sh @@ -10,7 +10,9 @@ OPENCLAW_TEST_STATE_SCRIPT_B64="$(docker_e2e_test_state_shell_b64 kitchen-sink-p DEFAULT_KITCHEN_SINK_SCENARIOS="$( cat <<'SCENARIOS' -npm-latest|npm:@openclaw/kitchen-sink@latest|openclaw-kitchen-sink-fixture|npm|success|full +npm-latest-full|npm:@openclaw/kitchen-sink@latest|openclaw-kitchen-sink-fixture|npm|success|full +npm-latest-conformance|npm:@openclaw/kitchen-sink@latest|openclaw-kitchen-sink-fixture|npm|success|conformance|conformance +npm-latest-adversarial|npm:@openclaw/kitchen-sink@latest|openclaw-kitchen-sink-fixture|npm|success|adversarial|adversarial npm-beta|npm:@openclaw/kitchen-sink@beta|openclaw-kitchen-sink-fixture|npm|failure|none clawhub-latest|clawhub:openclaw-kitchen-sink@latest|openclaw-kitchen-sink-fixture|clawhub|success|basic clawhub-beta|clawhub:openclaw-kitchen-sink@beta|openclaw-kitchen-sink-fixture|clawhub|failure|none diff --git a/scripts/e2e/lib/kitchen-sink-plugin/assertions.mjs b/scripts/e2e/lib/kitchen-sink-plugin/assertions.mjs index c00e45362fe..f2bcc3278cd 100644 --- a/scripts/e2e/lib/kitchen-sink-plugin/assertions.mjs +++ b/scripts/e2e/lib/kitchen-sink-plugin/assertions.mjs @@ -87,11 +87,20 @@ function readConfig() { function configureRuntime() { const pluginId = process.env.KITCHEN_SINK_ID; + const personality = process.env.KITCHEN_SINK_PERSONALITY; const { configPath, config } = readConfig(); config.plugins = config.plugins || {}; config.plugins.entries = config.plugins.entries || {}; config.plugins.entries[pluginId] = { ...config.plugins.entries[pluginId], + ...(personality + ? { + config: { + ...config.plugins.entries[pluginId]?.config, + personality, + }, + } + : {}), hooks: { ...config.plugins.entries[pluginId]?.hooks, allowConversationAccess: true, @@ -124,6 +133,39 @@ const expectMissing = (listValue, expected, field) => { } }; +function assertExpectedDiagnostics(surfaceMode, errorMessages) { + const expectedErrorMessages = new Set([ + "only bundled plugins can register agent tool result middleware", + 'agent harness "kitchen-sink-agent-harness" registration missing required runtime methods', + 'channel "kitchen-sink-channel-probe" registration missing required config helpers', + "cli registration missing explicit commands metadata", + "only bundled plugins can register Codex app-server extension factories", + 'compaction provider "kitchen-sink-compaction-provider" registration missing summarize', + "context engine registration missing id", + "http route registration missing or invalid auth: /kitchen-sink/http-route", + "plugin must own memory slot or declare contracts.memoryEmbeddingProviders for adapter: kitchen-sink-memory-embedding-provider", + "memory prompt supplement registration missing builder", + ]); + if (surfaceMode !== "full" && surfaceMode !== "adversarial") { + if (errorMessages.size > 0) { + throw new Error( + `unexpected kitchen-sink diagnostic errors: ${[...errorMessages].join(", ")}`, + ); + } + return; + } + for (const message of errorMessages) { + if (!expectedErrorMessages.has(message)) { + throw new Error(`unexpected kitchen-sink diagnostic error: ${message}`); + } + } + for (const message of expectedErrorMessages) { + if (!errorMessages.has(message)) { + throw new Error(`missing expected kitchen-sink diagnostic error: ${message}`); + } + } +} + function assertRealPathInside(parentPath, childPath, label) { const parentRealPath = fs.realpathSync(parentPath); const childRealPath = fs.realpathSync(childPath); @@ -181,8 +223,10 @@ function assertInstalled() { ); } - expectIncludes(inspect.plugin?.channelIds, "kitchen-sink-channel", "channels"); - expectIncludes(inspect.plugin?.providerIds, "kitchen-sink-provider", "providers"); + if (surfaceMode !== "adversarial") { + expectIncludes(inspect.plugin?.channelIds, "kitchen-sink-channel", "channels"); + expectIncludes(inspect.plugin?.providerIds, "kitchen-sink-provider", "providers"); + } const diagnostics = [ ...(list.diagnostics || []), @@ -193,7 +237,7 @@ function assertInstalled() { diagnostics.filter((diag) => diag?.level === "error").map((diag) => String(diag.message || "")), ); - if (surfaceMode === "full") { + if (surfaceMode === "full" || surfaceMode === "conformance") { const toolNames = Array.isArray(inspect.tools) ? inspect.tools.flatMap((entry) => (Array.isArray(entry?.names) ? entry.names : [])) : []; @@ -232,8 +276,13 @@ function assertInstalled() { } expectMissing(inspect.plugin?.agentHarnessIds, "kitchen-sink-agent-harness", "agent harnesses"); expectIncludes(inspect.services, "kitchen-sink-service", "services"); - expectIncludes(inspect.commands, "kitchen-sink-command", "commands"); - expectIncludes(toolNames, "kitchen-sink-tool", "tools"); + if (surfaceMode === "full") { + expectIncludes(inspect.commands, "kitchen-sink-command", "commands"); + expectIncludes(toolNames, "kitchen-sink-tool", "tools"); + } else { + expectIncludes(inspect.commands, "kitchen", "commands"); + expectIncludes(toolNames, "kitchen_sink_text", "tools"); + } if ( (inspect.plugin?.hookCount || 0) < 30 || !Array.isArray(inspect.typedHooks) || @@ -243,32 +292,8 @@ function assertInstalled() { `expected kitchen-sink typed hooks to load, got hookCount=${inspect.plugin?.hookCount} typedHooks=${inspect.typedHooks?.length}`, ); } - - const expectedErrorMessages = new Set([ - "only bundled plugins can register agent tool result middleware", - 'agent harness "kitchen-sink-agent-harness" registration missing required runtime methods', - 'channel "kitchen-sink-channel-probe" registration missing required config helpers', - "cli registration missing explicit commands metadata", - "only bundled plugins can register Codex app-server extension factories", - 'compaction provider "kitchen-sink-compaction-provider" registration missing summarize', - "context engine registration missing id", - "http route registration missing or invalid auth: /kitchen-sink/http-route", - "plugin must own memory slot or declare contracts.memoryEmbeddingProviders for adapter: kitchen-sink-memory-embedding-provider", - "memory prompt supplement registration missing builder", - ]); - for (const message of errorMessages) { - if (!expectedErrorMessages.has(message)) { - throw new Error(`unexpected kitchen-sink diagnostic error: ${message}`); - } - } - for (const message of expectedErrorMessages) { - if (!errorMessages.has(message)) { - throw new Error(`missing expected kitchen-sink diagnostic error: ${message}`); - } - } - } else if (errorMessages.size > 0) { - throw new Error(`unexpected kitchen-sink diagnostic errors: ${[...errorMessages].join(", ")}`); } + assertExpectedDiagnostics(surfaceMode, errorMessages); const indexPath = path.join(process.env.HOME, ".openclaw", "plugins", "installs.json"); const index = readJson(indexPath); diff --git a/scripts/e2e/lib/kitchen-sink-plugin/sweep.sh b/scripts/e2e/lib/kitchen-sink-plugin/sweep.sh index 7efe7dadf80..9c2956c68c0 100644 --- a/scripts/e2e/lib/kitchen-sink-plugin/sweep.sh +++ b/scripts/e2e/lib/kitchen-sink-plugin/sweep.sh @@ -108,7 +108,7 @@ if [[ "$KITCHEN_SINK_SCENARIOS" == *"clawhub:"* ]] && fi scenario_count=0 -while IFS='|' read -r label spec plugin_id source expectation surface_mode; do +while IFS='|' read -r label spec plugin_id source expectation surface_mode personality; do if [ -z "${label:-}" ] || [[ "$label" == \#* ]]; then continue fi @@ -118,6 +118,7 @@ while IFS='|' read -r label spec plugin_id source expectation surface_mode; do export KITCHEN_SINK_ID="$plugin_id" export KITCHEN_SINK_SOURCE="$source" export KITCHEN_SINK_SURFACE_MODE="$surface_mode" + export KITCHEN_SINK_PERSONALITY="${personality:-}" case "$expectation" in success) run_success_scenario diff --git a/test/scripts/plugin-prerelease-test-plan.test.ts b/test/scripts/plugin-prerelease-test-plan.test.ts index ef5c5ad093e..c0042752ec0 100644 --- a/test/scripts/plugin-prerelease-test-plan.test.ts +++ b/test/scripts/plugin-prerelease-test-plan.test.ts @@ -100,11 +100,14 @@ describe("scripts/lib/plugin-prerelease-test-plan.mjs", () => { }), ); expect(script).toContain("npm:@openclaw/kitchen-sink@latest"); + expect(script).toContain("npm-latest-conformance"); + expect(script).toContain("npm-latest-adversarial"); expect(script).toContain("npm:@openclaw/kitchen-sink@beta"); expect(script).toContain("clawhub:openclaw-kitchen-sink@latest"); expect(script).toContain("clawhub:openclaw-kitchen-sink@beta"); expect(script).toContain("scripts/e2e/lib/kitchen-sink-plugin/sweep.sh"); expect(sweepScript).toContain('plugins install "$KITCHEN_SINK_SPEC"'); + expect(sweepScript).toContain("KITCHEN_SINK_PERSONALITY"); expect(sweepScript).toContain('plugins uninstall "$KITCHEN_SINK_SPEC" --force'); expect(sweepScript).toContain("run_failure_scenario"); expect(assertionsScript).toContain("record.source !== source");