diff --git a/CHANGELOG.md b/CHANGELOG.md index 456d5694fc0..b3fb7be5c2c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -64,6 +64,7 @@ Docs: https://docs.openclaw.ai - Release/CI/E2E: exit Telegram credential proof wrappers promptly after forwarded shutdown signals while keeping the descendant force-kill guard armed. - Release/CI/E2E: reject oversized ClickClack fixture request bodies before release journey smokes can accumulate unbounded payloads. - Release/CI/E2E: reject oversized OpenAI image-auth mock request bodies before Docker proof runs can accumulate unbounded payloads. +- Release/CI/E2E: require the Kitchen Sink RPC walk to prove every expected plugin tool is cataloged and effective before invoking tool fixtures. - Release/CI/E2E: fail secret-provider proof runs when temporary state cleanup still fails after retries instead of hiding the cleanup error. - Release/CI/E2E: fail package-candidate ref proofs when temporary source worktree cleanup fails instead of leaving stale worktrees behind. - Release/CI/E2E: remove package tarball extract directories when tar extraction fails before validation can continue. diff --git a/scripts/e2e/kitchen-sink-rpc-walk.mjs b/scripts/e2e/kitchen-sink-rpc-walk.mjs index 19eded762ec..d47ce37dcce 100644 --- a/scripts/e2e/kitchen-sink-rpc-walk.mjs +++ b/scripts/e2e/kitchen-sink-rpc-walk.mjs @@ -936,7 +936,7 @@ export function extractPluginCommandNames(payload) { .toSorted((left, right) => left.localeCompare(right)); } -function extractToolEntries(payload) { +export function extractToolEntries(payload) { return (Array.isArray(payload?.groups) ? payload.groups : []).flatMap((group) => Array.isArray(group?.tools) ? group.tools : [], ); @@ -959,6 +959,31 @@ function assertIncludesAll(actual, expected, label) { } } +export function assertExpectedKitchenSinkToolEntries( + entries, + label, + { requirePluginProvenance = false } = {}, +) { + const ids = entries.map((entry) => entry?.id).filter(isNonEmptyString); + assertIncludesAll(ids, EXPECTED_TOOLS, label); + if (requirePluginProvenance) { + const wrongProvenance = entries + .filter((entry) => EXPECTED_TOOLS.includes(entry?.id)) + .filter((entry) => entry.source !== "plugin" || entry.pluginId !== PLUGIN_ID) + .map((entry) => ({ + id: entry?.id, + pluginId: entry?.pluginId, + source: entry?.source, + })); + if (wrongProvenance.length > 0) { + throw new Error( + `${label} plugin provenance mismatch: ${JSON.stringify(wrongProvenance)}`, + ); + } + } + return ids; +} + function assertChannelAccountRunning(payload) { const accounts = Array.isArray(payload?.channelAccounts?.[CHANNEL_ID]) ? payload.channelAccounts[CHANNEL_ID] @@ -970,13 +995,27 @@ function assertChannelAccountRunning(payload) { return account; } -function assertToolInvokeResult(payload) { +export function assertKitchenSinkSearchInvokeResult(payload) { if (payload?.ok !== true || payload?.source !== "plugin") { - throw new Error(`Kitchen Sink tool invoke failed: ${JSON.stringify(payload)}`); + throw new Error(`Kitchen Sink search tool invoke failed: ${JSON.stringify(payload)}`); } const text = JSON.stringify(payload.output ?? payload); if (!text.includes("Kitchen Sink image fixture")) { - throw new Error(`Kitchen Sink tool output missed expected fixture: ${text.slice(0, 1000)}`); + throw new Error( + `Kitchen Sink search tool output missed expected fixture: ${text.slice(0, 1000)}`, + ); + } +} + +export function assertKitchenSinkTextInvokeResult(payload) { + if (payload?.ok !== true || payload?.source !== "plugin") { + throw new Error(`Kitchen Sink text tool invoke failed: ${JSON.stringify(payload)}`); + } + const text = JSON.stringify(payload.output ?? payload); + if (!text.includes("tool:kitchen_sink_text") || !text.includes("Kitchen Sink")) { + throw new Error( + `Kitchen Sink text tool output missed expected fixture: ${text.slice(0, 1000)}`, + ); } } @@ -1608,12 +1647,11 @@ export async function main() { rpcOptions, ); const catalogTools = extractToolEntries(catalog); - const catalogToolIds = catalogTools.map((entry) => entry?.id).filter(isNonEmptyString); - assertIncludesAny(catalogToolIds, EXPECTED_TOOLS, "tools.catalog plugin tools"); - const pluginTool = catalogTools.find((entry) => EXPECTED_TOOLS.includes(entry?.id)); - if (pluginTool?.source !== "plugin" || pluginTool?.pluginId !== PLUGIN_ID) { - throw new Error(`tools.catalog plugin provenance missing: ${JSON.stringify(pluginTool)}`); - } + const catalogToolIds = assertExpectedKitchenSinkToolEntries( + catalogTools, + "tools.catalog plugin tools", + { requirePluginProvenance: true }, + ); const createdSession = await retryRpcCall( "sessions.create", @@ -1625,10 +1663,12 @@ export async function main() { { sessionKey: createdSession.key, agentId: "main" }, rpcOptions, ); - const effectiveToolIds = extractToolEntries(effective).map((entry) => entry?.id); - assertIncludesAny(effectiveToolIds, EXPECTED_TOOLS, "tools.effective plugin tools"); + assertExpectedKitchenSinkToolEntries( + extractToolEntries(effective), + "tools.effective plugin tools", + ); - const invoked = await retryRpcCall( + const searchInvoked = await retryRpcCall( "tools.invoke", { name: "kitchen_sink_search", @@ -1639,7 +1679,20 @@ export async function main() { }, rpcOptions, ); - assertToolInvokeResult(invoked); + assertKitchenSinkSearchInvokeResult(searchInvoked); + + const textInvoked = await retryRpcCall( + "tools.invoke", + { + name: "kitchen_sink_text", + args: { prompt: "explain kitchen sink rpc walk" }, + sessionKey: createdSession.key, + agentId: "main", + idempotencyKey: "kitchen-sink-rpc-text", + }, + rpcOptions, + ); + assertKitchenSinkTextInvokeResult(textInvoked); const ttsProviders = await retryRpcCall("tts.providers", {}, rpcOptions); const ttsStatus = await retryRpcCall("tts.status", {}, rpcOptions); diff --git a/test/scripts/kitchen-sink-rpc-walk.test.ts b/test/scripts/kitchen-sink-rpc-walk.test.ts index 90610a4ad9b..3b12ecc1652 100644 --- a/test/scripts/kitchen-sink-rpc-walk.test.ts +++ b/test/scripts/kitchen-sink-rpc-walk.test.ts @@ -15,6 +15,9 @@ import { appendBoundedOutput, assertCommandResourceCeiling, assertDiagnosticStabilityClean, + assertExpectedKitchenSinkToolEntries, + assertKitchenSinkSearchInvokeResult, + assertKitchenSinkTextInvokeResult, assertResourceCeiling, cleanupKitchenSinkEnv, createGatewayReadyLogScanner, @@ -92,7 +95,7 @@ describe("kitchen-sink RPC isolated state", () => { }); it("can fail the walk when generated temp cleanup cannot remove the root", async () => { - const rmSync = vi.spyOn(fs, "rmSync").mockImplementation(() => { + const rmSyncSpy = vi.spyOn(fs, "rmSync").mockImplementation(() => { throw new Error("device busy"); }); @@ -107,9 +110,9 @@ describe("kitchen-sink RPC isolated state", () => { ).rejects.toThrow( "failed to remove Kitchen Sink RPC temp root: /tmp/openclaw-kitchen-sink-rpc-stuck", ); - expect(rmSync).toHaveBeenCalledTimes(3); + expect(rmSyncSpy).toHaveBeenCalledTimes(3); } finally { - rmSync.mockRestore(); + rmSyncSpy.mockRestore(); } }); }); @@ -522,6 +525,81 @@ describe("kitchen-sink RPC command catalog assertions", () => { }), ).toEqual(["kitchen", "kitchen-sink"]); }); + + it("requires every expected Kitchen Sink plugin tool", () => { + expect(() => + assertExpectedKitchenSinkToolEntries( + [ + { id: "kitchen_sink_text", source: "plugin", pluginId: "openclaw-kitchen-sink-fixture" }, + ], + "tools.catalog plugin tools", + { requirePluginProvenance: true }, + ), + ).toThrow("tools.catalog plugin tools missing kitchen_sink_search, kitchen_sink_image_job"); + }); + + it("requires plugin provenance for expected catalog tools", () => { + expect(() => + assertExpectedKitchenSinkToolEntries( + [ + { id: "kitchen_sink_text", source: "plugin", pluginId: "openclaw-kitchen-sink-fixture" }, + { id: "kitchen_sink_search", source: "core", pluginId: "openclaw-kitchen-sink-fixture" }, + { id: "kitchen_sink_image_job", source: "plugin", pluginId: "other-plugin" }, + ], + "tools.catalog plugin tools", + { requirePluginProvenance: true }, + ), + ).toThrow("tools.catalog plugin tools plugin provenance mismatch"); + }); + + it("accepts complete expected tool coverage", () => { + expect( + assertExpectedKitchenSinkToolEntries( + [ + { id: "kitchen_sink_text", source: "plugin", pluginId: "openclaw-kitchen-sink-fixture" }, + { + id: "kitchen_sink_search", + source: "plugin", + pluginId: "openclaw-kitchen-sink-fixture", + }, + { + id: "kitchen_sink_image_job", + source: "plugin", + pluginId: "openclaw-kitchen-sink-fixture", + }, + ], + "tools.catalog plugin tools", + { requirePluginProvenance: true }, + ), + ).toEqual(["kitchen_sink_text", "kitchen_sink_search", "kitchen_sink_image_job"]); + }); + + it("checks search and text tool invocation fixtures separately", () => { + expect(() => + assertKitchenSinkSearchInvokeResult({ + ok: true, + source: "plugin", + output: { results: [{ title: "Kitchen Sink image fixture" }] }, + }), + ).not.toThrow(); + expect(() => + assertKitchenSinkTextInvokeResult({ + ok: true, + source: "plugin", + output: { + route: "tool:kitchen_sink_text", + text: "Kitchen Sink text provider produced a deterministic reply.", + }, + }), + ).not.toThrow(); + expect(() => + assertKitchenSinkTextInvokeResult({ + ok: true, + source: "plugin", + output: { route: "tool:kitchen_sink_search" }, + }), + ).toThrow("Kitchen Sink text tool output missed expected fixture"); + }); }); describe("kitchen-sink RPC diagnostics assertions", () => {