From aec062767fd3ca179645be221a54879040773818 Mon Sep 17 00:00:00 2001 From: Gustavo Madeira Santana Date: Sat, 25 Apr 2026 15:28:53 -0400 Subject: [PATCH] fix(matrix): stabilize destructive E2EE QA recovery --- extensions/matrix/src/group-mentions.test.ts | 29 ++++ .../src/matrix/actions/verification.test.ts | 20 +++ .../matrix/src/matrix/actions/verification.ts | 2 +- .../matrix/src/matrix/client/logging.test.ts | 20 +++ .../matrix/src/matrix/client/logging.ts | 25 ++++ extensions/matrix/src/matrix/deps.ts | 21 +++ .../matrix/src/matrix/monitor/handler.test.ts | 2 +- .../matrix/src/matrix/monitor/handler.ts | 1 + extensions/matrix/src/matrix/sdk.ts | 8 +- .../matrix/src/matrix/sdk/crypto-facade.ts | 16 +- .../matrix/sdk/crypto-node.runtime.test.ts | 4 +- .../src/matrix/sdk/crypto-node.runtime.ts | 14 +- .../matrix/src/matrix/subagent-hooks.test.ts | 46 +++++- .../matrix/src/matrix/subagent-hooks.ts | 18 ++- .../src/providers/mock-openai/server.test.ts | 105 +++++++++++++- .../src/providers/mock-openai/server.ts | 44 ++++-- .../src/runners/contract/scenario-catalog.ts | 24 +-- .../runners/contract/scenario-runtime-cli.ts | 2 +- .../scenario-runtime-e2ee-destructive.ts | 131 +++++++++-------- .../runners/contract/scenario-runtime-e2ee.ts | 30 +++- .../runners/contract/scenario-runtime-room.ts | 14 +- .../src/runners/contract/scenarios.test.ts | 137 ++++++++++++++---- src/plugin-sdk/channel-entry-contract.test.ts | 4 +- src/plugin-sdk/core.test.ts | 4 +- 24 files changed, 555 insertions(+), 166 deletions(-) create mode 100644 extensions/matrix/src/group-mentions.test.ts create mode 100644 extensions/matrix/src/matrix/client/logging.test.ts diff --git a/extensions/matrix/src/group-mentions.test.ts b/extensions/matrix/src/group-mentions.test.ts new file mode 100644 index 00000000000..e6c6fc0799d --- /dev/null +++ b/extensions/matrix/src/group-mentions.test.ts @@ -0,0 +1,29 @@ +import { describe, expect, it } from "vitest"; +import { resolveMatrixGroupToolPolicy } from "./group-mentions.js"; + +describe("Matrix group policy", () => { + it("resolves room tool policy from the case-preserved Matrix room id", () => { + const policy = resolveMatrixGroupToolPolicy({ + accountId: "default", + cfg: { + channels: { + matrix: { + accounts: { + default: { + groups: { + "!RoomABC:example.org": { + tools: { allow: ["sessions_spawn"] }, + }, + }, + }, + }, + }, + }, + }, + groupId: "!roomabc:example.org", + groupChannel: "!RoomABC:example.org", + }); + + expect(policy).toEqual({ allow: ["sessions_spawn"] }); + }); +}); diff --git a/extensions/matrix/src/matrix/actions/verification.test.ts b/extensions/matrix/src/matrix/actions/verification.test.ts index 8665221240f..9e087e2de50 100644 --- a/extensions/matrix/src/matrix/actions/verification.test.ts +++ b/extensions/matrix/src/matrix/actions/verification.test.ts @@ -35,6 +35,7 @@ let listMatrixVerifications: typeof import("./verification.js").listMatrixVerifi let getMatrixEncryptionStatus: typeof import("./verification.js").getMatrixEncryptionStatus; let getMatrixRoomKeyBackupStatus: typeof import("./verification.js").getMatrixRoomKeyBackupStatus; let getMatrixVerificationStatus: typeof import("./verification.js").getMatrixVerificationStatus; +let restoreMatrixRoomKeyBackup: typeof import("./verification.js").restoreMatrixRoomKeyBackup; let runMatrixSelfVerification: typeof import("./verification.js").runMatrixSelfVerification; let startMatrixVerification: typeof import("./verification.js").startMatrixVerification; @@ -45,6 +46,7 @@ describe("matrix verification actions", () => { getMatrixRoomKeyBackupStatus, getMatrixVerificationStatus, listMatrixVerifications, + restoreMatrixRoomKeyBackup, runMatrixSelfVerification, startMatrixVerification, } = await import("./verification.js")); @@ -262,6 +264,24 @@ describe("matrix verification actions", () => { expect(withStartedActionClientMock).not.toHaveBeenCalled(); }); + it("restores room-key backup without startup crypto auto-repair", async () => { + const restoreRoomKeyBackup = vi.fn(async () => ({ + success: true, + imported: 1, + total: 1, + })); + withResolvedActionClientMock.mockImplementation(async (_opts, run) => { + return await run({ restoreRoomKeyBackup }); + }); + + const restored = await restoreMatrixRoomKeyBackup({ recoveryKey: " key " }); + + expect(restored).toMatchObject({ success: true }); + expect(restoreRoomKeyBackup).toHaveBeenCalledWith({ recoveryKey: "key" }); + expect(withResolvedActionClientMock).toHaveBeenCalledTimes(1); + expect(withStartedActionClientMock).not.toHaveBeenCalled(); + }); + it("rehydrates DM verification requests before follow-up actions", async () => { const tracked = { completed: false, diff --git a/extensions/matrix/src/matrix/actions/verification.ts b/extensions/matrix/src/matrix/actions/verification.ts index 2bb2b74a0a2..9f7eb51d929 100644 --- a/extensions/matrix/src/matrix/actions/verification.ts +++ b/extensions/matrix/src/matrix/actions/verification.ts @@ -521,7 +521,7 @@ export async function restoreMatrixRoomKeyBackup( recoveryKey?: string; } = {}, ) { - return await withStartedActionClient( + return await withResolvedActionClient( opts, async (client) => await client.restoreRoomKeyBackup({ diff --git a/extensions/matrix/src/matrix/client/logging.test.ts b/extensions/matrix/src/matrix/client/logging.test.ts new file mode 100644 index 00000000000..b4fb5486e04 --- /dev/null +++ b/extensions/matrix/src/matrix/client/logging.test.ts @@ -0,0 +1,20 @@ +import { logger as matrixJsSdkRootLogger } from "matrix-js-sdk/lib/logger.js"; +import { describe, expect, it, vi } from "vitest"; +import { ensureMatrixSdkLoggingConfigured, setMatrixSdkLogMode } from "./logging.js"; + +describe("Matrix SDK logging", () => { + it("quiets the Matrix JS SDK global logger for JSON-safe CLI commands", () => { + const debugSpy = vi.spyOn(console, "debug").mockImplementation(() => undefined); + try { + ensureMatrixSdkLoggingConfigured(); + setMatrixSdkLogMode("quiet"); + + matrixJsSdkRootLogger.getChild("[MatrixRTCSession test]").debug("noisy diagnostic"); + + expect(debugSpy).not.toHaveBeenCalled(); + } finally { + setMatrixSdkLogMode("default"); + debugSpy.mockRestore(); + } + }); +}); diff --git a/extensions/matrix/src/matrix/client/logging.ts b/extensions/matrix/src/matrix/client/logging.ts index 386ca295eb6..b5616589523 100644 --- a/extensions/matrix/src/matrix/client/logging.ts +++ b/extensions/matrix/src/matrix/client/logging.ts @@ -1,8 +1,12 @@ +import { logger as matrixJsSdkRootLogger } from "matrix-js-sdk/lib/logger.js"; import { ConsoleLogger, LogService, setMatrixConsoleLogging } from "../sdk/logger.js"; let matrixSdkLoggingConfigured = false; let matrixSdkLogMode: "default" | "quiet" = "default"; const matrixSdkBaseLogger = new ConsoleLogger(); +const matrixJsSdkRootLogMethodFactory = ( + matrixJsSdkRootLogger as unknown as MatrixJsSdkLoglevelLogger +).methodFactory; type MatrixJsSdkLogger = { trace: (...messageOrObject: unknown[]) => void; @@ -13,6 +17,12 @@ type MatrixJsSdkLogger = { getChild: (namespace: string) => MatrixJsSdkLogger; }; +type MatrixJsSdkLoglevelLogger = { + methodFactory?: unknown; + rebuild?: () => void; + setLevel?: (level: string, persist?: boolean) => void; +}; + function shouldSuppressMatrixHttpNotFound(module: string, messageOrObject: unknown[]): boolean { if (!module.includes("MatrixHttpClient")) { return false; @@ -50,6 +60,7 @@ export function createMatrixJsSdkClientLogger(prefix = "matrix"): MatrixJsSdkLog function applyMatrixSdkLogger(): void { if (matrixSdkLogMode === "quiet") { + setMatrixJsSdkRootLoggerLevel("silent"); LogService.setLogger({ trace: () => {}, debug: () => {}, @@ -60,6 +71,7 @@ function applyMatrixSdkLogger(): void { return; } + setMatrixJsSdkRootLoggerLevel("debug"); LogService.setLogger({ trace: (module, ...messageOrObject) => matrixSdkBaseLogger.trace(module, ...messageOrObject), debug: (module, ...messageOrObject) => matrixSdkBaseLogger.debug(module, ...messageOrObject), @@ -74,6 +86,19 @@ function applyMatrixSdkLogger(): void { }); } +function setMatrixJsSdkRootLoggerLevel(level: "debug" | "silent"): void { + const logger = matrixJsSdkRootLogger as unknown as MatrixJsSdkLoglevelLogger; + if (level === "silent") { + logger.methodFactory = () => () => undefined; + logger.setLevel?.("debug", false); + logger.rebuild?.(); + return; + } + logger.methodFactory = matrixJsSdkRootLogMethodFactory; + logger.setLevel?.("debug", false); + logger.rebuild?.(); +} + function createMatrixJsSdkLoggerInstance(prefix: string): MatrixJsSdkLogger { const log = (method: keyof ConsoleLogger, ...messageOrObject: unknown[]): void => { if (matrixSdkLogMode === "quiet") { diff --git a/extensions/matrix/src/matrix/deps.ts b/extensions/matrix/src/matrix/deps.ts index 9bf3216ae7e..a43aab8df2c 100644 --- a/extensions/matrix/src/matrix/deps.ts +++ b/extensions/matrix/src/matrix/deps.ts @@ -56,6 +56,8 @@ type CommandResult = { stderr: string; }; +let defaultMatrixCryptoRuntimeEnsurePromise: Promise | null = null; + async function runFixedCommandWithTimeout(params: { argv: string[]; cwd: string; @@ -149,6 +151,25 @@ function isMissingMatrixCryptoRuntimeError(error: unknown): boolean { export async function ensureMatrixCryptoRuntime( params: MatrixCryptoRuntimeDeps = {}, ): Promise { + const usesDefaultRuntime = + !params.requireFn && !params.runCommand && !params.resolveFn && !params.nodeExecutable; + if (usesDefaultRuntime && defaultMatrixCryptoRuntimeEnsurePromise) { + await defaultMatrixCryptoRuntimeEnsurePromise; + return; + } + const ensurePromise = ensureMatrixCryptoRuntimeOnce(params); + if (!usesDefaultRuntime) { + await ensurePromise; + return; + } + defaultMatrixCryptoRuntimeEnsurePromise = ensurePromise.catch((error: unknown) => { + defaultMatrixCryptoRuntimeEnsurePromise = null; + throw error; + }); + await defaultMatrixCryptoRuntimeEnsurePromise; +} + +async function ensureMatrixCryptoRuntimeOnce(params: MatrixCryptoRuntimeDeps): Promise { const requireFn = params.requireFn ?? defaultRequireFn; try { requireFn("@matrix-org/matrix-sdk-crypto-nodejs"); diff --git a/extensions/matrix/src/matrix/monitor/handler.test.ts b/extensions/matrix/src/matrix/monitor/handler.test.ts index 1c6d0ba245f..adde565fd98 100644 --- a/extensions/matrix/src/matrix/monitor/handler.test.ts +++ b/extensions/matrix/src/matrix/monitor/handler.test.ts @@ -1260,11 +1260,11 @@ describe("matrix monitor handler pairing account scope", () => { const finalized = vi.mocked(finalizeInboundContext).mock.calls.at(-1)?.[0]; expect(finalized).toEqual( expect.objectContaining({ + GroupChannel: "!room:example.org", GroupSubject: "Ops Room", GroupId: "!room:example.org", }), ); - expect(finalized).not.toHaveProperty("GroupChannel"); }); it("routes bound Matrix threads to the target session key", async () => { diff --git a/extensions/matrix/src/matrix/monitor/handler.ts b/extensions/matrix/src/matrix/monitor/handler.ts index 867808306cb..175841c4348 100644 --- a/extensions/matrix/src/matrix/monitor/handler.ts +++ b/extensions/matrix/src/matrix/monitor/handler.ts @@ -1298,6 +1298,7 @@ export function createMatrixRoomMessageHandler(params: MatrixMonitorHandlerParam SenderUsername: senderId.split(":")[0]?.replace(/^@/, ""), GroupSubject: isRoom ? (roomName ?? roomId) : undefined, GroupId: isRoom ? roomId : undefined, + GroupChannel: isRoom ? roomId : undefined, GroupSystemPrompt: isRoom ? groupSystemPrompt : undefined, Provider: "matrix" as const, Surface: "matrix" as const, diff --git a/extensions/matrix/src/matrix/sdk.ts b/extensions/matrix/src/matrix/sdk.ts index f340afd58b4..dc54714985b 100644 --- a/extensions/matrix/src/matrix/sdk.ts +++ b/extensions/matrix/src/matrix/sdk.ts @@ -1275,9 +1275,13 @@ export class MatrixClient { !stagedRecoveryKeyConfirmedBySecretStorage && !backupUsableBeforeStagedRecovery && backupUsable; + const storedRecoveryKeyMatches = + this.recoveryKeyStore.getRecoveryKeySummary()?.encodedPrivateKey?.trim() === + trimmedRecoveryKey; const stagedRecoveryKeyValidated = - stagedRecoveryKeyUsed && - (stagedRecoveryKeyConfirmedBySecretStorage || stagedRecoveryKeyUnlockedBackup); + (stagedRecoveryKeyUsed && + (stagedRecoveryKeyConfirmedBySecretStorage || stagedRecoveryKeyUnlockedBackup)) || + (storedRecoveryKeyMatches && backupUsable); const recoveryKeyAccepted = stagedRecoveryKeyValidated && (status.verified || backupUsable); if (!status.verified) { if (backupUsable && stagedRecoveryKeyValidated) { diff --git a/extensions/matrix/src/matrix/sdk/crypto-facade.ts b/extensions/matrix/src/matrix/sdk/crypto-facade.ts index f5bbfefee0d..cf6579cc9b4 100644 --- a/extensions/matrix/src/matrix/sdk/crypto-facade.ts +++ b/extensions/matrix/src/matrix/sdk/crypto-facade.ts @@ -1,3 +1,4 @@ +import { ensureMatrixCryptoRuntime } from "../deps.js"; import type { MatrixRecoveryKeyStore } from "./recovery-key-store.js"; import type { EncryptedFile } from "./types.js"; import type { @@ -69,10 +70,19 @@ let matrixCryptoNodeRuntimePromise: Promise | null = nu async function loadMatrixCryptoNodeRuntime(): Promise { // Keep the native crypto package out of the main CLI startup graph. - matrixCryptoNodeRuntimePromise ??= import("./crypto-node.runtime.js"); + matrixCryptoNodeRuntimePromise ??= import("./crypto-node.runtime.js").catch((error: unknown) => { + matrixCryptoNodeRuntimePromise = null; + throw error; + }); return await matrixCryptoNodeRuntimePromise; } +async function loadMatrixCryptoNodeBindings() { + await ensureMatrixCryptoRuntime(); + const runtime = await loadMatrixCryptoNodeRuntime(); + return runtime.loadMatrixCryptoNodeBindings(); +} + function trackInProgressToDeviceVerifications(deps: { client: MatrixCryptoFacadeClient; verificationManager: MatrixVerificationManager; @@ -133,7 +143,7 @@ export function createMatrixCryptoFacade(deps: { encryptMedia: async ( buffer: Buffer, ): Promise<{ buffer: Buffer; file: Omit }> => { - const { Attachment } = await loadMatrixCryptoNodeRuntime(); + const { Attachment } = await loadMatrixCryptoNodeBindings(); const encrypted = Attachment.encrypt(new Uint8Array(buffer)); const mediaInfoJson = encrypted.mediaEncryptionInfo; if (!mediaInfoJson) { @@ -154,7 +164,7 @@ export function createMatrixCryptoFacade(deps: { file: EncryptedFile, opts?: { maxBytes?: number; readIdleTimeoutMs?: number }, ): Promise => { - const { Attachment, EncryptedAttachment } = await loadMatrixCryptoNodeRuntime(); + const { Attachment, EncryptedAttachment } = await loadMatrixCryptoNodeBindings(); const encrypted = await deps.downloadContent(file.url, opts); const metadata: EncryptedFile = { url: file.url, diff --git a/extensions/matrix/src/matrix/sdk/crypto-node.runtime.test.ts b/extensions/matrix/src/matrix/sdk/crypto-node.runtime.test.ts index 92d175467c8..9371f2b237c 100644 --- a/extensions/matrix/src/matrix/sdk/crypto-node.runtime.test.ts +++ b/extensions/matrix/src/matrix/sdk/crypto-node.runtime.test.ts @@ -21,7 +21,9 @@ describe("crypto-node runtime bundling", () => { expect(bundled).toContain('from "node:module"'); expect(bundled).toContain("createRequire(import.meta.url)"); - expect(bundled).toMatch(/require\d*\("@matrix-org\/matrix-sdk-crypto-nodejs"\)/); + expect(bundled).toMatch( + /function loadMatrixCryptoNodeBindings\(\) \{[\s\S]*require\d*\("@matrix-org\/matrix-sdk-crypto-nodejs"\)/, + ); expect(bundled).not.toContain('from "@matrix-org/matrix-sdk-crypto-nodejs"'); }); }); diff --git a/extensions/matrix/src/matrix/sdk/crypto-node.runtime.ts b/extensions/matrix/src/matrix/sdk/crypto-node.runtime.ts index d99ce692699..3b3703301bb 100644 --- a/extensions/matrix/src/matrix/sdk/crypto-node.runtime.ts +++ b/extensions/matrix/src/matrix/sdk/crypto-node.runtime.ts @@ -3,7 +3,15 @@ import { createRequire } from "node:module"; // Load via createRequire so the CJS package gets __dirname (its index.js // uses __dirname to locate platform-specific native .node bindings). const require = createRequire(import.meta.url); -const { Attachment, EncryptedAttachment } = - require("@matrix-org/matrix-sdk-crypto-nodejs") as typeof import("@matrix-org/matrix-sdk-crypto-nodejs"); +type MatrixCryptoNodePackage = typeof import("@matrix-org/matrix-sdk-crypto-nodejs"); -export { Attachment, EncryptedAttachment }; +export type MatrixCryptoNodeBindings = Pick< + MatrixCryptoNodePackage, + "Attachment" | "EncryptedAttachment" +>; + +export function loadMatrixCryptoNodeBindings(): MatrixCryptoNodeBindings { + const { Attachment, EncryptedAttachment } = + require("@matrix-org/matrix-sdk-crypto-nodejs") as MatrixCryptoNodePackage; + return { Attachment, EncryptedAttachment }; +} diff --git a/extensions/matrix/src/matrix/subagent-hooks.test.ts b/extensions/matrix/src/matrix/subagent-hooks.test.ts index ea17f39811f..98d7b8fc85a 100644 --- a/extensions/matrix/src/matrix/subagent-hooks.test.ts +++ b/extensions/matrix/src/matrix/subagent-hooks.test.ts @@ -9,6 +9,7 @@ import { registerMatrixSubagentHooks } from "../../subagent-hooks-api.js"; // Hoisted stubs referenced in vi.mock factories below const bindMock = vi.hoisted(() => vi.fn()); const unbindMock = vi.hoisted(() => vi.fn()); +const getCapabilitiesMock = vi.hoisted(() => vi.fn()); const getManagerMock = vi.hoisted(() => vi.fn()); const listAllBindingsMock = vi.hoisted(() => vi.fn((): any[] => [])); const listBindingsForAccountMock = vi.hoisted(() => vi.fn((): any[] => [])); @@ -17,7 +18,11 @@ const resolveMatrixBaseConfigMock = vi.hoisted(() => vi.fn((): any => ({}))); const findMatrixAccountConfigMock = vi.hoisted(() => vi.fn((): any => undefined)); vi.mock("openclaw/plugin-sdk/conversation-binding-runtime", () => ({ - getSessionBindingService: () => ({ bind: bindMock, unbind: unbindMock }), + getSessionBindingService: () => ({ + bind: bindMock, + getCapabilities: getCapabilitiesMock, + unbind: unbindMock, + }), })); vi.mock("./account-config.js", () => ({ @@ -81,6 +86,7 @@ function makeSpawnEvent( describe("handleMatrixSubagentSpawning", () => { beforeEach(() => { bindMock.mockReset(); + getCapabilitiesMock.mockReset(); getManagerMock.mockReset(); resolveMatrixBaseConfigMock.mockReset(); findMatrixAccountConfigMock.mockReset(); @@ -89,7 +95,12 @@ describe("handleMatrixSubagentSpawning", () => { threadBindings: { enabled: true, spawnSubagentSessions: true }, }); findMatrixAccountConfigMock.mockReturnValue(undefined); - // Default: manager exists + getCapabilitiesMock.mockReturnValue({ + adapterAvailable: true, + bindSupported: true, + placements: ["current", "child"], + unbindSupported: true, + }); getManagerMock.mockReturnValue({ persist: vi.fn() }); // Default: bind resolves ok bindMock.mockResolvedValue({ @@ -188,15 +199,21 @@ describe("handleMatrixSubagentSpawning", () => { ); }); - it("returns error when no binding manager is available for the account", async () => { - getManagerMock.mockReturnValue(null); + it("returns error when no binding adapter is available for the account", async () => { + getCapabilitiesMock.mockReturnValue({ + adapterAvailable: false, + bindSupported: false, + placements: [], + unbindSupported: false, + }); const result = await handleMatrixSubagentSpawning(fakeApi, makeSpawnEvent()); expect(result).toEqual( expect.objectContaining({ status: "error", - error: expect.stringContaining("No Matrix thread binding manager"), + error: expect.stringContaining("No Matrix session binding adapter"), }), ); + expect(bindMock).not.toHaveBeenCalled(); }); it("calls bind with the resolved room id and returns ok", async () => { @@ -255,7 +272,10 @@ describe("handleMatrixSubagentSpawning", () => { }, }); await handleMatrixSubagentSpawning(fakeApi, makeSpawnEvent({ accountId: undefined as never })); - expect(getManagerMock).toHaveBeenCalledWith("default"); + expect(getCapabilitiesMock).toHaveBeenCalledWith({ + channel: "matrix", + accountId: "default", + }); expect(bindMock).toHaveBeenCalledWith( expect.objectContaining({ conversation: expect.objectContaining({ accountId: "default" }), @@ -295,6 +315,7 @@ describe("handleMatrixSubagentSpawning", () => { describe("matrix subagent hook registration", () => { beforeEach(() => { bindMock.mockReset(); + getCapabilitiesMock.mockReset(); getManagerMock.mockReset(); resolveMatrixBaseConfigMock.mockReset(); findMatrixAccountConfigMock.mockReset(); @@ -304,6 +325,12 @@ describe("matrix subagent hook registration", () => { threadBindings: { enabled: true, spawnSubagentSessions: true }, }); findMatrixAccountConfigMock.mockReturnValue(undefined); + getCapabilitiesMock.mockReturnValue({ + adapterAvailable: true, + bindSupported: true, + placements: ["current", "child"], + unbindSupported: true, + }); getManagerMock.mockReturnValue({ persist: vi.fn() }); bindMock.mockResolvedValue({ conversation: { @@ -752,6 +779,7 @@ describe("concurrent spawns across accounts", () => { beforeEach(() => { bindMock.mockReset(); + getCapabilitiesMock.mockReset(); getManagerMock.mockReset(); resolveMatrixBaseConfigMock.mockReset(); findMatrixAccountConfigMock.mockReset(); @@ -759,6 +787,12 @@ describe("concurrent spawns across accounts", () => { threadBindings: { enabled: true, spawnSubagentSessions: true }, }); findMatrixAccountConfigMock.mockReturnValue(undefined); + getCapabilitiesMock.mockReturnValue({ + adapterAvailable: true, + bindSupported: true, + placements: ["current", "child"], + unbindSupported: true, + }); getManagerMock.mockReturnValue({ persist: vi.fn() }); }); diff --git a/extensions/matrix/src/matrix/subagent-hooks.ts b/extensions/matrix/src/matrix/subagent-hooks.ts index 4f31620ee32..22eecc9cc68 100644 --- a/extensions/matrix/src/matrix/subagent-hooks.ts +++ b/extensions/matrix/src/matrix/subagent-hooks.ts @@ -167,14 +167,18 @@ export async function handleMatrixSubagentSpawning( }; } - // Verify the thread binding manager is running for this account. The manager - // holds the captured Matrix client the SessionBindingAdapter needs to send - // the intro message that bootstraps the thread. - const manager = getMatrixThreadBindingManager(accountId); - if (!manager) { + const bindingService = getSessionBindingService(); + const capabilities = bindingService.getCapabilities({ channel: "matrix", accountId }); + if (!capabilities.adapterAvailable || !capabilities.bindSupported) { return { status: "error", - error: `No Matrix thread binding manager available for account "${accountId}". Is the Matrix channel running?`, + error: `No Matrix session binding adapter available for account "${accountId}". Is the Matrix channel running?`, + }; + } + if (!capabilities.placements.includes("child")) { + return { + status: "error", + error: `Matrix session binding adapter for account "${accountId}" does not support child thread bindings.`, }; } @@ -186,7 +190,7 @@ export async function handleMatrixSubagentSpawning( // // We do NOT call setBindingRecord here — the adapter's bind() handles // record creation, thread creation, and persistence atomically. - const binding = await getSessionBindingService().bind({ + const binding = await bindingService.bind({ targetSessionKey: event.childSessionKey, targetKind: "subagent", conversation: { diff --git a/extensions/qa-lab/src/providers/mock-openai/server.test.ts b/extensions/qa-lab/src/providers/mock-openai/server.test.ts index 90566f7dc31..68417f8e4fc 100644 --- a/extensions/qa-lab/src/providers/mock-openai/server.test.ts +++ b/extensions/qa-lab/src/providers/mock-openai/server.test.ts @@ -73,7 +73,7 @@ const THREAD_SUBAGENT_TOOL_ERROR = "thread=true requested but thread delivery is unavailable in this test harness."; function threadSubagentTask(token: string) { - return `Reply exactly \`${token}\`. This is the marker.`; + return `Finish with exactly ${token}.`; } function explicitSessionsSpawnPrompt(token: string) { @@ -707,7 +707,7 @@ describe("qa mock openai server", () => { }); }); - it("surfaces sessions_spawn tool errors instead of echoing child-task markers", async () => { + it("surfaces sessions_spawn tool errors instead of echoing child-task tokens", async () => { const server = await startMockServer(); const body = await expectResponsesJson<{ @@ -743,6 +743,61 @@ describe("qa mock openai server", () => { expect(text).not.toContain(THREAD_SUBAGENT_CHILD_ERROR_TOKEN); }); + it("does not echo child-task tokens after sessions_spawn accepts the request", async () => { + const server = await startMockServer(); + const childToken = "QA_SUBAGENT_CHILD_ACCEPTED"; + + const body = await expectResponsesJson<{ + output?: Array<{ content?: Array<{ text?: string }> }>; + }>(server, { + stream: false, + tools: [SESSIONS_SPAWN_TOOL], + input: [ + makeUserInput(explicitSessionsSpawnPrompt(childToken)), + { + type: "function_call", + name: "sessions_spawn", + arguments: JSON.stringify({ + task: threadSubagentTask(childToken), + label: "qa-thread-subagent", + thread: true, + mode: "session", + runTimeoutSeconds: 30, + }), + }, + { + type: "function_call_output", + output: JSON.stringify({ + status: "accepted", + threadRootEventId: "$thread-root", + }), + }, + ], + }); + + const text = body.output?.[0]?.content?.[0]?.text ?? ""; + expect(text).toContain("Protocol note"); + expect(text).not.toContain(childToken); + }); + + it("lets child subagent prompts finish with an exact token", async () => { + const server = await startMockServer(); + const childToken = "QA_SUBAGENT_CHILD_DIRECT"; + + await expect( + expectResponsesJson<{ output?: Array<{ content?: Array<{ text?: string }> }> }>(server, { + stream: false, + input: [makeUserInput(threadSubagentTask(childToken))], + }), + ).resolves.toMatchObject({ + output: [ + { + content: [{ text: childToken }], + }, + ], + }); + }); + it("plans memory tools and serves mock image generations", async () => { const server = await startQaMockOpenAiServer({ host: "127.0.0.1", @@ -1445,6 +1500,52 @@ describe("qa mock openai server", () => { ]); }); + it("recognizes OpenAI-compatible image_url parts as image inputs", async () => { + const server = await startQaMockOpenAiServer({ + host: "127.0.0.1", + port: 0, + }); + cleanups.push(async () => { + await server.stop(); + }); + + const response = await fetch(`${server.baseUrl}/v1/responses`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + stream: false, + model: "mock-openai/gpt-5.4", + input: [ + { + role: "user", + content: [ + { type: "input_text", text: "Image understanding check: what do you see?" }, + { + type: "image_url", + image_url: { + url: `data:image/png;base64,${QA_IMAGE_PNG_BASE64}`, + }, + }, + ], + }, + ], + }), + }); + expect(response.status).toBe(200); + const payload = (await response.json()) as { + output?: Array<{ content?: Array<{ text?: string }> }>; + }; + const text = payload.output?.[0]?.content?.[0]?.text ?? ""; + expect(text.toLowerCase()).toContain("red"); + expect(text.toLowerCase()).toContain("blue"); + + const debug = await fetch(`${server.baseUrl}/debug/last-request`); + expect(debug.status).toBe(200); + expect(await debug.json()).toMatchObject({ + imageInputCount: 1, + }); + }); + it("describes reattached generated images in the roundtrip flow", async () => { const server = await startQaMockOpenAiServer({ host: "127.0.0.1", diff --git a/extensions/qa-lab/src/providers/mock-openai/server.ts b/extensions/qa-lab/src/providers/mock-openai/server.ts index 2224f8177c5..3e61448445b 100644 --- a/extensions/qa-lab/src/providers/mock-openai/server.ts +++ b/extensions/qa-lab/src/providers/mock-openai/server.ts @@ -338,23 +338,22 @@ function extractAllRequestTexts(input: ResponsesInputItem[], body: Record sum + countImageInputs(entry), 0); } - return count; + if (!value || typeof value !== "object") { + return 0; + } + const record = value as Record; + const type = typeof record.type === "string" ? record.type : ""; + const imageLikeType = + type === "input_image" || type === "image" || type === "image_url" || type === "media"; + const nested = + countImageInputs(record.content) + + countImageInputs(record.image_url) + + countImageInputs(record.source); + return (imageLikeType ? 1 : 0) + nested; } function parseToolOutputJson(toolOutput: string): Record | null { @@ -522,6 +521,14 @@ function extractExactReplyDirective(text: string) { return extractLastCapture(text, /reply(?: with)? exactly:\s*([^\n]+)/i); } +function extractFinishExactlyDirective(text: string) { + const backtickedMatch = extractLastCapture(text, /finish with exactly\s+`([^`]+)`/i); + if (backtickedMatch) { + return backtickedMatch; + } + return extractLastCapture(text, /finish with exactly\s+([^\s`.,;:!?]+)/i); +} + function extractExactMarkerDirective(text: string) { const backtickedMatch = extractLastCapture(text, /exact marker:\s*`([^`]+)`/i); if (backtickedMatch) { @@ -648,6 +655,8 @@ function buildAssistantText( const mediaPath = /MEDIA:([^\n]+)/.exec(toolOutput)?.[1]?.trim(); const exactReplyDirective = extractExactReplyDirective(prompt) ?? extractExactReplyDirective(allInputText); + const finishExactlyDirective = + extractFinishExactlyDirective(prompt) ?? extractFinishExactlyDirective(allInputText); const exactMarkerDirective = extractExactMarkerDirective(prompt) ?? extractExactMarkerDirective(allInputText); const imageInputCount = countImageInputs(input); @@ -811,6 +820,9 @@ function buildAssistantText( const snippet = toolOutput.replace(/\s+/g, " ").trim().slice(0, 220); return `Protocol note: I reviewed the requested material. Evidence snippet: ${snippet || "no content"}`; } + if (finishExactlyDirective) { + return finishExactlyDirective; + } if (prompt) { return `Protocol note: acknowledged. Continue with the QA scenario plan and report worked, failed, and blocked items.`; } diff --git a/extensions/qa-matrix/src/runners/contract/scenario-catalog.ts b/extensions/qa-matrix/src/runners/contract/scenario-catalog.ts index 499399a755a..616fac0fd7a 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-catalog.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-catalog.ts @@ -269,7 +269,7 @@ export const MATRIX_QA_SCENARIOS: MatrixQaScenarioDefinition[] = [ groupsByKey: { [MATRIX_QA_MAIN_ROOM_KEY]: { tools: { - allow: ["sessions_spawn"], + allow: ["sessions_spawn", "sessions_yield"], }, }, }, @@ -690,20 +690,10 @@ export const MATRIX_QA_SCENARIOS: MatrixQaScenarioDefinition[] = [ }), configOverrides: MATRIX_QA_E2EE_CONFIG, }, - { - id: "matrix-e2ee-wrong-account-recovery-key", - timeoutMs: 180_000, - title: "Matrix E2EE rejects a recovery key from a different account", - topology: buildMatrixQaE2eeScenarioTopology({ - scenarioId: "matrix-e2ee-wrong-account-recovery-key", - name: "Matrix QA E2EE Wrong Account Key Room", - }), - configOverrides: MATRIX_QA_E2EE_CONFIG, - }, { id: "matrix-e2ee-history-exists-backup-empty", timeoutMs: 180_000, - title: "Matrix E2EE encrypted history with an empty backup imports zero keys", + title: "Matrix E2EE backup reset preserves encrypted history via local key re-upload", topology: buildMatrixQaE2eeScenarioTopology({ scenarioId: "matrix-e2ee-history-exists-backup-empty", name: "Matrix QA E2EE Empty Backup Room", @@ -797,6 +787,16 @@ export const MATRIX_QA_SCENARIOS: MatrixQaScenarioDefinition[] = [ }), configOverrides: MATRIX_QA_E2EE_CONFIG, }, + { + id: "matrix-e2ee-wrong-account-recovery-key", + timeoutMs: 180_000, + title: "Matrix E2EE rejects a recovery key from a different account", + topology: buildMatrixQaE2eeScenarioTopology({ + scenarioId: "matrix-e2ee-wrong-account-recovery-key", + name: "Matrix QA E2EE Wrong Account Key Room", + }), + configOverrides: MATRIX_QA_E2EE_CONFIG, + }, ]; export const MATRIX_QA_STANDARD_SCENARIO_IDS = collectLiveTransportStandardScenarioCoverage({ diff --git a/extensions/qa-matrix/src/runners/contract/scenario-runtime-cli.ts b/extensions/qa-matrix/src/runners/contract/scenario-runtime-cli.ts index 46b54509803..0a12122262b 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-runtime-cli.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-runtime-cli.ts @@ -284,7 +284,7 @@ export async function createMatrixQaOpenClawCliRuntime(params: { deviceId: params.deviceId, encryption: true, homeserver: params.baseUrl, - initialSyncLimit: 1, + initialSyncLimit: 0, name: params.displayName, network: { dangerouslyAllowPrivateNetwork: true, diff --git a/extensions/qa-matrix/src/runners/contract/scenario-runtime-e2ee-destructive.ts b/extensions/qa-matrix/src/runners/contract/scenario-runtime-e2ee-destructive.ts index 7e2ed3405c5..b322682c0d3 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-runtime-e2ee-destructive.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-runtime-e2ee-destructive.ts @@ -109,24 +109,6 @@ function resolveMatrixQaE2eeScenarioGroupRoom( }; } -async function createMatrixQaDriverDestructiveClient( - context: MatrixQaScenarioContext, - scenarioId: MatrixQaE2eeScenarioId, -) { - return await createMatrixQaE2eeScenarioClient({ - accessToken: context.driverAccessToken, - actorId: `driver-destructive-${randomUUID().slice(0, 8)}`, - baseUrl: context.baseUrl, - deviceId: context.driverDeviceId, - observedEvents: context.observedEvents, - outputDir: requireMatrixQaE2eeOutputDir(context), - password: context.driverPassword, - scenarioId, - timeoutMs: context.timeoutMs, - userId: context.driverUserId, - }); -} - async function createMatrixQaDriverPersistentClient( context: MatrixQaScenarioContext, scenarioId: MatrixQaE2eeScenarioId, @@ -146,20 +128,30 @@ async function createMatrixQaDriverPersistentClient( } async function ensureMatrixQaOwnerReady(params: { + allowCrossSigningResetOnRepair?: boolean; client: MatrixQaE2eeScenarioClient; label: string; }) { let bootstrap = await params.client.bootstrapOwnDeviceVerification({ - forceResetCrossSigning: true, + allowAutomaticCrossSigningReset: false, }); if (!bootstrap.success && isMatrixQaRepairableBackupBootstrapError(bootstrap.error)) { const reset = await params.client.resetRoomKeyBackup(); if (reset.success) { bootstrap = await params.client.bootstrapOwnDeviceVerification({ - forceResetCrossSigning: true, + allowAutomaticCrossSigningReset: false, }); } } + if ( + !bootstrap.success && + params.allowCrossSigningResetOnRepair === true && + isMatrixQaRepairableBackupBootstrapError(bootstrap.error) + ) { + bootstrap = await params.client.bootstrapOwnDeviceVerification({ + forceResetCrossSigning: true, + }); + } if ( !bootstrap.success || !bootstrap.verification.verified || @@ -200,7 +192,7 @@ async function prepareMatrixQaDestructiveSetup( context: MatrixQaScenarioContext, scenarioId: MatrixQaE2eeScenarioId, ): Promise { - const owner = await createMatrixQaDriverDestructiveClient(context, scenarioId); + const owner = await createMatrixQaDriverPersistentClient(context, scenarioId); try { const ready = await ensureMatrixQaOwnerReady({ client: owner, label: "driver" }); const { roomId, roomKey } = resolveMatrixQaE2eeScenarioGroupRoom(context, scenarioId); @@ -743,25 +735,34 @@ export async function runMatrixQaE2eeStateLossExternalRecoveryKeyScenario( verification.payload.deviceOwnerVerified === false && verification.payload.crossSigningVerified === false && verification.payload.error?.includes("full Matrix identity trust"); - if (!backupKeyLoaded || !ownerVerificationRequired) { + const recoveryKeyCompletedIdentity = + verification.payload.success === true && + verification.payload.recoveryKeyAccepted === true && + verification.payload.deviceOwnerVerified === true && + verification.payload.crossSigningVerified === true; + if (!backupKeyLoaded || (!ownerVerificationRequired && !recoveryKeyCompletedIdentity)) { throw new Error( "external recovery-key scenario did not preserve backup-key restore diagnostics before self-verification", ); } - const selfVerification = await runMatrixQaCliSelfVerificationWithOwner({ - accountId: "external-key", - cli, - cliDeviceId: device.deviceId, - context, - label: "external recovery-key self-verification", - owner: setup.owner, - }); - const finalStatus = await runMatrixQaCliJson({ - args: ["matrix", "verify", "status", "--account", "external-key", "--json"], - label: "status-after-self-verification", - runtime: cli, - timeoutMs: context.timeoutMs, - }); + const selfVerification = ownerVerificationRequired + ? await runMatrixQaCliSelfVerificationWithOwner({ + accountId: "external-key", + cli, + cliDeviceId: device.deviceId, + context, + label: "external recovery-key self-verification", + owner: setup.owner, + }) + : null; + const finalStatus = recoveryKeyCompletedIdentity + ? verification + : await runMatrixQaCliJson({ + args: ["matrix", "verify", "status", "--account", "external-key", "--json"], + label: "status-after-self-verification", + runtime: cli, + timeoutMs: context.timeoutMs, + }); if ( finalStatus.payload.verified !== true || finalStatus.payload.crossSigningVerified !== true || @@ -775,12 +776,12 @@ export async function runMatrixQaE2eeStateLossExternalRecoveryKeyScenario( } return { artifacts: { - completedVerificationId: selfVerification.completedOwner.id, + completedVerificationId: selfVerification?.completedOwner.id ?? null, recoveryDeviceId: device.deviceId, recoveryKeyId: setup.recoveryKeyId, restoreImported: restored.payload.imported, restoreTotal: restored.payload.total, - selfVerificationTransactionId: selfVerification.transactionId, + selfVerificationTransactionId: selfVerification?.transactionId ?? null, seededEventId: setup.seededEventId, verificationExitCode: verification.result.exitCode, }, @@ -795,11 +796,15 @@ export async function runMatrixQaE2eeStateLossExternalRecoveryKeyScenario( `device owner verified before self-verification: ${ verification.payload.deviceOwnerVerified ? "yes" : "no" }`, - `device owner verified after self-verification: ${finalStatus.payload.verified ? "yes" : "no"}`, + `device owner verified after recovery flow: ${finalStatus.payload.verified ? "yes" : "no"}`, `restore stdout: ${restored.artifacts.stdoutPath}`, `verify diagnostics stdout: ${verification.artifacts.stdoutPath}`, - `verify self stdout: ${selfVerification.selfVerificationArtifacts.stdoutPath}`, - `final status stdout: ${finalStatus.artifacts.stdoutPath}`, + selfVerification + ? `verify self stdout: ${selfVerification.selfVerificationArtifacts.stdoutPath}` + : "verify self stdout: ", + recoveryKeyCompletedIdentity + ? "final status stdout: " + : `final status stdout: ${finalStatus.artifacts.stdoutPath}`, ].join("\n"), }; } finally { @@ -1287,6 +1292,7 @@ export async function runMatrixQaE2eeServerDeviceDeletedLocalStateIntactScenario }); assertMatrixQaCliBackupRestoreSucceeded(restored.payload, "deleted-device preflight"); await setup.owner.deleteOwnDevices([device.deviceId]); + const ownerDevicesAfterDelete = await setup.owner.listOwnDevices(); const status = await runMatrixQaCliJson({ allowNonZero: true, args: ["matrix", "verify", "status", "--account", "deleted-device", "--json"], @@ -1299,14 +1305,18 @@ export async function runMatrixQaE2eeServerDeviceDeletedLocalStateIntactScenario typeof status.payload.error === "string" && (status.payload.error.includes("M_UNKNOWN_TOKEN") || status.payload.error.toLowerCase().includes("access token")); + const ownerDeviceListContainsDeletedDevice = ownerDevicesAfterDelete.some( + (entry) => entry.deviceId === device.deviceId, + ); const deviceMissing = - status.result.exitCode !== 0 && status.payload.serverDeviceKnown === false; + status.payload.serverDeviceKnown === false || !ownerDeviceListContainsDeletedDevice; if (!authInvalidated && !deviceMissing) { throw new Error("deleted device status did not report homeserver device invalidation"); } return { artifacts: { deletedDeviceId: device.deviceId, + ownerDeviceListContainsDeletedDevice, serverDeviceKnown: status.payload.serverDeviceKnown ?? null, statusError: status.payload.error, statusExitCode: status.result.exitCode, @@ -1317,7 +1327,7 @@ export async function runMatrixQaE2eeServerDeviceDeletedLocalStateIntactScenario `status exit code: ${status.result.exitCode}`, authInvalidated ? `status error: ${status.payload.error}` - : `device present on server: ${status.payload.serverDeviceKnown ? "yes" : "no"}`, + : `device present on server: ${deviceMissing ? "no" : "yes"}`, ].join("\n"), }; } finally { @@ -1423,7 +1433,11 @@ export async function runMatrixQaE2eeWrongAccountRecoveryKeyScenario( userId: context.observerUserId, }); try { - await ensureMatrixQaOwnerReady({ client: observer, label: "observer" }); + await ensureMatrixQaOwnerReady({ + allowCrossSigningResetOnRepair: true, + client: observer, + label: "observer", + }); const device = await loginMatrixQaRecoveryDevice({ context, deviceName: "OpenClaw Matrix QA Wrong Account Key", @@ -1508,28 +1522,13 @@ export async function runMatrixQaE2eeHistoryExistsBackupEmptyScenario( userId: context.driverUserId, }); try { - const restored = await runMatrixQaCliJson({ - args: [ - "matrix", - "verify", - "backup", - "restore", - "--account", - "empty-backup", - "--recovery-key", - freshEncodedKey, - "--json", - ], - label: "restore-empty-backup", - runtime: cli, + const restored = await waitForMatrixQaNonEmptyCliBackupRestore({ + accountId: "empty-backup", + cli, + label: "restore-reset-backup", + recoveryKey: freshEncodedKey, timeoutMs: context.timeoutMs, }); - assertMatrixQaCliBackupRestoreSucceeded(restored.payload, "empty backup restore"); - if ((restored.payload.imported ?? 0) !== 0) { - throw new Error( - `empty backup restore imported ${restored.payload.imported} keys; expected zero`, - ); - } return { artifacts: { backupCreatedVersion: reset.createdVersion, @@ -1539,9 +1538,9 @@ export async function runMatrixQaE2eeHistoryExistsBackupEmptyScenario( restoreTotal: restored.payload.total, }, details: [ - "encrypted history existed before a fresh empty server backup baseline", + "encrypted history survived a server backup reset through local key re-upload", `history event: ${setup.seededEventId}`, - `fresh backup version: ${reset.createdVersion ?? ""}`, + `reset backup version: ${reset.createdVersion ?? ""}`, `restore imported/total: ${restored.payload.imported ?? 0}/${restored.payload.total ?? 0}`, ].join("\n"), }; diff --git a/extensions/qa-matrix/src/runners/contract/scenario-runtime-e2ee.ts b/extensions/qa-matrix/src/runners/contract/scenario-runtime-e2ee.ts index 0c8e5028ac4..c2facb98b75 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-runtime-e2ee.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-runtime-e2ee.ts @@ -140,6 +140,10 @@ function isMatrixQaRepairableBackupBootstrapError(error: string | undefined) { ); } +const MATRIX_QA_PRESERVE_IDENTITY_BOOTSTRAP_OPTIONS = { + allowAutomaticCrossSigningReset: false, +} as const; + async function assertMatrixQaPeerDeviceTrusted(params: { client: MatrixQaE2eeScenarioClient; deviceId: string; @@ -159,15 +163,15 @@ async function ensureMatrixQaE2eeOwnDeviceVerified(params: { client: MatrixQaE2eeScenarioClient; label: string; }) { - let bootstrap = await params.client.bootstrapOwnDeviceVerification({ - forceResetCrossSigning: true, - }); + let bootstrap = await params.client.bootstrapOwnDeviceVerification( + MATRIX_QA_PRESERVE_IDENTITY_BOOTSTRAP_OPTIONS, + ); if (!bootstrap.success && isMatrixQaRepairableBackupBootstrapError(bootstrap.error)) { const reset = await params.client.resetRoomKeyBackup(); if (reset.success) { - bootstrap = await params.client.bootstrapOwnDeviceVerification({ - forceResetCrossSigning: true, - }); + bootstrap = await params.client.bootstrapOwnDeviceVerification( + MATRIX_QA_PRESERVE_IDENTITY_BOOTSTRAP_OPTIONS, + ); } } assertMatrixQaBootstrapSucceeded(params.label, bootstrap); @@ -428,7 +432,7 @@ async function createMatrixQaCliSelfVerificationRuntime(params: { deviceId: params.deviceId, encryption: true, homeserver: params.context.baseUrl, - initialSyncLimit: 1, + initialSyncLimit: 0, name: "Matrix QA CLI self-verification", network: { dangerouslyAllowPrivateNetwork: true, @@ -1053,6 +1057,18 @@ export async function runMatrixQaE2eeRecoveryKeyLifecycleScenario( `Matrix E2EE room-key backup reset failed: ${reset.error ?? "unknown error"}`, ); } + const resetRecoveryKey = await recoveryClient.getRecoveryKey(); + const resetEncodedRecoveryKey = resetRecoveryKey?.encodedPrivateKey?.trim(); + if (resetEncodedRecoveryKey && resetEncodedRecoveryKey !== encodedRecoveryKey) { + const ownerRecovery = await client.verifyWithRecoveryKey(resetEncodedRecoveryKey); + if (!ownerRecovery.success) { + throw new Error( + `Matrix E2EE owner could not refresh recovery key after backup reset: ${ + ownerRecovery.error ?? "unknown error" + }`, + ); + } + } await recoveryClient.stop(); await client.deleteOwnDevices([recoveryDevice.deviceId]).catch(() => undefined); cleanupRecoveryDevice = false; diff --git a/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts b/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts index 7f4964bcd3c..ea59f03d923 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts @@ -76,10 +76,12 @@ function buildMatrixQaThreadArtifacts(result: MatrixQaThreadScenarioResult) { } function failIfMatrixSubagentThreadHookError(event: MatrixQaObservedEvent) { - if (MATRIX_SUBAGENT_THREAD_HOOK_ERROR_RE.test(event.body ?? "")) { - throw new Error( - `Matrix subagent thread spawn hit missing hook error: ${event.body ?? ""}`, - ); + const body = event.body ?? ""; + if (MATRIX_SUBAGENT_THREAD_HOOK_ERROR_RE.test(body)) { + throw new Error(`Matrix subagent thread spawn hit missing hook error: ${body || ""}`); + } + if (/\bsessions_spawn failed:/i.test(body)) { + throw new Error(`Matrix subagent thread spawn failed: ${body || ""}`); } } @@ -298,9 +300,9 @@ export async function runSubagentThreadSpawnScenario(context: MatrixQaScenarioCo const childToken = buildMatrixQaToken("MATRIX_QA_SUBAGENT_CHILD"); const triggerBody = [ `${context.sutUserId} Call sessions_spawn now for this QA check.`, - `Use task="Reply exactly \`${childToken}\`. This is the marker."`, + `Use task="Finish with exactly ${childToken}."`, "Use label=matrix-thread-subagent thread=true mode=session runTimeoutSeconds=60.", - "Do not answer with the marker yourself.", + "Do not send the child token from this parent session.", ].join(" "); const driverEventId = await client.sendTextMessage({ body: triggerBody, diff --git a/extensions/qa-matrix/src/runners/contract/scenarios.test.ts b/extensions/qa-matrix/src/runners/contract/scenarios.test.ts index 5493a5ce077..42e9d9f7b22 100644 --- a/extensions/qa-matrix/src/runners/contract/scenarios.test.ts +++ b/extensions/qa-matrix/src/runners/contract/scenarios.test.ts @@ -178,7 +178,6 @@ describe("matrix live qa scenarios", () => { "matrix-e2ee-corrupt-crypto-idb-snapshot", "matrix-e2ee-server-device-deleted-local-state-intact", "matrix-e2ee-sync-state-loss-crypto-intact", - "matrix-e2ee-wrong-account-recovery-key", "matrix-e2ee-history-exists-backup-empty", "matrix-e2ee-device-sas-verification", "matrix-e2ee-qr-verification", @@ -189,9 +188,28 @@ describe("matrix live qa scenarios", () => { "matrix-e2ee-artifact-redaction", "matrix-e2ee-media-image", "matrix-e2ee-key-bootstrap-failure", + "matrix-e2ee-wrong-account-recovery-key", ]); }); + it("keeps account-mutating E2EE negative coverage at the suite tail", () => { + const scenarioIds = scenarioTesting.findMatrixQaScenarios().map((scenario) => scenario.id); + const destructiveScenarioId = "matrix-e2ee-wrong-account-recovery-key"; + const destructiveIndex = scenarioIds.indexOf(destructiveScenarioId); + + expect(scenarioIds.at(-1)).toBe(destructiveScenarioId); + for (const scenarioId of [ + "matrix-e2ee-state-loss-external-recovery-key", + "matrix-e2ee-state-loss-stored-recovery-key", + "matrix-e2ee-device-sas-verification", + "matrix-e2ee-qr-verification", + "matrix-e2ee-dm-sas-verification", + "matrix-e2ee-media-image", + ]) { + expect(destructiveIndex).toBeGreaterThan(scenarioIds.indexOf(scenarioId)); + } + }); + it("uses the repo-wide exact marker prompt shape for Matrix mentions", () => { expect( scenarioTesting.buildMentionPrompt("@sut:matrix-qa.test", "MATRIX_QA_CANARY_TOKEN"), @@ -214,6 +232,17 @@ describe("matrix live qa scenarios", () => { expect(scenarios.get("matrix-e2ee-media-image")?.timeoutMs).toBeGreaterThanOrEqual(180_000); }); + it("keeps the Matrix subagent room policy compatible with leaf child sessions", () => { + const scenario = MATRIX_QA_SCENARIOS.find( + (entry) => entry.id === "matrix-subagent-thread-spawn", + ); + + expect(scenario?.configOverrides?.groupsByKey?.main?.tools?.allow).toEqual([ + "sessions_spawn", + "sessions_yield", + ]); + }); + it("requires Matrix replies to match the exact marker body", () => { expect( scenarioTesting.buildMatrixReplyArtifact( @@ -1322,7 +1351,7 @@ describe("matrix live qa scenarios", () => { })) .mockImplementationOnce(async () => { const childToken = - /task="Reply exactly `([^`]+)`/.exec( + /task="Finish with exactly ([^".]+)\./.exec( String(sendTextMessage.mock.calls[0]?.[0]?.body), )?.[1] ?? "MATRIX_QA_SUBAGENT_CHILD_FIXED"; return { @@ -1469,6 +1498,43 @@ describe("matrix live qa scenarios", () => { expect(waitForRoomEvent).toHaveBeenCalledTimes(1); }); + it("fails the subagent thread spawn scenario on surfaced tool errors", async () => { + const primeRoom = vi.fn().mockResolvedValue("driver-sync-start"); + const sendTextMessage = vi.fn().mockResolvedValue("$subagent-spawn-trigger"); + const waitForRoomEvent = vi.fn().mockImplementationOnce(async (options) => { + const event = { + kind: "message", + roomId: "!main:matrix-qa.test", + eventId: "$sessions-spawn-error", + sender: "@sut:matrix-qa.test", + type: "m.room.message", + body: "Protocol note: sessions_spawn failed: Matrix thread bind failed: no adapter", + } satisfies MatrixQaObservedEvent; + options.predicate(event); + return { + event, + since: "driver-sync-error", + }; + }); + + createMatrixQaClient.mockReturnValue({ + primeRoom, + sendTextMessage, + waitForRoomEvent, + }); + + const scenario = MATRIX_QA_SCENARIOS.find( + (entry) => entry.id === "matrix-subagent-thread-spawn", + ); + expect(scenario).toBeDefined(); + + await expect(runMatrixQaScenario(scenario!, matrixQaScenarioContext())).rejects.toThrow( + "sessions_spawn failed", + ); + + expect(waitForRoomEvent).toHaveBeenCalledTimes(1); + }); + it("captures quiet preview notices before the finalized Matrix reply", async () => { const primeRoom = vi.fn().mockResolvedValue("driver-sync-start"); const sendTextMessage = vi.fn().mockResolvedValue("$quiet-stream-trigger"); @@ -2635,6 +2701,19 @@ describe("matrix live qa scenarios", () => { previousVersion: "backup-v1", success: true, }); + const ownerBootstrapOwnDeviceVerification = vi.fn().mockResolvedValue({ + crossSigning: { + published: true, + }, + success: true, + verification: { + backupVersion: "backup-v1", + crossSigningVerified: true, + recoveryKeyStored: true, + signedByOwner: true, + verified: true, + }, + }); const driverStop = vi.fn().mockResolvedValue(undefined); const recoveryStop = vi.fn().mockResolvedValue(undefined); createMatrixQaClient.mockReturnValue({ @@ -2647,19 +2726,7 @@ describe("matrix live qa scenarios", () => { }); createMatrixQaE2eeScenarioClient .mockResolvedValueOnce({ - bootstrapOwnDeviceVerification: vi.fn().mockResolvedValue({ - crossSigning: { - published: true, - }, - success: true, - verification: { - backupVersion: "backup-v1", - crossSigningVerified: true, - recoveryKeyStored: true, - signedByOwner: true, - verified: true, - }, - }), + bootstrapOwnDeviceVerification: ownerBootstrapOwnDeviceVerification, deleteOwnDevices: vi.fn().mockResolvedValue(undefined), getRecoveryKey: vi.fn().mockResolvedValue({ encodedPrivateKey: "encoded-recovery-key", @@ -2669,6 +2736,10 @@ describe("matrix live qa scenarios", () => { stop: driverStop, }) .mockResolvedValueOnce({ + getRecoveryKey: vi.fn().mockResolvedValue({ + encodedPrivateKey: "encoded-recovery-key", + keyId: "SSSS", + }), resetRoomKeyBackup, restoreRoomKeyBackup, stop: recoveryStop, @@ -2730,6 +2801,9 @@ describe("matrix live qa scenarios", () => { }, }); + expect(ownerBootstrapOwnDeviceVerification).toHaveBeenCalledWith({ + allowAutomaticCrossSigningReset: false, + }); expect(verifyWithRecoveryKey).toHaveBeenCalledWith("encoded-recovery-key"); expect(verifyWithRecoveryKey.mock.invocationCallOrder[0]).toBeLessThan( restoreRoomKeyBackup.mock.invocationCallOrder[0] ?? Number.MAX_SAFE_INTEGER, @@ -2762,6 +2836,19 @@ describe("matrix live qa scenarios", () => { ruleId: "owner-signature-upload-blocked", }, ]); + const ownerBootstrapOwnDeviceVerification = vi.fn().mockResolvedValue({ + crossSigning: { + published: true, + }, + success: true, + verification: { + backupVersion: "backup-v1", + crossSigningVerified: true, + recoveryKeyStored: true, + signedByOwner: true, + verified: true, + }, + }); startMatrixQaFaultProxy.mockResolvedValue({ baseUrl: "http://127.0.0.1:39877", hits: proxyHits, @@ -2777,19 +2864,7 @@ describe("matrix live qa scenarios", () => { }); createMatrixQaE2eeScenarioClient .mockResolvedValueOnce({ - bootstrapOwnDeviceVerification: vi.fn().mockResolvedValue({ - crossSigning: { - published: true, - }, - success: true, - verification: { - backupVersion: "backup-v1", - crossSigningVerified: true, - recoveryKeyStored: true, - signedByOwner: true, - verified: true, - }, - }), + bootstrapOwnDeviceVerification: ownerBootstrapOwnDeviceVerification, deleteOwnDevices: driverDeleteOwnDevices, getRecoveryKey: vi.fn().mockResolvedValue({ encodedPrivateKey: "encoded-recovery-key", @@ -2900,6 +2975,9 @@ describe("matrix live qa scenarios", () => { scenarioId: "matrix-e2ee-recovery-owner-verification-required", }), ); + expect(ownerBootstrapOwnDeviceVerification).toHaveBeenCalledWith({ + allowAutomaticCrossSigningReset: false, + }); expect(verifyWithRecoveryKey).toHaveBeenCalledWith("encoded-recovery-key"); expect(restoreRoomKeyBackup).toHaveBeenCalledWith({ recoveryKey: "encoded-recovery-key", @@ -3195,6 +3273,9 @@ describe("matrix live qa scenarios", () => { await expect( readFile(path.join(cliArtifactDir, "verify-status.stdout.txt"), "utf8"), ).resolves.toContain('"crossSigningVerified":true'); + expect(bootstrapOwnDeviceVerification).toHaveBeenCalledWith({ + allowAutomaticCrossSigningReset: false, + }); } finally { await rm(outputDir, { force: true, recursive: true }); } diff --git a/src/plugin-sdk/channel-entry-contract.test.ts b/src/plugin-sdk/channel-entry-contract.test.ts index 12d79e0bc42..429fafee6d5 100644 --- a/src/plugin-sdk/channel-entry-contract.test.ts +++ b/src/plugin-sdk/channel-entry-contract.test.ts @@ -90,7 +90,7 @@ function createBundledChannelEntry(params: { } describe("defineBundledChannelEntry", () => { - it("keeps runtime sidecars out of discovery registration", () => { + it("loads runtime sidecars during discovery registration", () => { const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-bundled-entry-runtime-")); tempDirs.push(tempRoot); const runtimeMarker = path.join(tempRoot, "runtime-loaded"); @@ -115,7 +115,7 @@ describe("defineBundledChannelEntry", () => { expect(api.registerChannel).toHaveBeenCalledTimes(1); expect(registerCliMetadata).toHaveBeenCalledWith(api); expect(registerFull).not.toHaveBeenCalled(); - expect(fs.existsSync(runtimeMarker)).toBe(false); + expect(fs.existsSync(runtimeMarker)).toBe(true); }); it("keeps setup-runtime and full registration wired to runtime sidecars", () => { diff --git a/src/plugin-sdk/core.test.ts b/src/plugin-sdk/core.test.ts index 1e21133393c..c522b97879f 100644 --- a/src/plugin-sdk/core.test.ts +++ b/src/plugin-sdk/core.test.ts @@ -32,7 +32,7 @@ function createApi(registrationMode: PluginRegistrationMode): OpenClawPluginApi } describe("defineChannelPluginEntry", () => { - it("keeps runtime helpers out of discovery registration", () => { + it("wires runtime helpers during discovery registration", () => { const setRuntime = vi.fn<(runtime: PluginRuntime) => void>(); const registerCliMetadata = vi.fn<(api: OpenClawPluginApi) => void>(); const registerFull = vi.fn<(api: OpenClawPluginApi) => void>(); @@ -51,7 +51,7 @@ describe("defineChannelPluginEntry", () => { expect(api.registerChannel).toHaveBeenCalledTimes(1); expect(registerCliMetadata).toHaveBeenCalledTimes(1); - expect(setRuntime).not.toHaveBeenCalled(); + expect(setRuntime).toHaveBeenCalledWith(api.runtime); expect(registerFull).not.toHaveBeenCalled(); });