From 8bef5d0d622b9f543a39728a86e70d99006f72cb Mon Sep 17 00:00:00 2001
From: Vincent Koc <vincentkoc@ieee.org>
Date: Sun, 17 May 2026 06:35:54 +0800
Subject: [PATCH] fix(qa-lab): stabilize threaded memory parity

---
 CHANGELOG.md                                  |  1 +
 .../qa-lab/src/qa-gateway-config.test.ts      |  1 +
 extensions/qa-lab/src/qa-gateway-config.ts    |  3 +
 .../src/suite-runtime-agent-session.test.ts   | 29 ++++++-
 .../qa-lab/src/suite-runtime-agent-session.ts | 77 +++++++++++++------
 scripts/run-node.mjs                          |  1 +
 src/infra/run-node.test.ts                    | 33 ++++++++
 7 files changed, 122 insertions(+), 23 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 236ce6093fb..2d7751099c8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -38,6 +38,7 @@ Docs: https://docs.openclaw.ai
 - CLI/config: show concise human config-write output with an indented backup path instead of printing checksum-heavy overwrite audit details by default.
 - CLI/docs: call the canonical lowercase docs MCP search tool and surface MCP errors instead of returning empty search results. Fixes #82702. (#82704) Thanks @hclsys.
 - QA-Lab: ignore heartbeat-only operational transcripts when capturing runtime parity cells so background checks cannot replace the scenario reply. (#80323) Thanks @100yenadmin.
+- QA-Lab: pin threaded-memory parity runs to `memory-core`, keep bundled plugin resolution enabled for QA commands, and retry transient session-store lock reads. (#72045) Thanks @WuKongAI-CMU.
 - Gateway/exec approvals: wait for accepted async approval follow-up runs instead of direct-fallback sending duplicate completions when retries use different nonce keys. Fixes #82711. (#82717) Thanks @udaymanish6.
 - Agents/subagents: mark completed subagent handoffs as ready for parent review so requester agents verify results and continue required follow-up work before reporting done. (#82724) Thanks @100menotu001.
 - QA-Lab: validate Capture saved views loaded from browser storage so malformed local state cannot poison Capture inspector filters or layout controls. (#77722) Thanks @AsaZhou923.
diff --git a/extensions/qa-lab/src/qa-gateway-config.test.ts b/extensions/qa-lab/src/qa-gateway-config.test.ts
index 8357eeea060..1429ddd8869 100644
--- a/extensions/qa-lab/src/qa-gateway-config.test.ts
+++ b/extensions/qa-lab/src/qa-gateway-config.test.ts
@@ -60,6 +60,7 @@ describe("buildQaGatewayConfig", () => {
     expect(cfg.models?.providers?.anthropic?.baseUrl).toBe("http://127.0.0.1:44080");
     expect(cfg.models?.providers?.anthropic?.request).toEqual({ allowPrivateNetwork: true });
     expect(cfg.plugins?.allow).toEqual(["acpx", "memory-core", "qa-channel"]);
+    expect(cfg.plugins?.slots?.memory).toBe("memory-core");
     expect(cfg.plugins?.entries?.acpx).toEqual({
       enabled: true,
       config: {
diff --git a/extensions/qa-lab/src/qa-gateway-config.ts b/extensions/qa-lab/src/qa-gateway-config.ts
index f0e1eef3d48..26897bb52e6 100644
--- a/extensions/qa-lab/src/qa-gateway-config.ts
+++ b/extensions/qa-lab/src/qa-gateway-config.ts
@@ -126,6 +126,9 @@ export function buildQaGatewayConfig(params: {
   return {
     plugins: {
       allow: allowedPlugins,
+      slots: {
+        memory: "memory-core",
+      },
       entries: {
         acpx: {
           enabled: true,
diff --git a/extensions/qa-lab/src/suite-runtime-agent-session.test.ts b/extensions/qa-lab/src/suite-runtime-agent-session.test.ts
index 97337967322..c47da79dcb4 100644
--- a/extensions/qa-lab/src/suite-runtime-agent-session.test.ts
+++ b/extensions/qa-lab/src/suite-runtime-agent-session.test.ts
@@ -11,7 +11,10 @@ import { createTempDirHarness } from "./temp-dir.test-helper.js";
 
 const { cleanup, makeTempDir } = createTempDirHarness();
 
-afterEach(cleanup);
+afterEach(async () => {
+  vi.useRealTimers();
+  await cleanup();
+});
 
 describe("qa suite runtime agent session helpers", () => {
   const gatewayCall = vi.fn();
@@ -44,6 +47,30 @@ describe("qa suite runtime agent session helpers", () => {
     expect(options?.timeoutMs).toBe(60_000);
   });
 
+  it("retries transient session store lock timeouts while creating sessions", async () => {
+    const lockTimeoutError = Object.assign(
+      new Error("SessionWriteLockTimeoutError: session file locked"),
+      { code: "OPENCLAW_SESSION_WRITE_LOCK_TIMEOUT" },
+    );
+    gatewayCall
+      .mockRejectedValueOnce(lockTimeoutError)
+      .mockResolvedValueOnce({ key: " session-2 " });
+
+    vi.useFakeTimers();
+    const pending = createSession(env, "Retry Session", "agent:qa:retry");
+
+    await vi.advanceTimersByTimeAsync(1_000);
+
+    await expect(pending).resolves.toBe("session-2");
+    expect(gatewayCall).toHaveBeenCalledTimes(2);
+    expect(gatewayCall).toHaveBeenNthCalledWith(
+      2,
+      "sessions.create",
+      { label: "Retry Session", key: "agent:qa:retry" },
+      expect.objectContaining({ timeoutMs: expect.any(Number) }),
+    );
+  });
+
   it("reads effective tool ids once and drops blanks", async () => {
     gatewayCall.mockResolvedValueOnce({
       groups: [
diff --git a/extensions/qa-lab/src/suite-runtime-agent-session.ts b/extensions/qa-lab/src/suite-runtime-agent-session.ts
index 7346d93c53a..1abec08a700 100644
--- a/extensions/qa-lab/src/suite-runtime-agent-session.ts
+++ b/extensions/qa-lab/src/suite-runtime-agent-session.ts
@@ -1,5 +1,7 @@
 import fs from "node:fs/promises";
 import path from "node:path";
+import { setTimeout as sleep } from "node:timers/promises";
+import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
 import { liveTurnTimeoutMs } from "./suite-runtime-agent-common.js";
 import type {
   QaRawSessionStoreEntry,
@@ -7,12 +9,47 @@ import type {
   QaSuiteRuntimeEnv,
 } from "./suite-runtime-types.js";
 
-async function createSession(
-  env: Pick<QaSuiteRuntimeEnv, "gateway" | "primaryModel" | "alternateModel" | "providerMode">,
-  label: string,
-  key?: string,
+type QaGatewayCallEnv = Pick<
+  QaSuiteRuntimeEnv,
+  "gateway" | "primaryModel" | "alternateModel" | "providerMode"
+>;
+
+const SESSION_STORE_LOCK_RETRY_DELAYS_MS = [1_000, 3_000, 5_000] as const;
+
+function isSessionStoreLockTimeout(error: unknown) {
+  const text = formatErrorMessage(error);
+  return (
+    text.includes("OPENCLAW_SESSION_WRITE_LOCK_TIMEOUT") ||
+    text.includes("SessionWriteLockTimeoutError") ||
+    text.includes("session file locked")
+  );
+}
+
+async function callGatewayWithSessionStoreLockRetry<T>(
+  env: QaGatewayCallEnv,
+  method: string,
+  params: Record<string, unknown>,
+  options: { timeoutMs: number },
 ) {
-  const created = (await env.gateway.call(
+  for (let attempt = 0; attempt <= SESSION_STORE_LOCK_RETRY_DELAYS_MS.length; attempt += 1) {
+    try {
+      return (await env.gateway.call(method, params, options)) as T;
+    } catch (error) {
+      if (
+        !isSessionStoreLockTimeout(error) ||
+        attempt === SESSION_STORE_LOCK_RETRY_DELAYS_MS.length
+      ) {
+        throw error;
+      }
+      await sleep(SESSION_STORE_LOCK_RETRY_DELAYS_MS[attempt]);
+    }
+  }
+  throw new Error(`${method} failed after session store lock retries`);
+}
+
+async function createSession(env: QaGatewayCallEnv, label: string, key?: string) {
+  const created = await callGatewayWithSessionStoreLockRetry<{ key?: string }>(
+    env,
     "sessions.create",
     {
       label,
@@ -21,7 +58,7 @@ async function createSession(
     {
       timeoutMs: liveTurnTimeoutMs(env, 60_000),
     },
-  )) as { key?: string };
+  );
   const sessionKey = created.key?.trim();
   if (!sessionKey) {
     throw new Error("sessions.create returned no key");
@@ -29,11 +66,11 @@ async function createSession(
   return sessionKey;
 }
 
-async function readEffectiveTools(
-  env: Pick<QaSuiteRuntimeEnv, "gateway" | "primaryModel" | "alternateModel" | "providerMode">,
-  sessionKey: string,
-) {
-  const payload = (await env.gateway.call(
+async function readEffectiveTools(env: QaGatewayCallEnv, sessionKey: string) {
+  const payload = await callGatewayWithSessionStoreLockRetry<{
+    groups?: Array<{ tools?: Array<{ id?: string }> }>;
+  }>(
+    env,
     "tools.effective",
     {
       sessionKey,
@@ -41,9 +78,7 @@ async function readEffectiveTools(
     {
       timeoutMs: liveTurnTimeoutMs(env, 90_000),
     },
-  )) as {
-    groups?: Array<{ tools?: Array<{ id?: string }> }>;
-  };
+  );
   const ids = new Set<string>();
   for (const group of payload.groups ?? []) {
     for (const tool of group.tools ?? []) {
@@ -55,11 +90,11 @@ async function readEffectiveTools(
   return ids;
 }
 
-async function readSkillStatus(
-  env: Pick<QaSuiteRuntimeEnv, "gateway" | "primaryModel" | "alternateModel" | "providerMode">,
-  agentId = "qa",
-) {
-  const payload = (await env.gateway.call(
+async function readSkillStatus(env: QaGatewayCallEnv, agentId = "qa") {
+  const payload = await callGatewayWithSessionStoreLockRetry<{
+    skills?: QaSkillStatusEntry[];
+  }>(
+    env,
     "skills.status",
     {
       agentId,
@@ -67,9 +102,7 @@ async function readSkillStatus(
     {
       timeoutMs: liveTurnTimeoutMs(env, 45_000),
     },
-  )) as {
-    skills?: QaSkillStatusEntry[];
-  };
+  );
   return payload.skills ?? [];
 }
 
diff --git a/scripts/run-node.mjs b/scripts/run-node.mjs
index fde703e33f7..aac55620c8c 100644
--- a/scripts/run-node.mjs
+++ b/scripts/run-node.mjs
@@ -1180,6 +1180,7 @@ export async function runNodeMain(params = {}) {
   if (deps.args[0] === "qa") {
     deps.env.OPENCLAW_BUILD_PRIVATE_QA = "1";
     deps.env.OPENCLAW_ENABLE_PRIVATE_QA_CLI = "1";
+    deps.env.OPENCLAW_DISABLE_BUNDLED_PLUGINS ??= "0";
   }
   deps.outputTee = createRunNodeOutputTee(deps);
 
diff --git a/src/infra/run-node.test.ts b/src/infra/run-node.test.ts
index c319f892dd5..194a82e6bcc 100644
--- a/src/infra/run-node.test.ts
+++ b/src/infra/run-node.test.ts
@@ -857,6 +857,39 @@ describe("run-node script", () => {
       expect(postBuildParams?.cwd).toBe(tmp);
       expect(postBuildParams?.env?.OPENCLAW_BUILD_PRIVATE_QA).toBe("1");
       expect(postBuildParams?.env?.OPENCLAW_ENABLE_PRIVATE_QA_CLI).toBe("1");
+      expect(postBuildParams?.env?.OPENCLAW_DISABLE_BUNDLED_PLUGINS).toBe("0");
+    });
+  });
+
+  it("preserves an explicit bundled plugin disable flag for QA runs", async () => {
+    await withTempDir({ prefix: "openclaw-run-node-" }, async (tmp) => {
+      await setupTrackedProject(tmp, {
+        files: {
+          [ROOT_SRC]: "export const value = 1;\n",
+          [QA_LAB_PLUGIN_SDK_ENTRY]: "export const qaLab = true;\n",
+        },
+        oldPaths: [ROOT_SRC, ROOT_TSCONFIG, ROOT_PACKAGE, QA_LAB_PLUGIN_SDK_ENTRY],
+        buildPaths: [DIST_ENTRY, BUILD_STAMP],
+      });
+
+      const runRuntimePostBuild = vi.fn();
+      const { spawn, spawnSync } = createSpawnRecorder({
+        gitHead: "abc123\n",
+        gitStatus: "",
+      });
+      const exitCode = await runQaCommand({
+        tmp,
+        spawn,
+        spawnSync,
+        runRuntimePostBuild,
+        env: { OPENCLAW_DISABLE_BUNDLED_PLUGINS: "1" },
+      });
+
+      expect(exitCode).toBe(0);
+      const postBuildParams = firstMockCall(runRuntimePostBuild)?.[0] as
+        | { cwd?: string; env?: Record<string, string | undefined> }
+        | undefined;
+      expect(postBuildParams?.env?.OPENCLAW_DISABLE_BUNDLED_PLUGINS).toBe("1");
     });
   });