fix: stabilize live qa scenario suite

2026-04-23 07:01:40 +00:00 · 2026-04-08 08:17:27 +01:00
parent 55cbcd829d
commit 21d9bac5ec
14 changed files with 452 additions and 49 deletions
--- a/extensions/memory-core/index.test.ts
+++ b/extensions/memory-core/index.test.ts
@@ -20,6 +20,7 @@ describe("buildPromptSection", () => {
    expect(result[0]).toBe("## Memory Recall");
    expect(result[1]).toContain("run memory_search");
    expect(result[1]).toContain("then use memory_get");
+    expect(result[1]).toContain("indexed session transcripts");
    expect(result).toContain(
      "Citations: include Source: <path#line> when it helps the user verify memory snippets.",
    );
@@ -30,6 +31,7 @@ describe("buildPromptSection", () => {
    const result = buildPromptSection({ availableTools: new Set(["memory_search"]) });
    expect(result[0]).toBe("## Memory Recall");
    expect(result[1]).toContain("run memory_search");
+    expect(result[1]).toContain("indexed session transcripts");
    expect(result[1]).not.toContain("then use memory_get");
  });

--- a/extensions/memory-core/src/prompt-section.ts
+++ b/extensions/memory-core/src/prompt-section.ts
@@ -14,10 +14,10 @@ export const buildPromptSection: MemoryPromptSectionBuilder = ({
  let toolGuidance: string;
  if (hasMemorySearch && hasMemoryGet) {
    toolGuidance =
-      "Before answering anything about prior work, decisions, dates, people, preferences, or todos: run memory_search on MEMORY.md + memory/*.md; then use memory_get to pull only the needed lines. If low confidence after search, say you checked.";
+      "Before answering anything about prior work, decisions, dates, people, preferences, or todos: run memory_search on MEMORY.md + memory/*.md + indexed session transcripts; then use memory_get to pull only the needed lines. If low confidence after search, say you checked.";
  } else if (hasMemorySearch) {
    toolGuidance =
-      "Before answering anything about prior work, decisions, dates, people, preferences, or todos: run memory_search on MEMORY.md + memory/*.md and answer from the matching results. If low confidence after search, say you checked.";
+      "Before answering anything about prior work, decisions, dates, people, preferences, or todos: run memory_search on MEMORY.md + memory/*.md + indexed session transcripts and answer from the matching results. If low confidence after search, say you checked.";
  } else {
    toolGuidance =
      "Before answering anything about prior work, decisions, dates, people, preferences, or todos that already point to a specific memory file or note: run memory_get to pull only the needed lines. If low confidence after reading them, say you checked.";
--- a/extensions/qa-lab/src/gateway-child.test.ts
+++ b/extensions/qa-lab/src/gateway-child.test.ts
@@ -1,4 +1,4 @@
-import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises";
+import { lstat, mkdir, mkdtemp, readdir, rm, writeFile } from "node:fs/promises";
 import os from "node:os";
 import path from "node:path";
 import { afterEach, describe, expect, it } from "vitest";
@@ -21,6 +21,8 @@ function createParams(baseEnv?: NodeJS.ProcessEnv) {
    xdgConfigHome: "/tmp/openclaw-qa/xdg-config",
    xdgDataHome: "/tmp/openclaw-qa/xdg-data",
    xdgCacheHome: "/tmp/openclaw-qa/xdg-cache",
+    bundledPluginsDir: "/tmp/openclaw-qa/bundled-plugins",
+    compatibilityHostVersion: "2026.4.8",
    baseEnv,
  };
 }
@@ -35,6 +37,8 @@ describe("buildQaRuntimeEnv", () => {
    expect(env.OPENCLAW_TEST_FAST).toBe("1");
    expect(env.OPENCLAW_QA_ALLOW_LOCAL_IMAGE_PROVIDER).toBe("1");
    expect(env.OPENCLAW_ALLOW_SLOW_REPLY_TESTS).toBe("1");
+    expect(env.OPENCLAW_BUNDLED_PLUGINS_DIR).toBe("/tmp/openclaw-qa/bundled-plugins");
+    expect(env.OPENCLAW_COMPATIBILITY_HOST_VERSION).toBe("2026.4.8");
  });

  it("maps live frontier key aliases into provider env vars", () => {
@@ -130,3 +134,117 @@ describe("resolveQaControlUiRoot", () => {
    expect(resolveQaControlUiRoot({ repoRoot, controlUiEnabled: false })).toBeUndefined();
  });
 });
+
+describe("qa bundled plugin dir", () => {
+  it("prefers the built bundled plugin tree when present", async () => {
+    const repoRoot = await mkdtemp(path.join(os.tmpdir(), "qa-bundled-root-"));
+    cleanups.push(async () => {
+      await rm(repoRoot, { recursive: true, force: true });
+    });
+    await mkdir(path.join(repoRoot, "dist", "extensions", "qa-channel"), {
+      recursive: true,
+    });
+    await writeFile(
+      path.join(repoRoot, "dist", "extensions", "qa-channel", "package.json"),
+      "{}",
+      "utf8",
+    );
+    await mkdir(path.join(repoRoot, "dist-runtime", "extensions", "qa-channel"), {
+      recursive: true,
+    });
+    await writeFile(
+      path.join(repoRoot, "dist-runtime", "extensions", "qa-channel", "package.json"),
+      "{}",
+      "utf8",
+    );
+    await mkdir(path.join(repoRoot, "extensions", "qa-channel"), { recursive: true });
+
+    expect(__testing.resolveQaBundledPluginsSourceRoot(repoRoot)).toBe(
+      path.join(repoRoot, "dist", "extensions"),
+    );
+  });
+
+  it("creates a scoped bundled plugin tree for the allowed plugins only", async () => {
+    const repoRoot = await mkdtemp(path.join(os.tmpdir(), "qa-bundled-scope-"));
+    cleanups.push(async () => {
+      await rm(repoRoot, { recursive: true, force: true });
+    });
+    await mkdir(path.join(repoRoot, "dist", "extensions", "qa-channel"), { recursive: true });
+    await mkdir(path.join(repoRoot, "dist", "extensions", "memory-core"), { recursive: true });
+    await mkdir(path.join(repoRoot, "dist", "extensions", "unused-plugin"), { recursive: true });
+    await writeFile(path.join(repoRoot, "dist", "shared-chunk-abc123.js"), "export {};\n", "utf8");
+    const tempRoot = await mkdtemp(path.join(os.tmpdir(), "qa-bundled-target-"));
+    cleanups.push(async () => {
+      await rm(tempRoot, { recursive: true, force: true });
+    });
+
+    const { bundledPluginsDir, stagedRoot } = await __testing.createQaBundledPluginsDir({
+      repoRoot,
+      tempRoot,
+      allowedPluginIds: ["qa-channel", "memory-core"],
+    });
+
+    expect((await readdir(bundledPluginsDir)).toSorted()).toEqual(["memory-core", "qa-channel"]);
+    expect(bundledPluginsDir).toBe(
+      path.join(
+        repoRoot,
+        ".artifacts",
+        "qa-runtime",
+        path.basename(tempRoot),
+        "dist",
+        "extensions",
+      ),
+    );
+    expect(stagedRoot).toBe(
+      path.join(repoRoot, ".artifacts", "qa-runtime", path.basename(tempRoot)),
+    );
+    expect((await lstat(path.join(bundledPluginsDir, "qa-channel"))).isDirectory()).toBe(true);
+    expect((await lstat(path.join(bundledPluginsDir, "memory-core"))).isDirectory()).toBe(true);
+    await expect(
+      lstat(
+        path.join(
+          repoRoot,
+          ".artifacts",
+          "qa-runtime",
+          path.basename(tempRoot),
+          "dist",
+          "shared-chunk-abc123.js",
+        ),
+      ),
+    ).resolves.toBeTruthy();
+  });
+
+  it("raises the QA runtime host version to the highest allowed plugin floor", async () => {
+    const repoRoot = await mkdtemp(path.join(os.tmpdir(), "qa-runtime-version-"));
+    cleanups.push(async () => {
+      await rm(repoRoot, { recursive: true, force: true });
+    });
+    await writeFile(
+      path.join(repoRoot, "package.json"),
+      JSON.stringify({ version: "2026.4.7-1" }),
+      "utf8",
+    );
+    const bundledRoot = path.join(repoRoot, "extensions");
+    await mkdir(path.join(bundledRoot, "qa-channel"), { recursive: true });
+    await writeFile(
+      path.join(bundledRoot, "qa-channel", "package.json"),
+      JSON.stringify({ openclaw: { install: { minHostVersion: ">=2026.4.8" } } }),
+      "utf8",
+    );
+
+    await mkdir(path.join(bundledRoot, "memory-core"), { recursive: true });
+    await writeFile(
+      path.join(bundledRoot, "memory-core", "package.json"),
+      JSON.stringify({ openclaw: { install: { minHostVersion: ">=2026.4.7" } } }),
+      "utf8",
+    );
+
+    await expect(
+      __testing.resolveQaRuntimeHostVersion({
+        repoRoot,
+        bundledPluginsSourceRoot: bundledRoot,
+        allowedPluginIds: ["memory-core", "qa-channel"],
+      }),
+    ).resolves.toBe("2026.4.8");
+  });
+});
--- a/extensions/qa-lab/src/gateway-child.ts
+++ b/extensions/qa-lab/src/gateway-child.ts
@@ -1,6 +1,6 @@
 import { spawn } from "node:child_process";
 import { randomUUID } from "node:crypto";
-import { existsSync } from "node:fs";
+import { createWriteStream, existsSync } from "node:fs";
 import fs from "node:fs/promises";
 import net from "node:net";
 import os from "node:os";
@@ -95,6 +95,8 @@ export function buildQaRuntimeEnv(params: {
  xdgConfigHome: string;
  xdgDataHome: string;
  xdgCacheHome: string;
+  bundledPluginsDir?: string;
+  compatibilityHostVersion?: string;
  providerMode?: "mock-openai" | "live-frontier";
  baseEnv?: NodeJS.ProcessEnv;
 }) {
@@ -118,6 +120,10 @@ export function buildQaRuntimeEnv(params: {
    XDG_CONFIG_HOME: params.xdgConfigHome,
    XDG_DATA_HOME: params.xdgDataHome,
    XDG_CACHE_HOME: params.xdgCacheHome,
+    ...(params.bundledPluginsDir ? { OPENCLAW_BUNDLED_PLUGINS_DIR: params.bundledPluginsDir } : {}),
+    ...(params.compatibilityHostVersion
+      ? { OPENCLAW_COMPATIBILITY_HOST_VERSION: params.compatibilityHostVersion }
+      : {}),
  };
  return normalizeQaProviderModeEnv(env, params.providerMode);
 }
@@ -136,7 +142,145 @@ function isRetryableGatewayCallError(details: string): boolean {
 export const __testing = {
  buildQaRuntimeEnv,
  isRetryableGatewayCallError,
+  resolveQaBundledPluginsSourceRoot,
+  resolveQaRuntimeHostVersion,
+  createQaBundledPluginsDir,
 };
+
+function resolveQaBundledPluginsSourceRoot(repoRoot: string) {
+  const candidates = [
+    path.join(repoRoot, "dist", "extensions"),
+    path.join(repoRoot, "dist-runtime", "extensions"),
+    path.join(repoRoot, "extensions"),
+  ];
+  for (const candidate of candidates) {
+    if (existsSync(candidate)) {
+      return candidate;
+    }
+  }
+  throw new Error("failed to resolve qa bundled plugins source root");
+}
+
+function parseStableSemverFloor(value: string | undefined) {
+  if (!value) {
+    return null;
+  }
+  const match = value.trim().match(/(\d+)\.(\d+)\.(\d+)/);
+  if (!match) {
+    return null;
+  }
+  return {
+    major: Number.parseInt(match[1] ?? "", 10),
+    minor: Number.parseInt(match[2] ?? "", 10),
+    patch: Number.parseInt(match[3] ?? "", 10),
+    label: `${match[1]}.${match[2]}.${match[3]}`,
+  };
+}
+
+function compareSemverFloors(
+  left: ReturnType<typeof parseStableSemverFloor>,
+  right: ReturnType<typeof parseStableSemverFloor>,
+) {
+  if (!left && !right) {
+    return 0;
+  }
+  if (!left) {
+    return -1;
+  }
+  if (!right) {
+    return 1;
+  }
+  if (left.major !== right.major) {
+    return left.major - right.major;
+  }
+  if (left.minor !== right.minor) {
+    return left.minor - right.minor;
+  }
+  return left.patch - right.patch;
+}
+
+async function resolveQaRuntimeHostVersion(params: {
+  repoRoot: string;
+  bundledPluginsSourceRoot: string;
+  allowedPluginIds: readonly string[];
+}) {
+  const rootPackageRaw = await fs.readFile(path.join(params.repoRoot, "package.json"), "utf8");
+  const rootPackage = JSON.parse(rootPackageRaw) as { version?: string };
+  let selected = parseStableSemverFloor(rootPackage.version);
+
+  for (const pluginId of params.allowedPluginIds) {
+    const packagePath = path.join(params.bundledPluginsSourceRoot, pluginId, "package.json");
+    if (!existsSync(packagePath)) {
+      continue;
+    }
+    const packageRaw = await fs.readFile(packagePath, "utf8");
+    const packageJson = JSON.parse(packageRaw) as {
+      openclaw?: {
+        install?: {
+          minHostVersion?: string;
+        };
+      };
+    };
+    const candidate = parseStableSemverFloor(packageJson.openclaw?.install?.minHostVersion);
+    if (compareSemverFloors(candidate, selected) > 0) {
+      selected = candidate;
+    }
+  }
+
+  return selected?.label;
+}
+
+async function createQaBundledPluginsDir(params: {
+  repoRoot: string;
+  tempRoot: string;
+  allowedPluginIds: readonly string[];
+}) {
+  const sourceRoot = resolveQaBundledPluginsSourceRoot(params.repoRoot);
+  const sourceTreeRoot = path.dirname(sourceRoot);
+  if (
+    sourceTreeRoot === path.join(params.repoRoot, "dist") ||
+    sourceTreeRoot === path.join(params.repoRoot, "dist-runtime")
+  ) {
+    const stagedRoot = path.join(
+      params.repoRoot,
+      ".artifacts",
+      "qa-runtime",
+      path.basename(params.tempRoot),
+    );
+    await fs.rm(stagedRoot, { recursive: true, force: true });
+    await fs.mkdir(stagedRoot, { recursive: true });
+    const stagedTreeRoot = path.join(stagedRoot, path.basename(sourceTreeRoot));
+    await fs.cp(sourceTreeRoot, stagedTreeRoot, { recursive: true });
+    const stagedExtensionsDir = path.join(stagedTreeRoot, "extensions");
+    for (const entry of await fs.readdir(stagedExtensionsDir, { withFileTypes: true })) {
+      if (!entry.isDirectory() || params.allowedPluginIds.includes(entry.name)) {
+        continue;
+      }
+      await fs.rm(path.join(stagedExtensionsDir, entry.name), { recursive: true, force: true });
+    }
+    return {
+      bundledPluginsDir: stagedExtensionsDir,
+      stagedRoot,
+    };
+  }
+
+  const bundledPluginsDir = path.join(params.tempRoot, "bundled-plugins");
+  await fs.mkdir(bundledPluginsDir, { recursive: true });
+  for (const pluginId of params.allowedPluginIds) {
+    const sourceDir = path.join(sourceRoot, pluginId);
+    if (!existsSync(sourceDir)) {
+      throw new Error(`qa bundled plugin not found: ${pluginId} (${sourceDir})`);
+    }
+    // Plugin discovery walks real directories; copying avoids symlink-only
+    // trees being skipped by Dirent-based scans in the child runtime.
+    await fs.cp(sourceDir, path.join(bundledPluginsDir, pluginId), { recursive: true });
+  }
+  return {
+    bundledPluginsDir,
+    stagedRoot: null,
+  };
+}
+
 async function waitForGatewayReady(params: {
  baseUrl: string;
  logs: () => string;
@@ -242,9 +386,28 @@ export async function startQaGatewayChild(params: {
    controlUiEnabled: params.controlUiEnabled,
  });
  await fs.writeFile(configPath, `${JSON.stringify(cfg, null, 2)}\n`, "utf8");
+  const allowedPluginIds = (cfg.plugins?.allow ?? []).filter(
+    (pluginId): pluginId is string => typeof pluginId === "string" && pluginId.length > 0,
+  );
+  const bundledPluginsSourceRoot = resolveQaBundledPluginsSourceRoot(params.repoRoot);
+  const { bundledPluginsDir, stagedRoot: stagedBundledPluginsRoot } =
+    await createQaBundledPluginsDir({
+      repoRoot: params.repoRoot,
+      tempRoot,
+      allowedPluginIds,
+    });
+  const runtimeHostVersion = await resolveQaRuntimeHostVersion({
+    repoRoot: params.repoRoot,
+    bundledPluginsSourceRoot,
+    allowedPluginIds,
+  });

  const stdout: Buffer[] = [];
  const stderr: Buffer[] = [];
+  const stdoutLogPath = path.join(tempRoot, "gateway.stdout.log");
+  const stderrLogPath = path.join(tempRoot, "gateway.stderr.log");
+  const stdoutLog = createWriteStream(stdoutLogPath, { flags: "a" });
+  const stderrLog = createWriteStream(stderrLogPath, { flags: "a" });
  const env = buildQaRuntimeEnv({
    configPath,
    gatewayToken,
@@ -253,6 +416,8 @@ export async function startQaGatewayChild(params: {
    xdgConfigHome,
    xdgDataHome,
    xdgCacheHome,
+    bundledPluginsDir,
+    compatibilityHostVersion: runtimeHostVersion,
    providerMode: params.providerMode,
  });

@@ -274,8 +439,16 @@ export async function startQaGatewayChild(params: {
      stdio: ["ignore", "pipe", "pipe"],
    },
  );
-  child.stdout.on("data", (chunk) => stdout.push(Buffer.from(chunk)));
-  child.stderr.on("data", (chunk) => stderr.push(Buffer.from(chunk)));
+  child.stdout.on("data", (chunk) => {
+    const buffer = Buffer.from(chunk);
+    stdout.push(buffer);
+    stdoutLog.write(buffer);
+  });
+  child.stderr.on("data", (chunk) => {
+    const buffer = Buffer.from(chunk);
+    stderr.push(buffer);
+    stderrLog.write(buffer);
+  });

  const baseUrl = `http://127.0.0.1:${gatewayPort}`;
  const wsUrl = `ws://127.0.0.1:${gatewayPort}`;
@@ -318,7 +491,12 @@ export async function startQaGatewayChild(params: {
      throw lastRpcError ?? new Error("qa gateway rpc client failed to start");
    }
  } catch (error) {
+    stdoutLog.end();
+    stderrLog.end();
    child.kill("SIGTERM");
+    if (!keepTemp && stagedBundledPluginsRoot) {
+      await fs.rm(stagedBundledPluginsRoot, { recursive: true, force: true }).catch(() => {});
+    }
    throw error;
  }

@@ -370,6 +548,8 @@ export async function startQaGatewayChild(params: {
    },
    async stop(opts?: { keepTemp?: boolean }) {
      await rpcClient.stop().catch(() => {});
+      stdoutLog.end();
+      stderrLog.end();
      if (!child.killed) {
        child.kill("SIGTERM");
        await Promise.race([
@@ -383,6 +563,9 @@ export async function startQaGatewayChild(params: {
      }
      if (!(opts?.keepTemp ?? keepTemp)) {
        await fs.rm(tempRoot, { recursive: true, force: true });
+        if (stagedBundledPluginsRoot) {
+          await fs.rm(stagedBundledPluginsRoot, { recursive: true, force: true });
+        }
      }
    },
  };
--- a/extensions/qa-lab/src/qa-gateway-config.ts
+++ b/extensions/qa-lab/src/qa-gateway-config.ts
@@ -8,28 +8,6 @@ import {
  type QaProviderMode,
 } from "./model-selection.js";

-const DISABLED_BUNDLED_CHANNELS = Object.freeze({
-  bluebubbles: { enabled: false },
-  discord: { enabled: false },
-  feishu: { enabled: false },
-  googlechat: { enabled: false },
-  imessage: { enabled: false },
-  irc: { enabled: false },
-  line: { enabled: false },
-  mattermost: { enabled: false },
-  matrix: { enabled: false },
-  msteams: { enabled: false },
-  qqbot: { enabled: false },
-  signal: { enabled: false },
-  slack: { enabled: false },
-  "synology-chat": { enabled: false },
-  telegram: { enabled: false },
-  tlon: { enabled: false },
-  whatsapp: { enabled: false },
-  zalo: { enabled: false },
-  zalouser: { enabled: false },
-} satisfies Record<string, { enabled: false }>);
-
 export const DEFAULT_QA_CONTROL_UI_ALLOWED_ORIGINS = Object.freeze([
  "http://127.0.0.1:18789",
  "http://localhost:18789",
@@ -273,7 +251,6 @@ export function buildQaGatewayConfig(params: {
      },
    },
    channels: {
-      ...DISABLED_BUNDLED_CHANNELS,
      "qa-channel": {
        enabled: true,
        baseUrl: params.qaBusBaseUrl,
--- a/extensions/qa-lab/src/suite.ts
+++ b/extensions/qa-lab/src/suite.ts
@@ -65,9 +65,12 @@ type QaSuiteEnvironment = {

 const _QA_IMAGE_UNDERSTANDING_PNG_BASE64 =
  "iVBORw0KGgoAAAANSUhEUgAAAQAAAAEACAYAAABccqhmAAAAAklEQVR4AewaftIAAAK4SURBVO3BAQEAMAwCIG//znsQgXfJBZjUALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsl9wFmNQAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwP4TIF+7ciPkoAAAAASUVORK5CYII=";
-const QA_IMAGE_UNDERSTANDING_LARGE_PNG_BASE64 =
+const _QA_IMAGE_UNDERSTANDING_LARGE_PNG_BASE64 =
  "iVBORw0KGgoAAAANSUhEUgAAAQAAAAEACAYAAABccqhmAAACuklEQVR4Ae3BAQEAMAwCIG//znsQgXfJBZjUALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsl9wFmNQAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwP4TIF+2YE/z8AAAAASUVORK5CYII=";

+const QA_IMAGE_UNDERSTANDING_VALID_PNG_BASE64 =
+  "iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAALklEQVR4nO3OoQEAAAyDsP7/9HYGJgJNdtuVDQAAAAAAACAHxH8AAAAAAACAHvBX0fhq85dN7QAAAABJRU5ErkJggg==";
+
 type QaSkillStatusEntry = {
  name?: string;
  eligible?: boolean;
@@ -170,12 +173,14 @@ async function waitForOutboundMessage(
  state: QaBusState,
  predicate: (message: QaBusMessage) => boolean,
  timeoutMs = 15_000,
+  options?: { sinceIndex?: number },
 ) {
  return await waitForCondition(
    () =>
      state
        .getSnapshot()
        .messages.filter((message) => message.direction === "outbound")
+        .slice(options?.sinceIndex ?? 0)
        .find(predicate),
    timeoutMs,
  );
@@ -1131,9 +1136,19 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
            name: "stores the canary fact",
            run: async () => {
              const config = readScenarioExecutionConfig<{
+                resetDurableMemory?: boolean;
                rememberPrompt?: string;
+                rememberAckAny?: string[];
                recallPrompt?: string;
+                recallExpectedAny?: string[];
              }>("memory-recall");
+              if (config.resetDurableMemory) {
+                const today = formatMemoryDreamingDay(Date.now());
+                await fs.rm(path.join(env.gateway.workspaceDir, "MEMORY.md"), { force: true });
+                await fs.rm(path.join(env.gateway.workspaceDir, "memory", `${today}.md`), {
+                  force: true,
+                });
+              }
              await reset();
              await runAgentPrompt(env, {
                sessionKey: "agent:qa:memory",
@@ -1141,9 +1156,16 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
                  config.rememberPrompt ??
                  "Please remember this fact for later: the QA canary code is ALPHA-7.",
              });
+              const rememberAckAny = (config.rememberAckAny ?? ["remembered alpha-7"]).map(
+                (needle) => needle.toLowerCase(),
+              );
              const outbound = await waitForOutboundMessage(
                state,
-                (candidate) => candidate.conversation.id === "qa-operator",
+                (candidate) =>
+                  candidate.conversation.id === "qa-operator" &&
+                  rememberAckAny.some((needle) =>
+                    normalizeLowercaseStringOrEmpty(candidate.text).includes(needle),
+                  ),
              );
              return outbound.text;
            },
@@ -1152,8 +1174,11 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
            name: "recalls the same fact later",
            run: async () => {
              const config = readScenarioExecutionConfig<{
+                resetDurableMemory?: boolean;
                rememberPrompt?: string;
+                rememberAckAny?: string[];
                recallPrompt?: string;
+                recallExpectedAny?: string[];
              }>("memory-recall");
              await runAgentPrompt(env, {
                sessionKey: "agent:qa:memory",
@@ -1161,6 +1186,9 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
                  config.recallPrompt ??
                  "What was the QA canary code I asked you to remember earlier?",
              });
+              const recallExpectedAny = (config.recallExpectedAny ?? ["alpha-7"]).map((needle) =>
+                needle.toLowerCase(),
+              );
              const outbound = await waitForCondition(
                () =>
                  state
@@ -1169,7 +1197,9 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
                      (candidate) =>
                        candidate.direction === "outbound" &&
                        candidate.conversation.id === "qa-operator" &&
-                        candidate.text.includes("ALPHA-7"),
+                        recallExpectedAny.some((needle) =>
+                          normalizeLowercaseStringOrEmpty(candidate.text).includes(needle),
+                        ),
                    )
                    .at(-1),
                20_000,
@@ -2049,6 +2079,15 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
          {
            name: "prefers the newer transcript-backed fact over the stale durable note",
            run: async () => {
+              const config = readScenarioExecutionConfig<{
+                staleFact?: string;
+                currentFact?: string;
+                transcriptQuestion?: string;
+                transcriptAnswer?: string;
+                prompt?: string;
+              }>("session-memory-ranking");
+              const staleFact = config.staleFact ?? "ORBIT-9";
+              const currentFact = config.currentFact ?? "ORBIT-10";
              const original = await readConfigSnapshot(env);
              const originalMemorySearch =
                original.config.agents &&
@@ -2090,7 +2129,11 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
              await waitForQaChannelReady(env, 60_000);
              try {
                const memoryPath = path.join(env.gateway.workspaceDir, "MEMORY.md");
-                await fs.writeFile(memoryPath, "Project Nebula stale codename: ORBIT-9.\n", "utf8");
+                await fs.writeFile(
+                  memoryPath,
+                  `Project Nebula stale codename: ${staleFact}.\n`,
+                  "utf8",
+                );
                const staleAt = new Date("2020-01-01T00:00:00.000Z");
                await fs.utimes(memoryPath, staleAt, staleAt);
                const transcriptsDir = resolveSessionTranscriptsDirForAgent(
@@ -2117,7 +2160,9 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
                        content: [
                          {
                            type: "text",
-                            text: "What is the current Project Nebula codename?",
+                            text:
+                              config.transcriptQuestion ??
+                              "What is the current Project Nebula codename?",
                          },
                        ],
                      },
@@ -2130,7 +2175,9 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
                        content: [
                          {
                            type: "text",
-                            text: "The current Project Nebula codename is ORBIT-10.",
+                            text:
+                              config.transcriptAnswer ??
+                              `The current Project Nebula codename is ${currentFact}.`,
                          },
                        ],
                      },
@@ -2140,26 +2187,27 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
                );
                await forceMemoryIndex({
                  env,
-                  query: "current Project Nebula codename ORBIT-10",
-                  expectedNeedle: "ORBIT-10",
+                  query: `current Project Nebula codename ${currentFact}`,
+                  expectedNeedle: currentFact,
                });
                await reset();
                await runAgentPrompt(env, {
                  sessionKey: "agent:qa:session-memory-ranking",
                  message:
-                    "Session memory ranking check: what is the current Project Nebula codename? Use memory tools first.",
+                    config.prompt ??
+                    `Session memory ranking check: what is the current Project Nebula codename? Use memory tools first. If durable notes conflict with newer indexed session transcripts, prefer the newer current fact.`,
                  timeoutMs: liveTurnTimeoutMs(env, 45_000),
                });
                const outbound = await waitForOutboundMessage(
                  state,
                  (candidate) =>
                    candidate.conversation.id === "qa-operator" &&
-                    candidate.text.includes("ORBIT-10"),
+                    candidate.text.includes(currentFact),
                  liveTurnTimeoutMs(env, 45_000),
                );
                const lower = normalizeLowercaseStringOrEmpty(outbound.text);
                const staleLeak =
-                  outbound.text.includes("ORBIT-9") &&
+                  outbound.text.includes(staleFact) &&
                  !lower.includes("stale") &&
                  !lower.includes("older") &&
                  !lower.includes("previous");
@@ -2380,18 +2428,23 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
              const config = readScenarioExecutionConfig<{
                prompt?: string;
                expectedContains?: string;
+                skillName?: string;
+                skillBody?: string;
              }>("skill-visibility-invocation");
+              const skillName = config.skillName ?? "qa-visible-skill";
              await writeWorkspaceSkill({
                env,
-                name: "qa-visible-skill",
-                body: `---
+                name: skillName,
+                body:
+                  config.skillBody ??
+                  `---
 name: qa-visible-skill
 description: Visible QA skill marker
 ---
 When the user asks for the visible skill marker exactly, reply with exactly: VISIBLE-SKILL-OK`,
              });
              const skills = await readSkillStatus(env);
-              const visible = findSkill(skills, "qa-visible-skill");
+              const visible = findSkill(skills, skillName);
              if (!visible?.eligible || visible.disabled || visible.blockedByAllowlist) {
                throw new Error(`skill not visible/eligible: ${JSON.stringify(visible)}`);
              }
@@ -2635,16 +2688,24 @@ When the user asks for the hot install marker exactly, reply with exactly: HOT-I
          {
            name: "describes an attached image in one short sentence",
            run: async () => {
+              const config = readScenarioExecutionConfig<{
+                prompt?: string;
+                requiredColorGroups?: string[][];
+              }>("image-understanding-attachment");
              await reset();
+              const outboundStartIndex = state
+                .getSnapshot()
+                .messages.filter((message) => message.direction === "outbound").length;
              await runAgentPrompt(env, {
                sessionKey: "agent:qa:image-understanding",
                message:
+                  config.prompt ??
                  "Image understanding check: describe the top and bottom colors in the attached image in one short sentence.",
                attachments: [
                  {
                    mimeType: "image/png",
                    fileName: "red-top-blue-bottom.png",
-                    content: QA_IMAGE_UNDERSTANDING_LARGE_PNG_BASE64,
+                    content: QA_IMAGE_UNDERSTANDING_VALID_PNG_BASE64,
                  },
                ],
                timeoutMs: liveTurnTimeoutMs(env, 45_000),
@@ -2653,9 +2714,17 @@ When the user asks for the hot install marker exactly, reply with exactly: HOT-I
                state,
                (candidate) => candidate.conversation.id === "qa-operator",
                liveTurnTimeoutMs(env, 45_000),
+                { sinceIndex: outboundStartIndex },
              );
              const lower = normalizeLowercaseStringOrEmpty(outbound.text);
-              if (!lower.includes("red") || !lower.includes("blue")) {
+              const requiredColorGroups = config.requiredColorGroups ?? [
+                ["red", "scarlet", "crimson"],
+                ["blue", "azure", "teal", "cyan", "aqua"],
+              ];
+              const missingColorGroup = requiredColorGroups.find(
+                (group) => !group.some((candidate) => lower.includes(candidate)),
+              );
+              if (missingColorGroup) {
                throw new Error(`missing expected colors in image description: ${outbound.text}`);
              }
              if (env.mock) {
@@ -2835,6 +2904,11 @@ When the user asks for the hot disable marker exactly, reply with exactly: HOT-P
          {
            name: "restores image_generate after restart and uses it in the same session",
            run: async () => {
+              const config = readScenarioExecutionConfig<{
+                setupPrompt?: string;
+                imagePrompt?: string;
+                imagePromptSnippet?: string;
+              }>("config-restart-capability-flip");
              await ensureImageGenerationConfigured(env);
              const original = await readConfigSnapshot(env);
              const originalTools =
@@ -2868,6 +2942,7 @@ When the user asks for the hot disable marker exactly, reply with exactly: HOT-P
                await runAgentPrompt(env, {
                  sessionKey,
                  message:
+                    config.setupPrompt ??
                    "Capability flip setup: acknowledge this setup so restart wake-up has a route.",
                  timeoutMs: liveTurnTimeoutMs(env, 30_000),
                });
@@ -2907,12 +2982,13 @@ When the user asks for the hot disable marker exactly, reply with exactly: HOT-P
                await runAgentPrompt(env, {
                  sessionKey,
                  message:
+                    config.imagePrompt ??
                    "Capability flip image check: generate a QA lighthouse image now and keep the media path in the reply.",
                  timeoutMs: liveTurnTimeoutMs(env, 45_000),
                });
                const mediaPath = await resolveGeneratedImagePath({
                  env,
-                  promptSnippet: "Capability flip image check",
+                  promptSnippet: config.imagePromptSnippet ?? "Capability flip image check",
                  startedAtMs: imageStartedAtMs,
                  timeoutMs: liveTurnTimeoutMs(env, 45_000),
                });
--- a/qa/scenarios/approval-turn-tool-followthrough.md
+++ b/qa/scenarios/approval-turn-tool-followthrough.md
@@ -27,4 +27,9 @@ execution:
      - qa
      - mission
      - testing
+      - repo
+      - worked
+      - failed
+      - blocked
+      - chat flows
 ```
--- a/qa/scenarios/config-restart-capability-flip.md
+++ b/qa/scenarios/config-restart-capability-flip.md
@@ -22,4 +22,8 @@ execution:
  kind: custom
  handler: config-restart-capability-flip
  summary: Verify a restart-triggering config change flips capability inventory and the same session successfully uses the newly restored tool after wake-up.
+  config:
+    setupPrompt: "Capability flip setup: acknowledge this setup so restart wake-up has a route."
+    imagePrompt: "Capability flip image check: generate a QA lighthouse image in this turn right now. Do not acknowledge first, do not promise future work, and do not stop before using image_generate. Final reply must include the MEDIA path."
+    imagePromptSnippet: "Capability flip image check"
 ```
--- a/qa/scenarios/image-understanding-attachment.md
+++ b/qa/scenarios/image-understanding-attachment.md
@@ -20,4 +20,9 @@ execution:
  kind: custom
  handler: image-understanding-attachment
  summary: Verify an attached image reaches the agent model and the agent can describe what it sees.
+  config:
+    prompt: "Image understanding check: describe the top and bottom colors in the attached image in one short sentence."
+    requiredColorGroups:
+      - [red, scarlet, crimson]
+      - [blue, azure, teal, cyan, aqua]
 ```
--- a/qa/scenarios/memory-recall.md
+++ b/qa/scenarios/memory-recall.md
@@ -18,6 +18,11 @@ execution:
  handler: memory-recall
  summary: Verify the agent can store a fact, switch topics, then recall the fact accurately later.
  config:
-    rememberPrompt: "Please remember this fact for later: the QA canary code is ALPHA-7."
-    recallPrompt: "What was the QA canary code I asked you to remember earlier?"
+    resetDurableMemory: true
+    rememberPrompt: "Please remember this fact for later: the QA canary code is ALPHA-7. Use your normal memory mechanism, avoid manual repo cleanup, and reply exactly `Remembered ALPHA-7.` once stored."
+    rememberAckAny:
+      - remembered alpha-7
+    recallPrompt: "What was the QA canary code I asked you to remember earlier? Reply with the code only, plus at most one short sentence."
+    recallExpectedAny:
+      - alpha-7
 ```
--- a/qa/scenarios/session-memory-ranking.md
+++ b/qa/scenarios/session-memory-ranking.md
@@ -20,4 +20,10 @@ execution:
  kind: custom
  handler: session-memory-ranking
  summary: Verify session-transcript memory can outrank stale durable notes and drive the final answer toward the newer fact.
+  config:
+    staleFact: ORBIT-9
+    currentFact: ORBIT-10
+    transcriptQuestion: "What is the current Project Nebula codename?"
+    transcriptAnswer: "The current Project Nebula codename is ORBIT-10."
+    prompt: "Session memory ranking check: what is the current Project Nebula codename? Use memory tools first. If durable notes conflict with newer indexed session transcripts, prefer the newer current fact."
 ```
--- a/qa/scenarios/skill-visibility-invocation.md
+++ b/qa/scenarios/skill-visibility-invocation.md
@@ -20,6 +20,13 @@ execution:
  handler: skill-visibility-invocation
  summary: Verify a workspace skill becomes visible in skills.status and influences the next agent turn.
  config:
-    prompt: "Visible skill marker: give me the visible skill marker exactly."
+    skillName: qa-visible-skill
+    skillBody: |-
+      ---
+      name: qa-visible-skill
+      description: Visible QA skill marker
+      ---
+      When the user asks for the visible skill marker exactly, or explicitly asks you to use qa-visible-skill, reply with exactly: VISIBLE-SKILL-OK
+    prompt: "Use qa-visible-skill now. Reply exactly with the visible skill marker and nothing else."
    expectedContains: "VISIBLE-SKILL-OK"
 ```
--- a/src/version.test.ts
+++ b/src/version.test.ts
@@ -161,6 +161,17 @@ describe("version resolution", () => {
    ).toBe("2026.3.99");
  });

+  it("prefers explicit compatibility host overrides over runtime and stale env versions", () => {
+    expect(
+      resolveCompatibilityHostVersion({
+        OPENCLAW_COMPATIBILITY_HOST_VERSION: "2026.4.8",
+        OPENCLAW_VERSION: "2026.3.99",
+        OPENCLAW_SERVICE_VERSION: "2026.3.98",
+        npm_package_version: "2026.3.97",
+      }),
+    ).toBe("2026.4.8");
+  });
+
  it("normalizes runtime version candidate for fallback handling", () => {
    expect(resolveUsableRuntimeVersion(undefined)).toBeUndefined();
    expect(resolveUsableRuntimeVersion("")).toBeUndefined();
--- a/src/version.ts
+++ b/src/version.ts
@@ -139,6 +139,10 @@ export function resolveCompatibilityHostVersion(
  env: RuntimeVersionEnv = process.env as RuntimeVersionEnv,
  fallback = RUNTIME_SERVICE_VERSION_FALLBACK,
 ): string {
+  const explicitCompatibilityVersion = firstNonEmpty(env.OPENCLAW_COMPATIBILITY_HOST_VERSION);
+  if (explicitCompatibilityVersion) {
+    return explicitCompatibilityVersion;
+  }
  return resolveVersionFromRuntimeSources({
    env,
    runtimeVersion: resolveUsableRuntimeVersion(VERSION),