fix: stabilize live qa scenario suite

This commit is contained in:
Peter Steinberger
2026-04-08 08:17:27 +01:00
parent 55cbcd829d
commit 21d9bac5ec
14 changed files with 452 additions and 49 deletions

View File

@@ -20,6 +20,7 @@ describe("buildPromptSection", () => {
expect(result[0]).toBe("## Memory Recall");
expect(result[1]).toContain("run memory_search");
expect(result[1]).toContain("then use memory_get");
expect(result[1]).toContain("indexed session transcripts");
expect(result).toContain(
"Citations: include Source: <path#line> when it helps the user verify memory snippets.",
);
@@ -30,6 +31,7 @@ describe("buildPromptSection", () => {
const result = buildPromptSection({ availableTools: new Set(["memory_search"]) });
expect(result[0]).toBe("## Memory Recall");
expect(result[1]).toContain("run memory_search");
expect(result[1]).toContain("indexed session transcripts");
expect(result[1]).not.toContain("then use memory_get");
});

View File

@@ -14,10 +14,10 @@ export const buildPromptSection: MemoryPromptSectionBuilder = ({
let toolGuidance: string;
if (hasMemorySearch && hasMemoryGet) {
toolGuidance =
"Before answering anything about prior work, decisions, dates, people, preferences, or todos: run memory_search on MEMORY.md + memory/*.md; then use memory_get to pull only the needed lines. If low confidence after search, say you checked.";
"Before answering anything about prior work, decisions, dates, people, preferences, or todos: run memory_search on MEMORY.md + memory/*.md + indexed session transcripts; then use memory_get to pull only the needed lines. If low confidence after search, say you checked.";
} else if (hasMemorySearch) {
toolGuidance =
"Before answering anything about prior work, decisions, dates, people, preferences, or todos: run memory_search on MEMORY.md + memory/*.md and answer from the matching results. If low confidence after search, say you checked.";
"Before answering anything about prior work, decisions, dates, people, preferences, or todos: run memory_search on MEMORY.md + memory/*.md + indexed session transcripts and answer from the matching results. If low confidence after search, say you checked.";
} else {
toolGuidance =
"Before answering anything about prior work, decisions, dates, people, preferences, or todos that already point to a specific memory file or note: run memory_get to pull only the needed lines. If low confidence after reading them, say you checked.";

View File

@@ -1,4 +1,4 @@
import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises";
import { lstat, mkdir, mkdtemp, readdir, rm, writeFile } from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { afterEach, describe, expect, it } from "vitest";
@@ -21,6 +21,8 @@ function createParams(baseEnv?: NodeJS.ProcessEnv) {
xdgConfigHome: "/tmp/openclaw-qa/xdg-config",
xdgDataHome: "/tmp/openclaw-qa/xdg-data",
xdgCacheHome: "/tmp/openclaw-qa/xdg-cache",
bundledPluginsDir: "/tmp/openclaw-qa/bundled-plugins",
compatibilityHostVersion: "2026.4.8",
baseEnv,
};
}
@@ -35,6 +37,8 @@ describe("buildQaRuntimeEnv", () => {
expect(env.OPENCLAW_TEST_FAST).toBe("1");
expect(env.OPENCLAW_QA_ALLOW_LOCAL_IMAGE_PROVIDER).toBe("1");
expect(env.OPENCLAW_ALLOW_SLOW_REPLY_TESTS).toBe("1");
expect(env.OPENCLAW_BUNDLED_PLUGINS_DIR).toBe("/tmp/openclaw-qa/bundled-plugins");
expect(env.OPENCLAW_COMPATIBILITY_HOST_VERSION).toBe("2026.4.8");
});
it("maps live frontier key aliases into provider env vars", () => {
@@ -130,3 +134,117 @@ describe("resolveQaControlUiRoot", () => {
expect(resolveQaControlUiRoot({ repoRoot, controlUiEnabled: false })).toBeUndefined();
});
});
describe("qa bundled plugin dir", () => {
it("prefers the built bundled plugin tree when present", async () => {
const repoRoot = await mkdtemp(path.join(os.tmpdir(), "qa-bundled-root-"));
cleanups.push(async () => {
await rm(repoRoot, { recursive: true, force: true });
});
await mkdir(path.join(repoRoot, "dist", "extensions", "qa-channel"), {
recursive: true,
});
await writeFile(
path.join(repoRoot, "dist", "extensions", "qa-channel", "package.json"),
"{}",
"utf8",
);
await mkdir(path.join(repoRoot, "dist-runtime", "extensions", "qa-channel"), {
recursive: true,
});
await writeFile(
path.join(repoRoot, "dist-runtime", "extensions", "qa-channel", "package.json"),
"{}",
"utf8",
);
await mkdir(path.join(repoRoot, "extensions", "qa-channel"), { recursive: true });
expect(__testing.resolveQaBundledPluginsSourceRoot(repoRoot)).toBe(
path.join(repoRoot, "dist", "extensions"),
);
});
it("creates a scoped bundled plugin tree for the allowed plugins only", async () => {
const repoRoot = await mkdtemp(path.join(os.tmpdir(), "qa-bundled-scope-"));
cleanups.push(async () => {
await rm(repoRoot, { recursive: true, force: true });
});
await mkdir(path.join(repoRoot, "dist", "extensions", "qa-channel"), { recursive: true });
await mkdir(path.join(repoRoot, "dist", "extensions", "memory-core"), { recursive: true });
await mkdir(path.join(repoRoot, "dist", "extensions", "unused-plugin"), { recursive: true });
await writeFile(path.join(repoRoot, "dist", "shared-chunk-abc123.js"), "export {};\n", "utf8");
const tempRoot = await mkdtemp(path.join(os.tmpdir(), "qa-bundled-target-"));
cleanups.push(async () => {
await rm(tempRoot, { recursive: true, force: true });
});
const { bundledPluginsDir, stagedRoot } = await __testing.createQaBundledPluginsDir({
repoRoot,
tempRoot,
allowedPluginIds: ["qa-channel", "memory-core"],
});
expect((await readdir(bundledPluginsDir)).toSorted()).toEqual(["memory-core", "qa-channel"]);
expect(bundledPluginsDir).toBe(
path.join(
repoRoot,
".artifacts",
"qa-runtime",
path.basename(tempRoot),
"dist",
"extensions",
),
);
expect(stagedRoot).toBe(
path.join(repoRoot, ".artifacts", "qa-runtime", path.basename(tempRoot)),
);
expect((await lstat(path.join(bundledPluginsDir, "qa-channel"))).isDirectory()).toBe(true);
expect((await lstat(path.join(bundledPluginsDir, "memory-core"))).isDirectory()).toBe(true);
await expect(
lstat(
path.join(
repoRoot,
".artifacts",
"qa-runtime",
path.basename(tempRoot),
"dist",
"shared-chunk-abc123.js",
),
),
).resolves.toBeTruthy();
});
it("raises the QA runtime host version to the highest allowed plugin floor", async () => {
const repoRoot = await mkdtemp(path.join(os.tmpdir(), "qa-runtime-version-"));
cleanups.push(async () => {
await rm(repoRoot, { recursive: true, force: true });
});
await writeFile(
path.join(repoRoot, "package.json"),
JSON.stringify({ version: "2026.4.7-1" }),
"utf8",
);
const bundledRoot = path.join(repoRoot, "extensions");
await mkdir(path.join(bundledRoot, "qa-channel"), { recursive: true });
await writeFile(
path.join(bundledRoot, "qa-channel", "package.json"),
JSON.stringify({ openclaw: { install: { minHostVersion: ">=2026.4.8" } } }),
"utf8",
);
await mkdir(path.join(bundledRoot, "memory-core"), { recursive: true });
await writeFile(
path.join(bundledRoot, "memory-core", "package.json"),
JSON.stringify({ openclaw: { install: { minHostVersion: ">=2026.4.7" } } }),
"utf8",
);
await expect(
__testing.resolveQaRuntimeHostVersion({
repoRoot,
bundledPluginsSourceRoot: bundledRoot,
allowedPluginIds: ["memory-core", "qa-channel"],
}),
).resolves.toBe("2026.4.8");
});
});

View File

@@ -1,6 +1,6 @@
import { spawn } from "node:child_process";
import { randomUUID } from "node:crypto";
import { existsSync } from "node:fs";
import { createWriteStream, existsSync } from "node:fs";
import fs from "node:fs/promises";
import net from "node:net";
import os from "node:os";
@@ -95,6 +95,8 @@ export function buildQaRuntimeEnv(params: {
xdgConfigHome: string;
xdgDataHome: string;
xdgCacheHome: string;
bundledPluginsDir?: string;
compatibilityHostVersion?: string;
providerMode?: "mock-openai" | "live-frontier";
baseEnv?: NodeJS.ProcessEnv;
}) {
@@ -118,6 +120,10 @@ export function buildQaRuntimeEnv(params: {
XDG_CONFIG_HOME: params.xdgConfigHome,
XDG_DATA_HOME: params.xdgDataHome,
XDG_CACHE_HOME: params.xdgCacheHome,
...(params.bundledPluginsDir ? { OPENCLAW_BUNDLED_PLUGINS_DIR: params.bundledPluginsDir } : {}),
...(params.compatibilityHostVersion
? { OPENCLAW_COMPATIBILITY_HOST_VERSION: params.compatibilityHostVersion }
: {}),
};
return normalizeQaProviderModeEnv(env, params.providerMode);
}
@@ -136,7 +142,145 @@ function isRetryableGatewayCallError(details: string): boolean {
export const __testing = {
buildQaRuntimeEnv,
isRetryableGatewayCallError,
resolveQaBundledPluginsSourceRoot,
resolveQaRuntimeHostVersion,
createQaBundledPluginsDir,
};
function resolveQaBundledPluginsSourceRoot(repoRoot: string) {
const candidates = [
path.join(repoRoot, "dist", "extensions"),
path.join(repoRoot, "dist-runtime", "extensions"),
path.join(repoRoot, "extensions"),
];
for (const candidate of candidates) {
if (existsSync(candidate)) {
return candidate;
}
}
throw new Error("failed to resolve qa bundled plugins source root");
}
function parseStableSemverFloor(value: string | undefined) {
if (!value) {
return null;
}
const match = value.trim().match(/(\d+)\.(\d+)\.(\d+)/);
if (!match) {
return null;
}
return {
major: Number.parseInt(match[1] ?? "", 10),
minor: Number.parseInt(match[2] ?? "", 10),
patch: Number.parseInt(match[3] ?? "", 10),
label: `${match[1]}.${match[2]}.${match[3]}`,
};
}
function compareSemverFloors(
left: ReturnType<typeof parseStableSemverFloor>,
right: ReturnType<typeof parseStableSemverFloor>,
) {
if (!left && !right) {
return 0;
}
if (!left) {
return -1;
}
if (!right) {
return 1;
}
if (left.major !== right.major) {
return left.major - right.major;
}
if (left.minor !== right.minor) {
return left.minor - right.minor;
}
return left.patch - right.patch;
}
async function resolveQaRuntimeHostVersion(params: {
repoRoot: string;
bundledPluginsSourceRoot: string;
allowedPluginIds: readonly string[];
}) {
const rootPackageRaw = await fs.readFile(path.join(params.repoRoot, "package.json"), "utf8");
const rootPackage = JSON.parse(rootPackageRaw) as { version?: string };
let selected = parseStableSemverFloor(rootPackage.version);
for (const pluginId of params.allowedPluginIds) {
const packagePath = path.join(params.bundledPluginsSourceRoot, pluginId, "package.json");
if (!existsSync(packagePath)) {
continue;
}
const packageRaw = await fs.readFile(packagePath, "utf8");
const packageJson = JSON.parse(packageRaw) as {
openclaw?: {
install?: {
minHostVersion?: string;
};
};
};
const candidate = parseStableSemverFloor(packageJson.openclaw?.install?.minHostVersion);
if (compareSemverFloors(candidate, selected) > 0) {
selected = candidate;
}
}
return selected?.label;
}
async function createQaBundledPluginsDir(params: {
repoRoot: string;
tempRoot: string;
allowedPluginIds: readonly string[];
}) {
const sourceRoot = resolveQaBundledPluginsSourceRoot(params.repoRoot);
const sourceTreeRoot = path.dirname(sourceRoot);
if (
sourceTreeRoot === path.join(params.repoRoot, "dist") ||
sourceTreeRoot === path.join(params.repoRoot, "dist-runtime")
) {
const stagedRoot = path.join(
params.repoRoot,
".artifacts",
"qa-runtime",
path.basename(params.tempRoot),
);
await fs.rm(stagedRoot, { recursive: true, force: true });
await fs.mkdir(stagedRoot, { recursive: true });
const stagedTreeRoot = path.join(stagedRoot, path.basename(sourceTreeRoot));
await fs.cp(sourceTreeRoot, stagedTreeRoot, { recursive: true });
const stagedExtensionsDir = path.join(stagedTreeRoot, "extensions");
for (const entry of await fs.readdir(stagedExtensionsDir, { withFileTypes: true })) {
if (!entry.isDirectory() || params.allowedPluginIds.includes(entry.name)) {
continue;
}
await fs.rm(path.join(stagedExtensionsDir, entry.name), { recursive: true, force: true });
}
return {
bundledPluginsDir: stagedExtensionsDir,
stagedRoot,
};
}
const bundledPluginsDir = path.join(params.tempRoot, "bundled-plugins");
await fs.mkdir(bundledPluginsDir, { recursive: true });
for (const pluginId of params.allowedPluginIds) {
const sourceDir = path.join(sourceRoot, pluginId);
if (!existsSync(sourceDir)) {
throw new Error(`qa bundled plugin not found: ${pluginId} (${sourceDir})`);
}
// Plugin discovery walks real directories; copying avoids symlink-only
// trees being skipped by Dirent-based scans in the child runtime.
await fs.cp(sourceDir, path.join(bundledPluginsDir, pluginId), { recursive: true });
}
return {
bundledPluginsDir,
stagedRoot: null,
};
}
async function waitForGatewayReady(params: {
baseUrl: string;
logs: () => string;
@@ -242,9 +386,28 @@ export async function startQaGatewayChild(params: {
controlUiEnabled: params.controlUiEnabled,
});
await fs.writeFile(configPath, `${JSON.stringify(cfg, null, 2)}\n`, "utf8");
const allowedPluginIds = (cfg.plugins?.allow ?? []).filter(
(pluginId): pluginId is string => typeof pluginId === "string" && pluginId.length > 0,
);
const bundledPluginsSourceRoot = resolveQaBundledPluginsSourceRoot(params.repoRoot);
const { bundledPluginsDir, stagedRoot: stagedBundledPluginsRoot } =
await createQaBundledPluginsDir({
repoRoot: params.repoRoot,
tempRoot,
allowedPluginIds,
});
const runtimeHostVersion = await resolveQaRuntimeHostVersion({
repoRoot: params.repoRoot,
bundledPluginsSourceRoot,
allowedPluginIds,
});
const stdout: Buffer[] = [];
const stderr: Buffer[] = [];
const stdoutLogPath = path.join(tempRoot, "gateway.stdout.log");
const stderrLogPath = path.join(tempRoot, "gateway.stderr.log");
const stdoutLog = createWriteStream(stdoutLogPath, { flags: "a" });
const stderrLog = createWriteStream(stderrLogPath, { flags: "a" });
const env = buildQaRuntimeEnv({
configPath,
gatewayToken,
@@ -253,6 +416,8 @@ export async function startQaGatewayChild(params: {
xdgConfigHome,
xdgDataHome,
xdgCacheHome,
bundledPluginsDir,
compatibilityHostVersion: runtimeHostVersion,
providerMode: params.providerMode,
});
@@ -274,8 +439,16 @@ export async function startQaGatewayChild(params: {
stdio: ["ignore", "pipe", "pipe"],
},
);
child.stdout.on("data", (chunk) => stdout.push(Buffer.from(chunk)));
child.stderr.on("data", (chunk) => stderr.push(Buffer.from(chunk)));
child.stdout.on("data", (chunk) => {
const buffer = Buffer.from(chunk);
stdout.push(buffer);
stdoutLog.write(buffer);
});
child.stderr.on("data", (chunk) => {
const buffer = Buffer.from(chunk);
stderr.push(buffer);
stderrLog.write(buffer);
});
const baseUrl = `http://127.0.0.1:${gatewayPort}`;
const wsUrl = `ws://127.0.0.1:${gatewayPort}`;
@@ -318,7 +491,12 @@ export async function startQaGatewayChild(params: {
throw lastRpcError ?? new Error("qa gateway rpc client failed to start");
}
} catch (error) {
stdoutLog.end();
stderrLog.end();
child.kill("SIGTERM");
if (!keepTemp && stagedBundledPluginsRoot) {
await fs.rm(stagedBundledPluginsRoot, { recursive: true, force: true }).catch(() => {});
}
throw error;
}
@@ -370,6 +548,8 @@ export async function startQaGatewayChild(params: {
},
async stop(opts?: { keepTemp?: boolean }) {
await rpcClient.stop().catch(() => {});
stdoutLog.end();
stderrLog.end();
if (!child.killed) {
child.kill("SIGTERM");
await Promise.race([
@@ -383,6 +563,9 @@ export async function startQaGatewayChild(params: {
}
if (!(opts?.keepTemp ?? keepTemp)) {
await fs.rm(tempRoot, { recursive: true, force: true });
if (stagedBundledPluginsRoot) {
await fs.rm(stagedBundledPluginsRoot, { recursive: true, force: true });
}
}
},
};

View File

@@ -8,28 +8,6 @@ import {
type QaProviderMode,
} from "./model-selection.js";
const DISABLED_BUNDLED_CHANNELS = Object.freeze({
bluebubbles: { enabled: false },
discord: { enabled: false },
feishu: { enabled: false },
googlechat: { enabled: false },
imessage: { enabled: false },
irc: { enabled: false },
line: { enabled: false },
mattermost: { enabled: false },
matrix: { enabled: false },
msteams: { enabled: false },
qqbot: { enabled: false },
signal: { enabled: false },
slack: { enabled: false },
"synology-chat": { enabled: false },
telegram: { enabled: false },
tlon: { enabled: false },
whatsapp: { enabled: false },
zalo: { enabled: false },
zalouser: { enabled: false },
} satisfies Record<string, { enabled: false }>);
export const DEFAULT_QA_CONTROL_UI_ALLOWED_ORIGINS = Object.freeze([
"http://127.0.0.1:18789",
"http://localhost:18789",
@@ -273,7 +251,6 @@ export function buildQaGatewayConfig(params: {
},
},
channels: {
...DISABLED_BUNDLED_CHANNELS,
"qa-channel": {
enabled: true,
baseUrl: params.qaBusBaseUrl,

View File

@@ -65,9 +65,12 @@ type QaSuiteEnvironment = {
const _QA_IMAGE_UNDERSTANDING_PNG_BASE64 =
"iVBORw0KGgoAAAANSUhEUgAAAQAAAAEACAYAAABccqhmAAAAAklEQVR4AewaftIAAAK4SURBVO3BAQEAMAwCIG//znsQgXfJBZjUALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsl9wFmNQAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwP4TIF+7ciPkoAAAAASUVORK5CYII=";
const QA_IMAGE_UNDERSTANDING_LARGE_PNG_BASE64 =
const _QA_IMAGE_UNDERSTANDING_LARGE_PNG_BASE64 =
"iVBORw0KGgoAAAANSUhEUgAAAQAAAAEACAYAAABccqhmAAACuklEQVR4Ae3BAQEAMAwCIG//znsQgXfJBZjUALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsl9wFmNQAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwP4TIF+2YE/z8AAAAASUVORK5CYII=";
const QA_IMAGE_UNDERSTANDING_VALID_PNG_BASE64 =
"iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAALklEQVR4nO3OoQEAAAyDsP7/9HYGJgJNdtuVDQAAAAAAACAHxH8AAAAAAACAHvBX0fhq85dN7QAAAABJRU5ErkJggg==";
type QaSkillStatusEntry = {
name?: string;
eligible?: boolean;
@@ -170,12 +173,14 @@ async function waitForOutboundMessage(
state: QaBusState,
predicate: (message: QaBusMessage) => boolean,
timeoutMs = 15_000,
options?: { sinceIndex?: number },
) {
return await waitForCondition(
() =>
state
.getSnapshot()
.messages.filter((message) => message.direction === "outbound")
.slice(options?.sinceIndex ?? 0)
.find(predicate),
timeoutMs,
);
@@ -1131,9 +1136,19 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
name: "stores the canary fact",
run: async () => {
const config = readScenarioExecutionConfig<{
resetDurableMemory?: boolean;
rememberPrompt?: string;
rememberAckAny?: string[];
recallPrompt?: string;
recallExpectedAny?: string[];
}>("memory-recall");
if (config.resetDurableMemory) {
const today = formatMemoryDreamingDay(Date.now());
await fs.rm(path.join(env.gateway.workspaceDir, "MEMORY.md"), { force: true });
await fs.rm(path.join(env.gateway.workspaceDir, "memory", `${today}.md`), {
force: true,
});
}
await reset();
await runAgentPrompt(env, {
sessionKey: "agent:qa:memory",
@@ -1141,9 +1156,16 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
config.rememberPrompt ??
"Please remember this fact for later: the QA canary code is ALPHA-7.",
});
const rememberAckAny = (config.rememberAckAny ?? ["remembered alpha-7"]).map(
(needle) => needle.toLowerCase(),
);
const outbound = await waitForOutboundMessage(
state,
(candidate) => candidate.conversation.id === "qa-operator",
(candidate) =>
candidate.conversation.id === "qa-operator" &&
rememberAckAny.some((needle) =>
normalizeLowercaseStringOrEmpty(candidate.text).includes(needle),
),
);
return outbound.text;
},
@@ -1152,8 +1174,11 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
name: "recalls the same fact later",
run: async () => {
const config = readScenarioExecutionConfig<{
resetDurableMemory?: boolean;
rememberPrompt?: string;
rememberAckAny?: string[];
recallPrompt?: string;
recallExpectedAny?: string[];
}>("memory-recall");
await runAgentPrompt(env, {
sessionKey: "agent:qa:memory",
@@ -1161,6 +1186,9 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
config.recallPrompt ??
"What was the QA canary code I asked you to remember earlier?",
});
const recallExpectedAny = (config.recallExpectedAny ?? ["alpha-7"]).map((needle) =>
needle.toLowerCase(),
);
const outbound = await waitForCondition(
() =>
state
@@ -1169,7 +1197,9 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
(candidate) =>
candidate.direction === "outbound" &&
candidate.conversation.id === "qa-operator" &&
candidate.text.includes("ALPHA-7"),
recallExpectedAny.some((needle) =>
normalizeLowercaseStringOrEmpty(candidate.text).includes(needle),
),
)
.at(-1),
20_000,
@@ -2049,6 +2079,15 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
{
name: "prefers the newer transcript-backed fact over the stale durable note",
run: async () => {
const config = readScenarioExecutionConfig<{
staleFact?: string;
currentFact?: string;
transcriptQuestion?: string;
transcriptAnswer?: string;
prompt?: string;
}>("session-memory-ranking");
const staleFact = config.staleFact ?? "ORBIT-9";
const currentFact = config.currentFact ?? "ORBIT-10";
const original = await readConfigSnapshot(env);
const originalMemorySearch =
original.config.agents &&
@@ -2090,7 +2129,11 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
await waitForQaChannelReady(env, 60_000);
try {
const memoryPath = path.join(env.gateway.workspaceDir, "MEMORY.md");
await fs.writeFile(memoryPath, "Project Nebula stale codename: ORBIT-9.\n", "utf8");
await fs.writeFile(
memoryPath,
`Project Nebula stale codename: ${staleFact}.\n`,
"utf8",
);
const staleAt = new Date("2020-01-01T00:00:00.000Z");
await fs.utimes(memoryPath, staleAt, staleAt);
const transcriptsDir = resolveSessionTranscriptsDirForAgent(
@@ -2117,7 +2160,9 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
content: [
{
type: "text",
text: "What is the current Project Nebula codename?",
text:
config.transcriptQuestion ??
"What is the current Project Nebula codename?",
},
],
},
@@ -2130,7 +2175,9 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
content: [
{
type: "text",
text: "The current Project Nebula codename is ORBIT-10.",
text:
config.transcriptAnswer ??
`The current Project Nebula codename is ${currentFact}.`,
},
],
},
@@ -2140,26 +2187,27 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
);
await forceMemoryIndex({
env,
query: "current Project Nebula codename ORBIT-10",
expectedNeedle: "ORBIT-10",
query: `current Project Nebula codename ${currentFact}`,
expectedNeedle: currentFact,
});
await reset();
await runAgentPrompt(env, {
sessionKey: "agent:qa:session-memory-ranking",
message:
"Session memory ranking check: what is the current Project Nebula codename? Use memory tools first.",
config.prompt ??
`Session memory ranking check: what is the current Project Nebula codename? Use memory tools first. If durable notes conflict with newer indexed session transcripts, prefer the newer current fact.`,
timeoutMs: liveTurnTimeoutMs(env, 45_000),
});
const outbound = await waitForOutboundMessage(
state,
(candidate) =>
candidate.conversation.id === "qa-operator" &&
candidate.text.includes("ORBIT-10"),
candidate.text.includes(currentFact),
liveTurnTimeoutMs(env, 45_000),
);
const lower = normalizeLowercaseStringOrEmpty(outbound.text);
const staleLeak =
outbound.text.includes("ORBIT-9") &&
outbound.text.includes(staleFact) &&
!lower.includes("stale") &&
!lower.includes("older") &&
!lower.includes("previous");
@@ -2380,18 +2428,23 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
const config = readScenarioExecutionConfig<{
prompt?: string;
expectedContains?: string;
skillName?: string;
skillBody?: string;
}>("skill-visibility-invocation");
const skillName = config.skillName ?? "qa-visible-skill";
await writeWorkspaceSkill({
env,
name: "qa-visible-skill",
body: `---
name: skillName,
body:
config.skillBody ??
`---
name: qa-visible-skill
description: Visible QA skill marker
---
When the user asks for the visible skill marker exactly, reply with exactly: VISIBLE-SKILL-OK`,
});
const skills = await readSkillStatus(env);
const visible = findSkill(skills, "qa-visible-skill");
const visible = findSkill(skills, skillName);
if (!visible?.eligible || visible.disabled || visible.blockedByAllowlist) {
throw new Error(`skill not visible/eligible: ${JSON.stringify(visible)}`);
}
@@ -2635,16 +2688,24 @@ When the user asks for the hot install marker exactly, reply with exactly: HOT-I
{
name: "describes an attached image in one short sentence",
run: async () => {
const config = readScenarioExecutionConfig<{
prompt?: string;
requiredColorGroups?: string[][];
}>("image-understanding-attachment");
await reset();
const outboundStartIndex = state
.getSnapshot()
.messages.filter((message) => message.direction === "outbound").length;
await runAgentPrompt(env, {
sessionKey: "agent:qa:image-understanding",
message:
config.prompt ??
"Image understanding check: describe the top and bottom colors in the attached image in one short sentence.",
attachments: [
{
mimeType: "image/png",
fileName: "red-top-blue-bottom.png",
content: QA_IMAGE_UNDERSTANDING_LARGE_PNG_BASE64,
content: QA_IMAGE_UNDERSTANDING_VALID_PNG_BASE64,
},
],
timeoutMs: liveTurnTimeoutMs(env, 45_000),
@@ -2653,9 +2714,17 @@ When the user asks for the hot install marker exactly, reply with exactly: HOT-I
state,
(candidate) => candidate.conversation.id === "qa-operator",
liveTurnTimeoutMs(env, 45_000),
{ sinceIndex: outboundStartIndex },
);
const lower = normalizeLowercaseStringOrEmpty(outbound.text);
if (!lower.includes("red") || !lower.includes("blue")) {
const requiredColorGroups = config.requiredColorGroups ?? [
["red", "scarlet", "crimson"],
["blue", "azure", "teal", "cyan", "aqua"],
];
const missingColorGroup = requiredColorGroups.find(
(group) => !group.some((candidate) => lower.includes(candidate)),
);
if (missingColorGroup) {
throw new Error(`missing expected colors in image description: ${outbound.text}`);
}
if (env.mock) {
@@ -2835,6 +2904,11 @@ When the user asks for the hot disable marker exactly, reply with exactly: HOT-P
{
name: "restores image_generate after restart and uses it in the same session",
run: async () => {
const config = readScenarioExecutionConfig<{
setupPrompt?: string;
imagePrompt?: string;
imagePromptSnippet?: string;
}>("config-restart-capability-flip");
await ensureImageGenerationConfigured(env);
const original = await readConfigSnapshot(env);
const originalTools =
@@ -2868,6 +2942,7 @@ When the user asks for the hot disable marker exactly, reply with exactly: HOT-P
await runAgentPrompt(env, {
sessionKey,
message:
config.setupPrompt ??
"Capability flip setup: acknowledge this setup so restart wake-up has a route.",
timeoutMs: liveTurnTimeoutMs(env, 30_000),
});
@@ -2907,12 +2982,13 @@ When the user asks for the hot disable marker exactly, reply with exactly: HOT-P
await runAgentPrompt(env, {
sessionKey,
message:
config.imagePrompt ??
"Capability flip image check: generate a QA lighthouse image now and keep the media path in the reply.",
timeoutMs: liveTurnTimeoutMs(env, 45_000),
});
const mediaPath = await resolveGeneratedImagePath({
env,
promptSnippet: "Capability flip image check",
promptSnippet: config.imagePromptSnippet ?? "Capability flip image check",
startedAtMs: imageStartedAtMs,
timeoutMs: liveTurnTimeoutMs(env, 45_000),
});

View File

@@ -27,4 +27,9 @@ execution:
- qa
- mission
- testing
- repo
- worked
- failed
- blocked
- chat flows
```

View File

@@ -22,4 +22,8 @@ execution:
kind: custom
handler: config-restart-capability-flip
summary: Verify a restart-triggering config change flips capability inventory and the same session successfully uses the newly restored tool after wake-up.
config:
setupPrompt: "Capability flip setup: acknowledge this setup so restart wake-up has a route."
imagePrompt: "Capability flip image check: generate a QA lighthouse image in this turn right now. Do not acknowledge first, do not promise future work, and do not stop before using image_generate. Final reply must include the MEDIA path."
imagePromptSnippet: "Capability flip image check"
```

View File

@@ -20,4 +20,9 @@ execution:
kind: custom
handler: image-understanding-attachment
summary: Verify an attached image reaches the agent model and the agent can describe what it sees.
config:
prompt: "Image understanding check: describe the top and bottom colors in the attached image in one short sentence."
requiredColorGroups:
- [red, scarlet, crimson]
- [blue, azure, teal, cyan, aqua]
```

View File

@@ -18,6 +18,11 @@ execution:
handler: memory-recall
summary: Verify the agent can store a fact, switch topics, then recall the fact accurately later.
config:
rememberPrompt: "Please remember this fact for later: the QA canary code is ALPHA-7."
recallPrompt: "What was the QA canary code I asked you to remember earlier?"
resetDurableMemory: true
rememberPrompt: "Please remember this fact for later: the QA canary code is ALPHA-7. Use your normal memory mechanism, avoid manual repo cleanup, and reply exactly `Remembered ALPHA-7.` once stored."
rememberAckAny:
- remembered alpha-7
recallPrompt: "What was the QA canary code I asked you to remember earlier? Reply with the code only, plus at most one short sentence."
recallExpectedAny:
- alpha-7
```

View File

@@ -20,4 +20,10 @@ execution:
kind: custom
handler: session-memory-ranking
summary: Verify session-transcript memory can outrank stale durable notes and drive the final answer toward the newer fact.
config:
staleFact: ORBIT-9
currentFact: ORBIT-10
transcriptQuestion: "What is the current Project Nebula codename?"
transcriptAnswer: "The current Project Nebula codename is ORBIT-10."
prompt: "Session memory ranking check: what is the current Project Nebula codename? Use memory tools first. If durable notes conflict with newer indexed session transcripts, prefer the newer current fact."
```

View File

@@ -20,6 +20,13 @@ execution:
handler: skill-visibility-invocation
summary: Verify a workspace skill becomes visible in skills.status and influences the next agent turn.
config:
prompt: "Visible skill marker: give me the visible skill marker exactly."
skillName: qa-visible-skill
skillBody: |-
---
name: qa-visible-skill
description: Visible QA skill marker
---
When the user asks for the visible skill marker exactly, or explicitly asks you to use qa-visible-skill, reply with exactly: VISIBLE-SKILL-OK
prompt: "Use qa-visible-skill now. Reply exactly with the visible skill marker and nothing else."
expectedContains: "VISIBLE-SKILL-OK"
```

View File

@@ -161,6 +161,17 @@ describe("version resolution", () => {
).toBe("2026.3.99");
});
it("prefers explicit compatibility host overrides over runtime and stale env versions", () => {
expect(
resolveCompatibilityHostVersion({
OPENCLAW_COMPATIBILITY_HOST_VERSION: "2026.4.8",
OPENCLAW_VERSION: "2026.3.99",
OPENCLAW_SERVICE_VERSION: "2026.3.98",
npm_package_version: "2026.3.97",
}),
).toBe("2026.4.8");
});
it("normalizes runtime version candidate for fallback handling", () => {
expect(resolveUsableRuntimeVersion(undefined)).toBeUndefined();
expect(resolveUsableRuntimeVersion("")).toBeUndefined();

View File

@@ -139,6 +139,10 @@ export function resolveCompatibilityHostVersion(
env: RuntimeVersionEnv = process.env as RuntimeVersionEnv,
fallback = RUNTIME_SERVICE_VERSION_FALLBACK,
): string {
const explicitCompatibilityVersion = firstNonEmpty(env.OPENCLAW_COMPATIBILITY_HOST_VERSION);
if (explicitCompatibilityVersion) {
return explicitCompatibilityVersion;
}
return resolveVersionFromRuntimeSources({
env,
runtimeVersion: resolveUsableRuntimeVersion(VERSION),