mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-23 07:01:40 +00:00
fix: stabilize live qa scenario suite
This commit is contained in:
@@ -20,6 +20,7 @@ describe("buildPromptSection", () => {
|
||||
expect(result[0]).toBe("## Memory Recall");
|
||||
expect(result[1]).toContain("run memory_search");
|
||||
expect(result[1]).toContain("then use memory_get");
|
||||
expect(result[1]).toContain("indexed session transcripts");
|
||||
expect(result).toContain(
|
||||
"Citations: include Source: <path#line> when it helps the user verify memory snippets.",
|
||||
);
|
||||
@@ -30,6 +31,7 @@ describe("buildPromptSection", () => {
|
||||
const result = buildPromptSection({ availableTools: new Set(["memory_search"]) });
|
||||
expect(result[0]).toBe("## Memory Recall");
|
||||
expect(result[1]).toContain("run memory_search");
|
||||
expect(result[1]).toContain("indexed session transcripts");
|
||||
expect(result[1]).not.toContain("then use memory_get");
|
||||
});
|
||||
|
||||
|
||||
@@ -14,10 +14,10 @@ export const buildPromptSection: MemoryPromptSectionBuilder = ({
|
||||
let toolGuidance: string;
|
||||
if (hasMemorySearch && hasMemoryGet) {
|
||||
toolGuidance =
|
||||
"Before answering anything about prior work, decisions, dates, people, preferences, or todos: run memory_search on MEMORY.md + memory/*.md; then use memory_get to pull only the needed lines. If low confidence after search, say you checked.";
|
||||
"Before answering anything about prior work, decisions, dates, people, preferences, or todos: run memory_search on MEMORY.md + memory/*.md + indexed session transcripts; then use memory_get to pull only the needed lines. If low confidence after search, say you checked.";
|
||||
} else if (hasMemorySearch) {
|
||||
toolGuidance =
|
||||
"Before answering anything about prior work, decisions, dates, people, preferences, or todos: run memory_search on MEMORY.md + memory/*.md and answer from the matching results. If low confidence after search, say you checked.";
|
||||
"Before answering anything about prior work, decisions, dates, people, preferences, or todos: run memory_search on MEMORY.md + memory/*.md + indexed session transcripts and answer from the matching results. If low confidence after search, say you checked.";
|
||||
} else {
|
||||
toolGuidance =
|
||||
"Before answering anything about prior work, decisions, dates, people, preferences, or todos that already point to a specific memory file or note: run memory_get to pull only the needed lines. If low confidence after reading them, say you checked.";
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises";
|
||||
import { lstat, mkdir, mkdtemp, readdir, rm, writeFile } from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, describe, expect, it } from "vitest";
|
||||
@@ -21,6 +21,8 @@ function createParams(baseEnv?: NodeJS.ProcessEnv) {
|
||||
xdgConfigHome: "/tmp/openclaw-qa/xdg-config",
|
||||
xdgDataHome: "/tmp/openclaw-qa/xdg-data",
|
||||
xdgCacheHome: "/tmp/openclaw-qa/xdg-cache",
|
||||
bundledPluginsDir: "/tmp/openclaw-qa/bundled-plugins",
|
||||
compatibilityHostVersion: "2026.4.8",
|
||||
baseEnv,
|
||||
};
|
||||
}
|
||||
@@ -35,6 +37,8 @@ describe("buildQaRuntimeEnv", () => {
|
||||
expect(env.OPENCLAW_TEST_FAST).toBe("1");
|
||||
expect(env.OPENCLAW_QA_ALLOW_LOCAL_IMAGE_PROVIDER).toBe("1");
|
||||
expect(env.OPENCLAW_ALLOW_SLOW_REPLY_TESTS).toBe("1");
|
||||
expect(env.OPENCLAW_BUNDLED_PLUGINS_DIR).toBe("/tmp/openclaw-qa/bundled-plugins");
|
||||
expect(env.OPENCLAW_COMPATIBILITY_HOST_VERSION).toBe("2026.4.8");
|
||||
});
|
||||
|
||||
it("maps live frontier key aliases into provider env vars", () => {
|
||||
@@ -130,3 +134,117 @@ describe("resolveQaControlUiRoot", () => {
|
||||
expect(resolveQaControlUiRoot({ repoRoot, controlUiEnabled: false })).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe("qa bundled plugin dir", () => {
|
||||
it("prefers the built bundled plugin tree when present", async () => {
|
||||
const repoRoot = await mkdtemp(path.join(os.tmpdir(), "qa-bundled-root-"));
|
||||
cleanups.push(async () => {
|
||||
await rm(repoRoot, { recursive: true, force: true });
|
||||
});
|
||||
await mkdir(path.join(repoRoot, "dist", "extensions", "qa-channel"), {
|
||||
recursive: true,
|
||||
});
|
||||
await writeFile(
|
||||
path.join(repoRoot, "dist", "extensions", "qa-channel", "package.json"),
|
||||
"{}",
|
||||
"utf8",
|
||||
);
|
||||
await mkdir(path.join(repoRoot, "dist-runtime", "extensions", "qa-channel"), {
|
||||
recursive: true,
|
||||
});
|
||||
await writeFile(
|
||||
path.join(repoRoot, "dist-runtime", "extensions", "qa-channel", "package.json"),
|
||||
"{}",
|
||||
"utf8",
|
||||
);
|
||||
await mkdir(path.join(repoRoot, "extensions", "qa-channel"), { recursive: true });
|
||||
|
||||
expect(__testing.resolveQaBundledPluginsSourceRoot(repoRoot)).toBe(
|
||||
path.join(repoRoot, "dist", "extensions"),
|
||||
);
|
||||
});
|
||||
|
||||
it("creates a scoped bundled plugin tree for the allowed plugins only", async () => {
|
||||
const repoRoot = await mkdtemp(path.join(os.tmpdir(), "qa-bundled-scope-"));
|
||||
cleanups.push(async () => {
|
||||
await rm(repoRoot, { recursive: true, force: true });
|
||||
});
|
||||
await mkdir(path.join(repoRoot, "dist", "extensions", "qa-channel"), { recursive: true });
|
||||
await mkdir(path.join(repoRoot, "dist", "extensions", "memory-core"), { recursive: true });
|
||||
await mkdir(path.join(repoRoot, "dist", "extensions", "unused-plugin"), { recursive: true });
|
||||
await writeFile(path.join(repoRoot, "dist", "shared-chunk-abc123.js"), "export {};\n", "utf8");
|
||||
const tempRoot = await mkdtemp(path.join(os.tmpdir(), "qa-bundled-target-"));
|
||||
cleanups.push(async () => {
|
||||
await rm(tempRoot, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
const { bundledPluginsDir, stagedRoot } = await __testing.createQaBundledPluginsDir({
|
||||
repoRoot,
|
||||
tempRoot,
|
||||
allowedPluginIds: ["qa-channel", "memory-core"],
|
||||
});
|
||||
|
||||
expect((await readdir(bundledPluginsDir)).toSorted()).toEqual(["memory-core", "qa-channel"]);
|
||||
expect(bundledPluginsDir).toBe(
|
||||
path.join(
|
||||
repoRoot,
|
||||
".artifacts",
|
||||
"qa-runtime",
|
||||
path.basename(tempRoot),
|
||||
"dist",
|
||||
"extensions",
|
||||
),
|
||||
);
|
||||
expect(stagedRoot).toBe(
|
||||
path.join(repoRoot, ".artifacts", "qa-runtime", path.basename(tempRoot)),
|
||||
);
|
||||
expect((await lstat(path.join(bundledPluginsDir, "qa-channel"))).isDirectory()).toBe(true);
|
||||
expect((await lstat(path.join(bundledPluginsDir, "memory-core"))).isDirectory()).toBe(true);
|
||||
await expect(
|
||||
lstat(
|
||||
path.join(
|
||||
repoRoot,
|
||||
".artifacts",
|
||||
"qa-runtime",
|
||||
path.basename(tempRoot),
|
||||
"dist",
|
||||
"shared-chunk-abc123.js",
|
||||
),
|
||||
),
|
||||
).resolves.toBeTruthy();
|
||||
});
|
||||
|
||||
it("raises the QA runtime host version to the highest allowed plugin floor", async () => {
|
||||
const repoRoot = await mkdtemp(path.join(os.tmpdir(), "qa-runtime-version-"));
|
||||
cleanups.push(async () => {
|
||||
await rm(repoRoot, { recursive: true, force: true });
|
||||
});
|
||||
await writeFile(
|
||||
path.join(repoRoot, "package.json"),
|
||||
JSON.stringify({ version: "2026.4.7-1" }),
|
||||
"utf8",
|
||||
);
|
||||
const bundledRoot = path.join(repoRoot, "extensions");
|
||||
await mkdir(path.join(bundledRoot, "qa-channel"), { recursive: true });
|
||||
await writeFile(
|
||||
path.join(bundledRoot, "qa-channel", "package.json"),
|
||||
JSON.stringify({ openclaw: { install: { minHostVersion: ">=2026.4.8" } } }),
|
||||
"utf8",
|
||||
);
|
||||
|
||||
await mkdir(path.join(bundledRoot, "memory-core"), { recursive: true });
|
||||
await writeFile(
|
||||
path.join(bundledRoot, "memory-core", "package.json"),
|
||||
JSON.stringify({ openclaw: { install: { minHostVersion: ">=2026.4.7" } } }),
|
||||
"utf8",
|
||||
);
|
||||
|
||||
await expect(
|
||||
__testing.resolveQaRuntimeHostVersion({
|
||||
repoRoot,
|
||||
bundledPluginsSourceRoot: bundledRoot,
|
||||
allowedPluginIds: ["memory-core", "qa-channel"],
|
||||
}),
|
||||
).resolves.toBe("2026.4.8");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { spawn } from "node:child_process";
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { existsSync } from "node:fs";
|
||||
import { createWriteStream, existsSync } from "node:fs";
|
||||
import fs from "node:fs/promises";
|
||||
import net from "node:net";
|
||||
import os from "node:os";
|
||||
@@ -95,6 +95,8 @@ export function buildQaRuntimeEnv(params: {
|
||||
xdgConfigHome: string;
|
||||
xdgDataHome: string;
|
||||
xdgCacheHome: string;
|
||||
bundledPluginsDir?: string;
|
||||
compatibilityHostVersion?: string;
|
||||
providerMode?: "mock-openai" | "live-frontier";
|
||||
baseEnv?: NodeJS.ProcessEnv;
|
||||
}) {
|
||||
@@ -118,6 +120,10 @@ export function buildQaRuntimeEnv(params: {
|
||||
XDG_CONFIG_HOME: params.xdgConfigHome,
|
||||
XDG_DATA_HOME: params.xdgDataHome,
|
||||
XDG_CACHE_HOME: params.xdgCacheHome,
|
||||
...(params.bundledPluginsDir ? { OPENCLAW_BUNDLED_PLUGINS_DIR: params.bundledPluginsDir } : {}),
|
||||
...(params.compatibilityHostVersion
|
||||
? { OPENCLAW_COMPATIBILITY_HOST_VERSION: params.compatibilityHostVersion }
|
||||
: {}),
|
||||
};
|
||||
return normalizeQaProviderModeEnv(env, params.providerMode);
|
||||
}
|
||||
@@ -136,7 +142,145 @@ function isRetryableGatewayCallError(details: string): boolean {
|
||||
export const __testing = {
|
||||
buildQaRuntimeEnv,
|
||||
isRetryableGatewayCallError,
|
||||
resolveQaBundledPluginsSourceRoot,
|
||||
resolveQaRuntimeHostVersion,
|
||||
createQaBundledPluginsDir,
|
||||
};
|
||||
|
||||
function resolveQaBundledPluginsSourceRoot(repoRoot: string) {
|
||||
const candidates = [
|
||||
path.join(repoRoot, "dist", "extensions"),
|
||||
path.join(repoRoot, "dist-runtime", "extensions"),
|
||||
path.join(repoRoot, "extensions"),
|
||||
];
|
||||
for (const candidate of candidates) {
|
||||
if (existsSync(candidate)) {
|
||||
return candidate;
|
||||
}
|
||||
}
|
||||
throw new Error("failed to resolve qa bundled plugins source root");
|
||||
}
|
||||
|
||||
function parseStableSemverFloor(value: string | undefined) {
|
||||
if (!value) {
|
||||
return null;
|
||||
}
|
||||
const match = value.trim().match(/(\d+)\.(\d+)\.(\d+)/);
|
||||
if (!match) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
major: Number.parseInt(match[1] ?? "", 10),
|
||||
minor: Number.parseInt(match[2] ?? "", 10),
|
||||
patch: Number.parseInt(match[3] ?? "", 10),
|
||||
label: `${match[1]}.${match[2]}.${match[3]}`,
|
||||
};
|
||||
}
|
||||
|
||||
function compareSemverFloors(
|
||||
left: ReturnType<typeof parseStableSemverFloor>,
|
||||
right: ReturnType<typeof parseStableSemverFloor>,
|
||||
) {
|
||||
if (!left && !right) {
|
||||
return 0;
|
||||
}
|
||||
if (!left) {
|
||||
return -1;
|
||||
}
|
||||
if (!right) {
|
||||
return 1;
|
||||
}
|
||||
if (left.major !== right.major) {
|
||||
return left.major - right.major;
|
||||
}
|
||||
if (left.minor !== right.minor) {
|
||||
return left.minor - right.minor;
|
||||
}
|
||||
return left.patch - right.patch;
|
||||
}
|
||||
|
||||
async function resolveQaRuntimeHostVersion(params: {
|
||||
repoRoot: string;
|
||||
bundledPluginsSourceRoot: string;
|
||||
allowedPluginIds: readonly string[];
|
||||
}) {
|
||||
const rootPackageRaw = await fs.readFile(path.join(params.repoRoot, "package.json"), "utf8");
|
||||
const rootPackage = JSON.parse(rootPackageRaw) as { version?: string };
|
||||
let selected = parseStableSemverFloor(rootPackage.version);
|
||||
|
||||
for (const pluginId of params.allowedPluginIds) {
|
||||
const packagePath = path.join(params.bundledPluginsSourceRoot, pluginId, "package.json");
|
||||
if (!existsSync(packagePath)) {
|
||||
continue;
|
||||
}
|
||||
const packageRaw = await fs.readFile(packagePath, "utf8");
|
||||
const packageJson = JSON.parse(packageRaw) as {
|
||||
openclaw?: {
|
||||
install?: {
|
||||
minHostVersion?: string;
|
||||
};
|
||||
};
|
||||
};
|
||||
const candidate = parseStableSemverFloor(packageJson.openclaw?.install?.minHostVersion);
|
||||
if (compareSemverFloors(candidate, selected) > 0) {
|
||||
selected = candidate;
|
||||
}
|
||||
}
|
||||
|
||||
return selected?.label;
|
||||
}
|
||||
|
||||
async function createQaBundledPluginsDir(params: {
|
||||
repoRoot: string;
|
||||
tempRoot: string;
|
||||
allowedPluginIds: readonly string[];
|
||||
}) {
|
||||
const sourceRoot = resolveQaBundledPluginsSourceRoot(params.repoRoot);
|
||||
const sourceTreeRoot = path.dirname(sourceRoot);
|
||||
if (
|
||||
sourceTreeRoot === path.join(params.repoRoot, "dist") ||
|
||||
sourceTreeRoot === path.join(params.repoRoot, "dist-runtime")
|
||||
) {
|
||||
const stagedRoot = path.join(
|
||||
params.repoRoot,
|
||||
".artifacts",
|
||||
"qa-runtime",
|
||||
path.basename(params.tempRoot),
|
||||
);
|
||||
await fs.rm(stagedRoot, { recursive: true, force: true });
|
||||
await fs.mkdir(stagedRoot, { recursive: true });
|
||||
const stagedTreeRoot = path.join(stagedRoot, path.basename(sourceTreeRoot));
|
||||
await fs.cp(sourceTreeRoot, stagedTreeRoot, { recursive: true });
|
||||
const stagedExtensionsDir = path.join(stagedTreeRoot, "extensions");
|
||||
for (const entry of await fs.readdir(stagedExtensionsDir, { withFileTypes: true })) {
|
||||
if (!entry.isDirectory() || params.allowedPluginIds.includes(entry.name)) {
|
||||
continue;
|
||||
}
|
||||
await fs.rm(path.join(stagedExtensionsDir, entry.name), { recursive: true, force: true });
|
||||
}
|
||||
return {
|
||||
bundledPluginsDir: stagedExtensionsDir,
|
||||
stagedRoot,
|
||||
};
|
||||
}
|
||||
|
||||
const bundledPluginsDir = path.join(params.tempRoot, "bundled-plugins");
|
||||
await fs.mkdir(bundledPluginsDir, { recursive: true });
|
||||
for (const pluginId of params.allowedPluginIds) {
|
||||
const sourceDir = path.join(sourceRoot, pluginId);
|
||||
if (!existsSync(sourceDir)) {
|
||||
throw new Error(`qa bundled plugin not found: ${pluginId} (${sourceDir})`);
|
||||
}
|
||||
// Plugin discovery walks real directories; copying avoids symlink-only
|
||||
// trees being skipped by Dirent-based scans in the child runtime.
|
||||
await fs.cp(sourceDir, path.join(bundledPluginsDir, pluginId), { recursive: true });
|
||||
}
|
||||
return {
|
||||
bundledPluginsDir,
|
||||
stagedRoot: null,
|
||||
};
|
||||
}
|
||||
|
||||
async function waitForGatewayReady(params: {
|
||||
baseUrl: string;
|
||||
logs: () => string;
|
||||
@@ -242,9 +386,28 @@ export async function startQaGatewayChild(params: {
|
||||
controlUiEnabled: params.controlUiEnabled,
|
||||
});
|
||||
await fs.writeFile(configPath, `${JSON.stringify(cfg, null, 2)}\n`, "utf8");
|
||||
const allowedPluginIds = (cfg.plugins?.allow ?? []).filter(
|
||||
(pluginId): pluginId is string => typeof pluginId === "string" && pluginId.length > 0,
|
||||
);
|
||||
const bundledPluginsSourceRoot = resolveQaBundledPluginsSourceRoot(params.repoRoot);
|
||||
const { bundledPluginsDir, stagedRoot: stagedBundledPluginsRoot } =
|
||||
await createQaBundledPluginsDir({
|
||||
repoRoot: params.repoRoot,
|
||||
tempRoot,
|
||||
allowedPluginIds,
|
||||
});
|
||||
const runtimeHostVersion = await resolveQaRuntimeHostVersion({
|
||||
repoRoot: params.repoRoot,
|
||||
bundledPluginsSourceRoot,
|
||||
allowedPluginIds,
|
||||
});
|
||||
|
||||
const stdout: Buffer[] = [];
|
||||
const stderr: Buffer[] = [];
|
||||
const stdoutLogPath = path.join(tempRoot, "gateway.stdout.log");
|
||||
const stderrLogPath = path.join(tempRoot, "gateway.stderr.log");
|
||||
const stdoutLog = createWriteStream(stdoutLogPath, { flags: "a" });
|
||||
const stderrLog = createWriteStream(stderrLogPath, { flags: "a" });
|
||||
const env = buildQaRuntimeEnv({
|
||||
configPath,
|
||||
gatewayToken,
|
||||
@@ -253,6 +416,8 @@ export async function startQaGatewayChild(params: {
|
||||
xdgConfigHome,
|
||||
xdgDataHome,
|
||||
xdgCacheHome,
|
||||
bundledPluginsDir,
|
||||
compatibilityHostVersion: runtimeHostVersion,
|
||||
providerMode: params.providerMode,
|
||||
});
|
||||
|
||||
@@ -274,8 +439,16 @@ export async function startQaGatewayChild(params: {
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
},
|
||||
);
|
||||
child.stdout.on("data", (chunk) => stdout.push(Buffer.from(chunk)));
|
||||
child.stderr.on("data", (chunk) => stderr.push(Buffer.from(chunk)));
|
||||
child.stdout.on("data", (chunk) => {
|
||||
const buffer = Buffer.from(chunk);
|
||||
stdout.push(buffer);
|
||||
stdoutLog.write(buffer);
|
||||
});
|
||||
child.stderr.on("data", (chunk) => {
|
||||
const buffer = Buffer.from(chunk);
|
||||
stderr.push(buffer);
|
||||
stderrLog.write(buffer);
|
||||
});
|
||||
|
||||
const baseUrl = `http://127.0.0.1:${gatewayPort}`;
|
||||
const wsUrl = `ws://127.0.0.1:${gatewayPort}`;
|
||||
@@ -318,7 +491,12 @@ export async function startQaGatewayChild(params: {
|
||||
throw lastRpcError ?? new Error("qa gateway rpc client failed to start");
|
||||
}
|
||||
} catch (error) {
|
||||
stdoutLog.end();
|
||||
stderrLog.end();
|
||||
child.kill("SIGTERM");
|
||||
if (!keepTemp && stagedBundledPluginsRoot) {
|
||||
await fs.rm(stagedBundledPluginsRoot, { recursive: true, force: true }).catch(() => {});
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
|
||||
@@ -370,6 +548,8 @@ export async function startQaGatewayChild(params: {
|
||||
},
|
||||
async stop(opts?: { keepTemp?: boolean }) {
|
||||
await rpcClient.stop().catch(() => {});
|
||||
stdoutLog.end();
|
||||
stderrLog.end();
|
||||
if (!child.killed) {
|
||||
child.kill("SIGTERM");
|
||||
await Promise.race([
|
||||
@@ -383,6 +563,9 @@ export async function startQaGatewayChild(params: {
|
||||
}
|
||||
if (!(opts?.keepTemp ?? keepTemp)) {
|
||||
await fs.rm(tempRoot, { recursive: true, force: true });
|
||||
if (stagedBundledPluginsRoot) {
|
||||
await fs.rm(stagedBundledPluginsRoot, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
@@ -8,28 +8,6 @@ import {
|
||||
type QaProviderMode,
|
||||
} from "./model-selection.js";
|
||||
|
||||
const DISABLED_BUNDLED_CHANNELS = Object.freeze({
|
||||
bluebubbles: { enabled: false },
|
||||
discord: { enabled: false },
|
||||
feishu: { enabled: false },
|
||||
googlechat: { enabled: false },
|
||||
imessage: { enabled: false },
|
||||
irc: { enabled: false },
|
||||
line: { enabled: false },
|
||||
mattermost: { enabled: false },
|
||||
matrix: { enabled: false },
|
||||
msteams: { enabled: false },
|
||||
qqbot: { enabled: false },
|
||||
signal: { enabled: false },
|
||||
slack: { enabled: false },
|
||||
"synology-chat": { enabled: false },
|
||||
telegram: { enabled: false },
|
||||
tlon: { enabled: false },
|
||||
whatsapp: { enabled: false },
|
||||
zalo: { enabled: false },
|
||||
zalouser: { enabled: false },
|
||||
} satisfies Record<string, { enabled: false }>);
|
||||
|
||||
export const DEFAULT_QA_CONTROL_UI_ALLOWED_ORIGINS = Object.freeze([
|
||||
"http://127.0.0.1:18789",
|
||||
"http://localhost:18789",
|
||||
@@ -273,7 +251,6 @@ export function buildQaGatewayConfig(params: {
|
||||
},
|
||||
},
|
||||
channels: {
|
||||
...DISABLED_BUNDLED_CHANNELS,
|
||||
"qa-channel": {
|
||||
enabled: true,
|
||||
baseUrl: params.qaBusBaseUrl,
|
||||
|
||||
@@ -65,9 +65,12 @@ type QaSuiteEnvironment = {
|
||||
|
||||
const _QA_IMAGE_UNDERSTANDING_PNG_BASE64 =
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAQAAAAEACAYAAABccqhmAAAAAklEQVR4AewaftIAAAK4SURBVO3BAQEAMAwCIG//znsQgXfJBZjUALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsl9wFmNQAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwP4TIF+7ciPkoAAAAASUVORK5CYII=";
|
||||
const QA_IMAGE_UNDERSTANDING_LARGE_PNG_BASE64 =
|
||||
const _QA_IMAGE_UNDERSTANDING_LARGE_PNG_BASE64 =
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAQAAAAEACAYAAABccqhmAAACuklEQVR4Ae3BAQEAMAwCIG//znsQgXfJBZjUALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsl9wFmNQAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwP4TIF+2YE/z8AAAAASUVORK5CYII=";
|
||||
|
||||
const QA_IMAGE_UNDERSTANDING_VALID_PNG_BASE64 =
|
||||
"iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAALklEQVR4nO3OoQEAAAyDsP7/9HYGJgJNdtuVDQAAAAAAACAHxH8AAAAAAACAHvBX0fhq85dN7QAAAABJRU5ErkJggg==";
|
||||
|
||||
type QaSkillStatusEntry = {
|
||||
name?: string;
|
||||
eligible?: boolean;
|
||||
@@ -170,12 +173,14 @@ async function waitForOutboundMessage(
|
||||
state: QaBusState,
|
||||
predicate: (message: QaBusMessage) => boolean,
|
||||
timeoutMs = 15_000,
|
||||
options?: { sinceIndex?: number },
|
||||
) {
|
||||
return await waitForCondition(
|
||||
() =>
|
||||
state
|
||||
.getSnapshot()
|
||||
.messages.filter((message) => message.direction === "outbound")
|
||||
.slice(options?.sinceIndex ?? 0)
|
||||
.find(predicate),
|
||||
timeoutMs,
|
||||
);
|
||||
@@ -1131,9 +1136,19 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
||||
name: "stores the canary fact",
|
||||
run: async () => {
|
||||
const config = readScenarioExecutionConfig<{
|
||||
resetDurableMemory?: boolean;
|
||||
rememberPrompt?: string;
|
||||
rememberAckAny?: string[];
|
||||
recallPrompt?: string;
|
||||
recallExpectedAny?: string[];
|
||||
}>("memory-recall");
|
||||
if (config.resetDurableMemory) {
|
||||
const today = formatMemoryDreamingDay(Date.now());
|
||||
await fs.rm(path.join(env.gateway.workspaceDir, "MEMORY.md"), { force: true });
|
||||
await fs.rm(path.join(env.gateway.workspaceDir, "memory", `${today}.md`), {
|
||||
force: true,
|
||||
});
|
||||
}
|
||||
await reset();
|
||||
await runAgentPrompt(env, {
|
||||
sessionKey: "agent:qa:memory",
|
||||
@@ -1141,9 +1156,16 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
||||
config.rememberPrompt ??
|
||||
"Please remember this fact for later: the QA canary code is ALPHA-7.",
|
||||
});
|
||||
const rememberAckAny = (config.rememberAckAny ?? ["remembered alpha-7"]).map(
|
||||
(needle) => needle.toLowerCase(),
|
||||
);
|
||||
const outbound = await waitForOutboundMessage(
|
||||
state,
|
||||
(candidate) => candidate.conversation.id === "qa-operator",
|
||||
(candidate) =>
|
||||
candidate.conversation.id === "qa-operator" &&
|
||||
rememberAckAny.some((needle) =>
|
||||
normalizeLowercaseStringOrEmpty(candidate.text).includes(needle),
|
||||
),
|
||||
);
|
||||
return outbound.text;
|
||||
},
|
||||
@@ -1152,8 +1174,11 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
||||
name: "recalls the same fact later",
|
||||
run: async () => {
|
||||
const config = readScenarioExecutionConfig<{
|
||||
resetDurableMemory?: boolean;
|
||||
rememberPrompt?: string;
|
||||
rememberAckAny?: string[];
|
||||
recallPrompt?: string;
|
||||
recallExpectedAny?: string[];
|
||||
}>("memory-recall");
|
||||
await runAgentPrompt(env, {
|
||||
sessionKey: "agent:qa:memory",
|
||||
@@ -1161,6 +1186,9 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
||||
config.recallPrompt ??
|
||||
"What was the QA canary code I asked you to remember earlier?",
|
||||
});
|
||||
const recallExpectedAny = (config.recallExpectedAny ?? ["alpha-7"]).map((needle) =>
|
||||
needle.toLowerCase(),
|
||||
);
|
||||
const outbound = await waitForCondition(
|
||||
() =>
|
||||
state
|
||||
@@ -1169,7 +1197,9 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
||||
(candidate) =>
|
||||
candidate.direction === "outbound" &&
|
||||
candidate.conversation.id === "qa-operator" &&
|
||||
candidate.text.includes("ALPHA-7"),
|
||||
recallExpectedAny.some((needle) =>
|
||||
normalizeLowercaseStringOrEmpty(candidate.text).includes(needle),
|
||||
),
|
||||
)
|
||||
.at(-1),
|
||||
20_000,
|
||||
@@ -2049,6 +2079,15 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
||||
{
|
||||
name: "prefers the newer transcript-backed fact over the stale durable note",
|
||||
run: async () => {
|
||||
const config = readScenarioExecutionConfig<{
|
||||
staleFact?: string;
|
||||
currentFact?: string;
|
||||
transcriptQuestion?: string;
|
||||
transcriptAnswer?: string;
|
||||
prompt?: string;
|
||||
}>("session-memory-ranking");
|
||||
const staleFact = config.staleFact ?? "ORBIT-9";
|
||||
const currentFact = config.currentFact ?? "ORBIT-10";
|
||||
const original = await readConfigSnapshot(env);
|
||||
const originalMemorySearch =
|
||||
original.config.agents &&
|
||||
@@ -2090,7 +2129,11 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
||||
await waitForQaChannelReady(env, 60_000);
|
||||
try {
|
||||
const memoryPath = path.join(env.gateway.workspaceDir, "MEMORY.md");
|
||||
await fs.writeFile(memoryPath, "Project Nebula stale codename: ORBIT-9.\n", "utf8");
|
||||
await fs.writeFile(
|
||||
memoryPath,
|
||||
`Project Nebula stale codename: ${staleFact}.\n`,
|
||||
"utf8",
|
||||
);
|
||||
const staleAt = new Date("2020-01-01T00:00:00.000Z");
|
||||
await fs.utimes(memoryPath, staleAt, staleAt);
|
||||
const transcriptsDir = resolveSessionTranscriptsDirForAgent(
|
||||
@@ -2117,7 +2160,9 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "What is the current Project Nebula codename?",
|
||||
text:
|
||||
config.transcriptQuestion ??
|
||||
"What is the current Project Nebula codename?",
|
||||
},
|
||||
],
|
||||
},
|
||||
@@ -2130,7 +2175,9 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "The current Project Nebula codename is ORBIT-10.",
|
||||
text:
|
||||
config.transcriptAnswer ??
|
||||
`The current Project Nebula codename is ${currentFact}.`,
|
||||
},
|
||||
],
|
||||
},
|
||||
@@ -2140,26 +2187,27 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
||||
);
|
||||
await forceMemoryIndex({
|
||||
env,
|
||||
query: "current Project Nebula codename ORBIT-10",
|
||||
expectedNeedle: "ORBIT-10",
|
||||
query: `current Project Nebula codename ${currentFact}`,
|
||||
expectedNeedle: currentFact,
|
||||
});
|
||||
await reset();
|
||||
await runAgentPrompt(env, {
|
||||
sessionKey: "agent:qa:session-memory-ranking",
|
||||
message:
|
||||
"Session memory ranking check: what is the current Project Nebula codename? Use memory tools first.",
|
||||
config.prompt ??
|
||||
`Session memory ranking check: what is the current Project Nebula codename? Use memory tools first. If durable notes conflict with newer indexed session transcripts, prefer the newer current fact.`,
|
||||
timeoutMs: liveTurnTimeoutMs(env, 45_000),
|
||||
});
|
||||
const outbound = await waitForOutboundMessage(
|
||||
state,
|
||||
(candidate) =>
|
||||
candidate.conversation.id === "qa-operator" &&
|
||||
candidate.text.includes("ORBIT-10"),
|
||||
candidate.text.includes(currentFact),
|
||||
liveTurnTimeoutMs(env, 45_000),
|
||||
);
|
||||
const lower = normalizeLowercaseStringOrEmpty(outbound.text);
|
||||
const staleLeak =
|
||||
outbound.text.includes("ORBIT-9") &&
|
||||
outbound.text.includes(staleFact) &&
|
||||
!lower.includes("stale") &&
|
||||
!lower.includes("older") &&
|
||||
!lower.includes("previous");
|
||||
@@ -2380,18 +2428,23 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
||||
const config = readScenarioExecutionConfig<{
|
||||
prompt?: string;
|
||||
expectedContains?: string;
|
||||
skillName?: string;
|
||||
skillBody?: string;
|
||||
}>("skill-visibility-invocation");
|
||||
const skillName = config.skillName ?? "qa-visible-skill";
|
||||
await writeWorkspaceSkill({
|
||||
env,
|
||||
name: "qa-visible-skill",
|
||||
body: `---
|
||||
name: skillName,
|
||||
body:
|
||||
config.skillBody ??
|
||||
`---
|
||||
name: qa-visible-skill
|
||||
description: Visible QA skill marker
|
||||
---
|
||||
When the user asks for the visible skill marker exactly, reply with exactly: VISIBLE-SKILL-OK`,
|
||||
});
|
||||
const skills = await readSkillStatus(env);
|
||||
const visible = findSkill(skills, "qa-visible-skill");
|
||||
const visible = findSkill(skills, skillName);
|
||||
if (!visible?.eligible || visible.disabled || visible.blockedByAllowlist) {
|
||||
throw new Error(`skill not visible/eligible: ${JSON.stringify(visible)}`);
|
||||
}
|
||||
@@ -2635,16 +2688,24 @@ When the user asks for the hot install marker exactly, reply with exactly: HOT-I
|
||||
{
|
||||
name: "describes an attached image in one short sentence",
|
||||
run: async () => {
|
||||
const config = readScenarioExecutionConfig<{
|
||||
prompt?: string;
|
||||
requiredColorGroups?: string[][];
|
||||
}>("image-understanding-attachment");
|
||||
await reset();
|
||||
const outboundStartIndex = state
|
||||
.getSnapshot()
|
||||
.messages.filter((message) => message.direction === "outbound").length;
|
||||
await runAgentPrompt(env, {
|
||||
sessionKey: "agent:qa:image-understanding",
|
||||
message:
|
||||
config.prompt ??
|
||||
"Image understanding check: describe the top and bottom colors in the attached image in one short sentence.",
|
||||
attachments: [
|
||||
{
|
||||
mimeType: "image/png",
|
||||
fileName: "red-top-blue-bottom.png",
|
||||
content: QA_IMAGE_UNDERSTANDING_LARGE_PNG_BASE64,
|
||||
content: QA_IMAGE_UNDERSTANDING_VALID_PNG_BASE64,
|
||||
},
|
||||
],
|
||||
timeoutMs: liveTurnTimeoutMs(env, 45_000),
|
||||
@@ -2653,9 +2714,17 @@ When the user asks for the hot install marker exactly, reply with exactly: HOT-I
|
||||
state,
|
||||
(candidate) => candidate.conversation.id === "qa-operator",
|
||||
liveTurnTimeoutMs(env, 45_000),
|
||||
{ sinceIndex: outboundStartIndex },
|
||||
);
|
||||
const lower = normalizeLowercaseStringOrEmpty(outbound.text);
|
||||
if (!lower.includes("red") || !lower.includes("blue")) {
|
||||
const requiredColorGroups = config.requiredColorGroups ?? [
|
||||
["red", "scarlet", "crimson"],
|
||||
["blue", "azure", "teal", "cyan", "aqua"],
|
||||
];
|
||||
const missingColorGroup = requiredColorGroups.find(
|
||||
(group) => !group.some((candidate) => lower.includes(candidate)),
|
||||
);
|
||||
if (missingColorGroup) {
|
||||
throw new Error(`missing expected colors in image description: ${outbound.text}`);
|
||||
}
|
||||
if (env.mock) {
|
||||
@@ -2835,6 +2904,11 @@ When the user asks for the hot disable marker exactly, reply with exactly: HOT-P
|
||||
{
|
||||
name: "restores image_generate after restart and uses it in the same session",
|
||||
run: async () => {
|
||||
const config = readScenarioExecutionConfig<{
|
||||
setupPrompt?: string;
|
||||
imagePrompt?: string;
|
||||
imagePromptSnippet?: string;
|
||||
}>("config-restart-capability-flip");
|
||||
await ensureImageGenerationConfigured(env);
|
||||
const original = await readConfigSnapshot(env);
|
||||
const originalTools =
|
||||
@@ -2868,6 +2942,7 @@ When the user asks for the hot disable marker exactly, reply with exactly: HOT-P
|
||||
await runAgentPrompt(env, {
|
||||
sessionKey,
|
||||
message:
|
||||
config.setupPrompt ??
|
||||
"Capability flip setup: acknowledge this setup so restart wake-up has a route.",
|
||||
timeoutMs: liveTurnTimeoutMs(env, 30_000),
|
||||
});
|
||||
@@ -2907,12 +2982,13 @@ When the user asks for the hot disable marker exactly, reply with exactly: HOT-P
|
||||
await runAgentPrompt(env, {
|
||||
sessionKey,
|
||||
message:
|
||||
config.imagePrompt ??
|
||||
"Capability flip image check: generate a QA lighthouse image now and keep the media path in the reply.",
|
||||
timeoutMs: liveTurnTimeoutMs(env, 45_000),
|
||||
});
|
||||
const mediaPath = await resolveGeneratedImagePath({
|
||||
env,
|
||||
promptSnippet: "Capability flip image check",
|
||||
promptSnippet: config.imagePromptSnippet ?? "Capability flip image check",
|
||||
startedAtMs: imageStartedAtMs,
|
||||
timeoutMs: liveTurnTimeoutMs(env, 45_000),
|
||||
});
|
||||
|
||||
@@ -27,4 +27,9 @@ execution:
|
||||
- qa
|
||||
- mission
|
||||
- testing
|
||||
- repo
|
||||
- worked
|
||||
- failed
|
||||
- blocked
|
||||
- chat flows
|
||||
```
|
||||
|
||||
@@ -22,4 +22,8 @@ execution:
|
||||
kind: custom
|
||||
handler: config-restart-capability-flip
|
||||
summary: Verify a restart-triggering config change flips capability inventory and the same session successfully uses the newly restored tool after wake-up.
|
||||
config:
|
||||
setupPrompt: "Capability flip setup: acknowledge this setup so restart wake-up has a route."
|
||||
imagePrompt: "Capability flip image check: generate a QA lighthouse image in this turn right now. Do not acknowledge first, do not promise future work, and do not stop before using image_generate. Final reply must include the MEDIA path."
|
||||
imagePromptSnippet: "Capability flip image check"
|
||||
```
|
||||
|
||||
@@ -20,4 +20,9 @@ execution:
|
||||
kind: custom
|
||||
handler: image-understanding-attachment
|
||||
summary: Verify an attached image reaches the agent model and the agent can describe what it sees.
|
||||
config:
|
||||
prompt: "Image understanding check: describe the top and bottom colors in the attached image in one short sentence."
|
||||
requiredColorGroups:
|
||||
- [red, scarlet, crimson]
|
||||
- [blue, azure, teal, cyan, aqua]
|
||||
```
|
||||
|
||||
@@ -18,6 +18,11 @@ execution:
|
||||
handler: memory-recall
|
||||
summary: Verify the agent can store a fact, switch topics, then recall the fact accurately later.
|
||||
config:
|
||||
rememberPrompt: "Please remember this fact for later: the QA canary code is ALPHA-7."
|
||||
recallPrompt: "What was the QA canary code I asked you to remember earlier?"
|
||||
resetDurableMemory: true
|
||||
rememberPrompt: "Please remember this fact for later: the QA canary code is ALPHA-7. Use your normal memory mechanism, avoid manual repo cleanup, and reply exactly `Remembered ALPHA-7.` once stored."
|
||||
rememberAckAny:
|
||||
- remembered alpha-7
|
||||
recallPrompt: "What was the QA canary code I asked you to remember earlier? Reply with the code only, plus at most one short sentence."
|
||||
recallExpectedAny:
|
||||
- alpha-7
|
||||
```
|
||||
|
||||
@@ -20,4 +20,10 @@ execution:
|
||||
kind: custom
|
||||
handler: session-memory-ranking
|
||||
summary: Verify session-transcript memory can outrank stale durable notes and drive the final answer toward the newer fact.
|
||||
config:
|
||||
staleFact: ORBIT-9
|
||||
currentFact: ORBIT-10
|
||||
transcriptQuestion: "What is the current Project Nebula codename?"
|
||||
transcriptAnswer: "The current Project Nebula codename is ORBIT-10."
|
||||
prompt: "Session memory ranking check: what is the current Project Nebula codename? Use memory tools first. If durable notes conflict with newer indexed session transcripts, prefer the newer current fact."
|
||||
```
|
||||
|
||||
@@ -20,6 +20,13 @@ execution:
|
||||
handler: skill-visibility-invocation
|
||||
summary: Verify a workspace skill becomes visible in skills.status and influences the next agent turn.
|
||||
config:
|
||||
prompt: "Visible skill marker: give me the visible skill marker exactly."
|
||||
skillName: qa-visible-skill
|
||||
skillBody: |-
|
||||
---
|
||||
name: qa-visible-skill
|
||||
description: Visible QA skill marker
|
||||
---
|
||||
When the user asks for the visible skill marker exactly, or explicitly asks you to use qa-visible-skill, reply with exactly: VISIBLE-SKILL-OK
|
||||
prompt: "Use qa-visible-skill now. Reply exactly with the visible skill marker and nothing else."
|
||||
expectedContains: "VISIBLE-SKILL-OK"
|
||||
```
|
||||
|
||||
@@ -161,6 +161,17 @@ describe("version resolution", () => {
|
||||
).toBe("2026.3.99");
|
||||
});
|
||||
|
||||
it("prefers explicit compatibility host overrides over runtime and stale env versions", () => {
|
||||
expect(
|
||||
resolveCompatibilityHostVersion({
|
||||
OPENCLAW_COMPATIBILITY_HOST_VERSION: "2026.4.8",
|
||||
OPENCLAW_VERSION: "2026.3.99",
|
||||
OPENCLAW_SERVICE_VERSION: "2026.3.98",
|
||||
npm_package_version: "2026.3.97",
|
||||
}),
|
||||
).toBe("2026.4.8");
|
||||
});
|
||||
|
||||
it("normalizes runtime version candidate for fallback handling", () => {
|
||||
expect(resolveUsableRuntimeVersion(undefined)).toBeUndefined();
|
||||
expect(resolveUsableRuntimeVersion("")).toBeUndefined();
|
||||
|
||||
@@ -139,6 +139,10 @@ export function resolveCompatibilityHostVersion(
|
||||
env: RuntimeVersionEnv = process.env as RuntimeVersionEnv,
|
||||
fallback = RUNTIME_SERVICE_VERSION_FALLBACK,
|
||||
): string {
|
||||
const explicitCompatibilityVersion = firstNonEmpty(env.OPENCLAW_COMPATIBILITY_HOST_VERSION);
|
||||
if (explicitCompatibilityVersion) {
|
||||
return explicitCompatibilityVersion;
|
||||
}
|
||||
return resolveVersionFromRuntimeSources({
|
||||
env,
|
||||
runtimeVersion: resolveUsableRuntimeVersion(VERSION),
|
||||
|
||||
Reference in New Issue
Block a user