feat(qa-lab): support scenario-defined plugin runs

This commit is contained in:
Peter Steinberger
2026-04-12 11:57:44 -07:00
parent ea71a59127
commit fcee268373
17 changed files with 1131 additions and 191 deletions

View File

@@ -166,7 +166,7 @@ export function registerQaLabCli(program: Command) {
.option(
"--provider-mode <mode>",
"Provider mode: mock-openai or live-frontier (legacy live-openai still works)",
"mock-openai",
"live-frontier",
)
.option("--model <ref>", "Primary provider/model ref")
.option("--alt-model <ref>", "Alternate provider/model ref")

View File

@@ -836,6 +836,7 @@ export async function startQaGatewayChild(params: {
thinkingDefault?: QaThinkingLevel;
claudeCliAuthMode?: QaCliBackendAuthMode;
controlUiEnabled?: boolean;
enabledPluginIds?: string[];
mutateConfig?: (cfg: OpenClawConfig) => OpenClawConfig;
}) {
const tempRoot = await fs.mkdtemp(
@@ -873,14 +874,17 @@ export async function startQaGatewayChild(params: {
const liveProviderConfigs = await readQaLiveProviderConfigOverrides({
providerIds: liveProviderIds,
});
const enabledPluginIds =
const liveOwnerPluginIds =
liveProviderIds.length > 0
? await resolveQaOwnerPluginIdsForProviderIds({
repoRoot: params.repoRoot,
providerIds: liveProviderIds,
providerConfigs: liveProviderConfigs,
})
: undefined;
: [];
const enabledPluginIds = [
...new Set([...(liveOwnerPluginIds ?? []), ...(params.enabledPluginIds ?? [])]),
];
const buildGatewayConfig = (gatewayPort: number) =>
buildQaGatewayConfig({
bind: "loopback",

View File

@@ -116,7 +116,7 @@ describe("qa-lab server", () => {
expect(bootstrap.scenarios.length).toBeGreaterThanOrEqual(10);
expect(bootstrap.scenarios.some((scenario) => scenario.id === "dm-chat-baseline")).toBe(true);
expect(bootstrap.runner.status).toBe("idle");
expect(bootstrap.runner.selection.providerMode).toBe("mock-openai");
expect(bootstrap.runner.selection.providerMode).toBe("live-frontier");
expect(bootstrap.runner.selection.scenarioIds).toHaveLength(bootstrap.scenarios.length);
const messageResponse = await fetch(`${lab.baseUrl}/api/inbound/message`, {

View File

@@ -433,6 +433,151 @@ describe("qa mock openai server", () => {
"Protocol note: I checked memory and the current Project Nebula codename is ORBIT-10.",
);
const activeMemorySearch = await fetch(`${server.baseUrl}/v1/responses`, {
method: "POST",
headers: {
"content-type": "application/json",
},
body: JSON.stringify({
stream: true,
input: [
{
role: "user",
content: [
{
type: "input_text",
text: [
"You are a memory search agent.",
"Use only memory_search and memory_get.",
"",
"Conversation context:",
"Latest user message:",
"Silent snack recall check: what snack do I usually want for QA movie night? Reply in one short sentence.",
].join("\n"),
},
],
},
],
}),
});
expect(activeMemorySearch.status).toBe(200);
expect(await activeMemorySearch.text()).toContain('"name":"memory_search"');
const activeMemoryGet = await fetch(`${server.baseUrl}/v1/responses`, {
method: "POST",
headers: {
"content-type": "application/json",
},
body: JSON.stringify({
stream: true,
input: [
{
role: "user",
content: [
{
type: "input_text",
text: [
"You are a memory search agent.",
"Use only memory_search and memory_get.",
"",
"Conversation context:",
"Latest user message:",
"Silent snack recall check: what snack do I usually want for QA movie night? Reply in one short sentence.",
].join("\n"),
},
],
},
{
type: "function_call_output",
output: JSON.stringify({
results: [
{
path: "MEMORY.md",
startLine: 1,
endLine: 1,
},
],
}),
},
],
}),
});
expect(activeMemoryGet.status).toBe(200);
expect(await activeMemoryGet.text()).toContain('"name":"memory_get"');
const activeMemorySummary = await fetch(`${server.baseUrl}/v1/responses`, {
method: "POST",
headers: {
"content-type": "application/json",
},
body: JSON.stringify({
stream: false,
input: [
{
role: "user",
content: [
{
type: "input_text",
text: [
"You are a memory search agent.",
"Use only memory_search and memory_get.",
"",
"Conversation context:",
"Latest user message:",
"Silent snack recall check: what snack do I usually want for QA movie night? Reply in one short sentence.",
].join("\n"),
},
],
},
{
type: "function_call_output",
output: JSON.stringify({
text: "Stable QA movie night snack preference: lemon pepper wings with blue cheese.",
}),
},
],
}),
});
expect(activeMemorySummary.status).toBe(200);
expect(JSON.stringify(await activeMemorySummary.json())).toContain(
"lemon pepper wings with blue cheese",
);
const injectedMainReply = await fetch(`${server.baseUrl}/v1/responses`, {
method: "POST",
headers: {
"content-type": "application/json",
},
body: JSON.stringify({
stream: false,
instructions: [
"System context:",
"<active_memory_plugin>User usually wants lemon pepper wings with blue cheese for QA movie night.</active_memory_plugin>",
].join("\n"),
input: [
{
role: "user",
content: [
{
type: "input_text",
text: "Silent snack recall check: what snack do I usually want for QA movie night? Reply in one short sentence.",
},
],
},
],
}),
});
expect(injectedMainReply.status).toBe(200);
expect(JSON.stringify(await injectedMainReply.json())).toContain(
"lemon pepper wings with blue cheese",
);
const lastRequest = await fetch(`${server.baseUrl}/debug/last-request`);
expect(lastRequest.status).toBe(200);
expect(await lastRequest.json()).toMatchObject({
instructions: expect.stringContaining("<active_memory_plugin>"),
allInputText: expect.stringContaining("<active_memory_plugin>"),
});
const spawn = await fetch(`${server.baseUrl}/v1/responses`, {
method: "POST",
headers: {

View File

@@ -27,6 +27,7 @@ type MockOpenAiRequestSnapshot = {
body: Record<string, unknown>;
prompt: string;
allInputText: string;
instructions?: string;
toolOutput: string;
model: string;
imageInputCount: number;
@@ -181,6 +182,23 @@ function extractAllInputTexts(input: ResponsesInputItem[]) {
return texts.join("\n");
}
function extractInstructionsText(body: Record<string, unknown>) {
return typeof body.instructions === "string" ? body.instructions.trim() : "";
}
function extractAllRequestTexts(input: ResponsesInputItem[], body: Record<string, unknown>) {
const texts: string[] = [];
const instructions = extractInstructionsText(body);
if (instructions) {
texts.push(instructions);
}
const inputText = extractAllInputTexts(input);
if (inputText) {
texts.push(inputText);
}
return texts.join("\n");
}
function countImageInputs(input: ResponsesInputItem[]) {
let count = 0;
for (const item of input) {
@@ -320,6 +338,33 @@ function extractOrbitCode(text: string) {
return /\bORBIT-\d+\b/i.exec(text)?.[0]?.toUpperCase() ?? null;
}
function decodeXmlEntities(text: string) {
return text
.replaceAll("&lt;", "<")
.replaceAll("&gt;", ">")
.replaceAll("&amp;", "&")
.replaceAll("&quot;", '"')
.replaceAll("&#39;", "'");
}
function extractActiveMemorySummary(text: string) {
const match = /<active_memory_plugin>\s*([\s\S]*?)\s*<\/active_memory_plugin>/i.exec(text);
return match?.[1] ? decodeXmlEntities(match[1]).trim() : null;
}
function isActiveMemorySubagentPrompt(text: string) {
return text.includes("You are a memory search agent.");
}
function extractSnackPreference(text: string) {
const normalized = text.replace(/\s+/g, " ").trim();
const match =
/(lemon pepper wings(?:\s+with\s+blue cheese)?|blue cheese(?:\s+with\s+lemon pepper wings)?)/i.exec(
normalized,
);
return match?.[0]?.trim() ?? null;
}
function extractLastCapture(text: string, pattern: RegExp) {
let lastMatch: RegExpExecArray | null = null;
const flags = pattern.flags.includes("g") ? pattern.flags : `${pattern.flags}g`;
@@ -355,7 +400,7 @@ function buildAssistantText(input: ResponsesInputItem[], body: Record<string, un
const toolOutput = extractToolOutput(input);
const toolJson = parseToolOutputJson(toolOutput);
const userTexts = extractAllUserTexts(input);
const allInputText = extractAllInputTexts(input);
const allInputText = extractAllRequestTexts(input, body);
const rememberedFact = extractRememberedFact(userTexts);
const model = typeof body.model === "string" ? body.model : "";
const memorySnippet =
@@ -369,6 +414,8 @@ function buildAssistantText(input: ResponsesInputItem[], body: Record<string, un
const exactReplyDirective = extractExactReplyDirective(allInputText);
const exactMarkerDirective = extractExactMarkerDirective(allInputText);
const imageInputCount = countImageInputs(input);
const activeMemorySummary = extractActiveMemorySummary(allInputText);
const snackPreference = extractSnackPreference(activeMemorySummary ?? memorySnippet);
if (/what was the qa canary code/i.test(prompt) && rememberedFact) {
return `Protocol note: the QA canary code was ${rememberedFact}.`;
@@ -400,6 +447,12 @@ function buildAssistantText(input: ResponsesInputItem[], body: Record<string, un
if (/memory tools check/i.test(prompt) && orbitCode) {
return `Protocol note: I checked memory and the project codename is ${orbitCode}.`;
}
if (/silent snack recall check/i.test(prompt) && snackPreference) {
return `Protocol note: you usually want ${snackPreference} for QA movie night.`;
}
if (/silent snack recall check/i.test(prompt)) {
return "Protocol note: I do not have enough context to say what you usually want for QA movie night.";
}
if (/tool continuity check/i.test(prompt) && toolOutput) {
return `Protocol note: model switch handoff confirmed on ${model || "the requested model"}. QA mission from QA_KICKOFF_TASK.md still applies: understand this OpenClaw repo from source + docs before acting.`;
}
@@ -531,7 +584,7 @@ async function buildResponsesPayload(body: Record<string, unknown>) {
const prompt = extractLastUserText(input);
const toolOutput = extractToolOutput(input);
const toolJson = parseToolOutputJson(toolOutput);
const allInputText = extractAllInputTexts(input);
const allInputText = extractAllRequestTexts(input, body);
const isGroupChat = allInputText.includes('"is_group_chat": true');
const isBaselineUnmentionedChannelChatter = /\bno bot ping here\b/i.test(prompt);
if (isHeartbeatPrompt(prompt)) {
@@ -591,6 +644,48 @@ async function buildResponsesPayload(body: Record<string, unknown>) {
});
}
}
if (
isActiveMemorySubagentPrompt(allInputText) &&
/silent snack recall check/i.test(allInputText)
) {
if (!toolOutput) {
return buildToolCallEventsWithArgs("memory_search", {
query: "QA movie night snack lemon pepper wings blue cheese",
maxResults: 3,
});
}
const results = Array.isArray(toolJson?.results)
? (toolJson.results as Array<Record<string, unknown>>)
: [];
const first = results[0];
if (
typeof first?.path === "string" &&
(typeof first.startLine === "number" || typeof first.endLine === "number")
) {
const from =
typeof first.startLine === "number"
? Math.max(1, first.startLine)
: typeof first.endLine === "number"
? Math.max(1, first.endLine)
: 1;
return buildToolCallEventsWithArgs("memory_get", {
path: first.path,
from,
lines: 4,
});
}
const memorySnippet =
typeof toolJson?.text === "string"
? toolJson.text
: Array.isArray(toolJson?.results)
? JSON.stringify(toolJson.results)
: toolOutput;
const snackPreference = extractSnackPreference(memorySnippet);
if (snackPreference) {
return buildAssistantEvents(`User usually wants ${snackPreference} for QA movie night.`);
}
return buildAssistantEvents("NONE");
}
if (/session memory ranking check/i.test(prompt)) {
if (!toolOutput) {
return buildToolCallEventsWithArgs("memory_search", {
@@ -798,7 +893,8 @@ export async function startQaMockOpenAiServer(params?: { host?: string; port?: n
raw,
body,
prompt: extractLastUserText(input),
allInputText: extractAllInputTexts(input),
allInputText: extractAllRequestTexts(input, body),
instructions: extractInstructionsText(body) || undefined,
toolOutput: extractToolOutput(input),
model: typeof body.model === "string" ? body.model : "",
imageInputCount: countImageInputs(input),

View File

@@ -81,7 +81,7 @@ describe("qa multipass runtime", () => {
expect(plan.summaryPath).toBe(path.join(outputDir, "qa-suite-summary.json"));
});
it("renders a guest script that runs the mock qa suite with explicit scenarios", () => {
it("renders a guest script that runs the live qa suite by default", () => {
const plan = createQaMultipassPlan({
repoRoot: process.cwd(),
outputDir: path.join(process.cwd(), ".artifacts", "qa-e2e", "multipass-test"),
@@ -93,9 +93,8 @@ describe("qa multipass runtime", () => {
expect(script).toContain("pnpm install --frozen-lockfile");
expect(script).toContain("pnpm build");
expect(script).toContain("corepack prepare 'pnpm@10.32.1' --activate");
expect(script).toContain(
"'pnpm' 'openclaw' 'qa' 'suite' '--transport' 'qa-channel' '--provider-mode' 'mock-openai'",
);
expect(script).toContain("'pnpm' 'openclaw' 'qa' 'suite' '--transport' 'qa-channel'");
expect(script).toContain("'--provider-mode' 'live-frontier'");
expect(script).toContain("'--scenario' 'channel-chat-baseline'");
expect(script).toContain("'--scenario' 'thread-follow-up'");
expect(script).toContain("/workspace/openclaw-host/.artifacts/qa-e2e/multipass-test");
@@ -128,9 +127,8 @@ describe("qa multipass runtime", () => {
);
expect(plan.forwardedEnv.OPENAI_API_KEY).toBe("test-openai-key");
expect(script).toContain("OPENAI_API_KEY='test-openai-key'");
expect(script).toContain(
"'pnpm' 'openclaw' 'qa' 'suite' '--transport' 'qa-channel' '--provider-mode' 'live-frontier'",
);
expect(script).toContain("'pnpm' 'openclaw' 'qa' 'suite' '--transport' 'qa-channel'");
expect(script).toContain("'--provider-mode' 'live-frontier'");
});
it("redacts forwarded live secrets in the persisted artifact script", () => {

View File

@@ -345,7 +345,7 @@ export function createQaMultipassPlan(params: {
const outputDir = params.outputDir ?? createQaMultipassOutputDir(params.repoRoot);
const scenarioIds = [...new Set(params.scenarioIds ?? [])];
const transportId = params.transportId?.trim() || "qa-channel";
const providerMode = params.providerMode ?? "mock-openai";
const providerMode = params.providerMode ?? "live-frontier";
const forwardedEnv = providerMode === "live-frontier" ? resolveForwardedLiveEnv() : {};
const hostCodexHomePath = forwardedEnv.CODEX_HOME;
const liveProviderConfig =

View File

@@ -82,6 +82,21 @@ describe("buildQaGatewayConfig", () => {
expect(cfg.channels?.["qa-channel"]).toBeUndefined();
});
it("can stage extra bundled plugins in the mock lane", () => {
const cfg = buildQaGatewayConfig({
bind: "loopback",
gatewayPort: 18789,
gatewayToken: "token",
providerBaseUrl: "http://127.0.0.1:44080/v1",
workspaceDir: "/tmp/qa-workspace",
enabledPluginIds: ["active-memory"],
...createQaChannelTransportParams(),
});
expect(cfg.plugins?.allow).toEqual(["memory-core", "active-memory", "qa-channel"]);
expect(cfg.plugins?.entries?.["active-memory"]).toEqual({ enabled: true });
});
it("uses built-in provider wiring in frontier live mode", () => {
const cfg = buildQaGatewayConfig({
bind: "loopback",

View File

@@ -162,24 +162,23 @@ export function buildQaGatewayConfig(params: {
: selectedProviderIds,
),
]
: [];
: [
...new Set(
(params.enabledPluginIds ?? [])
.map((pluginId) => pluginId.trim())
.filter((pluginId) => pluginId.length > 0),
),
];
const transportPluginIds = [...new Set(params.transportPluginIds ?? [])]
.map((pluginId) => pluginId.trim())
.filter((pluginId) => pluginId.length > 0);
const pluginEntries =
providerMode === "live-frontier"
? Object.fromEntries(selectedPluginIds.map((pluginId) => [pluginId, { enabled: true }]))
: {};
const pluginEntries = Object.fromEntries(
selectedPluginIds.map((pluginId) => [pluginId, { enabled: true }]),
);
const transportPluginEntries = Object.fromEntries(
transportPluginIds.map((pluginId) => [pluginId, { enabled: true }]),
);
const allowedPlugins = [
...new Set(
providerMode === "live-frontier"
? ["memory-core", ...selectedPluginIds, ...transportPluginIds]
: ["memory-core", ...transportPluginIds],
),
];
const allowedPlugins = [...new Set(["memory-core", ...selectedPluginIds, ...transportPluginIds])];
const liveModelParams =
providerMode === "live-frontier"
? (modelRef: string) => ({

View File

@@ -24,12 +24,12 @@ const scenarios = [
];
describe("qa run config", () => {
it("creates a synthetic-by-default selection that arms every scenario", () => {
it("creates a live-by-default selection that arms every scenario", () => {
expect(createDefaultQaRunSelection(scenarios)).toEqual({
providerMode: "mock-openai",
primaryModel: "mock-openai/gpt-5.4",
alternateModel: "mock-openai/gpt-5.4-alt",
fastMode: false,
providerMode: "live-frontier",
primaryModel: "openai/gpt-5.4",
alternateModel: "openai/gpt-5.4",
fastMode: true,
scenarioIds: ["dm-chat-baseline", "thread-lifecycle"],
});
});

View File

@@ -38,19 +38,21 @@ export function defaultQaModelForMode(mode: QaProviderMode, alternate = false) {
}
export function createDefaultQaRunSelection(scenarios: QaSeedScenario[]): QaLabRunSelection {
const providerMode: QaProviderMode = "mock-openai";
const providerMode: QaProviderMode = "live-frontier";
return {
providerMode,
primaryModel: defaultQaModelForMode(providerMode),
alternateModel: defaultQaModelForMode(providerMode, true),
fastMode: false,
fastMode: true,
scenarioIds: scenarios.map((scenario) => scenario.id),
};
}
export function normalizeQaProviderMode(input: unknown): QaProviderMode {
return normalizeQaProviderModeInput(
input === "live-frontier" || input === "live-openai" ? input : "mock-openai",
input === "mock-openai" || input === "live-frontier" || input === "live-openai"
? input
: "live-frontier",
);
}

View File

@@ -135,6 +135,8 @@ const qaSeedScenarioSchema = z.object({
surface: z.string().trim().min(1),
objective: z.string().trim().min(1),
successCriteria: z.array(z.string().trim().min(1)).min(1),
plugins: z.array(z.string().trim().min(1)).optional(),
gatewayConfigPatch: z.record(z.string(), z.unknown()).optional(),
docsRefs: z.array(z.string().trim().min(1)).optional(),
codeRefs: z.array(z.string().trim().min(1)).optional(),
execution: qaScenarioExecutionSchema.optional(),

View File

@@ -0,0 +1,160 @@
import { randomUUID } from "node:crypto";
import * as fs from "node:fs/promises";
import path from "node:path";
import { describe, expect, it, vi } from "vitest";
import { createQaBusState } from "./bus-state.js";
import {
createQaScenarioRuntimeApi,
type QaScenarioRuntimeConstants,
type QaScenarioRuntimeDeps,
} from "./scenario-runtime-api.js";
function createDeps(overrides?: Partial<QaScenarioRuntimeDeps>): QaScenarioRuntimeDeps {
const fn = vi.fn();
return {
fs,
path,
sleep: vi.fn(async () => undefined),
randomUUID,
runScenario: fn,
waitForOutboundMessage: fn,
waitForTransportOutboundMessage: fn,
waitForChannelOutboundMessage: fn,
waitForNoOutbound: fn,
waitForNoTransportOutbound: fn,
recentOutboundSummary: fn,
formatConversationTranscript: fn,
readTransportTranscript: fn,
formatTransportTranscript: fn,
fetchJson: fn,
waitForGatewayHealthy: fn,
waitForTransportReady: fn,
waitForQaChannelReady: fn,
waitForConfigRestartSettle: fn,
patchConfig: fn,
applyConfig: fn,
readConfigSnapshot: fn,
createSession: fn,
readEffectiveTools: fn,
readSkillStatus: fn,
readRawQaSessionStore: fn,
runQaCli: fn,
extractMediaPathFromText: fn,
resolveGeneratedImagePath: fn,
startAgentRun: fn,
waitForAgentRun: fn,
listCronJobs: fn,
waitForCronRunCompletion: fn,
readDoctorMemoryStatus: fn,
forceMemoryIndex: fn,
findSkill: fn,
writeWorkspaceSkill: fn,
callPluginToolsMcp: fn,
runAgentPrompt: fn,
ensureImageGenerationConfigured: fn,
handleQaAction: fn,
extractQaToolPayload: fn,
formatMemoryDreamingDay: fn,
resolveSessionTranscriptsDirForAgent: fn,
buildAgentSessionKey: fn,
normalizeLowercaseStringOrEmpty: fn,
formatErrorMessage: fn,
liveTurnTimeoutMs: fn,
resolveQaLiveTurnTimeoutMs: fn,
splitModelRef: fn,
qaChannelPlugin: { id: "qa-channel" },
hasDiscoveryLabels: fn,
reportsDiscoveryScopeLeak: fn,
reportsMissingDiscoveryFiles: fn,
hasModelSwitchContinuityEvidence: fn,
...overrides,
};
}
const constants: QaScenarioRuntimeConstants = {
imageUnderstandingPngBase64: "png-small",
imageUnderstandingLargePngBase64: "png-large",
imageUnderstandingValidPngBase64: "png-valid",
};
describe("createQaScenarioRuntimeApi", () => {
it("builds a markdown-flow runtime surface from generic transport capabilities", async () => {
const state = createQaBusState();
const resetSpy = vi.spyOn(state, "reset");
const inboundSpy = vi.spyOn(state, "addInboundMessage");
const outboundSpy = vi.spyOn(state, "addOutboundMessage");
const readSpy = vi.spyOn(state, "readMessage");
const waitForCondition = vi.fn(async (check: () => unknown) => check());
const sleep = vi.fn(async () => undefined);
const env = {
lab: { baseUrl: "http://127.0.0.1:1234" },
transport: {
state,
capabilities: {
waitForCondition,
getNormalizedMessageState: state.getSnapshot.bind(state),
resetNormalizedMessageState: async () => {
state.reset();
},
sendInboundMessage: state.addInboundMessage.bind(state),
injectOutboundMessage: state.addOutboundMessage.bind(state),
readNormalizedMessage: state.readMessage.bind(state),
},
},
};
const scenario = {
id: "generic-flow",
title: "Generic Flow",
surface: "test",
objective: "test",
successCriteria: ["works"],
sourcePath: "qa/scenarios/generic-flow.md",
execution: {
kind: "flow" as const,
config: { expected: "value" },
flow: {
steps: [{ name: "noop", actions: [{ assert: "true" }] }],
},
},
};
const api = createQaScenarioRuntimeApi({
env,
scenario,
deps: createDeps({ sleep }),
constants,
});
expect(api.lab).toBe(env.lab);
expect(api.state).toBe(state);
expect(api.config).toEqual({ expected: "value" });
expect(api.waitForCondition).toBe(waitForCondition);
expect(api.waitForChannelReady).toBe(api.waitForTransportReady);
expect(api.getTransportSnapshot()).toEqual(state.getSnapshot());
expect(api.imageUnderstandingPngBase64).toBe("png-small");
const inbound = await api.injectInboundMessage({
accountId: "qa-channel",
conversation: { id: "qa-operator", kind: "direct" },
senderId: "qa-operator",
text: "hello",
});
const outbound = await api.injectOutboundMessage({
accountId: "qa-channel",
to: "dm:qa-operator",
text: "hi",
});
expect(inbound.id).toBeTruthy();
expect(outbound.id).toBeTruthy();
await api.readTransportMessage({ accountId: "qa-channel", messageId: outbound.id });
await api.reset();
await api.resetBus();
await api.resetTransport();
expect(inboundSpy).toHaveBeenCalledTimes(1);
expect(outboundSpy).toHaveBeenCalledTimes(1);
expect(readSpy).toHaveBeenCalledTimes(1);
expect(resetSpy).toHaveBeenCalledTimes(3);
expect(sleep).toHaveBeenCalledTimes(3);
});
});

View File

@@ -0,0 +1,256 @@
import type * as NodeFs from "node:fs/promises";
import type * as NodePath from "node:path";
import type { QaTransportState } from "./qa-transport.js";
import type { QaSeedScenarioWithSource } from "./scenario-catalog.js";
type QaScenarioRuntimeFunction = (...args: never[]) => unknown;
export type QaScenarioRuntimeEnv<
TLab = unknown,
TTransportState extends QaTransportState = QaTransportState,
> = {
lab: TLab;
transport: {
state: TTransportState;
capabilities: {
waitForCondition: QaScenarioRuntimeFunction;
getNormalizedMessageState: () => ReturnType<TTransportState["getSnapshot"]>;
resetNormalizedMessageState: () => Promise<void>;
sendInboundMessage: TTransportState["addInboundMessage"];
injectOutboundMessage: TTransportState["addOutboundMessage"];
readNormalizedMessage: TTransportState["readMessage"];
};
};
};
export type QaScenarioRuntimeDeps = {
fs: typeof NodeFs;
path: typeof NodePath;
sleep: (ms?: number) => Promise<unknown>;
randomUUID: () => string;
runScenario: QaScenarioRuntimeFunction;
waitForOutboundMessage: QaScenarioRuntimeFunction;
waitForTransportOutboundMessage: QaScenarioRuntimeFunction;
waitForChannelOutboundMessage: QaScenarioRuntimeFunction;
waitForNoOutbound: QaScenarioRuntimeFunction;
waitForNoTransportOutbound: QaScenarioRuntimeFunction;
recentOutboundSummary: QaScenarioRuntimeFunction;
formatConversationTranscript: QaScenarioRuntimeFunction;
readTransportTranscript: QaScenarioRuntimeFunction;
formatTransportTranscript: QaScenarioRuntimeFunction;
fetchJson: QaScenarioRuntimeFunction;
waitForGatewayHealthy: QaScenarioRuntimeFunction;
waitForTransportReady: QaScenarioRuntimeFunction;
waitForQaChannelReady: QaScenarioRuntimeFunction;
waitForConfigRestartSettle: QaScenarioRuntimeFunction;
patchConfig: QaScenarioRuntimeFunction;
applyConfig: QaScenarioRuntimeFunction;
readConfigSnapshot: QaScenarioRuntimeFunction;
createSession: QaScenarioRuntimeFunction;
readEffectiveTools: QaScenarioRuntimeFunction;
readSkillStatus: QaScenarioRuntimeFunction;
readRawQaSessionStore: QaScenarioRuntimeFunction;
runQaCli: QaScenarioRuntimeFunction;
extractMediaPathFromText: QaScenarioRuntimeFunction;
resolveGeneratedImagePath: QaScenarioRuntimeFunction;
startAgentRun: QaScenarioRuntimeFunction;
waitForAgentRun: QaScenarioRuntimeFunction;
listCronJobs: QaScenarioRuntimeFunction;
waitForCronRunCompletion: QaScenarioRuntimeFunction;
readDoctorMemoryStatus: QaScenarioRuntimeFunction;
forceMemoryIndex: QaScenarioRuntimeFunction;
findSkill: QaScenarioRuntimeFunction;
writeWorkspaceSkill: QaScenarioRuntimeFunction;
callPluginToolsMcp: QaScenarioRuntimeFunction;
runAgentPrompt: QaScenarioRuntimeFunction;
ensureImageGenerationConfigured: QaScenarioRuntimeFunction;
handleQaAction: QaScenarioRuntimeFunction;
extractQaToolPayload: QaScenarioRuntimeFunction;
formatMemoryDreamingDay: QaScenarioRuntimeFunction;
resolveSessionTranscriptsDirForAgent: QaScenarioRuntimeFunction;
buildAgentSessionKey: QaScenarioRuntimeFunction;
normalizeLowercaseStringOrEmpty: QaScenarioRuntimeFunction;
formatErrorMessage: QaScenarioRuntimeFunction;
liveTurnTimeoutMs: QaScenarioRuntimeFunction;
resolveQaLiveTurnTimeoutMs: QaScenarioRuntimeFunction;
splitModelRef: QaScenarioRuntimeFunction;
qaChannelPlugin: unknown;
hasDiscoveryLabels: QaScenarioRuntimeFunction;
reportsDiscoveryScopeLeak: QaScenarioRuntimeFunction;
reportsMissingDiscoveryFiles: QaScenarioRuntimeFunction;
hasModelSwitchContinuityEvidence: QaScenarioRuntimeFunction;
};
export type QaScenarioRuntimeConstants = {
imageUnderstandingPngBase64: string;
imageUnderstandingLargePngBase64: string;
imageUnderstandingValidPngBase64: string;
};
export type QaScenarioRuntimeApi<
TEnv extends QaScenarioRuntimeEnv = QaScenarioRuntimeEnv,
TDeps extends QaScenarioRuntimeDeps = QaScenarioRuntimeDeps,
> = {
env: TEnv;
lab: TEnv["lab"];
state: TEnv["transport"]["state"];
scenario: QaSeedScenarioWithSource;
config: Record<string, unknown>;
fs: typeof NodeFs;
path: typeof NodePath;
sleep: (ms?: number) => Promise<unknown>;
randomUUID: () => string;
runScenario: TDeps["runScenario"];
waitForCondition: TEnv["transport"]["capabilities"]["waitForCondition"];
waitForOutboundMessage: TDeps["waitForOutboundMessage"];
waitForTransportOutboundMessage: TDeps["waitForTransportOutboundMessage"];
waitForChannelOutboundMessage: TDeps["waitForChannelOutboundMessage"];
waitForNoOutbound: TDeps["waitForNoOutbound"];
waitForNoTransportOutbound: TDeps["waitForNoTransportOutbound"];
recentOutboundSummary: TDeps["recentOutboundSummary"];
formatConversationTranscript: TDeps["formatConversationTranscript"];
readTransportTranscript: TDeps["readTransportTranscript"];
formatTransportTranscript: TDeps["formatTransportTranscript"];
fetchJson: TDeps["fetchJson"];
waitForGatewayHealthy: TDeps["waitForGatewayHealthy"];
waitForTransportReady: TDeps["waitForTransportReady"];
waitForChannelReady: TDeps["waitForTransportReady"];
waitForQaChannelReady: TDeps["waitForQaChannelReady"];
waitForConfigRestartSettle: TDeps["waitForConfigRestartSettle"];
patchConfig: TDeps["patchConfig"];
applyConfig: TDeps["applyConfig"];
readConfigSnapshot: TDeps["readConfigSnapshot"];
createSession: TDeps["createSession"];
readEffectiveTools: TDeps["readEffectiveTools"];
readSkillStatus: TDeps["readSkillStatus"];
readRawQaSessionStore: TDeps["readRawQaSessionStore"];
runQaCli: TDeps["runQaCli"];
extractMediaPathFromText: TDeps["extractMediaPathFromText"];
resolveGeneratedImagePath: TDeps["resolveGeneratedImagePath"];
startAgentRun: TDeps["startAgentRun"];
waitForAgentRun: TDeps["waitForAgentRun"];
listCronJobs: TDeps["listCronJobs"];
waitForCronRunCompletion: TDeps["waitForCronRunCompletion"];
readDoctorMemoryStatus: TDeps["readDoctorMemoryStatus"];
forceMemoryIndex: TDeps["forceMemoryIndex"];
findSkill: TDeps["findSkill"];
writeWorkspaceSkill: TDeps["writeWorkspaceSkill"];
callPluginToolsMcp: TDeps["callPluginToolsMcp"];
runAgentPrompt: TDeps["runAgentPrompt"];
ensureImageGenerationConfigured: TDeps["ensureImageGenerationConfigured"];
handleQaAction: TDeps["handleQaAction"];
extractQaToolPayload: TDeps["extractQaToolPayload"];
formatMemoryDreamingDay: TDeps["formatMemoryDreamingDay"];
resolveSessionTranscriptsDirForAgent: TDeps["resolveSessionTranscriptsDirForAgent"];
buildAgentSessionKey: TDeps["buildAgentSessionKey"];
normalizeLowercaseStringOrEmpty: TDeps["normalizeLowercaseStringOrEmpty"];
formatErrorMessage: TDeps["formatErrorMessage"];
liveTurnTimeoutMs: TDeps["liveTurnTimeoutMs"];
resolveQaLiveTurnTimeoutMs: TDeps["resolveQaLiveTurnTimeoutMs"];
splitModelRef: TDeps["splitModelRef"];
qaChannelPlugin: unknown;
hasDiscoveryLabels: TDeps["hasDiscoveryLabels"];
reportsDiscoveryScopeLeak: TDeps["reportsDiscoveryScopeLeak"];
reportsMissingDiscoveryFiles: TDeps["reportsMissingDiscoveryFiles"];
hasModelSwitchContinuityEvidence: TDeps["hasModelSwitchContinuityEvidence"];
imageUnderstandingPngBase64: string;
imageUnderstandingLargePngBase64: string;
imageUnderstandingValidPngBase64: string;
getTransportSnapshot: TEnv["transport"]["capabilities"]["getNormalizedMessageState"];
resetTransport: () => Promise<void>;
injectInboundMessage: TEnv["transport"]["capabilities"]["sendInboundMessage"];
injectOutboundMessage: TEnv["transport"]["capabilities"]["injectOutboundMessage"];
readTransportMessage: TEnv["transport"]["capabilities"]["readNormalizedMessage"];
resetBus: () => Promise<void>;
reset: () => Promise<void>;
};
export function createQaScenarioRuntimeApi<
TEnv extends QaScenarioRuntimeEnv,
TDeps extends QaScenarioRuntimeDeps,
>(params: {
env: TEnv;
scenario: QaSeedScenarioWithSource;
deps: TDeps;
constants: QaScenarioRuntimeConstants;
}): QaScenarioRuntimeApi<TEnv, TDeps> {
const resetTransportState = async () => {
await params.env.transport.capabilities.resetNormalizedMessageState();
await params.deps.sleep(100);
};
return {
env: params.env,
lab: params.env.lab,
state: params.env.transport.state,
scenario: params.scenario,
config: params.scenario.execution.config ?? {},
fs: params.deps.fs,
path: params.deps.path,
sleep: params.deps.sleep,
randomUUID: params.deps.randomUUID,
runScenario: params.deps.runScenario,
waitForCondition: params.env.transport.capabilities.waitForCondition,
waitForOutboundMessage: params.deps.waitForOutboundMessage,
waitForTransportOutboundMessage: params.deps.waitForTransportOutboundMessage,
waitForChannelOutboundMessage: params.deps.waitForChannelOutboundMessage,
waitForNoOutbound: params.deps.waitForNoOutbound,
waitForNoTransportOutbound: params.deps.waitForNoTransportOutbound,
recentOutboundSummary: params.deps.recentOutboundSummary,
formatConversationTranscript: params.deps.formatConversationTranscript,
readTransportTranscript: params.deps.readTransportTranscript,
formatTransportTranscript: params.deps.formatTransportTranscript,
fetchJson: params.deps.fetchJson,
waitForGatewayHealthy: params.deps.waitForGatewayHealthy,
waitForTransportReady: params.deps.waitForTransportReady,
waitForChannelReady: params.deps.waitForTransportReady,
waitForQaChannelReady: params.deps.waitForQaChannelReady,
waitForConfigRestartSettle: params.deps.waitForConfigRestartSettle,
patchConfig: params.deps.patchConfig,
applyConfig: params.deps.applyConfig,
readConfigSnapshot: params.deps.readConfigSnapshot,
createSession: params.deps.createSession,
readEffectiveTools: params.deps.readEffectiveTools,
readSkillStatus: params.deps.readSkillStatus,
readRawQaSessionStore: params.deps.readRawQaSessionStore,
runQaCli: params.deps.runQaCli,
extractMediaPathFromText: params.deps.extractMediaPathFromText,
resolveGeneratedImagePath: params.deps.resolveGeneratedImagePath,
startAgentRun: params.deps.startAgentRun,
waitForAgentRun: params.deps.waitForAgentRun,
listCronJobs: params.deps.listCronJobs,
waitForCronRunCompletion: params.deps.waitForCronRunCompletion,
readDoctorMemoryStatus: params.deps.readDoctorMemoryStatus,
forceMemoryIndex: params.deps.forceMemoryIndex,
findSkill: params.deps.findSkill,
writeWorkspaceSkill: params.deps.writeWorkspaceSkill,
callPluginToolsMcp: params.deps.callPluginToolsMcp,
runAgentPrompt: params.deps.runAgentPrompt,
ensureImageGenerationConfigured: params.deps.ensureImageGenerationConfigured,
handleQaAction: params.deps.handleQaAction,
extractQaToolPayload: params.deps.extractQaToolPayload,
formatMemoryDreamingDay: params.deps.formatMemoryDreamingDay,
resolveSessionTranscriptsDirForAgent: params.deps.resolveSessionTranscriptsDirForAgent,
buildAgentSessionKey: params.deps.buildAgentSessionKey,
normalizeLowercaseStringOrEmpty: params.deps.normalizeLowercaseStringOrEmpty,
formatErrorMessage: params.deps.formatErrorMessage,
liveTurnTimeoutMs: params.deps.liveTurnTimeoutMs,
resolveQaLiveTurnTimeoutMs: params.deps.resolveQaLiveTurnTimeoutMs,
splitModelRef: params.deps.splitModelRef,
qaChannelPlugin: params.deps.qaChannelPlugin,
hasDiscoveryLabels: params.deps.hasDiscoveryLabels,
reportsDiscoveryScopeLeak: params.deps.reportsDiscoveryScopeLeak,
reportsMissingDiscoveryFiles: params.deps.reportsMissingDiscoveryFiles,
hasModelSwitchContinuityEvidence: params.deps.hasModelSwitchContinuityEvidence,
imageUnderstandingPngBase64: params.constants.imageUnderstandingPngBase64,
imageUnderstandingLargePngBase64: params.constants.imageUnderstandingLargePngBase64,
imageUnderstandingValidPngBase64: params.constants.imageUnderstandingValidPngBase64,
getTransportSnapshot: params.env.transport.capabilities.getNormalizedMessageState,
resetTransport: resetTransportState,
injectInboundMessage: params.env.transport.capabilities.sendInboundMessage,
injectOutboundMessage: params.env.transport.capabilities.injectOutboundMessage,
readTransportMessage: params.env.transport.capabilities.readNormalizedMessage,
resetBus: resetTransportState,
reset: resetTransportState,
};
}

View File

@@ -9,6 +9,8 @@ describe("qa suite failure reply handling", () => {
const makeScenario = (
id: string,
config?: Record<string, unknown>,
plugins?: string[],
gatewayConfigPatch?: Record<string, unknown>,
): Parameters<typeof qaSuiteTesting.selectQaSuiteScenarios>[0]["scenarios"][number] =>
({
id,
@@ -16,6 +18,8 @@ describe("qa suite failure reply handling", () => {
surface: "test",
objective: "test",
successCriteria: ["test"],
plugins,
gatewayConfigPatch,
sourcePath: `qa/scenarios/${id}.md`,
execution: {
kind: "flow",
@@ -129,6 +133,72 @@ describe("qa suite failure reply handling", () => {
).toEqual(["anthropic-only"]);
});
it("collects unique scenario-declared bundled plugins in encounter order", () => {
const scenarios = [
makeScenario("generic", undefined, ["active-memory", "memory-wiki"]),
makeScenario("other", undefined, ["memory-wiki", "openai"]),
makeScenario("plain"),
];
expect(qaSuiteTesting.collectQaSuitePluginIds(scenarios)).toEqual([
"active-memory",
"memory-wiki",
"openai",
]);
});
it("merge-patches scenario startup config in encounter order", () => {
const scenarios = [
makeScenario("active-memory", undefined, ["active-memory"], {
plugins: {
entries: {
"active-memory": {
config: {
enabled: true,
agents: ["qa"],
},
},
},
},
}),
makeScenario("live-defaults", undefined, undefined, {
agents: {
defaults: {
thinkingDefault: "minimal",
},
},
plugins: {
entries: {
"active-memory": {
config: {
transcriptDir: "qa-memory-e2e",
},
},
},
},
}),
];
expect(qaSuiteTesting.collectQaSuiteGatewayConfigPatch(scenarios)).toEqual({
agents: {
defaults: {
thinkingDefault: "minimal",
},
},
plugins: {
entries: {
"active-memory": {
config: {
enabled: true,
agents: ["qa"],
transcriptDir: "qa-memory-e2e",
},
},
},
},
});
});
it("filters provider-specific scenarios from an implicit live lane", () => {
const scenarios = [
makeScenario("generic"),

View File

@@ -59,6 +59,7 @@ import { renderQaMarkdownReport, type QaReportCheck, type QaReportScenario } fro
import { qaChannelPlugin, type QaBusMessage } from "./runtime-api.js";
import { readQaBootstrapScenarioCatalog } from "./scenario-catalog.js";
import { runScenarioFlow } from "./scenario-flow-runner.js";
import { createQaScenarioRuntimeApi } from "./scenario-runtime-api.js";
type QaSuiteStep = {
name: string;
@@ -261,6 +262,57 @@ function selectQaSuiteScenarios(params: {
);
}
function collectQaSuitePluginIds(
scenarios: ReturnType<typeof readQaBootstrapScenarioCatalog>["scenarios"],
) {
return [
...new Set(
scenarios.flatMap((scenario) =>
Array.isArray(scenario.plugins)
? scenario.plugins
.map((pluginId) => pluginId.trim())
.filter((pluginId) => pluginId.length > 0)
: [],
),
),
];
}
function isQaPlainObject(value: unknown): value is Record<string, unknown> {
return value !== null && typeof value === "object" && !Array.isArray(value);
}
function applyQaMergePatch(base: unknown, patch: unknown): unknown {
if (!isQaPlainObject(patch)) {
return patch;
}
const result = isQaPlainObject(base) ? { ...base } : {};
for (const [key, value] of Object.entries(patch)) {
if (value === null) {
delete result[key];
continue;
}
result[key] = isQaPlainObject(value) ? applyQaMergePatch(result[key], value) : value;
}
return result;
}
function collectQaSuiteGatewayConfigPatch(
scenarios: ReturnType<typeof readQaBootstrapScenarioCatalog>["scenarios"],
): Record<string, unknown> | undefined {
let merged: Record<string, unknown> | undefined;
for (const scenario of scenarios) {
if (!isQaPlainObject(scenario.gatewayConfigPatch)) {
continue;
}
merged = applyQaMergePatch(merged ?? {}, scenario.gatewayConfigPatch) as Record<
string,
unknown
>;
}
return merged;
}
function liveTurnTimeoutMs(env: QaSuiteEnvironment, fallbackMs: number) {
return resolveQaLiveTurnTimeoutMs(env, fallbackMs);
}
@@ -1158,171 +1210,81 @@ async function handleQaAction(params: {
return extractQaToolPayload(result as Parameters<typeof extractQaToolPayload>[0]);
}
type QaScenarioFlowApi = {
env: QaSuiteEnvironment;
lab: QaSuiteEnvironment["lab"];
state: QaTransportState;
scenario: ReturnType<typeof readQaBootstrapScenarioCatalog>["scenarios"][number];
config: Record<string, unknown>;
fs: typeof fs;
path: typeof path;
sleep: typeof sleep;
randomUUID: typeof randomUUID;
runScenario: typeof runScenario;
waitForCondition: typeof waitForCondition;
waitForOutboundMessage: typeof waitForOutboundMessage;
waitForTransportOutboundMessage: typeof waitForTransportOutboundMessage;
waitForChannelOutboundMessage: typeof waitForChannelOutboundMessage;
waitForNoOutbound: typeof waitForNoOutbound;
waitForNoTransportOutbound: typeof waitForNoTransportOutbound;
recentOutboundSummary: typeof recentOutboundSummary;
formatConversationTranscript: typeof formatConversationTranscript;
readTransportTranscript: typeof readTransportTranscript;
formatTransportTranscript: typeof formatTransportTranscript;
fetchJson: typeof fetchJson;
waitForGatewayHealthy: typeof waitForGatewayHealthy;
waitForTransportReady: typeof waitForTransportReady;
waitForChannelReady: typeof waitForTransportReady;
waitForQaChannelReady: typeof waitForQaChannelReady;
waitForConfigRestartSettle: typeof waitForConfigRestartSettle;
patchConfig: typeof patchConfig;
applyConfig: typeof applyConfig;
readConfigSnapshot: typeof readConfigSnapshot;
createSession: typeof createSession;
readEffectiveTools: typeof readEffectiveTools;
readSkillStatus: typeof readSkillStatus;
readRawQaSessionStore: typeof readRawQaSessionStore;
runQaCli: typeof runQaCli;
extractMediaPathFromText: typeof extractMediaPathFromText;
resolveGeneratedImagePath: typeof resolveGeneratedImagePath;
startAgentRun: typeof startAgentRun;
waitForAgentRun: typeof waitForAgentRun;
listCronJobs: typeof listCronJobs;
waitForCronRunCompletion: typeof waitForCronRunCompletion;
readDoctorMemoryStatus: typeof readDoctorMemoryStatus;
forceMemoryIndex: typeof forceMemoryIndex;
findSkill: typeof findSkill;
writeWorkspaceSkill: typeof writeWorkspaceSkill;
callPluginToolsMcp: typeof callPluginToolsMcp;
runAgentPrompt: typeof runAgentPrompt;
ensureImageGenerationConfigured: typeof ensureImageGenerationConfigured;
handleQaAction: typeof handleQaAction;
extractQaToolPayload: typeof extractQaToolPayload;
formatMemoryDreamingDay: typeof formatMemoryDreamingDay;
resolveSessionTranscriptsDirForAgent: typeof resolveSessionTranscriptsDirForAgent;
buildAgentSessionKey: typeof buildAgentSessionKey;
normalizeLowercaseStringOrEmpty: typeof normalizeLowercaseStringOrEmpty;
formatErrorMessage: typeof formatErrorMessage;
liveTurnTimeoutMs: typeof liveTurnTimeoutMs;
resolveQaLiveTurnTimeoutMs: typeof resolveQaLiveTurnTimeoutMs;
splitModelRef: typeof splitModelRef;
qaChannelPlugin: typeof qaChannelPlugin;
hasDiscoveryLabels: typeof hasDiscoveryLabels;
reportsDiscoveryScopeLeak: typeof reportsDiscoveryScopeLeak;
reportsMissingDiscoveryFiles: typeof reportsMissingDiscoveryFiles;
hasModelSwitchContinuityEvidence: typeof hasModelSwitchContinuityEvidence;
imageUnderstandingPngBase64: string;
imageUnderstandingLargePngBase64: string;
imageUnderstandingValidPngBase64: string;
getTransportSnapshot: () => ReturnType<QaTransportState["getSnapshot"]>;
resetTransport: () => Promise<void>;
injectInboundMessage: QaTransportState["addInboundMessage"];
injectOutboundMessage: QaTransportState["addOutboundMessage"];
readTransportMessage: QaTransportState["readMessage"];
resetBus: () => Promise<void>;
reset: () => Promise<void>;
};
function createScenarioFlowApi(
env: QaSuiteEnvironment,
scenario: ReturnType<typeof readQaBootstrapScenarioCatalog>["scenarios"][number],
): QaScenarioFlowApi {
return {
) {
return createQaScenarioRuntimeApi({
env,
lab: env.lab,
state: env.transport.state,
scenario,
config: scenario.execution.config ?? {},
fs,
path,
sleep,
randomUUID,
runScenario,
waitForCondition: env.transport.capabilities.waitForCondition,
waitForOutboundMessage,
waitForTransportOutboundMessage,
waitForChannelOutboundMessage,
waitForNoOutbound,
waitForNoTransportOutbound,
recentOutboundSummary,
formatConversationTranscript,
readTransportTranscript,
formatTransportTranscript,
fetchJson,
waitForGatewayHealthy,
waitForTransportReady,
waitForChannelReady: waitForTransportReady,
waitForQaChannelReady,
waitForConfigRestartSettle,
patchConfig,
applyConfig,
readConfigSnapshot,
createSession,
readEffectiveTools,
readSkillStatus,
readRawQaSessionStore,
runQaCli,
extractMediaPathFromText,
resolveGeneratedImagePath,
startAgentRun,
waitForAgentRun,
listCronJobs,
waitForCronRunCompletion,
readDoctorMemoryStatus,
forceMemoryIndex,
findSkill,
writeWorkspaceSkill,
callPluginToolsMcp,
runAgentPrompt,
ensureImageGenerationConfigured,
handleQaAction,
extractQaToolPayload,
formatMemoryDreamingDay,
resolveSessionTranscriptsDirForAgent,
buildAgentSessionKey,
normalizeLowercaseStringOrEmpty,
formatErrorMessage,
liveTurnTimeoutMs,
resolveQaLiveTurnTimeoutMs,
splitModelRef,
qaChannelPlugin,
hasDiscoveryLabels,
reportsDiscoveryScopeLeak,
reportsMissingDiscoveryFiles,
hasModelSwitchContinuityEvidence,
imageUnderstandingPngBase64: _QA_IMAGE_UNDERSTANDING_PNG_BASE64,
imageUnderstandingLargePngBase64: _QA_IMAGE_UNDERSTANDING_LARGE_PNG_BASE64,
imageUnderstandingValidPngBase64: QA_IMAGE_UNDERSTANDING_VALID_PNG_BASE64,
getTransportSnapshot: env.transport.capabilities.getNormalizedMessageState,
resetTransport: async () => {
await env.transport.capabilities.resetNormalizedMessageState();
await sleep(100);
deps: {
fs,
path,
sleep,
randomUUID,
runScenario,
waitForOutboundMessage,
waitForTransportOutboundMessage,
waitForChannelOutboundMessage,
waitForNoOutbound,
waitForNoTransportOutbound,
recentOutboundSummary,
formatConversationTranscript,
readTransportTranscript,
formatTransportTranscript,
fetchJson,
waitForGatewayHealthy,
waitForTransportReady,
waitForQaChannelReady,
waitForConfigRestartSettle,
patchConfig,
applyConfig,
readConfigSnapshot,
createSession,
readEffectiveTools,
readSkillStatus,
readRawQaSessionStore,
runQaCli,
extractMediaPathFromText,
resolveGeneratedImagePath,
startAgentRun,
waitForAgentRun,
listCronJobs,
waitForCronRunCompletion,
readDoctorMemoryStatus,
forceMemoryIndex,
findSkill,
writeWorkspaceSkill,
callPluginToolsMcp,
runAgentPrompt,
ensureImageGenerationConfigured,
handleQaAction,
extractQaToolPayload,
formatMemoryDreamingDay,
resolveSessionTranscriptsDirForAgent,
buildAgentSessionKey,
normalizeLowercaseStringOrEmpty,
formatErrorMessage,
liveTurnTimeoutMs,
resolveQaLiveTurnTimeoutMs,
splitModelRef,
qaChannelPlugin,
hasDiscoveryLabels,
reportsDiscoveryScopeLeak,
reportsMissingDiscoveryFiles,
hasModelSwitchContinuityEvidence,
},
injectInboundMessage: env.transport.capabilities.sendInboundMessage,
injectOutboundMessage: env.transport.capabilities.injectOutboundMessage,
readTransportMessage: env.transport.capabilities.readNormalizedMessage,
resetBus: async () => {
await env.transport.capabilities.resetNormalizedMessageState();
await sleep(100);
constants: {
imageUnderstandingPngBase64: _QA_IMAGE_UNDERSTANDING_PNG_BASE64,
imageUnderstandingLargePngBase64: _QA_IMAGE_UNDERSTANDING_LARGE_PNG_BASE64,
imageUnderstandingValidPngBase64: QA_IMAGE_UNDERSTANDING_VALID_PNG_BASE64,
},
reset: async () => {
await env.transport.capabilities.resetNormalizedMessageState();
await sleep(100);
},
};
});
}
export const qaSuiteTesting = {
collectQaSuiteGatewayConfigPatch,
collectQaSuitePluginIds,
createScenarioWaitForCondition,
findFailureOutboundMessage,
getGatewayRetryAfterMs,
@@ -1415,7 +1377,7 @@ async function writeQaSuiteArtifacts(params: {
export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResult> {
const startedAt = new Date();
const repoRoot = path.resolve(params?.repoRoot ?? process.cwd());
const providerMode = normalizeQaProviderMode(params?.providerMode ?? "mock-openai");
const providerMode = normalizeQaProviderMode(params?.providerMode ?? "live-frontier");
const transportId = normalizeQaTransportId(params?.transportId);
const primaryModel = params?.primaryModel ?? defaultQaModelForMode(providerMode);
const alternateModel =
@@ -1433,6 +1395,8 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
primaryModel,
claudeCliAuthMode: params?.claudeCliAuthMode,
});
const enabledPluginIds = collectQaSuitePluginIds(selectedCatalogScenarios);
const gatewayConfigPatch = collectQaSuiteGatewayConfigPatch(selectedCatalogScenarios);
const concurrency = normalizeQaSuiteConcurrency(
params?.concurrency,
selectedCatalogScenarios.length,
@@ -1629,6 +1593,10 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
thinkingDefault: params?.thinkingDefault,
claudeCliAuthMode: params?.claudeCliAuthMode,
controlUiEnabled: params?.controlUiEnabled ?? true,
enabledPluginIds,
mutateConfig: gatewayConfigPatch
? (cfg) => applyQaMergePatch(cfg, gatewayConfigPatch) as OpenClawConfig
: undefined,
});
lab.setControlUi({
controlUiProxyTarget: gateway.baseUrl,

View File

@@ -0,0 +1,225 @@
# Active Memory pre-reply recall
```yaml qa-scenario
id: active-memory-preprompt-recall
title: Active Memory pre-reply recall
surface: memory
objective: Verify Active Memory surfaces a memory-only preference before the main reply, and that the same question stays unresolved when the plugin is off.
plugins:
- active-memory
gatewayConfigPatch:
plugins:
entries:
active-memory:
enabled: true
config:
enabled: true
agents:
- qa
allowedChatTypes:
- direct
logging: true
persistTranscripts: true
transcriptDir: qa-memory-e2e
queryMode: recent
maxSummaryChars: 220
successCriteria:
- With Active Memory off, the session shows no Active Memory plugin activity.
- With Active Memory on, plugin-owned evidence shows the Active Memory sub-agent searched memory before the main reply.
- Live lane proves the first user-visible reply uses the recalled preference.
docsRefs:
- docs/concepts/active-memory.md
- docs/concepts/memory-search.md
codeRefs:
- extensions/active-memory/index.ts
- extensions/qa-lab/src/suite.ts
- extensions/qa-lab/src/mock-openai-server.ts
execution:
kind: flow
summary: Verify Active Memory stays off when session-toggled off, runs memory search/get when enabled, and helps a live model answer with the recalled preference in the first visible reply.
config:
baselineConversationId: qa-active-memory-off
activeConversationId: qa-active-memory-on
memoryFact: "Stable QA movie night snack preference: lemon pepper wings with blue cheese."
memoryQuery: "QA movie night snack lemon pepper wings blue cheese"
expectedNeedle: lemon pepper wings
prompt: "Silent snack recall check: what snack do I usually want for QA movie night? Reply in one short sentence."
promptSnippet: "Silent snack recall check"
transcriptDir: qa-memory-e2e
```
```yaml qa-flow
steps:
- name: only active memory surfaces the hidden snack preference
actions:
- call: reset
- call: fs.rm
args:
- expr: "path.join(env.gateway.workspaceDir, 'MEMORY.md')"
- force: true
- call: fs.rm
args:
- expr: "path.join(env.gateway.workspaceDir, 'memory', `${formatMemoryDreamingDay(Date.now())}.md`)"
- force: true
- call: fs.writeFile
args:
- expr: "path.join(env.gateway.workspaceDir, 'MEMORY.md')"
- expr: "`${config.memoryFact}\\n`"
- utf8
- call: forceMemoryIndex
args:
- env:
ref: env
query:
expr: config.memoryQuery
expectedNeedle:
expr: config.expectedNeedle
- set: baselineSessionKey
value:
expr: "'agent:qa:qa-channel:direct:active-memory-off'"
- set: activeSessionKey
value:
expr: "'agent:qa:qa-channel:direct:active-memory-on'"
- set: transcriptRoot
value:
expr: "path.join(env.gateway.tempRoot, 'state', 'plugins', 'active-memory', 'transcripts', 'agents', 'qa', config.transcriptDir)"
- set: toggleStorePath
value:
expr: "path.join(env.gateway.tempRoot, 'state', 'plugins', 'active-memory', 'session-toggles.json')"
- call: fs.rm
args:
- ref: transcriptRoot
- recursive: true
force: true
- call: fs.rm
args:
- ref: toggleStorePath
- force: true
- call: fs.mkdir
args:
- expr: "path.dirname(toggleStorePath)"
- recursive: true
- call: fs.writeFile
args:
- ref: toggleStorePath
- expr: "`${JSON.stringify({ sessions: { [baselineSessionKey]: { disabled: true, updatedAt: Date.now() } } }, null, 2)}\\n`"
- utf8
- set: requestCountBeforeBaseline
value:
expr: "env.mock ? (await fetchJson(`${env.mock.baseUrl}/debug/requests`)).length : 0"
- set: baselineStartIndex
value:
expr: "state.getSnapshot().messages.length"
- call: runAgentPrompt
args:
- ref: env
- sessionKey:
ref: baselineSessionKey
message:
expr: config.prompt
timeoutMs:
expr: liveTurnTimeoutMs(env, 45000)
- call: waitForOutboundMessage
saveAs: baselineOutbound
args:
- ref: state
- lambda:
params: [candidate]
expr: "candidate.conversation.id === 'qa-operator'"
- expr: liveTurnTimeoutMs(env, 30000)
- sinceIndex:
ref: baselineStartIndex
- set: baselineLower
value:
expr: "normalizeLowercaseStringOrEmpty(baselineOutbound.text)"
- if:
expr: "Boolean(env.mock)"
then:
- set: baselineMockRequests
value:
expr: "(await fetchJson(`${env.mock.baseUrl}/debug/requests`)).slice(requestCountBeforeBaseline)"
- set: baselineSessionStore
value:
expr: "await readRawQaSessionStore(env)"
- assert:
expr: "!Array.isArray(baselineSessionStore[baselineSessionKey]?.pluginDebugEntries) || !baselineSessionStore[baselineSessionKey].pluginDebugEntries.some((pluginEntry) => pluginEntry?.pluginId === 'active-memory')"
message: baseline session unexpectedly recorded active-memory plugin activity
- set: requestCountBeforeActive
value:
expr: "env.mock ? (await fetchJson(`${env.mock.baseUrl}/debug/requests`)).length : 0"
- call: fs.writeFile
args:
- ref: toggleStorePath
- expr: "'{}\\n'"
- utf8
- set: activeStartIndex
value:
expr: "state.getSnapshot().messages.length"
- call: runAgentPrompt
args:
- ref: env
- sessionKey:
ref: activeSessionKey
message:
expr: config.prompt
timeoutMs:
expr: liveTurnTimeoutMs(env, 45000)
- call: waitForOutboundMessage
saveAs: activeOutbound
args:
- ref: state
- lambda:
params: [candidate]
expr: "candidate.conversation.id === 'qa-operator'"
- expr: liveTurnTimeoutMs(env, 30000)
- sinceIndex:
ref: activeStartIndex
- set: activeLower
value:
expr: "normalizeLowercaseStringOrEmpty(activeOutbound.text)"
- if:
expr: "!env.mock"
then:
- assert:
expr: "activeLower.includes(normalizeLowercaseStringOrEmpty(config.expectedNeedle))"
message:
expr: "`active memory reply missed the hidden preference: ${activeOutbound.text}`"
- call: waitForCondition
saveAs: transcriptPath
args:
- lambda:
async: true
expr: "await (async () => { const entries = (await fs.readdir(transcriptRoot).catch(() => [])).filter((entry) => entry.endsWith('.jsonl')).toSorted(); return entries.length > 0 ? path.join(transcriptRoot, entries.at(-1)) : undefined; })()"
- 10000
- call: fs.readFile
saveAs: transcriptText
args:
- ref: transcriptPath
- utf8
- assert:
expr: "transcriptText.includes('memory_search')"
message: active memory transcript missing memory_search
- assert:
expr: "transcriptText.includes('memory_get')"
message: active memory transcript missing memory_get
- call: waitForCondition
saveAs: activeSessionEntry
args:
- lambda:
async: true
expr: "await (async () => { const store = await readRawQaSessionStore(env); const entry = store[activeSessionKey]; if (!entry || !Array.isArray(entry.pluginDebugEntries)) return undefined; return entry.pluginDebugEntries.some((pluginEntry) => pluginEntry?.pluginId === 'active-memory' && Array.isArray(pluginEntry.lines) && pluginEntry.lines.some((line) => line.includes('Active Memory: ok'))) ? entry : undefined; })()"
- 10000
- if:
expr: "Boolean(env.mock)"
then:
- set: mockRequests
value:
expr: "(await fetchJson(`${env.mock.baseUrl}/debug/requests`)).slice(requestCountBeforeActive)"
- assert:
expr: "mockRequests.some((request) => request.allInputText.includes('You are a memory search agent.') && request.plannedToolName === 'memory_search')"
message: expected mock Active Memory search request
- assert:
expr: "mockRequests.some((request) => request.allInputText.includes('You are a memory search agent.') && request.plannedToolName === 'memory_get')"
message: expected mock Active Memory memory_get request
detailsExpr: "`${activeOutbound.text}\\n\\ntranscript=${transcriptPath}`"
```