mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 09:30:43 +00:00
feat(qa-lab): support scenario-defined plugin runs
This commit is contained in:
@@ -166,7 +166,7 @@ export function registerQaLabCli(program: Command) {
|
||||
.option(
|
||||
"--provider-mode <mode>",
|
||||
"Provider mode: mock-openai or live-frontier (legacy live-openai still works)",
|
||||
"mock-openai",
|
||||
"live-frontier",
|
||||
)
|
||||
.option("--model <ref>", "Primary provider/model ref")
|
||||
.option("--alt-model <ref>", "Alternate provider/model ref")
|
||||
|
||||
@@ -836,6 +836,7 @@ export async function startQaGatewayChild(params: {
|
||||
thinkingDefault?: QaThinkingLevel;
|
||||
claudeCliAuthMode?: QaCliBackendAuthMode;
|
||||
controlUiEnabled?: boolean;
|
||||
enabledPluginIds?: string[];
|
||||
mutateConfig?: (cfg: OpenClawConfig) => OpenClawConfig;
|
||||
}) {
|
||||
const tempRoot = await fs.mkdtemp(
|
||||
@@ -873,14 +874,17 @@ export async function startQaGatewayChild(params: {
|
||||
const liveProviderConfigs = await readQaLiveProviderConfigOverrides({
|
||||
providerIds: liveProviderIds,
|
||||
});
|
||||
const enabledPluginIds =
|
||||
const liveOwnerPluginIds =
|
||||
liveProviderIds.length > 0
|
||||
? await resolveQaOwnerPluginIdsForProviderIds({
|
||||
repoRoot: params.repoRoot,
|
||||
providerIds: liveProviderIds,
|
||||
providerConfigs: liveProviderConfigs,
|
||||
})
|
||||
: undefined;
|
||||
: [];
|
||||
const enabledPluginIds = [
|
||||
...new Set([...(liveOwnerPluginIds ?? []), ...(params.enabledPluginIds ?? [])]),
|
||||
];
|
||||
const buildGatewayConfig = (gatewayPort: number) =>
|
||||
buildQaGatewayConfig({
|
||||
bind: "loopback",
|
||||
|
||||
@@ -116,7 +116,7 @@ describe("qa-lab server", () => {
|
||||
expect(bootstrap.scenarios.length).toBeGreaterThanOrEqual(10);
|
||||
expect(bootstrap.scenarios.some((scenario) => scenario.id === "dm-chat-baseline")).toBe(true);
|
||||
expect(bootstrap.runner.status).toBe("idle");
|
||||
expect(bootstrap.runner.selection.providerMode).toBe("mock-openai");
|
||||
expect(bootstrap.runner.selection.providerMode).toBe("live-frontier");
|
||||
expect(bootstrap.runner.selection.scenarioIds).toHaveLength(bootstrap.scenarios.length);
|
||||
|
||||
const messageResponse = await fetch(`${lab.baseUrl}/api/inbound/message`, {
|
||||
|
||||
@@ -433,6 +433,151 @@ describe("qa mock openai server", () => {
|
||||
"Protocol note: I checked memory and the current Project Nebula codename is ORBIT-10.",
|
||||
);
|
||||
|
||||
const activeMemorySearch = await fetch(`${server.baseUrl}/v1/responses`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"content-type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
stream: true,
|
||||
input: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "input_text",
|
||||
text: [
|
||||
"You are a memory search agent.",
|
||||
"Use only memory_search and memory_get.",
|
||||
"",
|
||||
"Conversation context:",
|
||||
"Latest user message:",
|
||||
"Silent snack recall check: what snack do I usually want for QA movie night? Reply in one short sentence.",
|
||||
].join("\n"),
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
}),
|
||||
});
|
||||
expect(activeMemorySearch.status).toBe(200);
|
||||
expect(await activeMemorySearch.text()).toContain('"name":"memory_search"');
|
||||
|
||||
const activeMemoryGet = await fetch(`${server.baseUrl}/v1/responses`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"content-type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
stream: true,
|
||||
input: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "input_text",
|
||||
text: [
|
||||
"You are a memory search agent.",
|
||||
"Use only memory_search and memory_get.",
|
||||
"",
|
||||
"Conversation context:",
|
||||
"Latest user message:",
|
||||
"Silent snack recall check: what snack do I usually want for QA movie night? Reply in one short sentence.",
|
||||
].join("\n"),
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
type: "function_call_output",
|
||||
output: JSON.stringify({
|
||||
results: [
|
||||
{
|
||||
path: "MEMORY.md",
|
||||
startLine: 1,
|
||||
endLine: 1,
|
||||
},
|
||||
],
|
||||
}),
|
||||
},
|
||||
],
|
||||
}),
|
||||
});
|
||||
expect(activeMemoryGet.status).toBe(200);
|
||||
expect(await activeMemoryGet.text()).toContain('"name":"memory_get"');
|
||||
|
||||
const activeMemorySummary = await fetch(`${server.baseUrl}/v1/responses`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"content-type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
stream: false,
|
||||
input: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "input_text",
|
||||
text: [
|
||||
"You are a memory search agent.",
|
||||
"Use only memory_search and memory_get.",
|
||||
"",
|
||||
"Conversation context:",
|
||||
"Latest user message:",
|
||||
"Silent snack recall check: what snack do I usually want for QA movie night? Reply in one short sentence.",
|
||||
].join("\n"),
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
type: "function_call_output",
|
||||
output: JSON.stringify({
|
||||
text: "Stable QA movie night snack preference: lemon pepper wings with blue cheese.",
|
||||
}),
|
||||
},
|
||||
],
|
||||
}),
|
||||
});
|
||||
expect(activeMemorySummary.status).toBe(200);
|
||||
expect(JSON.stringify(await activeMemorySummary.json())).toContain(
|
||||
"lemon pepper wings with blue cheese",
|
||||
);
|
||||
|
||||
const injectedMainReply = await fetch(`${server.baseUrl}/v1/responses`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"content-type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
stream: false,
|
||||
instructions: [
|
||||
"System context:",
|
||||
"<active_memory_plugin>User usually wants lemon pepper wings with blue cheese for QA movie night.</active_memory_plugin>",
|
||||
].join("\n"),
|
||||
input: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "input_text",
|
||||
text: "Silent snack recall check: what snack do I usually want for QA movie night? Reply in one short sentence.",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
}),
|
||||
});
|
||||
expect(injectedMainReply.status).toBe(200);
|
||||
expect(JSON.stringify(await injectedMainReply.json())).toContain(
|
||||
"lemon pepper wings with blue cheese",
|
||||
);
|
||||
const lastRequest = await fetch(`${server.baseUrl}/debug/last-request`);
|
||||
expect(lastRequest.status).toBe(200);
|
||||
expect(await lastRequest.json()).toMatchObject({
|
||||
instructions: expect.stringContaining("<active_memory_plugin>"),
|
||||
allInputText: expect.stringContaining("<active_memory_plugin>"),
|
||||
});
|
||||
|
||||
const spawn = await fetch(`${server.baseUrl}/v1/responses`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
|
||||
@@ -27,6 +27,7 @@ type MockOpenAiRequestSnapshot = {
|
||||
body: Record<string, unknown>;
|
||||
prompt: string;
|
||||
allInputText: string;
|
||||
instructions?: string;
|
||||
toolOutput: string;
|
||||
model: string;
|
||||
imageInputCount: number;
|
||||
@@ -181,6 +182,23 @@ function extractAllInputTexts(input: ResponsesInputItem[]) {
|
||||
return texts.join("\n");
|
||||
}
|
||||
|
||||
function extractInstructionsText(body: Record<string, unknown>) {
|
||||
return typeof body.instructions === "string" ? body.instructions.trim() : "";
|
||||
}
|
||||
|
||||
function extractAllRequestTexts(input: ResponsesInputItem[], body: Record<string, unknown>) {
|
||||
const texts: string[] = [];
|
||||
const instructions = extractInstructionsText(body);
|
||||
if (instructions) {
|
||||
texts.push(instructions);
|
||||
}
|
||||
const inputText = extractAllInputTexts(input);
|
||||
if (inputText) {
|
||||
texts.push(inputText);
|
||||
}
|
||||
return texts.join("\n");
|
||||
}
|
||||
|
||||
function countImageInputs(input: ResponsesInputItem[]) {
|
||||
let count = 0;
|
||||
for (const item of input) {
|
||||
@@ -320,6 +338,33 @@ function extractOrbitCode(text: string) {
|
||||
return /\bORBIT-\d+\b/i.exec(text)?.[0]?.toUpperCase() ?? null;
|
||||
}
|
||||
|
||||
function decodeXmlEntities(text: string) {
|
||||
return text
|
||||
.replaceAll("<", "<")
|
||||
.replaceAll(">", ">")
|
||||
.replaceAll("&", "&")
|
||||
.replaceAll(""", '"')
|
||||
.replaceAll("'", "'");
|
||||
}
|
||||
|
||||
function extractActiveMemorySummary(text: string) {
|
||||
const match = /<active_memory_plugin>\s*([\s\S]*?)\s*<\/active_memory_plugin>/i.exec(text);
|
||||
return match?.[1] ? decodeXmlEntities(match[1]).trim() : null;
|
||||
}
|
||||
|
||||
function isActiveMemorySubagentPrompt(text: string) {
|
||||
return text.includes("You are a memory search agent.");
|
||||
}
|
||||
|
||||
function extractSnackPreference(text: string) {
|
||||
const normalized = text.replace(/\s+/g, " ").trim();
|
||||
const match =
|
||||
/(lemon pepper wings(?:\s+with\s+blue cheese)?|blue cheese(?:\s+with\s+lemon pepper wings)?)/i.exec(
|
||||
normalized,
|
||||
);
|
||||
return match?.[0]?.trim() ?? null;
|
||||
}
|
||||
|
||||
function extractLastCapture(text: string, pattern: RegExp) {
|
||||
let lastMatch: RegExpExecArray | null = null;
|
||||
const flags = pattern.flags.includes("g") ? pattern.flags : `${pattern.flags}g`;
|
||||
@@ -355,7 +400,7 @@ function buildAssistantText(input: ResponsesInputItem[], body: Record<string, un
|
||||
const toolOutput = extractToolOutput(input);
|
||||
const toolJson = parseToolOutputJson(toolOutput);
|
||||
const userTexts = extractAllUserTexts(input);
|
||||
const allInputText = extractAllInputTexts(input);
|
||||
const allInputText = extractAllRequestTexts(input, body);
|
||||
const rememberedFact = extractRememberedFact(userTexts);
|
||||
const model = typeof body.model === "string" ? body.model : "";
|
||||
const memorySnippet =
|
||||
@@ -369,6 +414,8 @@ function buildAssistantText(input: ResponsesInputItem[], body: Record<string, un
|
||||
const exactReplyDirective = extractExactReplyDirective(allInputText);
|
||||
const exactMarkerDirective = extractExactMarkerDirective(allInputText);
|
||||
const imageInputCount = countImageInputs(input);
|
||||
const activeMemorySummary = extractActiveMemorySummary(allInputText);
|
||||
const snackPreference = extractSnackPreference(activeMemorySummary ?? memorySnippet);
|
||||
|
||||
if (/what was the qa canary code/i.test(prompt) && rememberedFact) {
|
||||
return `Protocol note: the QA canary code was ${rememberedFact}.`;
|
||||
@@ -400,6 +447,12 @@ function buildAssistantText(input: ResponsesInputItem[], body: Record<string, un
|
||||
if (/memory tools check/i.test(prompt) && orbitCode) {
|
||||
return `Protocol note: I checked memory and the project codename is ${orbitCode}.`;
|
||||
}
|
||||
if (/silent snack recall check/i.test(prompt) && snackPreference) {
|
||||
return `Protocol note: you usually want ${snackPreference} for QA movie night.`;
|
||||
}
|
||||
if (/silent snack recall check/i.test(prompt)) {
|
||||
return "Protocol note: I do not have enough context to say what you usually want for QA movie night.";
|
||||
}
|
||||
if (/tool continuity check/i.test(prompt) && toolOutput) {
|
||||
return `Protocol note: model switch handoff confirmed on ${model || "the requested model"}. QA mission from QA_KICKOFF_TASK.md still applies: understand this OpenClaw repo from source + docs before acting.`;
|
||||
}
|
||||
@@ -531,7 +584,7 @@ async function buildResponsesPayload(body: Record<string, unknown>) {
|
||||
const prompt = extractLastUserText(input);
|
||||
const toolOutput = extractToolOutput(input);
|
||||
const toolJson = parseToolOutputJson(toolOutput);
|
||||
const allInputText = extractAllInputTexts(input);
|
||||
const allInputText = extractAllRequestTexts(input, body);
|
||||
const isGroupChat = allInputText.includes('"is_group_chat": true');
|
||||
const isBaselineUnmentionedChannelChatter = /\bno bot ping here\b/i.test(prompt);
|
||||
if (isHeartbeatPrompt(prompt)) {
|
||||
@@ -591,6 +644,48 @@ async function buildResponsesPayload(body: Record<string, unknown>) {
|
||||
});
|
||||
}
|
||||
}
|
||||
if (
|
||||
isActiveMemorySubagentPrompt(allInputText) &&
|
||||
/silent snack recall check/i.test(allInputText)
|
||||
) {
|
||||
if (!toolOutput) {
|
||||
return buildToolCallEventsWithArgs("memory_search", {
|
||||
query: "QA movie night snack lemon pepper wings blue cheese",
|
||||
maxResults: 3,
|
||||
});
|
||||
}
|
||||
const results = Array.isArray(toolJson?.results)
|
||||
? (toolJson.results as Array<Record<string, unknown>>)
|
||||
: [];
|
||||
const first = results[0];
|
||||
if (
|
||||
typeof first?.path === "string" &&
|
||||
(typeof first.startLine === "number" || typeof first.endLine === "number")
|
||||
) {
|
||||
const from =
|
||||
typeof first.startLine === "number"
|
||||
? Math.max(1, first.startLine)
|
||||
: typeof first.endLine === "number"
|
||||
? Math.max(1, first.endLine)
|
||||
: 1;
|
||||
return buildToolCallEventsWithArgs("memory_get", {
|
||||
path: first.path,
|
||||
from,
|
||||
lines: 4,
|
||||
});
|
||||
}
|
||||
const memorySnippet =
|
||||
typeof toolJson?.text === "string"
|
||||
? toolJson.text
|
||||
: Array.isArray(toolJson?.results)
|
||||
? JSON.stringify(toolJson.results)
|
||||
: toolOutput;
|
||||
const snackPreference = extractSnackPreference(memorySnippet);
|
||||
if (snackPreference) {
|
||||
return buildAssistantEvents(`User usually wants ${snackPreference} for QA movie night.`);
|
||||
}
|
||||
return buildAssistantEvents("NONE");
|
||||
}
|
||||
if (/session memory ranking check/i.test(prompt)) {
|
||||
if (!toolOutput) {
|
||||
return buildToolCallEventsWithArgs("memory_search", {
|
||||
@@ -798,7 +893,8 @@ export async function startQaMockOpenAiServer(params?: { host?: string; port?: n
|
||||
raw,
|
||||
body,
|
||||
prompt: extractLastUserText(input),
|
||||
allInputText: extractAllInputTexts(input),
|
||||
allInputText: extractAllRequestTexts(input, body),
|
||||
instructions: extractInstructionsText(body) || undefined,
|
||||
toolOutput: extractToolOutput(input),
|
||||
model: typeof body.model === "string" ? body.model : "",
|
||||
imageInputCount: countImageInputs(input),
|
||||
|
||||
@@ -81,7 +81,7 @@ describe("qa multipass runtime", () => {
|
||||
expect(plan.summaryPath).toBe(path.join(outputDir, "qa-suite-summary.json"));
|
||||
});
|
||||
|
||||
it("renders a guest script that runs the mock qa suite with explicit scenarios", () => {
|
||||
it("renders a guest script that runs the live qa suite by default", () => {
|
||||
const plan = createQaMultipassPlan({
|
||||
repoRoot: process.cwd(),
|
||||
outputDir: path.join(process.cwd(), ".artifacts", "qa-e2e", "multipass-test"),
|
||||
@@ -93,9 +93,8 @@ describe("qa multipass runtime", () => {
|
||||
expect(script).toContain("pnpm install --frozen-lockfile");
|
||||
expect(script).toContain("pnpm build");
|
||||
expect(script).toContain("corepack prepare 'pnpm@10.32.1' --activate");
|
||||
expect(script).toContain(
|
||||
"'pnpm' 'openclaw' 'qa' 'suite' '--transport' 'qa-channel' '--provider-mode' 'mock-openai'",
|
||||
);
|
||||
expect(script).toContain("'pnpm' 'openclaw' 'qa' 'suite' '--transport' 'qa-channel'");
|
||||
expect(script).toContain("'--provider-mode' 'live-frontier'");
|
||||
expect(script).toContain("'--scenario' 'channel-chat-baseline'");
|
||||
expect(script).toContain("'--scenario' 'thread-follow-up'");
|
||||
expect(script).toContain("/workspace/openclaw-host/.artifacts/qa-e2e/multipass-test");
|
||||
@@ -128,9 +127,8 @@ describe("qa multipass runtime", () => {
|
||||
);
|
||||
expect(plan.forwardedEnv.OPENAI_API_KEY).toBe("test-openai-key");
|
||||
expect(script).toContain("OPENAI_API_KEY='test-openai-key'");
|
||||
expect(script).toContain(
|
||||
"'pnpm' 'openclaw' 'qa' 'suite' '--transport' 'qa-channel' '--provider-mode' 'live-frontier'",
|
||||
);
|
||||
expect(script).toContain("'pnpm' 'openclaw' 'qa' 'suite' '--transport' 'qa-channel'");
|
||||
expect(script).toContain("'--provider-mode' 'live-frontier'");
|
||||
});
|
||||
|
||||
it("redacts forwarded live secrets in the persisted artifact script", () => {
|
||||
|
||||
@@ -345,7 +345,7 @@ export function createQaMultipassPlan(params: {
|
||||
const outputDir = params.outputDir ?? createQaMultipassOutputDir(params.repoRoot);
|
||||
const scenarioIds = [...new Set(params.scenarioIds ?? [])];
|
||||
const transportId = params.transportId?.trim() || "qa-channel";
|
||||
const providerMode = params.providerMode ?? "mock-openai";
|
||||
const providerMode = params.providerMode ?? "live-frontier";
|
||||
const forwardedEnv = providerMode === "live-frontier" ? resolveForwardedLiveEnv() : {};
|
||||
const hostCodexHomePath = forwardedEnv.CODEX_HOME;
|
||||
const liveProviderConfig =
|
||||
|
||||
@@ -82,6 +82,21 @@ describe("buildQaGatewayConfig", () => {
|
||||
expect(cfg.channels?.["qa-channel"]).toBeUndefined();
|
||||
});
|
||||
|
||||
it("can stage extra bundled plugins in the mock lane", () => {
|
||||
const cfg = buildQaGatewayConfig({
|
||||
bind: "loopback",
|
||||
gatewayPort: 18789,
|
||||
gatewayToken: "token",
|
||||
providerBaseUrl: "http://127.0.0.1:44080/v1",
|
||||
workspaceDir: "/tmp/qa-workspace",
|
||||
enabledPluginIds: ["active-memory"],
|
||||
...createQaChannelTransportParams(),
|
||||
});
|
||||
|
||||
expect(cfg.plugins?.allow).toEqual(["memory-core", "active-memory", "qa-channel"]);
|
||||
expect(cfg.plugins?.entries?.["active-memory"]).toEqual({ enabled: true });
|
||||
});
|
||||
|
||||
it("uses built-in provider wiring in frontier live mode", () => {
|
||||
const cfg = buildQaGatewayConfig({
|
||||
bind: "loopback",
|
||||
|
||||
@@ -162,24 +162,23 @@ export function buildQaGatewayConfig(params: {
|
||||
: selectedProviderIds,
|
||||
),
|
||||
]
|
||||
: [];
|
||||
: [
|
||||
...new Set(
|
||||
(params.enabledPluginIds ?? [])
|
||||
.map((pluginId) => pluginId.trim())
|
||||
.filter((pluginId) => pluginId.length > 0),
|
||||
),
|
||||
];
|
||||
const transportPluginIds = [...new Set(params.transportPluginIds ?? [])]
|
||||
.map((pluginId) => pluginId.trim())
|
||||
.filter((pluginId) => pluginId.length > 0);
|
||||
const pluginEntries =
|
||||
providerMode === "live-frontier"
|
||||
? Object.fromEntries(selectedPluginIds.map((pluginId) => [pluginId, { enabled: true }]))
|
||||
: {};
|
||||
const pluginEntries = Object.fromEntries(
|
||||
selectedPluginIds.map((pluginId) => [pluginId, { enabled: true }]),
|
||||
);
|
||||
const transportPluginEntries = Object.fromEntries(
|
||||
transportPluginIds.map((pluginId) => [pluginId, { enabled: true }]),
|
||||
);
|
||||
const allowedPlugins = [
|
||||
...new Set(
|
||||
providerMode === "live-frontier"
|
||||
? ["memory-core", ...selectedPluginIds, ...transportPluginIds]
|
||||
: ["memory-core", ...transportPluginIds],
|
||||
),
|
||||
];
|
||||
const allowedPlugins = [...new Set(["memory-core", ...selectedPluginIds, ...transportPluginIds])];
|
||||
const liveModelParams =
|
||||
providerMode === "live-frontier"
|
||||
? (modelRef: string) => ({
|
||||
|
||||
@@ -24,12 +24,12 @@ const scenarios = [
|
||||
];
|
||||
|
||||
describe("qa run config", () => {
|
||||
it("creates a synthetic-by-default selection that arms every scenario", () => {
|
||||
it("creates a live-by-default selection that arms every scenario", () => {
|
||||
expect(createDefaultQaRunSelection(scenarios)).toEqual({
|
||||
providerMode: "mock-openai",
|
||||
primaryModel: "mock-openai/gpt-5.4",
|
||||
alternateModel: "mock-openai/gpt-5.4-alt",
|
||||
fastMode: false,
|
||||
providerMode: "live-frontier",
|
||||
primaryModel: "openai/gpt-5.4",
|
||||
alternateModel: "openai/gpt-5.4",
|
||||
fastMode: true,
|
||||
scenarioIds: ["dm-chat-baseline", "thread-lifecycle"],
|
||||
});
|
||||
});
|
||||
|
||||
@@ -38,19 +38,21 @@ export function defaultQaModelForMode(mode: QaProviderMode, alternate = false) {
|
||||
}
|
||||
|
||||
export function createDefaultQaRunSelection(scenarios: QaSeedScenario[]): QaLabRunSelection {
|
||||
const providerMode: QaProviderMode = "mock-openai";
|
||||
const providerMode: QaProviderMode = "live-frontier";
|
||||
return {
|
||||
providerMode,
|
||||
primaryModel: defaultQaModelForMode(providerMode),
|
||||
alternateModel: defaultQaModelForMode(providerMode, true),
|
||||
fastMode: false,
|
||||
fastMode: true,
|
||||
scenarioIds: scenarios.map((scenario) => scenario.id),
|
||||
};
|
||||
}
|
||||
|
||||
export function normalizeQaProviderMode(input: unknown): QaProviderMode {
|
||||
return normalizeQaProviderModeInput(
|
||||
input === "live-frontier" || input === "live-openai" ? input : "mock-openai",
|
||||
input === "mock-openai" || input === "live-frontier" || input === "live-openai"
|
||||
? input
|
||||
: "live-frontier",
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -135,6 +135,8 @@ const qaSeedScenarioSchema = z.object({
|
||||
surface: z.string().trim().min(1),
|
||||
objective: z.string().trim().min(1),
|
||||
successCriteria: z.array(z.string().trim().min(1)).min(1),
|
||||
plugins: z.array(z.string().trim().min(1)).optional(),
|
||||
gatewayConfigPatch: z.record(z.string(), z.unknown()).optional(),
|
||||
docsRefs: z.array(z.string().trim().min(1)).optional(),
|
||||
codeRefs: z.array(z.string().trim().min(1)).optional(),
|
||||
execution: qaScenarioExecutionSchema.optional(),
|
||||
|
||||
160
extensions/qa-lab/src/scenario-runtime-api.test.ts
Normal file
160
extensions/qa-lab/src/scenario-runtime-api.test.ts
Normal file
@@ -0,0 +1,160 @@
|
||||
import { randomUUID } from "node:crypto";
|
||||
import * as fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import { createQaBusState } from "./bus-state.js";
|
||||
import {
|
||||
createQaScenarioRuntimeApi,
|
||||
type QaScenarioRuntimeConstants,
|
||||
type QaScenarioRuntimeDeps,
|
||||
} from "./scenario-runtime-api.js";
|
||||
|
||||
function createDeps(overrides?: Partial<QaScenarioRuntimeDeps>): QaScenarioRuntimeDeps {
|
||||
const fn = vi.fn();
|
||||
return {
|
||||
fs,
|
||||
path,
|
||||
sleep: vi.fn(async () => undefined),
|
||||
randomUUID,
|
||||
runScenario: fn,
|
||||
waitForOutboundMessage: fn,
|
||||
waitForTransportOutboundMessage: fn,
|
||||
waitForChannelOutboundMessage: fn,
|
||||
waitForNoOutbound: fn,
|
||||
waitForNoTransportOutbound: fn,
|
||||
recentOutboundSummary: fn,
|
||||
formatConversationTranscript: fn,
|
||||
readTransportTranscript: fn,
|
||||
formatTransportTranscript: fn,
|
||||
fetchJson: fn,
|
||||
waitForGatewayHealthy: fn,
|
||||
waitForTransportReady: fn,
|
||||
waitForQaChannelReady: fn,
|
||||
waitForConfigRestartSettle: fn,
|
||||
patchConfig: fn,
|
||||
applyConfig: fn,
|
||||
readConfigSnapshot: fn,
|
||||
createSession: fn,
|
||||
readEffectiveTools: fn,
|
||||
readSkillStatus: fn,
|
||||
readRawQaSessionStore: fn,
|
||||
runQaCli: fn,
|
||||
extractMediaPathFromText: fn,
|
||||
resolveGeneratedImagePath: fn,
|
||||
startAgentRun: fn,
|
||||
waitForAgentRun: fn,
|
||||
listCronJobs: fn,
|
||||
waitForCronRunCompletion: fn,
|
||||
readDoctorMemoryStatus: fn,
|
||||
forceMemoryIndex: fn,
|
||||
findSkill: fn,
|
||||
writeWorkspaceSkill: fn,
|
||||
callPluginToolsMcp: fn,
|
||||
runAgentPrompt: fn,
|
||||
ensureImageGenerationConfigured: fn,
|
||||
handleQaAction: fn,
|
||||
extractQaToolPayload: fn,
|
||||
formatMemoryDreamingDay: fn,
|
||||
resolveSessionTranscriptsDirForAgent: fn,
|
||||
buildAgentSessionKey: fn,
|
||||
normalizeLowercaseStringOrEmpty: fn,
|
||||
formatErrorMessage: fn,
|
||||
liveTurnTimeoutMs: fn,
|
||||
resolveQaLiveTurnTimeoutMs: fn,
|
||||
splitModelRef: fn,
|
||||
qaChannelPlugin: { id: "qa-channel" },
|
||||
hasDiscoveryLabels: fn,
|
||||
reportsDiscoveryScopeLeak: fn,
|
||||
reportsMissingDiscoveryFiles: fn,
|
||||
hasModelSwitchContinuityEvidence: fn,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
const constants: QaScenarioRuntimeConstants = {
|
||||
imageUnderstandingPngBase64: "png-small",
|
||||
imageUnderstandingLargePngBase64: "png-large",
|
||||
imageUnderstandingValidPngBase64: "png-valid",
|
||||
};
|
||||
|
||||
describe("createQaScenarioRuntimeApi", () => {
|
||||
it("builds a markdown-flow runtime surface from generic transport capabilities", async () => {
|
||||
const state = createQaBusState();
|
||||
const resetSpy = vi.spyOn(state, "reset");
|
||||
const inboundSpy = vi.spyOn(state, "addInboundMessage");
|
||||
const outboundSpy = vi.spyOn(state, "addOutboundMessage");
|
||||
const readSpy = vi.spyOn(state, "readMessage");
|
||||
const waitForCondition = vi.fn(async (check: () => unknown) => check());
|
||||
const sleep = vi.fn(async () => undefined);
|
||||
const env = {
|
||||
lab: { baseUrl: "http://127.0.0.1:1234" },
|
||||
transport: {
|
||||
state,
|
||||
capabilities: {
|
||||
waitForCondition,
|
||||
getNormalizedMessageState: state.getSnapshot.bind(state),
|
||||
resetNormalizedMessageState: async () => {
|
||||
state.reset();
|
||||
},
|
||||
sendInboundMessage: state.addInboundMessage.bind(state),
|
||||
injectOutboundMessage: state.addOutboundMessage.bind(state),
|
||||
readNormalizedMessage: state.readMessage.bind(state),
|
||||
},
|
||||
},
|
||||
};
|
||||
const scenario = {
|
||||
id: "generic-flow",
|
||||
title: "Generic Flow",
|
||||
surface: "test",
|
||||
objective: "test",
|
||||
successCriteria: ["works"],
|
||||
sourcePath: "qa/scenarios/generic-flow.md",
|
||||
execution: {
|
||||
kind: "flow" as const,
|
||||
config: { expected: "value" },
|
||||
flow: {
|
||||
steps: [{ name: "noop", actions: [{ assert: "true" }] }],
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const api = createQaScenarioRuntimeApi({
|
||||
env,
|
||||
scenario,
|
||||
deps: createDeps({ sleep }),
|
||||
constants,
|
||||
});
|
||||
|
||||
expect(api.lab).toBe(env.lab);
|
||||
expect(api.state).toBe(state);
|
||||
expect(api.config).toEqual({ expected: "value" });
|
||||
expect(api.waitForCondition).toBe(waitForCondition);
|
||||
expect(api.waitForChannelReady).toBe(api.waitForTransportReady);
|
||||
expect(api.getTransportSnapshot()).toEqual(state.getSnapshot());
|
||||
expect(api.imageUnderstandingPngBase64).toBe("png-small");
|
||||
|
||||
const inbound = await api.injectInboundMessage({
|
||||
accountId: "qa-channel",
|
||||
conversation: { id: "qa-operator", kind: "direct" },
|
||||
senderId: "qa-operator",
|
||||
text: "hello",
|
||||
});
|
||||
const outbound = await api.injectOutboundMessage({
|
||||
accountId: "qa-channel",
|
||||
to: "dm:qa-operator",
|
||||
text: "hi",
|
||||
});
|
||||
expect(inbound.id).toBeTruthy();
|
||||
expect(outbound.id).toBeTruthy();
|
||||
await api.readTransportMessage({ accountId: "qa-channel", messageId: outbound.id });
|
||||
await api.reset();
|
||||
await api.resetBus();
|
||||
await api.resetTransport();
|
||||
|
||||
expect(inboundSpy).toHaveBeenCalledTimes(1);
|
||||
expect(outboundSpy).toHaveBeenCalledTimes(1);
|
||||
expect(readSpy).toHaveBeenCalledTimes(1);
|
||||
expect(resetSpy).toHaveBeenCalledTimes(3);
|
||||
expect(sleep).toHaveBeenCalledTimes(3);
|
||||
});
|
||||
});
|
||||
256
extensions/qa-lab/src/scenario-runtime-api.ts
Normal file
256
extensions/qa-lab/src/scenario-runtime-api.ts
Normal file
@@ -0,0 +1,256 @@
|
||||
import type * as NodeFs from "node:fs/promises";
|
||||
import type * as NodePath from "node:path";
|
||||
import type { QaTransportState } from "./qa-transport.js";
|
||||
import type { QaSeedScenarioWithSource } from "./scenario-catalog.js";
|
||||
|
||||
type QaScenarioRuntimeFunction = (...args: never[]) => unknown;
|
||||
|
||||
export type QaScenarioRuntimeEnv<
|
||||
TLab = unknown,
|
||||
TTransportState extends QaTransportState = QaTransportState,
|
||||
> = {
|
||||
lab: TLab;
|
||||
transport: {
|
||||
state: TTransportState;
|
||||
capabilities: {
|
||||
waitForCondition: QaScenarioRuntimeFunction;
|
||||
getNormalizedMessageState: () => ReturnType<TTransportState["getSnapshot"]>;
|
||||
resetNormalizedMessageState: () => Promise<void>;
|
||||
sendInboundMessage: TTransportState["addInboundMessage"];
|
||||
injectOutboundMessage: TTransportState["addOutboundMessage"];
|
||||
readNormalizedMessage: TTransportState["readMessage"];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
export type QaScenarioRuntimeDeps = {
|
||||
fs: typeof NodeFs;
|
||||
path: typeof NodePath;
|
||||
sleep: (ms?: number) => Promise<unknown>;
|
||||
randomUUID: () => string;
|
||||
runScenario: QaScenarioRuntimeFunction;
|
||||
waitForOutboundMessage: QaScenarioRuntimeFunction;
|
||||
waitForTransportOutboundMessage: QaScenarioRuntimeFunction;
|
||||
waitForChannelOutboundMessage: QaScenarioRuntimeFunction;
|
||||
waitForNoOutbound: QaScenarioRuntimeFunction;
|
||||
waitForNoTransportOutbound: QaScenarioRuntimeFunction;
|
||||
recentOutboundSummary: QaScenarioRuntimeFunction;
|
||||
formatConversationTranscript: QaScenarioRuntimeFunction;
|
||||
readTransportTranscript: QaScenarioRuntimeFunction;
|
||||
formatTransportTranscript: QaScenarioRuntimeFunction;
|
||||
fetchJson: QaScenarioRuntimeFunction;
|
||||
waitForGatewayHealthy: QaScenarioRuntimeFunction;
|
||||
waitForTransportReady: QaScenarioRuntimeFunction;
|
||||
waitForQaChannelReady: QaScenarioRuntimeFunction;
|
||||
waitForConfigRestartSettle: QaScenarioRuntimeFunction;
|
||||
patchConfig: QaScenarioRuntimeFunction;
|
||||
applyConfig: QaScenarioRuntimeFunction;
|
||||
readConfigSnapshot: QaScenarioRuntimeFunction;
|
||||
createSession: QaScenarioRuntimeFunction;
|
||||
readEffectiveTools: QaScenarioRuntimeFunction;
|
||||
readSkillStatus: QaScenarioRuntimeFunction;
|
||||
readRawQaSessionStore: QaScenarioRuntimeFunction;
|
||||
runQaCli: QaScenarioRuntimeFunction;
|
||||
extractMediaPathFromText: QaScenarioRuntimeFunction;
|
||||
resolveGeneratedImagePath: QaScenarioRuntimeFunction;
|
||||
startAgentRun: QaScenarioRuntimeFunction;
|
||||
waitForAgentRun: QaScenarioRuntimeFunction;
|
||||
listCronJobs: QaScenarioRuntimeFunction;
|
||||
waitForCronRunCompletion: QaScenarioRuntimeFunction;
|
||||
readDoctorMemoryStatus: QaScenarioRuntimeFunction;
|
||||
forceMemoryIndex: QaScenarioRuntimeFunction;
|
||||
findSkill: QaScenarioRuntimeFunction;
|
||||
writeWorkspaceSkill: QaScenarioRuntimeFunction;
|
||||
callPluginToolsMcp: QaScenarioRuntimeFunction;
|
||||
runAgentPrompt: QaScenarioRuntimeFunction;
|
||||
ensureImageGenerationConfigured: QaScenarioRuntimeFunction;
|
||||
handleQaAction: QaScenarioRuntimeFunction;
|
||||
extractQaToolPayload: QaScenarioRuntimeFunction;
|
||||
formatMemoryDreamingDay: QaScenarioRuntimeFunction;
|
||||
resolveSessionTranscriptsDirForAgent: QaScenarioRuntimeFunction;
|
||||
buildAgentSessionKey: QaScenarioRuntimeFunction;
|
||||
normalizeLowercaseStringOrEmpty: QaScenarioRuntimeFunction;
|
||||
formatErrorMessage: QaScenarioRuntimeFunction;
|
||||
liveTurnTimeoutMs: QaScenarioRuntimeFunction;
|
||||
resolveQaLiveTurnTimeoutMs: QaScenarioRuntimeFunction;
|
||||
splitModelRef: QaScenarioRuntimeFunction;
|
||||
qaChannelPlugin: unknown;
|
||||
hasDiscoveryLabels: QaScenarioRuntimeFunction;
|
||||
reportsDiscoveryScopeLeak: QaScenarioRuntimeFunction;
|
||||
reportsMissingDiscoveryFiles: QaScenarioRuntimeFunction;
|
||||
hasModelSwitchContinuityEvidence: QaScenarioRuntimeFunction;
|
||||
};
|
||||
|
||||
export type QaScenarioRuntimeConstants = {
|
||||
imageUnderstandingPngBase64: string;
|
||||
imageUnderstandingLargePngBase64: string;
|
||||
imageUnderstandingValidPngBase64: string;
|
||||
};
|
||||
|
||||
export type QaScenarioRuntimeApi<
|
||||
TEnv extends QaScenarioRuntimeEnv = QaScenarioRuntimeEnv,
|
||||
TDeps extends QaScenarioRuntimeDeps = QaScenarioRuntimeDeps,
|
||||
> = {
|
||||
env: TEnv;
|
||||
lab: TEnv["lab"];
|
||||
state: TEnv["transport"]["state"];
|
||||
scenario: QaSeedScenarioWithSource;
|
||||
config: Record<string, unknown>;
|
||||
fs: typeof NodeFs;
|
||||
path: typeof NodePath;
|
||||
sleep: (ms?: number) => Promise<unknown>;
|
||||
randomUUID: () => string;
|
||||
runScenario: TDeps["runScenario"];
|
||||
waitForCondition: TEnv["transport"]["capabilities"]["waitForCondition"];
|
||||
waitForOutboundMessage: TDeps["waitForOutboundMessage"];
|
||||
waitForTransportOutboundMessage: TDeps["waitForTransportOutboundMessage"];
|
||||
waitForChannelOutboundMessage: TDeps["waitForChannelOutboundMessage"];
|
||||
waitForNoOutbound: TDeps["waitForNoOutbound"];
|
||||
waitForNoTransportOutbound: TDeps["waitForNoTransportOutbound"];
|
||||
recentOutboundSummary: TDeps["recentOutboundSummary"];
|
||||
formatConversationTranscript: TDeps["formatConversationTranscript"];
|
||||
readTransportTranscript: TDeps["readTransportTranscript"];
|
||||
formatTransportTranscript: TDeps["formatTransportTranscript"];
|
||||
fetchJson: TDeps["fetchJson"];
|
||||
waitForGatewayHealthy: TDeps["waitForGatewayHealthy"];
|
||||
waitForTransportReady: TDeps["waitForTransportReady"];
|
||||
waitForChannelReady: TDeps["waitForTransportReady"];
|
||||
waitForQaChannelReady: TDeps["waitForQaChannelReady"];
|
||||
waitForConfigRestartSettle: TDeps["waitForConfigRestartSettle"];
|
||||
patchConfig: TDeps["patchConfig"];
|
||||
applyConfig: TDeps["applyConfig"];
|
||||
readConfigSnapshot: TDeps["readConfigSnapshot"];
|
||||
createSession: TDeps["createSession"];
|
||||
readEffectiveTools: TDeps["readEffectiveTools"];
|
||||
readSkillStatus: TDeps["readSkillStatus"];
|
||||
readRawQaSessionStore: TDeps["readRawQaSessionStore"];
|
||||
runQaCli: TDeps["runQaCli"];
|
||||
extractMediaPathFromText: TDeps["extractMediaPathFromText"];
|
||||
resolveGeneratedImagePath: TDeps["resolveGeneratedImagePath"];
|
||||
startAgentRun: TDeps["startAgentRun"];
|
||||
waitForAgentRun: TDeps["waitForAgentRun"];
|
||||
listCronJobs: TDeps["listCronJobs"];
|
||||
waitForCronRunCompletion: TDeps["waitForCronRunCompletion"];
|
||||
readDoctorMemoryStatus: TDeps["readDoctorMemoryStatus"];
|
||||
forceMemoryIndex: TDeps["forceMemoryIndex"];
|
||||
findSkill: TDeps["findSkill"];
|
||||
writeWorkspaceSkill: TDeps["writeWorkspaceSkill"];
|
||||
callPluginToolsMcp: TDeps["callPluginToolsMcp"];
|
||||
runAgentPrompt: TDeps["runAgentPrompt"];
|
||||
ensureImageGenerationConfigured: TDeps["ensureImageGenerationConfigured"];
|
||||
handleQaAction: TDeps["handleQaAction"];
|
||||
extractQaToolPayload: TDeps["extractQaToolPayload"];
|
||||
formatMemoryDreamingDay: TDeps["formatMemoryDreamingDay"];
|
||||
resolveSessionTranscriptsDirForAgent: TDeps["resolveSessionTranscriptsDirForAgent"];
|
||||
buildAgentSessionKey: TDeps["buildAgentSessionKey"];
|
||||
normalizeLowercaseStringOrEmpty: TDeps["normalizeLowercaseStringOrEmpty"];
|
||||
formatErrorMessage: TDeps["formatErrorMessage"];
|
||||
liveTurnTimeoutMs: TDeps["liveTurnTimeoutMs"];
|
||||
resolveQaLiveTurnTimeoutMs: TDeps["resolveQaLiveTurnTimeoutMs"];
|
||||
splitModelRef: TDeps["splitModelRef"];
|
||||
qaChannelPlugin: unknown;
|
||||
hasDiscoveryLabels: TDeps["hasDiscoveryLabels"];
|
||||
reportsDiscoveryScopeLeak: TDeps["reportsDiscoveryScopeLeak"];
|
||||
reportsMissingDiscoveryFiles: TDeps["reportsMissingDiscoveryFiles"];
|
||||
hasModelSwitchContinuityEvidence: TDeps["hasModelSwitchContinuityEvidence"];
|
||||
imageUnderstandingPngBase64: string;
|
||||
imageUnderstandingLargePngBase64: string;
|
||||
imageUnderstandingValidPngBase64: string;
|
||||
getTransportSnapshot: TEnv["transport"]["capabilities"]["getNormalizedMessageState"];
|
||||
resetTransport: () => Promise<void>;
|
||||
injectInboundMessage: TEnv["transport"]["capabilities"]["sendInboundMessage"];
|
||||
injectOutboundMessage: TEnv["transport"]["capabilities"]["injectOutboundMessage"];
|
||||
readTransportMessage: TEnv["transport"]["capabilities"]["readNormalizedMessage"];
|
||||
resetBus: () => Promise<void>;
|
||||
reset: () => Promise<void>;
|
||||
};
|
||||
|
||||
export function createQaScenarioRuntimeApi<
|
||||
TEnv extends QaScenarioRuntimeEnv,
|
||||
TDeps extends QaScenarioRuntimeDeps,
|
||||
>(params: {
|
||||
env: TEnv;
|
||||
scenario: QaSeedScenarioWithSource;
|
||||
deps: TDeps;
|
||||
constants: QaScenarioRuntimeConstants;
|
||||
}): QaScenarioRuntimeApi<TEnv, TDeps> {
|
||||
const resetTransportState = async () => {
|
||||
await params.env.transport.capabilities.resetNormalizedMessageState();
|
||||
await params.deps.sleep(100);
|
||||
};
|
||||
|
||||
return {
|
||||
env: params.env,
|
||||
lab: params.env.lab,
|
||||
state: params.env.transport.state,
|
||||
scenario: params.scenario,
|
||||
config: params.scenario.execution.config ?? {},
|
||||
fs: params.deps.fs,
|
||||
path: params.deps.path,
|
||||
sleep: params.deps.sleep,
|
||||
randomUUID: params.deps.randomUUID,
|
||||
runScenario: params.deps.runScenario,
|
||||
waitForCondition: params.env.transport.capabilities.waitForCondition,
|
||||
waitForOutboundMessage: params.deps.waitForOutboundMessage,
|
||||
waitForTransportOutboundMessage: params.deps.waitForTransportOutboundMessage,
|
||||
waitForChannelOutboundMessage: params.deps.waitForChannelOutboundMessage,
|
||||
waitForNoOutbound: params.deps.waitForNoOutbound,
|
||||
waitForNoTransportOutbound: params.deps.waitForNoTransportOutbound,
|
||||
recentOutboundSummary: params.deps.recentOutboundSummary,
|
||||
formatConversationTranscript: params.deps.formatConversationTranscript,
|
||||
readTransportTranscript: params.deps.readTransportTranscript,
|
||||
formatTransportTranscript: params.deps.formatTransportTranscript,
|
||||
fetchJson: params.deps.fetchJson,
|
||||
waitForGatewayHealthy: params.deps.waitForGatewayHealthy,
|
||||
waitForTransportReady: params.deps.waitForTransportReady,
|
||||
waitForChannelReady: params.deps.waitForTransportReady,
|
||||
waitForQaChannelReady: params.deps.waitForQaChannelReady,
|
||||
waitForConfigRestartSettle: params.deps.waitForConfigRestartSettle,
|
||||
patchConfig: params.deps.patchConfig,
|
||||
applyConfig: params.deps.applyConfig,
|
||||
readConfigSnapshot: params.deps.readConfigSnapshot,
|
||||
createSession: params.deps.createSession,
|
||||
readEffectiveTools: params.deps.readEffectiveTools,
|
||||
readSkillStatus: params.deps.readSkillStatus,
|
||||
readRawQaSessionStore: params.deps.readRawQaSessionStore,
|
||||
runQaCli: params.deps.runQaCli,
|
||||
extractMediaPathFromText: params.deps.extractMediaPathFromText,
|
||||
resolveGeneratedImagePath: params.deps.resolveGeneratedImagePath,
|
||||
startAgentRun: params.deps.startAgentRun,
|
||||
waitForAgentRun: params.deps.waitForAgentRun,
|
||||
listCronJobs: params.deps.listCronJobs,
|
||||
waitForCronRunCompletion: params.deps.waitForCronRunCompletion,
|
||||
readDoctorMemoryStatus: params.deps.readDoctorMemoryStatus,
|
||||
forceMemoryIndex: params.deps.forceMemoryIndex,
|
||||
findSkill: params.deps.findSkill,
|
||||
writeWorkspaceSkill: params.deps.writeWorkspaceSkill,
|
||||
callPluginToolsMcp: params.deps.callPluginToolsMcp,
|
||||
runAgentPrompt: params.deps.runAgentPrompt,
|
||||
ensureImageGenerationConfigured: params.deps.ensureImageGenerationConfigured,
|
||||
handleQaAction: params.deps.handleQaAction,
|
||||
extractQaToolPayload: params.deps.extractQaToolPayload,
|
||||
formatMemoryDreamingDay: params.deps.formatMemoryDreamingDay,
|
||||
resolveSessionTranscriptsDirForAgent: params.deps.resolveSessionTranscriptsDirForAgent,
|
||||
buildAgentSessionKey: params.deps.buildAgentSessionKey,
|
||||
normalizeLowercaseStringOrEmpty: params.deps.normalizeLowercaseStringOrEmpty,
|
||||
formatErrorMessage: params.deps.formatErrorMessage,
|
||||
liveTurnTimeoutMs: params.deps.liveTurnTimeoutMs,
|
||||
resolveQaLiveTurnTimeoutMs: params.deps.resolveQaLiveTurnTimeoutMs,
|
||||
splitModelRef: params.deps.splitModelRef,
|
||||
qaChannelPlugin: params.deps.qaChannelPlugin,
|
||||
hasDiscoveryLabels: params.deps.hasDiscoveryLabels,
|
||||
reportsDiscoveryScopeLeak: params.deps.reportsDiscoveryScopeLeak,
|
||||
reportsMissingDiscoveryFiles: params.deps.reportsMissingDiscoveryFiles,
|
||||
hasModelSwitchContinuityEvidence: params.deps.hasModelSwitchContinuityEvidence,
|
||||
imageUnderstandingPngBase64: params.constants.imageUnderstandingPngBase64,
|
||||
imageUnderstandingLargePngBase64: params.constants.imageUnderstandingLargePngBase64,
|
||||
imageUnderstandingValidPngBase64: params.constants.imageUnderstandingValidPngBase64,
|
||||
getTransportSnapshot: params.env.transport.capabilities.getNormalizedMessageState,
|
||||
resetTransport: resetTransportState,
|
||||
injectInboundMessage: params.env.transport.capabilities.sendInboundMessage,
|
||||
injectOutboundMessage: params.env.transport.capabilities.injectOutboundMessage,
|
||||
readTransportMessage: params.env.transport.capabilities.readNormalizedMessage,
|
||||
resetBus: resetTransportState,
|
||||
reset: resetTransportState,
|
||||
};
|
||||
}
|
||||
@@ -9,6 +9,8 @@ describe("qa suite failure reply handling", () => {
|
||||
const makeScenario = (
|
||||
id: string,
|
||||
config?: Record<string, unknown>,
|
||||
plugins?: string[],
|
||||
gatewayConfigPatch?: Record<string, unknown>,
|
||||
): Parameters<typeof qaSuiteTesting.selectQaSuiteScenarios>[0]["scenarios"][number] =>
|
||||
({
|
||||
id,
|
||||
@@ -16,6 +18,8 @@ describe("qa suite failure reply handling", () => {
|
||||
surface: "test",
|
||||
objective: "test",
|
||||
successCriteria: ["test"],
|
||||
plugins,
|
||||
gatewayConfigPatch,
|
||||
sourcePath: `qa/scenarios/${id}.md`,
|
||||
execution: {
|
||||
kind: "flow",
|
||||
@@ -129,6 +133,72 @@ describe("qa suite failure reply handling", () => {
|
||||
).toEqual(["anthropic-only"]);
|
||||
});
|
||||
|
||||
it("collects unique scenario-declared bundled plugins in encounter order", () => {
|
||||
const scenarios = [
|
||||
makeScenario("generic", undefined, ["active-memory", "memory-wiki"]),
|
||||
makeScenario("other", undefined, ["memory-wiki", "openai"]),
|
||||
makeScenario("plain"),
|
||||
];
|
||||
|
||||
expect(qaSuiteTesting.collectQaSuitePluginIds(scenarios)).toEqual([
|
||||
"active-memory",
|
||||
"memory-wiki",
|
||||
"openai",
|
||||
]);
|
||||
});
|
||||
|
||||
it("merge-patches scenario startup config in encounter order", () => {
|
||||
const scenarios = [
|
||||
makeScenario("active-memory", undefined, ["active-memory"], {
|
||||
plugins: {
|
||||
entries: {
|
||||
"active-memory": {
|
||||
config: {
|
||||
enabled: true,
|
||||
agents: ["qa"],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
makeScenario("live-defaults", undefined, undefined, {
|
||||
agents: {
|
||||
defaults: {
|
||||
thinkingDefault: "minimal",
|
||||
},
|
||||
},
|
||||
plugins: {
|
||||
entries: {
|
||||
"active-memory": {
|
||||
config: {
|
||||
transcriptDir: "qa-memory-e2e",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
];
|
||||
|
||||
expect(qaSuiteTesting.collectQaSuiteGatewayConfigPatch(scenarios)).toEqual({
|
||||
agents: {
|
||||
defaults: {
|
||||
thinkingDefault: "minimal",
|
||||
},
|
||||
},
|
||||
plugins: {
|
||||
entries: {
|
||||
"active-memory": {
|
||||
config: {
|
||||
enabled: true,
|
||||
agents: ["qa"],
|
||||
transcriptDir: "qa-memory-e2e",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it("filters provider-specific scenarios from an implicit live lane", () => {
|
||||
const scenarios = [
|
||||
makeScenario("generic"),
|
||||
|
||||
@@ -59,6 +59,7 @@ import { renderQaMarkdownReport, type QaReportCheck, type QaReportScenario } fro
|
||||
import { qaChannelPlugin, type QaBusMessage } from "./runtime-api.js";
|
||||
import { readQaBootstrapScenarioCatalog } from "./scenario-catalog.js";
|
||||
import { runScenarioFlow } from "./scenario-flow-runner.js";
|
||||
import { createQaScenarioRuntimeApi } from "./scenario-runtime-api.js";
|
||||
|
||||
type QaSuiteStep = {
|
||||
name: string;
|
||||
@@ -261,6 +262,57 @@ function selectQaSuiteScenarios(params: {
|
||||
);
|
||||
}
|
||||
|
||||
function collectQaSuitePluginIds(
|
||||
scenarios: ReturnType<typeof readQaBootstrapScenarioCatalog>["scenarios"],
|
||||
) {
|
||||
return [
|
||||
...new Set(
|
||||
scenarios.flatMap((scenario) =>
|
||||
Array.isArray(scenario.plugins)
|
||||
? scenario.plugins
|
||||
.map((pluginId) => pluginId.trim())
|
||||
.filter((pluginId) => pluginId.length > 0)
|
||||
: [],
|
||||
),
|
||||
),
|
||||
];
|
||||
}
|
||||
|
||||
function isQaPlainObject(value: unknown): value is Record<string, unknown> {
|
||||
return value !== null && typeof value === "object" && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function applyQaMergePatch(base: unknown, patch: unknown): unknown {
|
||||
if (!isQaPlainObject(patch)) {
|
||||
return patch;
|
||||
}
|
||||
const result = isQaPlainObject(base) ? { ...base } : {};
|
||||
for (const [key, value] of Object.entries(patch)) {
|
||||
if (value === null) {
|
||||
delete result[key];
|
||||
continue;
|
||||
}
|
||||
result[key] = isQaPlainObject(value) ? applyQaMergePatch(result[key], value) : value;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function collectQaSuiteGatewayConfigPatch(
|
||||
scenarios: ReturnType<typeof readQaBootstrapScenarioCatalog>["scenarios"],
|
||||
): Record<string, unknown> | undefined {
|
||||
let merged: Record<string, unknown> | undefined;
|
||||
for (const scenario of scenarios) {
|
||||
if (!isQaPlainObject(scenario.gatewayConfigPatch)) {
|
||||
continue;
|
||||
}
|
||||
merged = applyQaMergePatch(merged ?? {}, scenario.gatewayConfigPatch) as Record<
|
||||
string,
|
||||
unknown
|
||||
>;
|
||||
}
|
||||
return merged;
|
||||
}
|
||||
|
||||
function liveTurnTimeoutMs(env: QaSuiteEnvironment, fallbackMs: number) {
|
||||
return resolveQaLiveTurnTimeoutMs(env, fallbackMs);
|
||||
}
|
||||
@@ -1158,171 +1210,81 @@ async function handleQaAction(params: {
|
||||
return extractQaToolPayload(result as Parameters<typeof extractQaToolPayload>[0]);
|
||||
}
|
||||
|
||||
type QaScenarioFlowApi = {
|
||||
env: QaSuiteEnvironment;
|
||||
lab: QaSuiteEnvironment["lab"];
|
||||
state: QaTransportState;
|
||||
scenario: ReturnType<typeof readQaBootstrapScenarioCatalog>["scenarios"][number];
|
||||
config: Record<string, unknown>;
|
||||
fs: typeof fs;
|
||||
path: typeof path;
|
||||
sleep: typeof sleep;
|
||||
randomUUID: typeof randomUUID;
|
||||
runScenario: typeof runScenario;
|
||||
waitForCondition: typeof waitForCondition;
|
||||
waitForOutboundMessage: typeof waitForOutboundMessage;
|
||||
waitForTransportOutboundMessage: typeof waitForTransportOutboundMessage;
|
||||
waitForChannelOutboundMessage: typeof waitForChannelOutboundMessage;
|
||||
waitForNoOutbound: typeof waitForNoOutbound;
|
||||
waitForNoTransportOutbound: typeof waitForNoTransportOutbound;
|
||||
recentOutboundSummary: typeof recentOutboundSummary;
|
||||
formatConversationTranscript: typeof formatConversationTranscript;
|
||||
readTransportTranscript: typeof readTransportTranscript;
|
||||
formatTransportTranscript: typeof formatTransportTranscript;
|
||||
fetchJson: typeof fetchJson;
|
||||
waitForGatewayHealthy: typeof waitForGatewayHealthy;
|
||||
waitForTransportReady: typeof waitForTransportReady;
|
||||
waitForChannelReady: typeof waitForTransportReady;
|
||||
waitForQaChannelReady: typeof waitForQaChannelReady;
|
||||
waitForConfigRestartSettle: typeof waitForConfigRestartSettle;
|
||||
patchConfig: typeof patchConfig;
|
||||
applyConfig: typeof applyConfig;
|
||||
readConfigSnapshot: typeof readConfigSnapshot;
|
||||
createSession: typeof createSession;
|
||||
readEffectiveTools: typeof readEffectiveTools;
|
||||
readSkillStatus: typeof readSkillStatus;
|
||||
readRawQaSessionStore: typeof readRawQaSessionStore;
|
||||
runQaCli: typeof runQaCli;
|
||||
extractMediaPathFromText: typeof extractMediaPathFromText;
|
||||
resolveGeneratedImagePath: typeof resolveGeneratedImagePath;
|
||||
startAgentRun: typeof startAgentRun;
|
||||
waitForAgentRun: typeof waitForAgentRun;
|
||||
listCronJobs: typeof listCronJobs;
|
||||
waitForCronRunCompletion: typeof waitForCronRunCompletion;
|
||||
readDoctorMemoryStatus: typeof readDoctorMemoryStatus;
|
||||
forceMemoryIndex: typeof forceMemoryIndex;
|
||||
findSkill: typeof findSkill;
|
||||
writeWorkspaceSkill: typeof writeWorkspaceSkill;
|
||||
callPluginToolsMcp: typeof callPluginToolsMcp;
|
||||
runAgentPrompt: typeof runAgentPrompt;
|
||||
ensureImageGenerationConfigured: typeof ensureImageGenerationConfigured;
|
||||
handleQaAction: typeof handleQaAction;
|
||||
extractQaToolPayload: typeof extractQaToolPayload;
|
||||
formatMemoryDreamingDay: typeof formatMemoryDreamingDay;
|
||||
resolveSessionTranscriptsDirForAgent: typeof resolveSessionTranscriptsDirForAgent;
|
||||
buildAgentSessionKey: typeof buildAgentSessionKey;
|
||||
normalizeLowercaseStringOrEmpty: typeof normalizeLowercaseStringOrEmpty;
|
||||
formatErrorMessage: typeof formatErrorMessage;
|
||||
liveTurnTimeoutMs: typeof liveTurnTimeoutMs;
|
||||
resolveQaLiveTurnTimeoutMs: typeof resolveQaLiveTurnTimeoutMs;
|
||||
splitModelRef: typeof splitModelRef;
|
||||
qaChannelPlugin: typeof qaChannelPlugin;
|
||||
hasDiscoveryLabels: typeof hasDiscoveryLabels;
|
||||
reportsDiscoveryScopeLeak: typeof reportsDiscoveryScopeLeak;
|
||||
reportsMissingDiscoveryFiles: typeof reportsMissingDiscoveryFiles;
|
||||
hasModelSwitchContinuityEvidence: typeof hasModelSwitchContinuityEvidence;
|
||||
imageUnderstandingPngBase64: string;
|
||||
imageUnderstandingLargePngBase64: string;
|
||||
imageUnderstandingValidPngBase64: string;
|
||||
getTransportSnapshot: () => ReturnType<QaTransportState["getSnapshot"]>;
|
||||
resetTransport: () => Promise<void>;
|
||||
injectInboundMessage: QaTransportState["addInboundMessage"];
|
||||
injectOutboundMessage: QaTransportState["addOutboundMessage"];
|
||||
readTransportMessage: QaTransportState["readMessage"];
|
||||
resetBus: () => Promise<void>;
|
||||
reset: () => Promise<void>;
|
||||
};
|
||||
|
||||
function createScenarioFlowApi(
|
||||
env: QaSuiteEnvironment,
|
||||
scenario: ReturnType<typeof readQaBootstrapScenarioCatalog>["scenarios"][number],
|
||||
): QaScenarioFlowApi {
|
||||
return {
|
||||
) {
|
||||
return createQaScenarioRuntimeApi({
|
||||
env,
|
||||
lab: env.lab,
|
||||
state: env.transport.state,
|
||||
scenario,
|
||||
config: scenario.execution.config ?? {},
|
||||
fs,
|
||||
path,
|
||||
sleep,
|
||||
randomUUID,
|
||||
runScenario,
|
||||
waitForCondition: env.transport.capabilities.waitForCondition,
|
||||
waitForOutboundMessage,
|
||||
waitForTransportOutboundMessage,
|
||||
waitForChannelOutboundMessage,
|
||||
waitForNoOutbound,
|
||||
waitForNoTransportOutbound,
|
||||
recentOutboundSummary,
|
||||
formatConversationTranscript,
|
||||
readTransportTranscript,
|
||||
formatTransportTranscript,
|
||||
fetchJson,
|
||||
waitForGatewayHealthy,
|
||||
waitForTransportReady,
|
||||
waitForChannelReady: waitForTransportReady,
|
||||
waitForQaChannelReady,
|
||||
waitForConfigRestartSettle,
|
||||
patchConfig,
|
||||
applyConfig,
|
||||
readConfigSnapshot,
|
||||
createSession,
|
||||
readEffectiveTools,
|
||||
readSkillStatus,
|
||||
readRawQaSessionStore,
|
||||
runQaCli,
|
||||
extractMediaPathFromText,
|
||||
resolveGeneratedImagePath,
|
||||
startAgentRun,
|
||||
waitForAgentRun,
|
||||
listCronJobs,
|
||||
waitForCronRunCompletion,
|
||||
readDoctorMemoryStatus,
|
||||
forceMemoryIndex,
|
||||
findSkill,
|
||||
writeWorkspaceSkill,
|
||||
callPluginToolsMcp,
|
||||
runAgentPrompt,
|
||||
ensureImageGenerationConfigured,
|
||||
handleQaAction,
|
||||
extractQaToolPayload,
|
||||
formatMemoryDreamingDay,
|
||||
resolveSessionTranscriptsDirForAgent,
|
||||
buildAgentSessionKey,
|
||||
normalizeLowercaseStringOrEmpty,
|
||||
formatErrorMessage,
|
||||
liveTurnTimeoutMs,
|
||||
resolveQaLiveTurnTimeoutMs,
|
||||
splitModelRef,
|
||||
qaChannelPlugin,
|
||||
hasDiscoveryLabels,
|
||||
reportsDiscoveryScopeLeak,
|
||||
reportsMissingDiscoveryFiles,
|
||||
hasModelSwitchContinuityEvidence,
|
||||
imageUnderstandingPngBase64: _QA_IMAGE_UNDERSTANDING_PNG_BASE64,
|
||||
imageUnderstandingLargePngBase64: _QA_IMAGE_UNDERSTANDING_LARGE_PNG_BASE64,
|
||||
imageUnderstandingValidPngBase64: QA_IMAGE_UNDERSTANDING_VALID_PNG_BASE64,
|
||||
getTransportSnapshot: env.transport.capabilities.getNormalizedMessageState,
|
||||
resetTransport: async () => {
|
||||
await env.transport.capabilities.resetNormalizedMessageState();
|
||||
await sleep(100);
|
||||
deps: {
|
||||
fs,
|
||||
path,
|
||||
sleep,
|
||||
randomUUID,
|
||||
runScenario,
|
||||
waitForOutboundMessage,
|
||||
waitForTransportOutboundMessage,
|
||||
waitForChannelOutboundMessage,
|
||||
waitForNoOutbound,
|
||||
waitForNoTransportOutbound,
|
||||
recentOutboundSummary,
|
||||
formatConversationTranscript,
|
||||
readTransportTranscript,
|
||||
formatTransportTranscript,
|
||||
fetchJson,
|
||||
waitForGatewayHealthy,
|
||||
waitForTransportReady,
|
||||
waitForQaChannelReady,
|
||||
waitForConfigRestartSettle,
|
||||
patchConfig,
|
||||
applyConfig,
|
||||
readConfigSnapshot,
|
||||
createSession,
|
||||
readEffectiveTools,
|
||||
readSkillStatus,
|
||||
readRawQaSessionStore,
|
||||
runQaCli,
|
||||
extractMediaPathFromText,
|
||||
resolveGeneratedImagePath,
|
||||
startAgentRun,
|
||||
waitForAgentRun,
|
||||
listCronJobs,
|
||||
waitForCronRunCompletion,
|
||||
readDoctorMemoryStatus,
|
||||
forceMemoryIndex,
|
||||
findSkill,
|
||||
writeWorkspaceSkill,
|
||||
callPluginToolsMcp,
|
||||
runAgentPrompt,
|
||||
ensureImageGenerationConfigured,
|
||||
handleQaAction,
|
||||
extractQaToolPayload,
|
||||
formatMemoryDreamingDay,
|
||||
resolveSessionTranscriptsDirForAgent,
|
||||
buildAgentSessionKey,
|
||||
normalizeLowercaseStringOrEmpty,
|
||||
formatErrorMessage,
|
||||
liveTurnTimeoutMs,
|
||||
resolveQaLiveTurnTimeoutMs,
|
||||
splitModelRef,
|
||||
qaChannelPlugin,
|
||||
hasDiscoveryLabels,
|
||||
reportsDiscoveryScopeLeak,
|
||||
reportsMissingDiscoveryFiles,
|
||||
hasModelSwitchContinuityEvidence,
|
||||
},
|
||||
injectInboundMessage: env.transport.capabilities.sendInboundMessage,
|
||||
injectOutboundMessage: env.transport.capabilities.injectOutboundMessage,
|
||||
readTransportMessage: env.transport.capabilities.readNormalizedMessage,
|
||||
resetBus: async () => {
|
||||
await env.transport.capabilities.resetNormalizedMessageState();
|
||||
await sleep(100);
|
||||
constants: {
|
||||
imageUnderstandingPngBase64: _QA_IMAGE_UNDERSTANDING_PNG_BASE64,
|
||||
imageUnderstandingLargePngBase64: _QA_IMAGE_UNDERSTANDING_LARGE_PNG_BASE64,
|
||||
imageUnderstandingValidPngBase64: QA_IMAGE_UNDERSTANDING_VALID_PNG_BASE64,
|
||||
},
|
||||
reset: async () => {
|
||||
await env.transport.capabilities.resetNormalizedMessageState();
|
||||
await sleep(100);
|
||||
},
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
export const qaSuiteTesting = {
|
||||
collectQaSuiteGatewayConfigPatch,
|
||||
collectQaSuitePluginIds,
|
||||
createScenarioWaitForCondition,
|
||||
findFailureOutboundMessage,
|
||||
getGatewayRetryAfterMs,
|
||||
@@ -1415,7 +1377,7 @@ async function writeQaSuiteArtifacts(params: {
|
||||
export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResult> {
|
||||
const startedAt = new Date();
|
||||
const repoRoot = path.resolve(params?.repoRoot ?? process.cwd());
|
||||
const providerMode = normalizeQaProviderMode(params?.providerMode ?? "mock-openai");
|
||||
const providerMode = normalizeQaProviderMode(params?.providerMode ?? "live-frontier");
|
||||
const transportId = normalizeQaTransportId(params?.transportId);
|
||||
const primaryModel = params?.primaryModel ?? defaultQaModelForMode(providerMode);
|
||||
const alternateModel =
|
||||
@@ -1433,6 +1395,8 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
|
||||
primaryModel,
|
||||
claudeCliAuthMode: params?.claudeCliAuthMode,
|
||||
});
|
||||
const enabledPluginIds = collectQaSuitePluginIds(selectedCatalogScenarios);
|
||||
const gatewayConfigPatch = collectQaSuiteGatewayConfigPatch(selectedCatalogScenarios);
|
||||
const concurrency = normalizeQaSuiteConcurrency(
|
||||
params?.concurrency,
|
||||
selectedCatalogScenarios.length,
|
||||
@@ -1629,6 +1593,10 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
|
||||
thinkingDefault: params?.thinkingDefault,
|
||||
claudeCliAuthMode: params?.claudeCliAuthMode,
|
||||
controlUiEnabled: params?.controlUiEnabled ?? true,
|
||||
enabledPluginIds,
|
||||
mutateConfig: gatewayConfigPatch
|
||||
? (cfg) => applyQaMergePatch(cfg, gatewayConfigPatch) as OpenClawConfig
|
||||
: undefined,
|
||||
});
|
||||
lab.setControlUi({
|
||||
controlUiProxyTarget: gateway.baseUrl,
|
||||
|
||||
225
qa/scenarios/active-memory-preprompt-recall.md
Normal file
225
qa/scenarios/active-memory-preprompt-recall.md
Normal file
@@ -0,0 +1,225 @@
|
||||
# Active Memory pre-reply recall
|
||||
|
||||
```yaml qa-scenario
|
||||
id: active-memory-preprompt-recall
|
||||
title: Active Memory pre-reply recall
|
||||
surface: memory
|
||||
objective: Verify Active Memory surfaces a memory-only preference before the main reply, and that the same question stays unresolved when the plugin is off.
|
||||
plugins:
|
||||
- active-memory
|
||||
gatewayConfigPatch:
|
||||
plugins:
|
||||
entries:
|
||||
active-memory:
|
||||
enabled: true
|
||||
config:
|
||||
enabled: true
|
||||
agents:
|
||||
- qa
|
||||
allowedChatTypes:
|
||||
- direct
|
||||
logging: true
|
||||
persistTranscripts: true
|
||||
transcriptDir: qa-memory-e2e
|
||||
queryMode: recent
|
||||
maxSummaryChars: 220
|
||||
successCriteria:
|
||||
- With Active Memory off, the session shows no Active Memory plugin activity.
|
||||
- With Active Memory on, plugin-owned evidence shows the Active Memory sub-agent searched memory before the main reply.
|
||||
- Live lane proves the first user-visible reply uses the recalled preference.
|
||||
docsRefs:
|
||||
- docs/concepts/active-memory.md
|
||||
- docs/concepts/memory-search.md
|
||||
codeRefs:
|
||||
- extensions/active-memory/index.ts
|
||||
- extensions/qa-lab/src/suite.ts
|
||||
- extensions/qa-lab/src/mock-openai-server.ts
|
||||
execution:
|
||||
kind: flow
|
||||
summary: Verify Active Memory stays off when session-toggled off, runs memory search/get when enabled, and helps a live model answer with the recalled preference in the first visible reply.
|
||||
config:
|
||||
baselineConversationId: qa-active-memory-off
|
||||
activeConversationId: qa-active-memory-on
|
||||
memoryFact: "Stable QA movie night snack preference: lemon pepper wings with blue cheese."
|
||||
memoryQuery: "QA movie night snack lemon pepper wings blue cheese"
|
||||
expectedNeedle: lemon pepper wings
|
||||
prompt: "Silent snack recall check: what snack do I usually want for QA movie night? Reply in one short sentence."
|
||||
promptSnippet: "Silent snack recall check"
|
||||
transcriptDir: qa-memory-e2e
|
||||
```
|
||||
|
||||
```yaml qa-flow
|
||||
steps:
|
||||
- name: only active memory surfaces the hidden snack preference
|
||||
actions:
|
||||
- call: reset
|
||||
- call: fs.rm
|
||||
args:
|
||||
- expr: "path.join(env.gateway.workspaceDir, 'MEMORY.md')"
|
||||
- force: true
|
||||
- call: fs.rm
|
||||
args:
|
||||
- expr: "path.join(env.gateway.workspaceDir, 'memory', `${formatMemoryDreamingDay(Date.now())}.md`)"
|
||||
- force: true
|
||||
- call: fs.writeFile
|
||||
args:
|
||||
- expr: "path.join(env.gateway.workspaceDir, 'MEMORY.md')"
|
||||
- expr: "`${config.memoryFact}\\n`"
|
||||
- utf8
|
||||
- call: forceMemoryIndex
|
||||
args:
|
||||
- env:
|
||||
ref: env
|
||||
query:
|
||||
expr: config.memoryQuery
|
||||
expectedNeedle:
|
||||
expr: config.expectedNeedle
|
||||
- set: baselineSessionKey
|
||||
value:
|
||||
expr: "'agent:qa:qa-channel:direct:active-memory-off'"
|
||||
- set: activeSessionKey
|
||||
value:
|
||||
expr: "'agent:qa:qa-channel:direct:active-memory-on'"
|
||||
- set: transcriptRoot
|
||||
value:
|
||||
expr: "path.join(env.gateway.tempRoot, 'state', 'plugins', 'active-memory', 'transcripts', 'agents', 'qa', config.transcriptDir)"
|
||||
- set: toggleStorePath
|
||||
value:
|
||||
expr: "path.join(env.gateway.tempRoot, 'state', 'plugins', 'active-memory', 'session-toggles.json')"
|
||||
- call: fs.rm
|
||||
args:
|
||||
- ref: transcriptRoot
|
||||
- recursive: true
|
||||
force: true
|
||||
- call: fs.rm
|
||||
args:
|
||||
- ref: toggleStorePath
|
||||
- force: true
|
||||
- call: fs.mkdir
|
||||
args:
|
||||
- expr: "path.dirname(toggleStorePath)"
|
||||
- recursive: true
|
||||
- call: fs.writeFile
|
||||
args:
|
||||
- ref: toggleStorePath
|
||||
- expr: "`${JSON.stringify({ sessions: { [baselineSessionKey]: { disabled: true, updatedAt: Date.now() } } }, null, 2)}\\n`"
|
||||
- utf8
|
||||
- set: requestCountBeforeBaseline
|
||||
value:
|
||||
expr: "env.mock ? (await fetchJson(`${env.mock.baseUrl}/debug/requests`)).length : 0"
|
||||
- set: baselineStartIndex
|
||||
value:
|
||||
expr: "state.getSnapshot().messages.length"
|
||||
- call: runAgentPrompt
|
||||
args:
|
||||
- ref: env
|
||||
- sessionKey:
|
||||
ref: baselineSessionKey
|
||||
message:
|
||||
expr: config.prompt
|
||||
timeoutMs:
|
||||
expr: liveTurnTimeoutMs(env, 45000)
|
||||
- call: waitForOutboundMessage
|
||||
saveAs: baselineOutbound
|
||||
args:
|
||||
- ref: state
|
||||
- lambda:
|
||||
params: [candidate]
|
||||
expr: "candidate.conversation.id === 'qa-operator'"
|
||||
- expr: liveTurnTimeoutMs(env, 30000)
|
||||
- sinceIndex:
|
||||
ref: baselineStartIndex
|
||||
- set: baselineLower
|
||||
value:
|
||||
expr: "normalizeLowercaseStringOrEmpty(baselineOutbound.text)"
|
||||
- if:
|
||||
expr: "Boolean(env.mock)"
|
||||
then:
|
||||
- set: baselineMockRequests
|
||||
value:
|
||||
expr: "(await fetchJson(`${env.mock.baseUrl}/debug/requests`)).slice(requestCountBeforeBaseline)"
|
||||
- set: baselineSessionStore
|
||||
value:
|
||||
expr: "await readRawQaSessionStore(env)"
|
||||
- assert:
|
||||
expr: "!Array.isArray(baselineSessionStore[baselineSessionKey]?.pluginDebugEntries) || !baselineSessionStore[baselineSessionKey].pluginDebugEntries.some((pluginEntry) => pluginEntry?.pluginId === 'active-memory')"
|
||||
message: baseline session unexpectedly recorded active-memory plugin activity
|
||||
- set: requestCountBeforeActive
|
||||
value:
|
||||
expr: "env.mock ? (await fetchJson(`${env.mock.baseUrl}/debug/requests`)).length : 0"
|
||||
- call: fs.writeFile
|
||||
args:
|
||||
- ref: toggleStorePath
|
||||
- expr: "'{}\\n'"
|
||||
- utf8
|
||||
- set: activeStartIndex
|
||||
value:
|
||||
expr: "state.getSnapshot().messages.length"
|
||||
- call: runAgentPrompt
|
||||
args:
|
||||
- ref: env
|
||||
- sessionKey:
|
||||
ref: activeSessionKey
|
||||
message:
|
||||
expr: config.prompt
|
||||
timeoutMs:
|
||||
expr: liveTurnTimeoutMs(env, 45000)
|
||||
- call: waitForOutboundMessage
|
||||
saveAs: activeOutbound
|
||||
args:
|
||||
- ref: state
|
||||
- lambda:
|
||||
params: [candidate]
|
||||
expr: "candidate.conversation.id === 'qa-operator'"
|
||||
- expr: liveTurnTimeoutMs(env, 30000)
|
||||
- sinceIndex:
|
||||
ref: activeStartIndex
|
||||
- set: activeLower
|
||||
value:
|
||||
expr: "normalizeLowercaseStringOrEmpty(activeOutbound.text)"
|
||||
- if:
|
||||
expr: "!env.mock"
|
||||
then:
|
||||
- assert:
|
||||
expr: "activeLower.includes(normalizeLowercaseStringOrEmpty(config.expectedNeedle))"
|
||||
message:
|
||||
expr: "`active memory reply missed the hidden preference: ${activeOutbound.text}`"
|
||||
- call: waitForCondition
|
||||
saveAs: transcriptPath
|
||||
args:
|
||||
- lambda:
|
||||
async: true
|
||||
expr: "await (async () => { const entries = (await fs.readdir(transcriptRoot).catch(() => [])).filter((entry) => entry.endsWith('.jsonl')).toSorted(); return entries.length > 0 ? path.join(transcriptRoot, entries.at(-1)) : undefined; })()"
|
||||
- 10000
|
||||
- call: fs.readFile
|
||||
saveAs: transcriptText
|
||||
args:
|
||||
- ref: transcriptPath
|
||||
- utf8
|
||||
- assert:
|
||||
expr: "transcriptText.includes('memory_search')"
|
||||
message: active memory transcript missing memory_search
|
||||
- assert:
|
||||
expr: "transcriptText.includes('memory_get')"
|
||||
message: active memory transcript missing memory_get
|
||||
- call: waitForCondition
|
||||
saveAs: activeSessionEntry
|
||||
args:
|
||||
- lambda:
|
||||
async: true
|
||||
expr: "await (async () => { const store = await readRawQaSessionStore(env); const entry = store[activeSessionKey]; if (!entry || !Array.isArray(entry.pluginDebugEntries)) return undefined; return entry.pluginDebugEntries.some((pluginEntry) => pluginEntry?.pluginId === 'active-memory' && Array.isArray(pluginEntry.lines) && pluginEntry.lines.some((line) => line.includes('Active Memory: ok'))) ? entry : undefined; })()"
|
||||
- 10000
|
||||
- if:
|
||||
expr: "Boolean(env.mock)"
|
||||
then:
|
||||
- set: mockRequests
|
||||
value:
|
||||
expr: "(await fetchJson(`${env.mock.baseUrl}/debug/requests`)).slice(requestCountBeforeActive)"
|
||||
- assert:
|
||||
expr: "mockRequests.some((request) => request.allInputText.includes('You are a memory search agent.') && request.plannedToolName === 'memory_search')"
|
||||
message: expected mock Active Memory search request
|
||||
- assert:
|
||||
expr: "mockRequests.some((request) => request.allInputText.includes('You are a memory search agent.') && request.plannedToolName === 'memory_get')"
|
||||
message: expected mock Active Memory memory_get request
|
||||
detailsExpr: "`${activeOutbound.text}\\n\\ntranscript=${transcriptPath}`"
|
||||
```
|
||||
Reference in New Issue
Block a user