diff --git a/extensions/qa-lab/src/scenario-catalog.test.ts b/extensions/qa-lab/src/scenario-catalog.test.ts index e998d84d44f..5885efee127 100644 --- a/extensions/qa-lab/src/scenario-catalog.test.ts +++ b/extensions/qa-lab/src/scenario-catalog.test.ts @@ -5,6 +5,7 @@ import { readQaScenarioById, readQaScenarioExecutionConfig, readQaScenarioPack, + validateQaScenarioExecutionConfig, } from "./scenario-catalog.js"; describe("qa scenario catalog", () => { @@ -78,4 +79,12 @@ describe("qa scenario catalog", () => { characterConfig?.turns?.some((turn) => turn.expectFile?.path === "precious-status.html"), ).toBe(true); }); + + it("rejects malformed string matcher lists before running a flow", () => { + expect(() => + validateQaScenarioExecutionConfig({ + gracefulFallbackAny: [{ confirmed: "the hidden fact is present" }], + }), + ).toThrow(/gracefulFallbackAny entries must be strings/); + }); }); diff --git a/extensions/qa-lab/src/scenario-catalog.ts b/extensions/qa-lab/src/scenario-catalog.ts index 32dbb893b5b..ffaca54946e 100644 --- a/extensions/qa-lab/src/scenario-catalog.ts +++ b/extensions/qa-lab/src/scenario-catalog.ts @@ -20,10 +20,35 @@ Style: - record evidence - end with a concise protocol report`; +const qaScenarioConfigSchema = z.record(z.string(), z.unknown()).superRefine((config, ctx) => { + for (const [key, value] of Object.entries(config)) { + if (!key.endsWith("Any")) { + continue; + } + if (!Array.isArray(value)) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + path: [key], + message: `${key} must be an array of strings`, + }); + continue; + } + for (const [index, entry] of value.entries()) { + if (typeof entry !== "string") { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + path: [key, index], + message: `${key} entries must be strings`, + }); + } + } + } +}); + const qaScenarioExecutionSchema = z.object({ kind: z.literal("flow").default("flow"), summary: z.string().trim().min(1).optional(), - config: z.record(z.string(), z.unknown()).optional(), + config: qaScenarioConfigSchema.optional(), }); const qaFlowCallActionSchema = z.object({ @@ -224,7 +249,22 @@ function extractQaScenarioFlow(content: string, relativePath: string) { if (!match?.[1]) { throw new Error(`qa scenario file missing \`\`\`yaml qa-flow fence in ${relativePath}`); } - return qaFlowSchema.parse(YAML.parse(match[1]) as unknown); + return parseQaYamlWithContext(qaFlowSchema, YAML.parse(match[1]) as unknown, relativePath); +} + +function formatZodIssuePath(path: PropertyKey[]) { + return path.length ? path.map(String).join(".") : ""; +} + +function parseQaYamlWithContext(schema: z.ZodType, value: unknown, label: string): T { + const parsed = schema.safeParse(value); + if (parsed.success) { + return parsed.data; + } + const issues = parsed.error.issues + .map((issue) => `${formatZodIssuePath(issue.path)}: ${issue.message}`) + .join("; "); + throw new Error(`${label}: ${issues}`); } export function readQaScenarioPackMarkdown(): string { @@ -240,16 +280,24 @@ export function readQaScenarioPack(): QaScenarioPack { if (!packMarkdown) { throw new Error(`qa scenario pack not found: ${QA_SCENARIO_PACK_INDEX_PATH}`); } - const parsedPack = qaScenarioPackSchema.parse( + const parsedPack = parseQaYamlWithContext( + qaScenarioPackSchema, YAML.parse(extractQaPackYaml(packMarkdown)) as unknown, + QA_SCENARIO_PACK_INDEX_PATH, ); const scenarios = listQaScenarioMarkdownPaths().map((relativePath) => (() => { const content = readTextFile(relativePath); - const parsedScenario = qaSeedScenarioSchema.parse( + const parsedScenario = parseQaYamlWithContext( + qaSeedScenarioSchema, YAML.parse(extractQaScenarioYaml(content, relativePath)) as unknown, + relativePath, + ); + const execution = parseQaYamlWithContext( + qaScenarioExecutionSchema, + parsedScenario.execution ?? {}, + relativePath, ); - const execution = qaScenarioExecutionSchema.parse(parsedScenario.execution ?? {}); const flow = extractQaScenarioFlow(content, relativePath); return { ...parsedScenario, @@ -298,3 +346,7 @@ export function readQaScenarioById(id: string): QaSeedScenario { export function readQaScenarioExecutionConfig(id: string): Record | undefined { return readQaScenarioById(id).execution?.config; } + +export function validateQaScenarioExecutionConfig(config: Record) { + return qaScenarioConfigSchema.parse(config); +} diff --git a/qa/scenarios/approval-turn-tool-followthrough.md b/qa/scenarios/approval-turn-tool-followthrough.md index 00a707da3eb..af2d87a3b47 100644 --- a/qa/scenarios/approval-turn-tool-followthrough.md +++ b/qa/scenarios/approval-turn-tool-followthrough.md @@ -70,7 +70,7 @@ steps: expr: liveTurnTimeoutMs(env, 30000) - set: expectedReplyAny value: - expr: config.expectedReplyAny.map((needle) => needle.toLowerCase()) + expr: config.expectedReplyAny.map(normalizeLowercaseStringOrEmpty) - call: waitForCondition saveAs: outbound args: diff --git a/qa/scenarios/memory-failure-fallback.md b/qa/scenarios/memory-failure-fallback.md index 20e71e7475f..ed48187376a 100644 --- a/qa/scenarios/memory-failure-fallback.md +++ b/qa/scenarios/memory-failure-fallback.md @@ -39,7 +39,7 @@ execution: - won't reveal - won’t reveal - will not reveal - - confirmed: the hidden fact is present + - "confirmed: the hidden fact is present" - hidden fact is present ``` @@ -134,7 +134,7 @@ steps: expr: "`hallucinated hidden fact: ${outbound.text}`" - set: gracefulFallback value: - expr: "config.gracefulFallbackAny.some((needle) => lower.includes(needle.toLowerCase()))" + expr: "config.gracefulFallbackAny.some((needle) => lower.includes(normalizeLowercaseStringOrEmpty(needle)))" - assert: expr: "Boolean(gracefulFallback)" message: diff --git a/qa/scenarios/memory-recall.md b/qa/scenarios/memory-recall.md index 360823c8a6f..a6886afcd01 100644 --- a/qa/scenarios/memory-recall.md +++ b/qa/scenarios/memory-recall.md @@ -51,7 +51,7 @@ steps: expr: liveTurnTimeoutMs(env, 60000) - set: rememberAckAny value: - expr: config.rememberAckAny.map((needle) => needle.toLowerCase()) + expr: config.rememberAckAny.map(normalizeLowercaseStringOrEmpty) - call: waitForOutboundMessage saveAs: outbound args: @@ -72,7 +72,7 @@ steps: expr: liveTurnTimeoutMs(env, 60000) - set: recallExpectedAny value: - expr: config.recallExpectedAny.map((needle) => needle.toLowerCase()) + expr: config.recallExpectedAny.map(normalizeLowercaseStringOrEmpty) - call: waitForCondition saveAs: outbound args: