diff --git a/extensions/qa-lab/src/scenario-catalog.test.ts b/extensions/qa-lab/src/scenario-catalog.test.ts index fa25fc272bb..7918d285b29 100644 --- a/extensions/qa-lab/src/scenario-catalog.test.ts +++ b/extensions/qa-lab/src/scenario-catalog.test.ts @@ -149,6 +149,8 @@ describe("qa scenario catalog", () => { workspaceFiles?: Record; prompt?: string; expectedReplyAll?: string[]; + expectedArtifactAll?: string[]; + expectedArtifactAny?: string[]; } | undefined; @@ -159,6 +161,8 @@ describe("qa scenario catalog", () => { ); expect(config?.prompt).toContain("Repo contract followthrough check."); expect(config?.expectedReplyAll).toEqual(["read:", "wrote:", "status:"]); + expect(config?.expectedArtifactAll).toEqual(["repo contract"]); + expect(config?.expectedArtifactAny).toContain("evidence path"); expect(scenario.title).toBe("Instruction followthrough repo contract"); }); diff --git a/qa/scenarios/instruction-followthrough-repo-contract.md b/qa/scenarios/instruction-followthrough-repo-contract.md index 4bbee26e63f..0259f516ba9 100644 --- a/qa/scenarios/instruction-followthrough-repo-contract.md +++ b/qa/scenarios/instruction-followthrough-repo-contract.md @@ -51,6 +51,12 @@ execution: - "read:" - "wrote:" - "status:" + expectedArtifactAll: + - "repo contract" + expectedArtifactAny: + - "evidence path" + - "agent.md" + - "followthrough" forbiddenNeedles: - need permission - need your approval @@ -91,9 +97,16 @@ steps: args: - lambda: async: true - expr: "((await fs.readFile(artifactPath, 'utf8').catch(() => null))?.includes('Mission: prove you followed the repo contract.') ? await fs.readFile(artifactPath, 'utf8').catch(() => null) : undefined)" + expr: "((await fs.readFile(artifactPath, 'utf8').catch(() => null))?.trim() ? await fs.readFile(artifactPath, 'utf8').catch(() => null) : undefined)" - expr: liveTurnTimeoutMs(env, 30000) - expr: "env.providerMode === 'mock-openai' ? 100 : 250" + - set: normalizedArtifact + value: + expr: "normalizeLowercaseStringOrEmpty(artifact)" + - assert: + expr: "config.expectedArtifactAll.every((needle) => normalizedArtifact.includes(normalizeLowercaseStringOrEmpty(needle))) && config.expectedArtifactAny.some((needle) => normalizedArtifact.includes(normalizeLowercaseStringOrEmpty(needle)))" + message: + expr: "`repo contract artifact missing expected followthrough signals: ${artifact}`" - set: expectedReplyAll value: expr: config.expectedReplyAll.map(normalizeLowercaseStringOrEmpty)