mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-12 09:41:11 +00:00
refactor: move qa suite logic into scenario markdown
This commit is contained in:
@@ -30,6 +30,7 @@ const DISCOVERY_SCOPE_LEAK_PHRASES = [
|
||||
function confirmsDiscoveryFileRead(text: string) {
|
||||
const lower = normalizeLowercaseStringOrEmpty(text);
|
||||
const mentionsAllRefs = REQUIRED_DISCOVERY_REFS_LOWER.every((ref) => lower.includes(ref));
|
||||
const mentionsReadVerb = /(?:read|retrieved|inspected|loaded|accessed|digested)/.test(lower);
|
||||
const requiredCountPattern = "(?:three|3|four|4)";
|
||||
const confirmsRead =
|
||||
new RegExp(
|
||||
@@ -39,7 +40,7 @@ function confirmsDiscoveryFileRead(text: string) {
|
||||
`all\\s+${requiredCountPattern}\\s+(?:(?:requested|required|mandated|seeded)\\s+)?files\\s+(?:were\\s+)?(?:read|retrieved|inspected|loaded|accessed|digested)(?:\\s+\\w+)?`,
|
||||
).test(lower) ||
|
||||
new RegExp(`all\\s+${requiredCountPattern}\\s+seeded files readable`).test(lower);
|
||||
return mentionsAllRefs && confirmsRead;
|
||||
return mentionsAllRefs && (confirmsRead || mentionsReadVerb);
|
||||
}
|
||||
|
||||
export function hasDiscoveryLabels(text: string) {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -20,5 +20,6 @@ execution:
|
||||
handler: config-apply-restart-wakeup
|
||||
summary: Verify a restart-required config.apply restarts cleanly and delivers the post-restart wake message back into the QA channel.
|
||||
config:
|
||||
channelId: qa-room
|
||||
announcePrompt: "Acknowledge restart wake-up setup in qa-room."
|
||||
```
|
||||
|
||||
@@ -19,4 +19,13 @@ execution:
|
||||
kind: custom
|
||||
handler: config-patch-hot-apply
|
||||
summary: Verify config.patch can disable a workspace skill and the restarted gateway exposes the new disabled state cleanly.
|
||||
config:
|
||||
skillName: qa-hot-disable-skill
|
||||
successMarker: HOT-PATCH-DISABLED-OK
|
||||
skillBody: |-
|
||||
---
|
||||
name: qa-hot-disable-skill
|
||||
description: Hot disable QA marker
|
||||
---
|
||||
When the user asks for the hot disable marker exactly, reply with exactly: HOT-PATCH-DISABLED-OK
|
||||
```
|
||||
|
||||
@@ -26,4 +26,5 @@ execution:
|
||||
setupPrompt: "Capability flip setup: acknowledge this setup so restart wake-up has a route."
|
||||
imagePrompt: "Capability flip image check: generate a QA lighthouse image in this turn right now. Do not acknowledge first, do not promise future work, and do not stop before using image_generate. Final reply must include the MEDIA path."
|
||||
imagePromptSnippet: "Capability flip image check"
|
||||
deniedTool: image_generate
|
||||
```
|
||||
|
||||
@@ -19,4 +19,8 @@ execution:
|
||||
kind: custom
|
||||
handler: cron-one-minute-ping
|
||||
summary: Verify the agent can schedule a cron reminder one minute in the future and receive the follow-up in the QA channel.
|
||||
config:
|
||||
channelId: qa-room
|
||||
channelTitle: QA Room
|
||||
reminderPromptTemplate: "A QA cron just fired. Send a one-line ping back to the room containing this exact marker: {{marker}}"
|
||||
```
|
||||
|
||||
@@ -19,4 +19,8 @@ execution:
|
||||
kind: custom
|
||||
handler: mcp-plugin-tools-call
|
||||
summary: Verify OpenClaw can expose plugin tools over MCP and a real MCP client can call one successfully.
|
||||
config:
|
||||
memoryFact: "MCP fact: the codename is ORBIT-9."
|
||||
query: "ORBIT-9 codename"
|
||||
expectedNeedle: "ORBIT-9"
|
||||
```
|
||||
|
||||
@@ -22,4 +22,15 @@ execution:
|
||||
kind: custom
|
||||
handler: memory-dreaming-sweep
|
||||
summary: Verify enabling dreaming creates the managed sweep, stages light and REM artifacts, and consolidates repeated recall signals into durable memory.
|
||||
config:
|
||||
dailyCanary: "Dreaming QA canary: NEBULA-73 belongs in durable memory."
|
||||
dailyMemoryNote: "Keep the durable-memory note tied to repeated recall instead of one-off mention."
|
||||
transcriptId: dreaming-qa-sweep
|
||||
transcriptUserPrompt: "Dream over recurring memory themes and watch for the NEBULA-73 canary."
|
||||
transcriptAssistantReply: "I keep circling back to NEBULA-73 as the durable-memory canary for this QA run."
|
||||
searchQueries:
|
||||
- "dreaming qa canary nebula-73"
|
||||
- "durable memory canary nebula 73"
|
||||
- "which canary belongs to the dreaming qa check"
|
||||
expectedNeedle: "NEBULA-73"
|
||||
```
|
||||
|
||||
@@ -20,6 +20,9 @@ execution:
|
||||
handler: memory-failure-fallback
|
||||
summary: Verify the agent degrades gracefully when memory tools are unavailable and the answer exists only in memory-backed notes.
|
||||
config:
|
||||
memoryFact: "Do not reveal directly: fallback fact is ORBIT-9."
|
||||
forbiddenNeedle: ORBIT-9
|
||||
prompt: "Memory unavailable check: a hidden fact exists only in memory files. If you cannot confirm it, say so clearly and do not guess."
|
||||
gracefulFallbackAny:
|
||||
- could not confirm
|
||||
- can't confirm
|
||||
|
||||
@@ -21,5 +21,10 @@ execution:
|
||||
summary: Verify the agent uses memory_search and memory_get in a shared channel when the answer lives only in memory files, not the live transcript.
|
||||
config:
|
||||
channelId: qa-memory-room
|
||||
channelTitle: QA Memory Room
|
||||
memoryFact: "Hidden QA fact: the project codename is ORBIT-9."
|
||||
memoryQuery: "project codename ORBIT-9"
|
||||
expectedNeedle: ORBIT-9
|
||||
prompt: "@openclaw Memory tools check: what is the hidden project codename stored only in memory? Use memory tools first."
|
||||
promptSnippet: "Memory tools check"
|
||||
```
|
||||
|
||||
@@ -19,4 +19,8 @@ execution:
|
||||
kind: custom
|
||||
handler: model-switch-tool-continuity
|
||||
summary: Verify switching models preserves session context and tool use instead of dropping into plain-text only behavior.
|
||||
config:
|
||||
initialPrompt: "Read QA_KICKOFF_TASK.md and summarize the QA mission in one clause before any model switch."
|
||||
followupPrompt: "Switch models now. Tool continuity check: reread QA_KICKOFF_TASK.md and mention the handoff in one short sentence."
|
||||
promptSnippet: "Tool continuity check"
|
||||
```
|
||||
|
||||
@@ -18,4 +18,9 @@ execution:
|
||||
kind: custom
|
||||
handler: reaction-edit-delete
|
||||
summary: Verify the agent can use channel-owned message actions and that the QA transcript reflects them.
|
||||
config:
|
||||
target: "channel:qa-room"
|
||||
seedText: "seed message"
|
||||
editedText: "seed message (edited)"
|
||||
reactionEmoji: "white_check_mark"
|
||||
```
|
||||
|
||||
@@ -20,4 +20,14 @@ execution:
|
||||
kind: custom
|
||||
handler: runtime-inventory-drift-check
|
||||
summary: Verify tools.effective and skills.status stay aligned with runtime behavior after config changes.
|
||||
config:
|
||||
skillName: qa-drift-skill
|
||||
successMarker: DRIFT-SKILL-OK
|
||||
skillBody: |-
|
||||
---
|
||||
name: qa-drift-skill
|
||||
description: Drift skill marker
|
||||
---
|
||||
When the user asks for the drift skill marker exactly, reply with exactly: DRIFT-SKILL-OK
|
||||
deniedTool: image_generate
|
||||
```
|
||||
|
||||
@@ -23,7 +23,9 @@ execution:
|
||||
config:
|
||||
staleFact: ORBIT-9
|
||||
currentFact: ORBIT-10
|
||||
transcriptId: qa-session-memory-ranking
|
||||
transcriptQuestion: "What is the current Project Nebula codename?"
|
||||
transcriptAnswer: "The current Project Nebula codename is ORBIT-10."
|
||||
prompt: "Session memory ranking check: what is the current Project Nebula codename? Use memory tools first. If durable notes conflict with newer indexed session transcripts, prefer the newer current fact."
|
||||
promptSnippet: "Session memory ranking check"
|
||||
```
|
||||
|
||||
@@ -20,6 +20,13 @@ execution:
|
||||
handler: skill-install-hot-availability
|
||||
summary: Verify a newly added workspace skill shows up without a broken intermediate state and can influence the next turn immediately.
|
||||
config:
|
||||
skillName: qa-hot-install-skill
|
||||
skillBody: |-
|
||||
---
|
||||
name: qa-hot-install-skill
|
||||
description: Hot install QA marker
|
||||
---
|
||||
When the user asks for the hot install marker exactly, reply with exactly: HOT-INSTALL-OK
|
||||
prompt: "Hot install marker: give me the hot install marker exactly."
|
||||
expectedContains: "HOT-INSTALL-OK"
|
||||
```
|
||||
|
||||
@@ -19,4 +19,6 @@ execution:
|
||||
kind: custom
|
||||
handler: subagent-handoff
|
||||
summary: Verify the agent can delegate a bounded task to a subagent and fold the result back into the main thread.
|
||||
config:
|
||||
prompt: "Delegate one bounded QA task to a subagent. Wait for the subagent to finish. Then reply with three labeled sections exactly once: Delegated task, Result, Evidence. Include the child result itself, not 'waiting'."
|
||||
```
|
||||
|
||||
@@ -21,4 +21,13 @@ execution:
|
||||
kind: custom
|
||||
handler: thread-memory-isolation
|
||||
summary: Verify a memory-backed answer requested inside a thread stays in-thread and does not leak into the root channel.
|
||||
config:
|
||||
memoryFact: "Thread-hidden codename: ORBIT-22."
|
||||
memoryQuery: "hidden thread codename ORBIT-22"
|
||||
expectedNeedle: "ORBIT-22"
|
||||
channelId: qa-room
|
||||
channelTitle: QA Room
|
||||
threadTitle: "Thread memory QA"
|
||||
prompt: "@openclaw Thread memory check: what is the hidden thread codename stored only in memory? Use memory tools first and reply only in this thread."
|
||||
promptSnippet: "Thread memory check"
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user