mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-16 03:31:10 +00:00
fix(qa-lab): address remaining review comments
This commit is contained in:
@@ -163,7 +163,7 @@ export function registerQaLabCli(program: Command) {
|
||||
.option(
|
||||
"--provider-mode <mode>",
|
||||
"Provider mode: mock-openai or live-frontier (legacy live-openai still works)",
|
||||
"live-frontier",
|
||||
"mock-openai",
|
||||
)
|
||||
.option("--model <ref>", "Primary provider/model ref")
|
||||
.option("--alt-model <ref>", "Alternate provider/model ref")
|
||||
@@ -223,7 +223,7 @@ export function registerQaLabCli(program: Command) {
|
||||
.option(
|
||||
"--provider-mode <mode>",
|
||||
"Provider mode: mock-openai or live-frontier (legacy live-openai still works)",
|
||||
"mock-openai",
|
||||
"live-frontier",
|
||||
)
|
||||
.option("--model <ref>", "Primary provider/model ref")
|
||||
.option("--alt-model <ref>", "Alternate provider/model ref")
|
||||
|
||||
@@ -123,6 +123,49 @@ describe("telegram live qa runtime", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("ignores unrelated sut replies when matching the canary response", () => {
|
||||
expect(
|
||||
__testing.classifyCanaryReply({
|
||||
groupId: "-100123",
|
||||
sutBotId: 88,
|
||||
driverMessageId: 55,
|
||||
message: {
|
||||
updateId: 1,
|
||||
messageId: 9,
|
||||
chatId: -100123,
|
||||
senderId: 88,
|
||||
senderIsBot: true,
|
||||
senderUsername: "sut_bot",
|
||||
text: "other reply",
|
||||
replyToMessageId: 999,
|
||||
timestamp: 1_700_000_000_000,
|
||||
inlineButtons: [],
|
||||
mediaKinds: [],
|
||||
},
|
||||
}),
|
||||
).toBe("unthreaded");
|
||||
expect(
|
||||
__testing.classifyCanaryReply({
|
||||
groupId: "-100123",
|
||||
sutBotId: 88,
|
||||
driverMessageId: 55,
|
||||
message: {
|
||||
updateId: 2,
|
||||
messageId: 10,
|
||||
chatId: -100123,
|
||||
senderId: 88,
|
||||
senderIsBot: true,
|
||||
senderUsername: "sut_bot",
|
||||
text: "canary reply",
|
||||
replyToMessageId: 55,
|
||||
timestamp: 1_700_000_001_000,
|
||||
inlineButtons: [],
|
||||
mediaKinds: [],
|
||||
},
|
||||
}),
|
||||
).toBe("match");
|
||||
});
|
||||
|
||||
it("formats phase-specific canary diagnostics with context", () => {
|
||||
const error = new Error(
|
||||
"SUT bot did not send any group reply after the canary command within 30s.",
|
||||
|
||||
@@ -436,6 +436,24 @@ function findScenario(ids?: string[]) {
|
||||
return selected;
|
||||
}
|
||||
|
||||
function classifyCanaryReply(params: {
|
||||
message: TelegramObservedMessage;
|
||||
groupId: string;
|
||||
sutBotId: number;
|
||||
driverMessageId: number;
|
||||
}) {
|
||||
if (
|
||||
params.message.chatId !== Number(params.groupId) ||
|
||||
params.message.senderId !== params.sutBotId ||
|
||||
!params.message.text.trim()
|
||||
) {
|
||||
return "ignore" as const;
|
||||
}
|
||||
return params.message.replyToMessageId === params.driverMessageId
|
||||
? ("match" as const)
|
||||
: ("unthreaded" as const);
|
||||
}
|
||||
|
||||
async function runCanary(params: {
|
||||
driverToken: string;
|
||||
groupId: string;
|
||||
@@ -449,6 +467,9 @@ async function runCanary(params: {
|
||||
params.groupId,
|
||||
`/help@${params.sutUsername}`,
|
||||
);
|
||||
let firstUnthreadedReply:
|
||||
| Pick<TelegramObservedMessage, "messageId" | "replyToMessageId" | "text">
|
||||
| undefined;
|
||||
let sutObserved: Awaited<ReturnType<typeof waitForObservedMessage>>;
|
||||
try {
|
||||
sutObserved = await waitForObservedMessage({
|
||||
@@ -456,10 +477,41 @@ async function runCanary(params: {
|
||||
initialOffset: offset,
|
||||
timeoutMs: 30_000,
|
||||
observedMessages: params.observedMessages,
|
||||
predicate: (message) =>
|
||||
message.chatId === Number(params.groupId) && message.senderId === params.sutBotId,
|
||||
predicate: (message) => {
|
||||
const classification = classifyCanaryReply({
|
||||
message,
|
||||
groupId: params.groupId,
|
||||
sutBotId: params.sutBotId,
|
||||
driverMessageId: driverMessage.message_id,
|
||||
});
|
||||
if (classification === "ignore") {
|
||||
return false;
|
||||
}
|
||||
if (classification === "unthreaded") {
|
||||
firstUnthreadedReply ??= {
|
||||
messageId: message.messageId,
|
||||
replyToMessageId: message.replyToMessageId,
|
||||
text: message.text,
|
||||
};
|
||||
return false;
|
||||
}
|
||||
return classification === "match";
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
if (firstUnthreadedReply) {
|
||||
throw new TelegramQaCanaryError(
|
||||
"sut_reply_not_threaded",
|
||||
"SUT bot replied, but not as a reply to the canary driver message.",
|
||||
{
|
||||
groupId: params.groupId,
|
||||
sutBotId: params.sutBotId,
|
||||
driverMessageId: driverMessage.message_id,
|
||||
sutMessageId: firstUnthreadedReply.messageId,
|
||||
sutReplyToMessageId: firstUnthreadedReply.replyToMessageId,
|
||||
},
|
||||
);
|
||||
}
|
||||
throw new TelegramQaCanaryError(
|
||||
"sut_reply_timeout",
|
||||
"SUT bot did not send any group reply after the canary command within 30s.",
|
||||
@@ -471,19 +523,6 @@ async function runCanary(params: {
|
||||
},
|
||||
);
|
||||
}
|
||||
if (sutObserved.message.replyToMessageId !== driverMessage.message_id) {
|
||||
throw new TelegramQaCanaryError(
|
||||
"sut_reply_not_threaded",
|
||||
"SUT bot replied, but not as a reply to the canary driver message.",
|
||||
{
|
||||
groupId: params.groupId,
|
||||
sutBotId: params.sutBotId,
|
||||
driverMessageId: driverMessage.message_id,
|
||||
sutMessageId: sutObserved.message.messageId,
|
||||
sutReplyToMessageId: sutObserved.message.replyToMessageId,
|
||||
},
|
||||
);
|
||||
}
|
||||
if (!sutObserved.message.text.trim()) {
|
||||
throw new TelegramQaCanaryError(
|
||||
"sut_reply_empty",
|
||||
@@ -743,6 +782,7 @@ export const __testing = {
|
||||
TELEGRAM_QA_SCENARIOS,
|
||||
buildTelegramQaConfig,
|
||||
canaryFailureMessage,
|
||||
classifyCanaryReply,
|
||||
normalizeTelegramObservedMessage,
|
||||
resolveTelegramQaRuntimeEnv,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user