fix(qa-lab): address remaining review comments

This commit is contained in:
Ayaan Zaidi
2026-04-10 18:31:11 +05:30
parent 2aaf5a3baa
commit d69cc5da5c
3 changed files with 100 additions and 17 deletions

View File

@@ -163,7 +163,7 @@ export function registerQaLabCli(program: Command) {
.option(
"--provider-mode <mode>",
"Provider mode: mock-openai or live-frontier (legacy live-openai still works)",
"live-frontier",
"mock-openai",
)
.option("--model <ref>", "Primary provider/model ref")
.option("--alt-model <ref>", "Alternate provider/model ref")
@@ -223,7 +223,7 @@ export function registerQaLabCli(program: Command) {
.option(
"--provider-mode <mode>",
"Provider mode: mock-openai or live-frontier (legacy live-openai still works)",
"mock-openai",
"live-frontier",
)
.option("--model <ref>", "Primary provider/model ref")
.option("--alt-model <ref>", "Alternate provider/model ref")

View File

@@ -123,6 +123,49 @@ describe("telegram live qa runtime", () => {
});
});
it("ignores unrelated sut replies when matching the canary response", () => {
expect(
__testing.classifyCanaryReply({
groupId: "-100123",
sutBotId: 88,
driverMessageId: 55,
message: {
updateId: 1,
messageId: 9,
chatId: -100123,
senderId: 88,
senderIsBot: true,
senderUsername: "sut_bot",
text: "other reply",
replyToMessageId: 999,
timestamp: 1_700_000_000_000,
inlineButtons: [],
mediaKinds: [],
},
}),
).toBe("unthreaded");
expect(
__testing.classifyCanaryReply({
groupId: "-100123",
sutBotId: 88,
driverMessageId: 55,
message: {
updateId: 2,
messageId: 10,
chatId: -100123,
senderId: 88,
senderIsBot: true,
senderUsername: "sut_bot",
text: "canary reply",
replyToMessageId: 55,
timestamp: 1_700_000_001_000,
inlineButtons: [],
mediaKinds: [],
},
}),
).toBe("match");
});
it("formats phase-specific canary diagnostics with context", () => {
const error = new Error(
"SUT bot did not send any group reply after the canary command within 30s.",

View File

@@ -436,6 +436,24 @@ function findScenario(ids?: string[]) {
return selected;
}
function classifyCanaryReply(params: {
message: TelegramObservedMessage;
groupId: string;
sutBotId: number;
driverMessageId: number;
}) {
if (
params.message.chatId !== Number(params.groupId) ||
params.message.senderId !== params.sutBotId ||
!params.message.text.trim()
) {
return "ignore" as const;
}
return params.message.replyToMessageId === params.driverMessageId
? ("match" as const)
: ("unthreaded" as const);
}
async function runCanary(params: {
driverToken: string;
groupId: string;
@@ -449,6 +467,9 @@ async function runCanary(params: {
params.groupId,
`/help@${params.sutUsername}`,
);
let firstUnthreadedReply:
| Pick<TelegramObservedMessage, "messageId" | "replyToMessageId" | "text">
| undefined;
let sutObserved: Awaited<ReturnType<typeof waitForObservedMessage>>;
try {
sutObserved = await waitForObservedMessage({
@@ -456,10 +477,41 @@ async function runCanary(params: {
initialOffset: offset,
timeoutMs: 30_000,
observedMessages: params.observedMessages,
predicate: (message) =>
message.chatId === Number(params.groupId) && message.senderId === params.sutBotId,
predicate: (message) => {
const classification = classifyCanaryReply({
message,
groupId: params.groupId,
sutBotId: params.sutBotId,
driverMessageId: driverMessage.message_id,
});
if (classification === "ignore") {
return false;
}
if (classification === "unthreaded") {
firstUnthreadedReply ??= {
messageId: message.messageId,
replyToMessageId: message.replyToMessageId,
text: message.text,
};
return false;
}
return classification === "match";
},
});
} catch (error) {
if (firstUnthreadedReply) {
throw new TelegramQaCanaryError(
"sut_reply_not_threaded",
"SUT bot replied, but not as a reply to the canary driver message.",
{
groupId: params.groupId,
sutBotId: params.sutBotId,
driverMessageId: driverMessage.message_id,
sutMessageId: firstUnthreadedReply.messageId,
sutReplyToMessageId: firstUnthreadedReply.replyToMessageId,
},
);
}
throw new TelegramQaCanaryError(
"sut_reply_timeout",
"SUT bot did not send any group reply after the canary command within 30s.",
@@ -471,19 +523,6 @@ async function runCanary(params: {
},
);
}
if (sutObserved.message.replyToMessageId !== driverMessage.message_id) {
throw new TelegramQaCanaryError(
"sut_reply_not_threaded",
"SUT bot replied, but not as a reply to the canary driver message.",
{
groupId: params.groupId,
sutBotId: params.sutBotId,
driverMessageId: driverMessage.message_id,
sutMessageId: sutObserved.message.messageId,
sutReplyToMessageId: sutObserved.message.replyToMessageId,
},
);
}
if (!sutObserved.message.text.trim()) {
throw new TelegramQaCanaryError(
"sut_reply_empty",
@@ -743,6 +782,7 @@ export const __testing = {
TELEGRAM_QA_SCENARIOS,
buildTelegramQaConfig,
canaryFailureMessage,
classifyCanaryReply,
normalizeTelegramObservedMessage,
resolveTelegramQaRuntimeEnv,
};