mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-18 17:54:47 +00:00
fix(qa-lab): add gateway log sentinels
This commit is contained in:
@@ -83,6 +83,7 @@ Docs: https://docs.openclaw.ai
|
||||
- CLI/setup: collapse raw gateway config keys in existing-config summaries into friendly `Model` and `Gateway` rows.
|
||||
- CLI/config: show concise human config-write output with an indented backup path instead of printing checksum-heavy overwrite audit details by default.
|
||||
- CLI/docs: call the canonical lowercase docs MCP search tool and surface MCP errors instead of returning empty search results. Fixes #82702. (#82704) Thanks @hclsys.
|
||||
- QA-Lab: add gateway log sentinels for plugin hook failures, Codex app-server stalls/timeouts, cron allowlist drift, live quota blockers, and direct-reply self-message transcripts so harness proof fails on self-health regressions. (#80323) Thanks @100yenadmin.
|
||||
- QA-Lab: ignore heartbeat-only operational transcripts when capturing runtime parity cells so background checks cannot replace the scenario reply. (#80323) Thanks @100yenadmin.
|
||||
- QA-Lab: pin threaded-memory parity runs to `memory-core`, keep bundled plugin resolution enabled for QA commands, and retry transient session-store lock reads. (#72045) Thanks @WuKongAI-CMU.
|
||||
- QA-Lab/qa-channel: keep mock memory ranking, inbound media notes, and opened-file realpath checks stable for mock OpenAI qa-channel runs. (#66826) Thanks @gumadeiras.
|
||||
|
||||
123
extensions/qa-lab/src/gateway-log-sentinel.test.ts
Normal file
123
extensions/qa-lab/src/gateway-log-sentinel.test.ts
Normal file
@@ -0,0 +1,123 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
assertNoGatewayLogSentinels,
|
||||
formatGatewayLogSentinelSummary,
|
||||
scanDirectReplyTranscriptSentinels,
|
||||
scanGatewayLogSentinels,
|
||||
} from "./gateway-log-sentinel.js";
|
||||
|
||||
describe("gateway log sentinels", () => {
|
||||
it("classifies May 13 beta.5 operational failure signatures", () => {
|
||||
const findings = scanGatewayLogSentinels(
|
||||
[
|
||||
"2026-05-13T00:00:01Z plugin before_prompt_build hook failed: TypeError: boom",
|
||||
"2026-05-13T00:00:02Z plugin before_tool_call crashed while evaluating policy",
|
||||
"2026-05-13T00:00:03Z plugin manifest invalid: missing contracts.tools registration",
|
||||
"2026-05-13T00:00:04Z codex app-server attempt timed out after 180000ms",
|
||||
"2026-05-13T00:00:05Z codex_app_server progress stalled for run abc123",
|
||||
"2026-05-13T00:00:06Z cron payload model openai/gpt-5.4 is not in model allowlist",
|
||||
"2026-05-13T00:00:07Z OpenAI quota exceeded for live-frontier request",
|
||||
].join("\n"),
|
||||
);
|
||||
|
||||
expect(findings.map((finding) => finding.kind)).toEqual([
|
||||
"plugin-hook-failure",
|
||||
"plugin-hook-failure",
|
||||
"plugin-contract-error",
|
||||
"codex-app-server-timeout",
|
||||
"stalled-agent-run",
|
||||
"cron-model-allowlist",
|
||||
"live-quota-or-subscription",
|
||||
]);
|
||||
expect(findings.find((finding) => finding.kind === "plugin-hook-failure")).toMatchObject({
|
||||
verdict: "qa-harness-bug",
|
||||
owner: "plugin",
|
||||
productImpact: "P1",
|
||||
});
|
||||
expect(findings.find((finding) => finding.kind === "live-quota-or-subscription")).toMatchObject(
|
||||
{
|
||||
verdict: "environment-blocked",
|
||||
owner: "environment",
|
||||
productImpact: "P4",
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
it("honors log cursors while preserving absolute line numbers", () => {
|
||||
const prefix = "safe line\n";
|
||||
const findings = scanGatewayLogSentinels(`${prefix}codex app-server attempt timed out`, {
|
||||
since: prefix.length,
|
||||
});
|
||||
|
||||
expect(findings).toHaveLength(1);
|
||||
expect(findings[0]).toMatchObject({
|
||||
kind: "codex-app-server-timeout",
|
||||
line: 2,
|
||||
});
|
||||
});
|
||||
|
||||
it("throws actionable summaries unless only environment blockers are allowed", () => {
|
||||
expect(() => assertNoGatewayLogSentinels("codex_app_server progress stalled")).toThrow(
|
||||
"stalled-agent-run",
|
||||
);
|
||||
expect(() =>
|
||||
assertNoGatewayLogSentinels("OpenAI quota exceeded", { allowEnvironmentBlocked: true }),
|
||||
).not.toThrow();
|
||||
expect(formatGatewayLogSentinelSummary(scanGatewayLogSentinels("OpenAI quota exceeded"))).toBe(
|
||||
"live-quota-or-subscription@1 environment-blocked owner=environment: OpenAI quota exceeded",
|
||||
);
|
||||
});
|
||||
|
||||
it("detects direct reply self-message transcripts separately from gateway logs", () => {
|
||||
const findings = scanDirectReplyTranscriptSentinels(
|
||||
[
|
||||
JSON.stringify({
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: [
|
||||
{
|
||||
type: "tool_use",
|
||||
name: "message",
|
||||
input: { action: "send", conversationId: "qa-operator", text: "hello" },
|
||||
},
|
||||
],
|
||||
},
|
||||
}),
|
||||
JSON.stringify({ message: { role: "assistant", content: "Sent." } }),
|
||||
].join("\n"),
|
||||
);
|
||||
|
||||
expect(findings).toHaveLength(1);
|
||||
expect(findings[0]).toMatchObject({
|
||||
kind: "direct-reply-self-message",
|
||||
verdict: "product-bug",
|
||||
owner: "openclaw-routing",
|
||||
});
|
||||
});
|
||||
|
||||
it("detects OpenAI function_call-shaped direct reply transcripts", () => {
|
||||
const findings = scanDirectReplyTranscriptSentinels(
|
||||
[
|
||||
JSON.stringify({
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: [
|
||||
{
|
||||
type: "function_call",
|
||||
name: "message",
|
||||
arguments: JSON.stringify({
|
||||
action: "send",
|
||||
target: "current",
|
||||
text: "hello",
|
||||
}),
|
||||
},
|
||||
],
|
||||
},
|
||||
}),
|
||||
JSON.stringify({ message: { role: "assistant", content: "Sent." } }),
|
||||
].join("\n"),
|
||||
);
|
||||
|
||||
expect(findings.map((finding) => finding.kind)).toEqual(["direct-reply-self-message"]);
|
||||
});
|
||||
});
|
||||
367
extensions/qa-lab/src/gateway-log-sentinel.ts
Normal file
367
extensions/qa-lab/src/gateway-log-sentinel.ts
Normal file
@@ -0,0 +1,367 @@
|
||||
export type GatewayLogSentinelKind =
|
||||
| "plugin-hook-failure"
|
||||
| "plugin-contract-error"
|
||||
| "direct-reply-self-message"
|
||||
| "codex-app-server-timeout"
|
||||
| "stalled-agent-run"
|
||||
| "cron-model-allowlist"
|
||||
| "live-quota-or-subscription";
|
||||
|
||||
export type GatewayLogSentinelVerdict =
|
||||
| "product-bug"
|
||||
| "qa-harness-bug"
|
||||
| "fixture-bug"
|
||||
| "environment-blocked";
|
||||
|
||||
export type GatewayLogSentinelOwner =
|
||||
| "plugin"
|
||||
| "openclaw-routing"
|
||||
| "codex-runtime"
|
||||
| "openclaw-cron"
|
||||
| "environment";
|
||||
|
||||
export type GatewayLogSentinelFinding = {
|
||||
kind: GatewayLogSentinelKind;
|
||||
verdict: GatewayLogSentinelVerdict;
|
||||
owner: GatewayLogSentinelOwner;
|
||||
productImpact: "P0" | "P1" | "P2" | "P3" | "P4";
|
||||
qaImpact: "P0" | "P1" | "P2" | "P3" | "P4";
|
||||
line: number;
|
||||
text: string;
|
||||
};
|
||||
|
||||
export type GatewayLogSentinelScanOptions = {
|
||||
since?: number;
|
||||
kinds?: readonly GatewayLogSentinelKind[];
|
||||
ignoreKinds?: readonly GatewayLogSentinelKind[];
|
||||
};
|
||||
|
||||
export type GatewayLogSentinelAssertOptions = GatewayLogSentinelScanOptions & {
|
||||
allowEnvironmentBlocked?: boolean;
|
||||
};
|
||||
|
||||
type GatewayLogSentinelRule = Omit<GatewayLogSentinelFinding, "line" | "text"> & {
|
||||
test: (line: string) => boolean;
|
||||
};
|
||||
|
||||
type GatewayLogSentinelToolCall = {
|
||||
name: string;
|
||||
args: unknown;
|
||||
};
|
||||
|
||||
const GATEWAY_LOG_SENTINEL_RULES: GatewayLogSentinelRule[] = [
|
||||
{
|
||||
kind: "plugin-hook-failure",
|
||||
verdict: "qa-harness-bug",
|
||||
owner: "plugin",
|
||||
productImpact: "P1",
|
||||
qaImpact: "P0",
|
||||
test: (line) =>
|
||||
/\bbefore_(?:prompt_build|tool_call)\b/iu.test(line) &&
|
||||
/\b(?:crash(?:ed)?|exception|failed|failure|error)\b/iu.test(line),
|
||||
},
|
||||
{
|
||||
kind: "plugin-contract-error",
|
||||
verdict: "qa-harness-bug",
|
||||
owner: "plugin",
|
||||
productImpact: "P1",
|
||||
qaImpact: "P0",
|
||||
test: (line) =>
|
||||
/\bcontracts\.tools\b/iu.test(line) &&
|
||||
/\b(?:missing|invalid|registration|register|manifest|contract|schema|error)\b/iu.test(line),
|
||||
},
|
||||
{
|
||||
kind: "codex-app-server-timeout",
|
||||
verdict: "product-bug",
|
||||
owner: "codex-runtime",
|
||||
productImpact: "P1",
|
||||
qaImpact: "P0",
|
||||
test: (line) =>
|
||||
/\bcodex app-server\b.*\btimed out\b|\btimed out\b.*\bcodex app-server\b/iu.test(line),
|
||||
},
|
||||
{
|
||||
kind: "stalled-agent-run",
|
||||
verdict: "product-bug",
|
||||
owner: "codex-runtime",
|
||||
productImpact: "P1",
|
||||
qaImpact: "P0",
|
||||
test: (line) =>
|
||||
/\bcodex_app_server\b.*\b(?:stalled|no progress|progress stalled)\b|\b(?:stalled|no progress|progress stalled)\b.*\bcodex_app_server\b/iu.test(
|
||||
line,
|
||||
),
|
||||
},
|
||||
{
|
||||
kind: "cron-model-allowlist",
|
||||
verdict: "product-bug",
|
||||
owner: "openclaw-cron",
|
||||
productImpact: "P2",
|
||||
qaImpact: "P0",
|
||||
test: (line) =>
|
||||
/\bcron\b/iu.test(line) &&
|
||||
(/\bmodel allowlist\b/iu.test(line) ||
|
||||
/\ballowlist\b.*\bmodel\b/iu.test(line) ||
|
||||
/\bmodel\b.*\b(?:not in|outside|blocked by)\b.*\ballowlist\b/iu.test(line)),
|
||||
},
|
||||
{
|
||||
kind: "live-quota-or-subscription",
|
||||
verdict: "environment-blocked",
|
||||
owner: "environment",
|
||||
productImpact: "P4",
|
||||
qaImpact: "P0",
|
||||
test: (line) =>
|
||||
/\b(?:quota exceeded|insufficient_quota|subscription exhausted|no active subscription|billing hard limit|usage limit)\b/iu.test(
|
||||
line,
|
||||
),
|
||||
},
|
||||
];
|
||||
|
||||
function filterGatewayLogSentinelFindings(
|
||||
findings: GatewayLogSentinelFinding[],
|
||||
options: GatewayLogSentinelScanOptions | undefined,
|
||||
) {
|
||||
const kinds = new Set(options?.kinds ?? []);
|
||||
const ignoreKinds = new Set(options?.ignoreKinds ?? []);
|
||||
return findings.filter((finding) => {
|
||||
if (kinds.size > 0 && !kinds.has(finding.kind)) {
|
||||
return false;
|
||||
}
|
||||
return !ignoreKinds.has(finding.kind);
|
||||
});
|
||||
}
|
||||
|
||||
function lineNumberForOffset(logs: string, offset: number) {
|
||||
if (offset <= 0) {
|
||||
return 1;
|
||||
}
|
||||
return logs.slice(0, offset).split(/\r?\n/u).length;
|
||||
}
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return Boolean(value) && typeof value === "object" && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function readNonEmptyString(value: unknown): string | undefined {
|
||||
return typeof value === "string" && value.trim().length > 0 ? value.trim() : undefined;
|
||||
}
|
||||
|
||||
function extractMessageText(message: Record<string, unknown>) {
|
||||
const rawContent = message.content;
|
||||
if (typeof rawContent === "string") {
|
||||
return rawContent.trim();
|
||||
}
|
||||
if (!Array.isArray(rawContent)) {
|
||||
return "";
|
||||
}
|
||||
const parts: string[] = [];
|
||||
for (const block of rawContent) {
|
||||
if (typeof block === "string") {
|
||||
if (block.trim()) {
|
||||
parts.push(block.trim());
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (!isRecord(block)) {
|
||||
continue;
|
||||
}
|
||||
const text = readNonEmptyString(block.text);
|
||||
if (text) {
|
||||
parts.push(text);
|
||||
continue;
|
||||
}
|
||||
const nestedText = readNonEmptyString(block.content);
|
||||
if (
|
||||
nestedText &&
|
||||
(block.type === "output_text" || block.type === "text" || block.type === "message")
|
||||
) {
|
||||
parts.push(nestedText);
|
||||
}
|
||||
}
|
||||
return parts.join("\n").trim();
|
||||
}
|
||||
|
||||
function parseJsonArguments(value: unknown): unknown {
|
||||
if (typeof value !== "string") {
|
||||
return value;
|
||||
}
|
||||
try {
|
||||
return JSON.parse(value) as unknown;
|
||||
} catch {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
function extractAssistantToolCalls(message: Record<string, unknown>): GatewayLogSentinelToolCall[] {
|
||||
const calls: GatewayLogSentinelToolCall[] = [];
|
||||
const rawContent = message.content;
|
||||
if (Array.isArray(rawContent)) {
|
||||
for (const block of rawContent) {
|
||||
if (!isRecord(block)) {
|
||||
continue;
|
||||
}
|
||||
const type = readNonEmptyString(block.type)?.toLowerCase();
|
||||
if (
|
||||
type !== "tool_use" &&
|
||||
type !== "toolcall" &&
|
||||
type !== "tool_call" &&
|
||||
type !== "function_call"
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
calls.push({
|
||||
name: readNonEmptyString(block.name) ?? "unknown",
|
||||
args: parseJsonArguments(block.input ?? block.arguments ?? block.args ?? null),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const rawToolCalls =
|
||||
message.tool_calls ?? message.toolCalls ?? message.function_call ?? message.functionCall;
|
||||
const toolCalls = Array.isArray(rawToolCalls) ? rawToolCalls : rawToolCalls ? [rawToolCalls] : [];
|
||||
for (const call of toolCalls) {
|
||||
if (!isRecord(call)) {
|
||||
continue;
|
||||
}
|
||||
const functionRecord = isRecord(call.function) ? call.function : undefined;
|
||||
calls.push({
|
||||
name: readNonEmptyString(call.name) ?? readNonEmptyString(functionRecord?.name) ?? "unknown",
|
||||
args: parseJsonArguments(
|
||||
call.arguments ?? functionRecord?.arguments ?? call.input ?? functionRecord?.input ?? null,
|
||||
),
|
||||
});
|
||||
}
|
||||
return calls;
|
||||
}
|
||||
|
||||
function isCurrentChatMessageSend(call: GatewayLogSentinelToolCall) {
|
||||
if (call.name !== "message") {
|
||||
return false;
|
||||
}
|
||||
if (!isRecord(call.args) || readNonEmptyString(call.args.action)?.toLowerCase() !== "send") {
|
||||
return false;
|
||||
}
|
||||
const explicitTarget =
|
||||
readNonEmptyString(call.args.conversationId) ??
|
||||
readNonEmptyString(call.args.conversation) ??
|
||||
readNonEmptyString(call.args.to) ??
|
||||
readNonEmptyString(call.args.target);
|
||||
if (!explicitTarget) {
|
||||
return true;
|
||||
}
|
||||
return /\b(?:current|same-chat|qa-operator|dm:qa-operator)\b/iu.test(explicitTarget);
|
||||
}
|
||||
|
||||
function normalizeTranscriptText(text: string) {
|
||||
return text.replace(/\s+/gu, " ").trim();
|
||||
}
|
||||
|
||||
function transcriptHasDirectReplySelfMessage(transcriptBytes: string) {
|
||||
let lastAssistantText = "";
|
||||
const toolCalls: GatewayLogSentinelToolCall[] = [];
|
||||
for (const line of transcriptBytes.split(/\r?\n/u)) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
const parsed = JSON.parse(trimmed) as unknown;
|
||||
const message = isRecord(parsed) && isRecord(parsed.message) ? parsed.message : undefined;
|
||||
if (!message || message.role !== "assistant") {
|
||||
continue;
|
||||
}
|
||||
const text = extractMessageText(message);
|
||||
if (text) {
|
||||
lastAssistantText = text;
|
||||
}
|
||||
toolCalls.push(...extractAssistantToolCalls(message));
|
||||
} catch {
|
||||
// Ignore malformed QA transcript rows and keep sentinel scans deterministic.
|
||||
}
|
||||
}
|
||||
return (
|
||||
toolCalls.some(isCurrentChatMessageSend) &&
|
||||
normalizeTranscriptText(lastAssistantText).toLowerCase() === "sent."
|
||||
);
|
||||
}
|
||||
|
||||
export function scanGatewayLogSentinels(
|
||||
logs: string | undefined,
|
||||
options?: GatewayLogSentinelScanOptions,
|
||||
): GatewayLogSentinelFinding[] {
|
||||
if (!logs) {
|
||||
return [];
|
||||
}
|
||||
const startOffset = Math.max(0, Math.min(logs.length, Math.floor(options?.since ?? 0)));
|
||||
const lineOffset = lineNumberForOffset(logs, startOffset) - 1;
|
||||
const findings: GatewayLogSentinelFinding[] = [];
|
||||
for (const [index, rawLine] of logs.slice(startOffset).split(/\r?\n/u).entries()) {
|
||||
const text = rawLine.trim();
|
||||
if (!text) {
|
||||
continue;
|
||||
}
|
||||
for (const rule of GATEWAY_LOG_SENTINEL_RULES) {
|
||||
if (!rule.test(text)) {
|
||||
continue;
|
||||
}
|
||||
findings.push({
|
||||
kind: rule.kind,
|
||||
verdict: rule.verdict,
|
||||
owner: rule.owner,
|
||||
productImpact: rule.productImpact,
|
||||
qaImpact: rule.qaImpact,
|
||||
line: lineOffset + index + 1,
|
||||
text,
|
||||
});
|
||||
}
|
||||
}
|
||||
return filterGatewayLogSentinelFindings(findings, options);
|
||||
}
|
||||
|
||||
export function scanDirectReplyTranscriptSentinels(
|
||||
transcriptBytes: string,
|
||||
): GatewayLogSentinelFinding[] {
|
||||
if (!transcriptHasDirectReplySelfMessage(transcriptBytes)) {
|
||||
return [];
|
||||
}
|
||||
return [
|
||||
{
|
||||
kind: "direct-reply-self-message",
|
||||
verdict: "product-bug",
|
||||
owner: "openclaw-routing",
|
||||
productImpact: "P1",
|
||||
qaImpact: "P0",
|
||||
line: 1,
|
||||
text: "assistant called message(action=send) and then produced final text Sent.",
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
export function formatGatewayLogSentinelSummary(findings: readonly GatewayLogSentinelFinding[]) {
|
||||
if (findings.length === 0) {
|
||||
return "no gateway log sentinels";
|
||||
}
|
||||
return findings
|
||||
.map(
|
||||
(finding) =>
|
||||
`${finding.kind}@${finding.line} ${finding.verdict} owner=${finding.owner}: ${finding.text}`,
|
||||
)
|
||||
.join("\n");
|
||||
}
|
||||
|
||||
export function assertNoGatewayLogSentinels(
|
||||
logs: string | undefined,
|
||||
options?: GatewayLogSentinelAssertOptions,
|
||||
) {
|
||||
const findings = scanGatewayLogSentinels(logs, options);
|
||||
if (findings.length === 0) {
|
||||
return findings;
|
||||
}
|
||||
if (
|
||||
options?.allowEnvironmentBlocked === true &&
|
||||
findings.every((finding) => finding.verdict === "environment-blocked")
|
||||
) {
|
||||
return findings;
|
||||
}
|
||||
throw new Error(
|
||||
`Gateway log sentinel(s) detected:\n${formatGatewayLogSentinelSummary(findings)}`,
|
||||
);
|
||||
}
|
||||
@@ -101,11 +101,7 @@ async function createRuntimeParityGatewayTempRoot(
|
||||
},
|
||||
]),
|
||||
);
|
||||
await fs.writeFile(
|
||||
path.join(sessionsDir, "sessions.json"),
|
||||
JSON.stringify(store),
|
||||
"utf8",
|
||||
);
|
||||
await fs.writeFile(path.join(sessionsDir, "sessions.json"), JSON.stringify(store), "utf8");
|
||||
await Promise.all(
|
||||
fixtures.map((entry) =>
|
||||
fs.writeFile(
|
||||
@@ -671,4 +667,72 @@ describe("runtime parity", () => {
|
||||
expect(cell.finalText).toBe("scenario final");
|
||||
expect(cell.transcriptBytes).not.toContain("deployment ok");
|
||||
});
|
||||
|
||||
it("marks captured cells failed when gateway logs contain QA sentinel signatures", async () => {
|
||||
const tempRoot = await createRuntimeParityGatewayTempRoot(
|
||||
JSON.stringify({
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: "scenario final",
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const cell = await captureRuntimeParityCell({
|
||||
runtime: "codex",
|
||||
gateway: {
|
||||
tempRoot,
|
||||
logs: () => "codex_app_server progress stalled for run abc123",
|
||||
},
|
||||
scenarioResult: {
|
||||
status: "pass",
|
||||
},
|
||||
wallClockMs: 42,
|
||||
});
|
||||
|
||||
expect(cell.runtimeErrorClass).toBe("sentinel:stalled-agent-run");
|
||||
expect(cell.sentinelFindings?.map((finding) => finding.kind)).toEqual(["stalled-agent-run"]);
|
||||
});
|
||||
|
||||
it("marks direct-reply self-message transcripts as captured cell failures", async () => {
|
||||
const tempRoot = await createRuntimeParityGatewayTempRoot(
|
||||
[
|
||||
JSON.stringify({
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: [
|
||||
{
|
||||
type: "tool_use",
|
||||
name: "message",
|
||||
input: { action: "send", conversationId: "qa-operator", text: "hello" },
|
||||
},
|
||||
],
|
||||
},
|
||||
}),
|
||||
JSON.stringify({
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: "Sent.",
|
||||
},
|
||||
}),
|
||||
].join("\n"),
|
||||
);
|
||||
|
||||
const cell = await captureRuntimeParityCell({
|
||||
runtime: "pi",
|
||||
gateway: {
|
||||
tempRoot,
|
||||
},
|
||||
scenarioResult: {
|
||||
status: "pass",
|
||||
},
|
||||
wallClockMs: 42,
|
||||
});
|
||||
|
||||
expect(cell.finalText).toBe("Sent.");
|
||||
expect(cell.runtimeErrorClass).toBe("sentinel:direct-reply-self-message");
|
||||
expect(cell.sentinelFindings?.map((finding) => finding.kind)).toEqual([
|
||||
"direct-reply-self-message",
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -2,6 +2,11 @@ import { createHash } from "node:crypto";
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { fetchWithSsrFGuard } from "openclaw/plugin-sdk/ssrf-runtime";
|
||||
import {
|
||||
scanDirectReplyTranscriptSentinels,
|
||||
scanGatewayLogSentinels,
|
||||
type GatewayLogSentinelFinding,
|
||||
} from "./gateway-log-sentinel.js";
|
||||
|
||||
export type RuntimeId = "pi" | "codex";
|
||||
|
||||
@@ -30,6 +35,7 @@ export type RuntimeParityCell = {
|
||||
transportErrorClass?: string;
|
||||
runtimeErrorClass?: string;
|
||||
bootStateLines: string[];
|
||||
sentinelFindings?: GatewayLogSentinelFinding[];
|
||||
};
|
||||
|
||||
export type RuntimeParityDrift =
|
||||
@@ -725,10 +731,21 @@ function isHardFailureRuntimeError(errorClass: string | undefined) {
|
||||
errorClass === "failover" ||
|
||||
errorClass === "codex-app-server" ||
|
||||
errorClass === "auth" ||
|
||||
errorClass === "capture-missing"
|
||||
errorClass === "capture-missing" ||
|
||||
errorClass?.startsWith("sentinel:") === true
|
||||
);
|
||||
}
|
||||
|
||||
function summarizeSentinelErrorClass(findings: readonly GatewayLogSentinelFinding[]) {
|
||||
if (findings.length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
return `sentinel:${findings
|
||||
.map((finding) => finding.kind)
|
||||
.toSorted((left, right) => left.localeCompare(right))
|
||||
.join(",")}`;
|
||||
}
|
||||
|
||||
function classifyRuntimeParityCells(params: {
|
||||
pi: RuntimeParityCell;
|
||||
codex: RuntimeParityCell;
|
||||
@@ -946,6 +963,13 @@ export async function captureRuntimeParityCell(
|
||||
});
|
||||
const transcriptRecords = buildTranscriptRecords(transcriptBytes);
|
||||
const mockToolCalls = await loadRuntimeParityMockToolCalls(params.mockBaseUrl);
|
||||
const gatewayLogs = params.gateway.logs?.();
|
||||
const sentinelFindings = [
|
||||
...scanGatewayLogSentinels(gatewayLogs),
|
||||
...scanDirectReplyTranscriptSentinels(transcriptBytes),
|
||||
];
|
||||
const scenarioErrorClass = classifyScenarioError(params.scenarioResult.details);
|
||||
const sentinelErrorClass = summarizeSentinelErrorClass(sentinelFindings);
|
||||
return {
|
||||
runtime: params.runtime,
|
||||
transcriptBytes,
|
||||
@@ -953,10 +977,11 @@ export async function captureRuntimeParityCell(
|
||||
finalText: extractFinalAssistantText(transcriptRecords),
|
||||
usage: aggregateUsage(transcriptRecords),
|
||||
wallClockMs: params.wallClockMs,
|
||||
...(classifyScenarioError(params.scenarioResult.details)
|
||||
? { runtimeErrorClass: classifyScenarioError(params.scenarioResult.details) }
|
||||
...(scenarioErrorClass || sentinelErrorClass
|
||||
? { runtimeErrorClass: scenarioErrorClass ?? sentinelErrorClass }
|
||||
: {}),
|
||||
bootStateLines: extractBootStateLines(params.gateway.logs?.()),
|
||||
bootStateLines: extractBootStateLines(gatewayLogs),
|
||||
...(sentinelFindings.length > 0 ? { sentinelFindings } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -281,6 +281,7 @@ function isRuntimeParityPass(result: RuntimeParityResult) {
|
||||
|
||||
function formatRuntimeParityCellDetails(cell: RuntimeParityCell) {
|
||||
const errors = [cell.transportErrorClass, cell.runtimeErrorClass].filter(Boolean).join(", ");
|
||||
const sentinels = cell.sentinelFindings?.map((finding) => finding.kind).join(", ");
|
||||
return [
|
||||
`runtime=${cell.runtime}`,
|
||||
`wallMs=${cell.wallClockMs}`,
|
||||
@@ -288,6 +289,7 @@ function formatRuntimeParityCellDetails(cell: RuntimeParityCell) {
|
||||
`finalChars=${cell.finalText.length}`,
|
||||
`tokens=${cell.usage.totalTokens}`,
|
||||
...(errors ? [`errors=${errors}`] : []),
|
||||
...(sentinels ? [`sentinels=${sentinels}`] : []),
|
||||
].join(" ");
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user