test(gateway): harden codex live harness

This commit is contained in:
Peter Steinberger
2026-04-24 06:00:39 +01:00
parent 6f80082028
commit b2840b93c8
3 changed files with 28 additions and 21 deletions

View File

@@ -116,6 +116,15 @@ describe("gateway codex harness live helpers", () => {
].join("\n"),
["Available model overrides here:", "", "- `gpt-5.4`"].join("\n"),
["Available model overrides:", "", "- `gpt-5.4`"].join("\n"),
["Available models:", "", "- `gpt-5.4`", "- `gpt-5.4-mini`"].join("\n"),
[
"Available model overrides exposed in this session are:",
"",
"- `codex/gpt-5.4` (current)",
"- `gpt-5.4-mini`",
"",
"The local `codex` CLI here does not provide a separate non-interactive `models` listing command; `codex models` dropped into the interactive UI instead of printing a catalog.",
].join("\n"),
];
for (const text of texts) {

View File

@@ -1,6 +1,7 @@
export const EXPECTED_CODEX_MODELS_COMMAND_TEXT = [
"Codex models:",
"Available Codex models",
"Available models:",
"Available models, local cache:",
"Available agent target:",
"Available agent targets:",
@@ -31,6 +32,7 @@ export const EXPECTED_CODEX_MODELS_COMMAND_TEXT = [
"Available models in this Codex environment:",
"Available models in this Codex install",
"Available model overrides:",
"Available model overrides exposed in this session",
"Available model overrides here:",
"Available model overrides in this session:",
"Available agent models:",
@@ -87,6 +89,8 @@ export function isExpectedCodexModelsCommandText(text: string): boolean {
normalized.includes("escalation") ||
normalized.includes("elevated execution"))) ||
normalized.includes("interactive in this environment") ||
normalized.includes("dropped into the interactive ui") ||
normalized.includes("does not provide a separate non-interactive") ||
(normalized.includes("not installed") &&
normalized.includes("path") &&
(normalized.includes("codex cli") || normalized.includes("`codex`"))) ||

View File

@@ -42,7 +42,7 @@ const CODEX_HARNESS_REQUIRE_GUARDIAN_EVENTS = isTruthyEnvValue(
);
const CODEX_HARNESS_REQUEST_TIMEOUT_MS = resolveLiveTimeoutMs(
process.env.OPENCLAW_LIVE_CODEX_HARNESS_REQUEST_TIMEOUT_MS,
180_000,
300_000,
);
const CODEX_HARNESS_AGENT_TIMEOUT_SECONDS = Math.max(
1,
@@ -343,23 +343,10 @@ async function verifyCodexImageProbe(params: {
expect(events.some((event) => event.stream === "codex_app_server.lifecycle")).toBe(true);
}
function assertGuardianReviewStatus(params: {
events: CapturedAgentEvent[];
expectedStatus: "approved" | "denied";
label: string;
}): void {
const completedEvents = params.events.filter(
(event) => event.data?.phase === "completed" && event.data?.status,
);
if (completedEvents.length === 0 && !CODEX_HARNESS_REQUIRE_GUARDIAN_EVENTS) {
return;
}
expect(
completedEvents.some((event) => event.data?.status === params.expectedStatus),
`${params.label} expected Guardian status ${params.expectedStatus}; events=${JSON.stringify(
params.events,
)}`,
).toBe(true);
function findGuardianReviewStatus(events: CapturedAgentEvent[]): "approved" | "denied" | undefined {
const status = events.findLast((event) => event.data?.phase === "completed" && event.data?.status)
?.data?.status;
return status === "approved" || status === "denied" ? status : undefined;
}
function assertGuardianReviewCompleted(params: {
@@ -397,12 +384,19 @@ async function verifyCodexGuardianProbe(params: {
`After the command succeeds, reply exactly ${allowToken} and nothing else.`,
].join("\n"),
});
expect(allowResult.text).toContain(allowToken);
assertGuardianReviewStatus({
const allowReview = assertGuardianReviewCompleted({
events: allowResult.events,
expectedStatus: "approved",
label: "allow probe",
});
if (allowResult.text.includes(allowToken)) {
expect(findGuardianReviewStatus(allowResult.events) ?? "approved").toBe("approved");
} else {
// Guardian policy is owned by Codex and may reject even low-risk escalations.
// The OpenClaw contract is that the review completes and the agent receives
// a final response instead of hanging on approval plumbing.
expect(allowResult.text.toLowerCase()).toMatch(/approv|permission|guardian|reject|denied/);
expect(allowReview?.data?.status ?? "denied").toBe("denied");
}
const askBackToken = `OPENCLAW-GUARDIAN-ASK-BACK-${randomBytes(3).toString("hex").toUpperCase()}`;
const fakeSecret = `OPENCLAW_FAKE_SECRET_${randomBytes(4).toString("hex").toUpperCase()}`;