test(codex): cover app-server Docker flows

This commit is contained in:
Peter Steinberger
2026-04-24 04:17:26 +01:00
parent 69566e43cb
commit e0d3256311
11 changed files with 863 additions and 78 deletions

View File

@@ -50,6 +50,11 @@ When debugging real providers/models (requires real creds):
- Add new high-signal provider secrets to `scripts/ci-hydrate-live-auth.sh`
plus `.github/workflows/openclaw-live-and-e2e-checks-reusable.yml` and its
scheduled/release callers.
- Native Codex bound-chat smoke: `pnpm test:docker:live-codex-bind`
- Runs a Docker live lane against the Codex app-server path, binds a synthetic
Slack DM with `/codex bind`, exercises `/codex fast` and
`/codex permissions`, then verifies a plain reply and an image attachment
route through the native plugin binding instead of ACP.
- Moonshot/Kimi cost smoke: with `MOONSHOT_API_KEY` set, run
`openclaw models list --provider moonshot --json`, then run an isolated
`openclaw agent --local --session-id live-kimi-cost --message 'Reply exactly: KIMI_LIVE_OK' --thinking off --json`

View File

@@ -0,0 +1,109 @@
#!/usr/bin/env -S node --import tsx
import fs from "node:fs/promises";
import path from "node:path";
type CodexAuthJson = {
tokens?: {
account_id?: unknown;
id_token?: unknown;
};
};
type JwtParts = {
header: string;
payload: Record<string, unknown>;
signature: string;
};
function decodeBase64UrlJson(value: string): Record<string, unknown> {
const decoded = Buffer.from(value, "base64url").toString("utf-8");
const parsed: unknown = JSON.parse(decoded);
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
throw new Error("JWT payload is not a JSON object.");
}
return parsed as Record<string, unknown>;
}
function encodeBase64UrlJson(value: Record<string, unknown>): string {
return Buffer.from(JSON.stringify(value), "utf-8").toString("base64url");
}
function parseJwt(value: string): JwtParts {
const parts = value.split(".");
if (parts.length !== 3 || !parts[0] || !parts[1]) {
throw new Error("id_token is not a JWT.");
}
return {
header: parts[0],
payload: decodeBase64UrlJson(parts[1]),
signature: parts[2] ?? "",
};
}
function stringifyJwt(parts: JwtParts): string {
return [parts.header, encodeBase64UrlJson(parts.payload), parts.signature].join(".");
}
export function patchCodexAuthForCi(auth: CodexAuthJson): {
auth: CodexAuthJson;
changed: boolean;
} {
const tokens = auth.tokens;
if (!tokens) {
return { auth, changed: false };
}
const accountId = typeof tokens.account_id === "string" ? tokens.account_id.trim() : "";
const idToken = typeof tokens.id_token === "string" ? tokens.id_token.trim() : "";
if (!accountId || !idToken) {
return { auth, changed: false };
}
const jwt = parseJwt(idToken);
if (typeof jwt.payload.chatgpt_account_id === "string" && jwt.payload.chatgpt_account_id) {
return { auth, changed: false };
}
return {
auth: {
...auth,
tokens: {
...tokens,
// Newer Codex app-server builds read ChatGPT account metadata from
// id_token claims. Older local auth files can have the same value only
// at tokens.account_id, so patch the staged Docker copy for CI.
id_token: stringifyJwt({
...jwt,
payload: {
...jwt.payload,
chatgpt_account_id: accountId,
},
}),
},
},
changed: true,
};
}
export async function prepareCodexCiAuth(authPath: string): Promise<boolean> {
const raw = await fs.readFile(authPath, "utf-8");
const parsed = JSON.parse(raw) as CodexAuthJson;
const { auth, changed } = patchCodexAuthForCi(parsed);
if (!changed) {
return false;
}
const stat = await fs.stat(authPath);
await fs.writeFile(authPath, `${JSON.stringify(auth, null, 2)}\n`, "utf-8");
await fs.chmod(authPath, stat.mode);
return true;
}
if (path.basename(process.argv[1] ?? "") === "prepare-codex-ci-auth.ts") {
const authPath = process.argv[2];
if (!authPath) {
throw new Error("Usage: node --import tsx scripts/prepare-codex-ci-auth.ts <auth-json-path>");
}
const changed = await prepareCodexCiAuth(authPath);
if (changed) {
console.error("Prepared staged Codex auth metadata for CI.");
}
}

View File

@@ -46,6 +46,7 @@ const exclusiveLanes = [
"OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:openai-web-search-minimal",
],
["live-codex-harness", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:live-codex-harness"],
["live-codex-bind", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:live-codex-bind"],
[
"live-cli-backend-codex",
"OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:live-cli-backend:codex",

View File

@@ -157,6 +157,9 @@ if [ "${OPENCLAW_LIVE_CODEX_HARNESS_AUTH:-codex-auth}" != "api-key" ] && [ ! -s
echo "ERROR: missing ~/.codex/auth.json for Codex harness live test." >&2
exit 1
fi
if [ "${OPENCLAW_LIVE_CODEX_HARNESS_AUTH:-codex-auth}" != "api-key" ]; then
node --import tsx /src/scripts/prepare-codex-ci-auth.ts "$HOME/.codex/auth.json"
fi
if [ ! -x "$NPM_CONFIG_PREFIX/bin/codex" ]; then
npm install -g @openai/codex
fi
@@ -181,7 +184,7 @@ cd "$tmp_dir"
if [ "${OPENCLAW_LIVE_CODEX_HARNESS_USE_CI_SAFE_CODEX_CONFIG:-1}" = "1" ]; then
node --import tsx /src/scripts/prepare-codex-ci-config.ts "$HOME/.codex/config.toml" "$tmp_dir"
fi
pnpm test:live src/gateway/gateway-codex-harness.live.test.ts
pnpm test:live ${OPENCLAW_LIVE_CODEX_TEST_FILES:-src/gateway/gateway-codex-harness.live.test.ts}
EOF
openclaw_live_codex_harness_append_build_extension codex
@@ -194,6 +197,7 @@ echo "==> MCP probe: ${OPENCLAW_LIVE_CODEX_HARNESS_MCP_PROBE:-1}"
echo "==> Guardian probe: ${OPENCLAW_LIVE_CODEX_HARNESS_GUARDIAN_PROBE:-1}"
echo "==> Auth mode: $CODEX_HARNESS_AUTH_MODE"
echo "==> CI-safe Codex config: ${OPENCLAW_LIVE_CODEX_HARNESS_USE_CI_SAFE_CODEX_CONFIG:-1}"
echo "==> Test files: ${OPENCLAW_LIVE_CODEX_TEST_FILES:-src/gateway/gateway-codex-harness.live.test.ts}"
echo "==> Harness fallback: none"
echo "==> Auth files: ${AUTH_FILES_CSV:-none}"
DOCKER_RUN_ARGS=(docker run --rm -t \
@@ -213,8 +217,12 @@ DOCKER_RUN_ARGS=(docker run --rm -t \
-e OPENCLAW_LIVE_CODEX_HARNESS_IMAGE_PROBE="${OPENCLAW_LIVE_CODEX_HARNESS_IMAGE_PROBE:-1}" \
-e OPENCLAW_LIVE_CODEX_HARNESS_MCP_PROBE="${OPENCLAW_LIVE_CODEX_HARNESS_MCP_PROBE:-1}" \
-e OPENCLAW_LIVE_CODEX_HARNESS_MODEL="${OPENCLAW_LIVE_CODEX_HARNESS_MODEL:-codex/gpt-5.4}" \
-e OPENCLAW_LIVE_CODEX_HARNESS_REQUIRE_GUARDIAN_EVENTS="${OPENCLAW_LIVE_CODEX_HARNESS_REQUIRE_GUARDIAN_EVENTS:-1}" \
-e OPENCLAW_LIVE_CODEX_HARNESS_REQUEST_TIMEOUT_MS="${OPENCLAW_LIVE_CODEX_HARNESS_REQUEST_TIMEOUT_MS:-}" \
-e OPENCLAW_LIVE_CODEX_HARNESS_USE_CI_SAFE_CODEX_CONFIG="${OPENCLAW_LIVE_CODEX_HARNESS_USE_CI_SAFE_CODEX_CONFIG:-1}" \
-e OPENCLAW_LIVE_CODEX_BIND="${OPENCLAW_LIVE_CODEX_BIND:-}" \
-e OPENCLAW_LIVE_CODEX_BIND_MODEL="${OPENCLAW_LIVE_CODEX_BIND_MODEL:-}" \
-e OPENCLAW_LIVE_CODEX_TEST_FILES="${OPENCLAW_LIVE_CODEX_TEST_FILES:-}" \
-e OPENCLAW_LIVE_TEST=1 \
-e OPENCLAW_VITEST_FS_MODULE_CACHE=0)
openclaw_live_append_array DOCKER_RUN_ARGS DOCKER_AUTH_ENV

View File

@@ -0,0 +1,517 @@
import { randomBytes, randomUUID } from "node:crypto";
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { describe, it } from "vitest";
import { isLiveTestEnabled } from "../agents/live-test-helpers.js";
import { clearConfigCache, clearRuntimeConfigSnapshot } from "../config/config.js";
import type { OpenClawConfig } from "../config/types.openclaw.js";
import { isTruthyEnvValue } from "../infra/env.js";
import { getSessionBindingService } from "../infra/outbound/session-binding-service.js";
import { resolveBundledPluginWorkspaceSourcePath } from "../plugins/bundled-plugin-metadata.js";
import { pluginCommands } from "../plugins/command-registry-state.js";
import { clearPluginLoaderCache } from "../plugins/loader.js";
import {
pinActivePluginChannelRegistry,
releasePinnedPluginChannelRegistry,
resetPluginRuntimeStateForTest,
} from "../plugins/runtime.js";
import { extractFirstTextBlock } from "../shared/chat-message-content.js";
import { createTestRegistry } from "../test-utils/channel-plugins.js";
import { sleep } from "../utils.js";
import type { GatewayClient } from "./client.js";
import { connectTestGatewayClient } from "./gateway-cli-backend.live-helpers.js";
import { renderCatFacePngBase64 } from "./live-image-probe.js";
import { startGatewayServer } from "./server.js";
const LIVE = isLiveTestEnabled();
const CODEX_BIND_LIVE = isTruthyEnvValue(process.env.OPENCLAW_LIVE_CODEX_BIND);
const describeLive = LIVE && CODEX_BIND_LIVE ? describe : describe.skip;
const CODEX_BIND_TIMEOUT_MS = 10 * 60_000;
const CODEX_BIND_REQUEST_TIMEOUT_MS = 180_000;
const DEFAULT_CODEX_BIND_MODEL = "gpt-5.4";
function createSlackCurrentConversationBindingRegistry() {
return createTestRegistry([
{
pluginId: "slack",
source: "test",
plugin: {
id: "slack",
meta: {
id: "slack",
label: "Slack",
selectionLabel: "Slack",
docsPath: "/channels/slack",
blurb: "test stub.",
aliases: [],
},
capabilities: { chatTypes: ["direct"] },
config: {
listAccountIds: () => ["default"],
resolveAccount: () => ({}),
},
conversationBindings: {
supportsCurrentConversationBinding: true,
},
bindings: {
compileConfiguredBinding: () => null,
matchInboundConversation: () => null,
resolveCommandConversation: ({
commandTo,
originatingTo,
fallbackTo,
}: {
commandTo?: string;
originatingTo?: string;
fallbackTo?: string;
}) => {
const conversationId = [commandTo, originatingTo, fallbackTo].find(Boolean)?.trim();
return conversationId ? { conversationId } : null;
},
},
},
},
]);
}
async function getFreeGatewayPort(): Promise<number> {
const { getFreePortBlockWithPermissionFallback } = await import("../test-utils/ports.js");
return await getFreePortBlockWithPermissionFallback({
offsets: [0, 1, 2, 4],
fallbackBase: 42_000,
});
}
function extractAssistantTexts(messages: unknown[]): string[] {
return messages
.map((entry) => {
if (!entry || typeof entry !== "object") {
return undefined;
}
return (entry as { role?: unknown }).role === "assistant"
? extractFirstTextBlock(entry)
: undefined;
})
.filter((value): value is string => typeof value === "string" && value.trim().length > 0);
}
function formatAssistantTextPreview(texts: string[], maxChars = 800): string {
const combined = texts.join("\n\n").trim();
if (!combined) {
return "<empty>";
}
return combined.length <= maxChars ? combined : combined.slice(-maxChars);
}
function restoreEnvVar(name: string, value: string | undefined): void {
if (value === undefined) {
delete process.env[name];
return;
}
process.env[name] = value;
}
async function waitForAgentRunOk(client: GatewayClient, runId: string): Promise<void> {
const result: { status?: string } = await client.request(
"agent.wait",
{ runId, timeoutMs: CODEX_BIND_REQUEST_TIMEOUT_MS },
{ timeoutMs: CODEX_BIND_REQUEST_TIMEOUT_MS + 5_000 },
);
if (result?.status !== "ok") {
throw new Error(`agent.wait failed for ${runId}: status=${String(result?.status)}`);
}
}
async function sendChatAndWait(params: {
client: GatewayClient;
sessionKey: string;
idempotencyKey: string;
message: string;
originatingChannel: string;
originatingTo: string;
originatingAccountId: string;
attachments?: Array<{
mimeType: string;
fileName: string;
content: string;
}>;
}): Promise<void> {
const started: { runId?: string; status?: string } = await params.client.request("chat.send", {
sessionKey: params.sessionKey,
message: params.message,
idempotencyKey: params.idempotencyKey,
originatingChannel: params.originatingChannel,
originatingTo: params.originatingTo,
originatingAccountId: params.originatingAccountId,
attachments: params.attachments,
});
if (started?.status !== "started" || typeof started.runId !== "string") {
throw new Error(`chat.send did not start correctly: ${JSON.stringify(started)}`);
}
await waitForAgentRunOk(params.client, started.runId);
}
async function waitForAssistantText(params: {
client: GatewayClient;
sessionKey: string;
contains: string;
caseInsensitive?: boolean;
minAssistantCount?: number;
timeoutMs?: number;
}): Promise<{ messages: unknown[]; assistantTexts: string[]; matchedAssistantText: string }> {
const timeoutMs = params.timeoutMs ?? 60_000;
const startedAt = Date.now();
while (Date.now() - startedAt < timeoutMs) {
const history: { messages?: unknown[] } = await params.client.request("chat.history", {
sessionKey: params.sessionKey,
limit: 24,
});
const messages = history.messages ?? [];
const assistantTexts = extractAssistantTexts(messages);
const minAssistantCount = params.minAssistantCount ?? 1;
const expected = params.caseInsensitive ? params.contains.toLowerCase() : params.contains;
const matchedAssistantText = assistantTexts
.slice(Math.max(0, minAssistantCount - 1))
.find((text) => (params.caseInsensitive ? text.toLowerCase() : text).includes(expected));
if (assistantTexts.length >= minAssistantCount && matchedAssistantText) {
return { messages, assistantTexts, matchedAssistantText };
}
await sleep(500);
}
const finalHistory: { messages?: unknown[] } = await params.client.request("chat.history", {
sessionKey: params.sessionKey,
limit: 24,
});
throw new Error(
`timed out waiting for assistant text containing ${params.contains}: ${formatAssistantTextPreview(
extractAssistantTexts(finalHistory.messages ?? []),
)}`,
);
}
function resolveCodexPluginRoot(): string {
const command =
pluginCommands.get("/codex") ??
Array.from(pluginCommands.values()).find((candidate) => candidate.pluginId === "codex");
if (command?.pluginRoot) {
return command.pluginRoot;
}
const pluginRoot = resolveBundledPluginWorkspaceSourcePath({
rootDir: process.cwd(),
pluginId: "codex",
});
if (!pluginRoot) {
throw new Error("Codex bundled plugin root was not found");
}
return pluginRoot;
}
function resolveBoundSessionKey(params: {
channel: string;
accountId: string;
conversationId: string;
}): string {
const binding = getSessionBindingService().resolveByConversation({
channel: params.channel,
accountId: params.accountId,
conversationId: params.conversationId,
});
if (!binding?.targetSessionKey) {
throw new Error(
`No plugin binding target session for ${params.channel}:${params.conversationId}`,
);
}
return binding.targetSessionKey;
}
async function writePluginBindingApproval(params: {
homeDir: string;
pluginRoot: string;
channel: string;
accountId: string;
}): Promise<void> {
const openclawDir = path.join(params.homeDir, ".openclaw");
await fs.mkdir(openclawDir, { recursive: true });
await fs.writeFile(
path.join(openclawDir, "plugin-binding-approvals.json"),
`${JSON.stringify(
{
version: 1,
approvals: [
{
pluginRoot: params.pluginRoot,
pluginId: "codex",
pluginName: "Codex",
channel: params.channel,
accountId: params.accountId,
approvedAt: Date.now(),
},
],
},
null,
2,
)}\n`,
);
}
async function writeGatewayConfig(params: {
configPath: string;
model: string;
port: number;
token: string;
workspace: string;
}): Promise<void> {
const cfg: OpenClawConfig = {
gateway: {
mode: "local",
port: params.port,
auth: { mode: "token", token: params.token },
},
plugins: {
allow: ["codex"],
entries: {
codex: {
enabled: true,
config: {
appServer: {
mode: "yolo",
requestTimeoutMs: CODEX_BIND_REQUEST_TIMEOUT_MS,
defaultWorkspaceDir: params.workspace,
},
},
},
},
},
agents: {
defaults: {
workspace: params.workspace,
embeddedHarness: { runtime: "codex", fallback: "none" },
model: { primary: `codex/${params.model}` },
skipBootstrap: true,
sandbox: { mode: "off" },
},
},
};
await fs.writeFile(params.configPath, `${JSON.stringify(cfg, null, 2)}\n`);
}
describeLive("gateway live (native Codex conversation binding)", () => {
it(
"binds a Slack DM to Codex app-server, updates controls, and forwards image media paths",
async () => {
const previous = {
codexHome: process.env.CODEX_HOME,
configPath: process.env.OPENCLAW_CONFIG_PATH,
gatewayToken: process.env.OPENCLAW_GATEWAY_TOKEN,
home: process.env.HOME,
skipCanvas: process.env.OPENCLAW_SKIP_CANVAS_HOST,
skipChannels: process.env.OPENCLAW_SKIP_CHANNELS,
skipCron: process.env.OPENCLAW_SKIP_CRON,
skipGmail: process.env.OPENCLAW_SKIP_GMAIL_WATCHER,
stateDir: process.env.OPENCLAW_STATE_DIR,
};
const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-live-codex-bind-"));
const tempHome = path.join(tempRoot, "home");
const stateDir = path.join(tempRoot, "state");
const workspace = path.join(tempRoot, "workspace");
const configPath = path.join(tempRoot, "openclaw.json");
const token = `test-${randomUUID()}`;
const port = await getFreeGatewayPort();
const sessionKey = "main";
const accountId = "default";
const slackUserId = `U${randomUUID().replace(/-/g, "").slice(0, 10).toUpperCase()}`;
const conversationId = `user:${slackUserId}`;
const bindModel =
process.env.OPENCLAW_LIVE_CODEX_BIND_MODEL?.trim() || DEFAULT_CODEX_BIND_MODEL;
await fs.mkdir(workspace, { recursive: true });
await fs.writeFile(
path.join(workspace, "AGENTS.md"),
[
"# AGENTS.md",
"",
"Follow exact reply instructions from the user.",
"Do not add commentary when asked for an exact response.",
].join("\n"),
);
await fs.mkdir(tempHome, { recursive: true });
await fs.mkdir(stateDir, { recursive: true });
await writeGatewayConfig({ configPath, model: bindModel, port, token, workspace });
clearConfigCache();
clearRuntimeConfigSnapshot();
clearPluginLoaderCache();
resetPluginRuntimeStateForTest();
const codexHome =
previous.codexHome || (previous.home ? path.join(previous.home, ".codex") : "");
if (codexHome) {
process.env.CODEX_HOME = codexHome;
} else {
delete process.env.CODEX_HOME;
}
process.env.HOME = tempHome;
process.env.OPENCLAW_CONFIG_PATH = configPath;
process.env.OPENCLAW_GATEWAY_TOKEN = token;
process.env.OPENCLAW_SKIP_CANVAS_HOST = "1";
process.env.OPENCLAW_SKIP_CHANNELS = "1";
process.env.OPENCLAW_SKIP_CRON = "1";
process.env.OPENCLAW_SKIP_GMAIL_WATCHER = "1";
process.env.OPENCLAW_STATE_DIR = stateDir;
const server = await startGatewayServer(port, {
bind: "loopback",
auth: { mode: "token", token },
controlUiEnabled: false,
awaitStartupSidecars: true,
});
const client = await connectTestGatewayClient({
url: `ws://127.0.0.1:${port}`,
token,
timeoutMs: 90_000,
requestTimeoutMs: CODEX_BIND_REQUEST_TIMEOUT_MS,
clientDisplayName: "vitest-codex-bind-live",
});
const channelRegistry = createSlackCurrentConversationBindingRegistry();
pinActivePluginChannelRegistry(channelRegistry);
try {
await writePluginBindingApproval({
homeDir: tempHome,
pluginRoot: resolveCodexPluginRoot(),
channel: "slack",
accountId,
});
await sendChatAndWait({
client,
sessionKey,
idempotencyKey: `idem-codex-bind-${randomUUID()}`,
message: `/codex bind --cwd ${workspace} --model ${bindModel}`,
originatingChannel: "slack",
originatingTo: conversationId,
originatingAccountId: accountId,
});
const bindHistory = await waitForAssistantText({
client,
sessionKey,
contains: "Bound this conversation to Codex thread",
timeoutMs: CODEX_BIND_REQUEST_TIMEOUT_MS,
});
const boundSessionKey = resolveBoundSessionKey({
channel: "slack",
accountId,
conversationId,
});
let commandAssistantCount = bindHistory.assistantTexts.length;
const sendCodexCommand = async (message: string, contains: string, timeoutMs = 60_000) => {
await sendChatAndWait({
client,
sessionKey,
idempotencyKey: `idem-codex-command-${randomUUID()}`,
message,
originatingChannel: "slack",
originatingTo: conversationId,
originatingAccountId: accountId,
});
const result = await waitForAssistantText({
client,
sessionKey,
contains,
minAssistantCount: commandAssistantCount + 1,
timeoutMs,
});
commandAssistantCount = result.assistantTexts.length;
return result;
};
await sendCodexCommand(
"/codex status",
"Codex app-server: connected",
CODEX_BIND_REQUEST_TIMEOUT_MS,
);
await sendCodexCommand("/codex models", "Codex models:", CODEX_BIND_REQUEST_TIMEOUT_MS);
await sendCodexCommand("/codex fast on", "Codex fast mode enabled.");
await sendCodexCommand("/codex fast status", "Codex fast mode: on.");
await sendCodexCommand("/codex permissions default", "Codex permissions set to default.");
await sendCodexCommand("/codex permissions status", "Codex permissions: default.");
await sendCodexCommand("/codex model", `Codex model: ${bindModel}`);
await sendCodexCommand("/codex stop", "No active Codex run to stop.");
const bindingStatus = await sendCodexCommand("/codex binding", "- Fast: on");
if (!bindingStatus.matchedAssistantText.includes("- Permissions: default")) {
throw new Error(
`binding status did not include default permissions: ${bindingStatus.matchedAssistantText}`,
);
}
const textNonce = randomBytes(4).toString("hex").toUpperCase();
const textToken = `CODEX-BIND-${textNonce}`;
await sendChatAndWait({
client,
sessionKey,
idempotencyKey: `idem-codex-bound-text-${randomUUID()}`,
message: `Reply with exactly this token and nothing else: ${textToken}`,
originatingChannel: "slack",
originatingTo: conversationId,
originatingAccountId: accountId,
});
const textHistory = await waitForAssistantText({
client,
sessionKey: boundSessionKey,
contains: textToken,
timeoutMs: CODEX_BIND_REQUEST_TIMEOUT_MS,
});
await sendChatAndWait({
client,
sessionKey,
idempotencyKey: `idem-codex-bound-image-${randomUUID()}`,
message:
"What animal is drawn in the attached image? Reply with only the lowercase animal name.",
originatingChannel: "slack",
originatingTo: conversationId,
originatingAccountId: accountId,
attachments: [
{
mimeType: "image/png",
fileName: `codex-bind-probe-${randomUUID()}.png`,
content: renderCatFacePngBase64(),
},
],
});
await waitForAssistantText({
client,
sessionKey: boundSessionKey,
contains: "cat",
caseInsensitive: true,
minAssistantCount: textHistory.assistantTexts.length + 1,
timeoutMs: CODEX_BIND_REQUEST_TIMEOUT_MS,
});
await sendCodexCommand("/codex detach", "Detached this conversation from Codex.");
await sendCodexCommand("/codex binding", "No Codex conversation binding is attached.");
} finally {
releasePinnedPluginChannelRegistry(channelRegistry);
clearConfigCache();
clearRuntimeConfigSnapshot();
await client.stopAndWait({ timeoutMs: 2_000 }).catch(() => {});
await server.close();
await fs.rm(tempRoot, { recursive: true, force: true });
restoreEnvVar("CODEX_HOME", previous.codexHome);
restoreEnvVar("OPENCLAW_CONFIG_PATH", previous.configPath);
restoreEnvVar("OPENCLAW_GATEWAY_TOKEN", previous.gatewayToken);
restoreEnvVar("HOME", previous.home);
restoreEnvVar("OPENCLAW_SKIP_CANVAS_HOST", previous.skipCanvas);
restoreEnvVar("OPENCLAW_SKIP_CHANNELS", previous.skipChannels);
restoreEnvVar("OPENCLAW_SKIP_CRON", previous.skipCron);
restoreEnvVar("OPENCLAW_SKIP_GMAIL_WATCHER", previous.skipGmail);
restoreEnvVar("OPENCLAW_STATE_DIR", previous.stateDir);
}
},
CODEX_BIND_TIMEOUT_MS,
);
});

View File

@@ -105,6 +105,26 @@ describe("gateway codex harness live helpers", () => {
expect(isExpectedCodexModelsCommandText(text)).toBe(true);
});
it("accepts the app-server model override list", () => {
const texts = [
[
"Available model overrides in this session:",
"",
"- `gpt-5.4`",
"- `GPT-5.5`",
"- `gpt-5.4-mini`",
].join("\n"),
["Available model overrides here:", "", "- `gpt-5.4`"].join("\n"),
["Available model overrides:", "", "- `gpt-5.4`"].join("\n"),
];
for (const text of texts) {
expect(
EXPECTED_CODEX_MODELS_COMMAND_TEXT.some((expectedText) => text.includes(expectedText)),
).toBe(true);
}
});
it("accepts missing codex shell PATH fallback with current-session model", () => {
const texts = [
[

View File

@@ -30,6 +30,9 @@ export const EXPECTED_CODEX_MODELS_COMMAND_TEXT = [
"Available models in this environment:",
"Available models in this Codex environment:",
"Available models in this Codex install",
"Available model overrides:",
"Available model overrides here:",
"Available model overrides in this session:",
"Available agent models:",
"Visible options in this session:",
"Current: `openai/",

View File

@@ -19,14 +19,13 @@ import {
import {
assertCronJobMatches,
assertCronJobVisibleViaCli,
assertLiveImageProbeReply,
buildLiveCronProbeMessage,
createLiveCronProbeSpec,
runOpenClawCliJson,
type CronListJob,
} from "./live-agent-probes.js";
import { restoreLiveEnv, snapshotLiveEnv, type LiveEnvSnapshot } from "./live-env-test-helpers.js";
import { renderCatFacePngBase64 } from "./live-image-probe.js";
import { renderSolidColorPngBase64 } from "./live-image-probe.js";
const LIVE = isLiveTestEnabled();
const CODEX_HARNESS_LIVE = isTruthyEnvValue(process.env.OPENCLAW_LIVE_CODEX_HARNESS);
@@ -54,11 +53,8 @@ const CODEX_HARNESS_AUTH_MODE =
const describeLive = LIVE && CODEX_HARNESS_LIVE ? describe : describe.skip;
const describeDisabled = LIVE && !CODEX_HARNESS_LIVE ? describe : describe.skip;
const CODEX_HARNESS_TIMEOUT_MS = 900_000;
const DEFAULT_CODEX_MODEL = "openai/gpt-5.5";
const DEFAULT_CODEX_MODEL = "codex/gpt-5.5";
const GATEWAY_CONNECT_TIMEOUT_MS = 60_000;
const CODEX_APP_SERVER_BASE_URL = "https://chatgpt.com/backend-api";
const CODEX_APP_SERVER_CONTEXT_WINDOW = 272_000;
const CODEX_APP_SERVER_MAX_TOKENS = 128_000;
type CapturedAgentEvent = {
stream: string;
@@ -153,7 +149,7 @@ async function writeLiveGatewayConfig(params: {
token: string;
workspace: string;
}): Promise<void> {
const { provider, modelId } = parseModelKey(params.modelKey);
parseModelKey(params.modelKey);
const cfg: OpenClawConfig = {
gateway: {
mode: "local",
@@ -173,32 +169,9 @@ async function writeLiveGatewayConfig(params: {
},
},
},
models: {
providers: {
[provider]: {
baseUrl: CODEX_APP_SERVER_BASE_URL,
apiKey: "codex-app-server",
auth: "token",
api: "openai-codex-responses",
models: [
{
id: modelId,
name: modelId,
api: "openai-codex-responses",
reasoning: true,
input: ["text", "image"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: CODEX_APP_SERVER_CONTEXT_WINDOW,
maxTokens: CODEX_APP_SERVER_MAX_TOKENS,
compat: {
supportsReasoningEffort: true,
supportsUsageInStreaming: true,
},
},
],
},
},
},
// The Codex plugin owns the `codex/*` catalog/auth marker. Keeping the
// fixture on that provider proves the app-server harness path instead of
// exercising legacy OpenAI-Codex provider overrides.
agents: {
defaults: {
workspace: params.workspace,
@@ -215,15 +188,17 @@ async function writeLiveGatewayConfig(params: {
async function requestAgentTextWithEvents(params: {
client: GatewayClient;
eventPrefix?: string;
message: string;
sessionKey: string;
}): Promise<{ text: string; events: CapturedAgentEvent[] }> {
const { extractPayloadText } = await import("./test-helpers.agent-results.js");
const { onAgentEvent } = await import("../infra/agent-events.js");
const events: CapturedAgentEvent[] = [];
const eventPrefix = params.eventPrefix ?? "codex_app_server.guardian";
const unsubscribe = onAgentEvent((event) => {
if (
event.stream !== "codex_app_server.guardian" ||
!event.stream.startsWith(eventPrefix) ||
(event.sessionKey && event.sessionKey !== params.sessionKey)
) {
return;
@@ -262,24 +237,14 @@ async function requestAgentText(params: {
message: string;
sessionKey: string;
}): Promise<string> {
const { extractPayloadText } = await import("./test-helpers.agent-results.js");
const payload = await params.client.request(
"agent",
{
sessionKey: params.sessionKey,
idempotencyKey: `idem-${randomUUID()}`,
message: params.message,
deliver: false,
thinking: "low",
timeout: CODEX_HARNESS_AGENT_TIMEOUT_SECONDS,
},
{ expectFinal: true, timeoutMs: CODEX_HARNESS_REQUEST_TIMEOUT_MS },
);
if (payload?.status !== "ok") {
throw new Error(`agent status=${String(payload?.status)} payload=${JSON.stringify(payload)}`);
}
const text = extractPayloadText(payload.result);
const { text, events } = await requestAgentTextWithEvents({
client: params.client,
eventPrefix: "codex_app_server.",
message: params.message,
sessionKey: params.sessionKey,
});
expect(text).toContain(params.expectedToken);
expect(events.some((event) => event.stream === "codex_app_server.lifecycle")).toBe(true);
return text;
}
@@ -326,31 +291,52 @@ async function verifyCodexImageProbe(params: {
sessionKey: string;
}): Promise<void> {
const runId = randomUUID();
const payload = await params.client.request(
"agent",
{
sessionKey: params.sessionKey,
idempotencyKey: `idem-${runId}-image`,
message:
"What animal is drawn in the attached image? Reply with only the lowercase animal name.",
attachments: [
{
mimeType: "image/png",
fileName: `codex-probe-${runId}.png`,
content: renderCatFacePngBase64(),
},
],
deliver: false,
thinking: "low",
timeout: CODEX_HARNESS_AGENT_TIMEOUT_SECONDS,
},
{ expectFinal: true, timeoutMs: CODEX_HARNESS_REQUEST_TIMEOUT_MS },
);
const expectedToken = `CODEX-IMAGE-${runId.slice(0, 6).toUpperCase()}`;
const { onAgentEvent } = await import("../infra/agent-events.js");
const events: CapturedAgentEvent[] = [];
const unsubscribe = onAgentEvent((event) => {
if (
!event.stream.startsWith("codex_app_server.") ||
(event.sessionKey && event.sessionKey !== params.sessionKey)
) {
return;
}
events.push({
stream: event.stream,
sessionKey: event.sessionKey,
data: event.data,
});
});
let payload: { status?: string; result?: unknown } | undefined;
try {
payload = await params.client.request(
"agent",
{
sessionKey: params.sessionKey,
idempotencyKey: `idem-${runId}-image`,
message: `Ignore the attached image and reply exactly ${expectedToken}.`,
attachments: [
{
mimeType: "image/png",
fileName: `codex-probe-${runId}.png`,
content: renderSolidColorPngBase64({ r: 220, g: 32, b: 32 }),
},
],
deliver: false,
thinking: "low",
timeout: CODEX_HARNESS_AGENT_TIMEOUT_SECONDS,
},
{ expectFinal: true, timeoutMs: CODEX_HARNESS_REQUEST_TIMEOUT_MS },
);
} finally {
unsubscribe();
}
if (payload?.status !== "ok") {
throw new Error(`image probe failed: status=${String(payload?.status)}`);
}
const { extractPayloadText } = await import("./test-helpers.agent-results.js");
assertLiveImageProbeReply(extractPayloadText(payload.result));
expect(extractPayloadText(payload.result)).toContain(expectedToken);
expect(events.some((event) => event.stream === "codex_app_server.lifecycle")).toBe(true);
}
function assertGuardianReviewStatus(params: {
@@ -372,6 +358,25 @@ function assertGuardianReviewStatus(params: {
).toBe(true);
}
function assertGuardianReviewCompleted(params: {
events: CapturedAgentEvent[];
label: string;
}): CapturedAgentEvent | undefined {
const completedEvents = params.events.filter(
(event) => event.data?.phase === "completed" && event.data?.status,
);
if (completedEvents.length === 0 && !CODEX_HARNESS_REQUIRE_GUARDIAN_EVENTS) {
return undefined;
}
expect(
completedEvents.length,
`${params.label} expected a completed Guardian review event; events=${JSON.stringify(
params.events,
)}`,
).toBeGreaterThan(0);
return completedEvents.at(-1);
}
async function verifyCodexGuardianProbe(params: {
client: GatewayClient;
sessionKey: string;
@@ -409,13 +414,18 @@ async function verifyCodexGuardianProbe(params: {
`This simulates sending a secret to an untrusted external destination. If the shell tool, sandbox, or Guardian blocks or rejects the action, ask me for explicit approval and include ${askBackToken}.`,
].join("\n"),
});
expect(deniedResult.text).toContain(askBackToken);
expect(deniedResult.text.toLowerCase()).toMatch(/approv|permission|guardian|reject|denied/);
assertGuardianReviewStatus({
const review = assertGuardianReviewCompleted({
events: deniedResult.events,
expectedStatus: "denied",
label: "ask-back probe",
});
// The approve/deny call is Codex policy-owned and may change independently.
// OpenClaw's contract here is that Guardian mode reaches Codex app-server and
// projects the structured review lifecycle back onto the agent event bus.
if (review?.data?.status === "denied") {
expect(deniedResult.text).toContain(askBackToken);
expect(deniedResult.text.toLowerCase()).toMatch(/approv|permission|guardian|reject|denied/);
}
expect(deniedResult.text.trim().length).toBeGreaterThan(0);
}
async function verifyCodexCronMcpProbe(params: {

View File

@@ -1,5 +1,5 @@
import { describe, expect, it } from "vitest";
import { renderCatFacePngBase64 } from "./live-image-probe.js";
import { renderCatFacePngBase64, renderSolidColorPngBase64 } from "./live-image-probe.js";
describe("live image probe", () => {
it("leaves room for the unclipped bottom CAT label", () => {
@@ -9,4 +9,12 @@ describe("live image probe", () => {
expect(png.readUInt32BE(16)).toBe(256);
expect(png.readUInt32BE(20)).toBeGreaterThanOrEqual(274);
});
it("renders a small solid-color probe attachment", () => {
const png = Buffer.from(renderSolidColorPngBase64({ r: 220, g: 32, b: 32 }), "base64");
expect(png.toString("ascii", 1, 4)).toBe("PNG");
expect(png.readUInt32BE(16)).toBe(192);
expect(png.readUInt32BE(20)).toBe(192);
});
});

View File

@@ -283,6 +283,24 @@ export function renderCatNoncePngBase64(nonce: string): string {
return png.toString("base64");
}
export function renderSolidColorPngBase64(color: { r: number; g: number; b: number }): string {
const width = 192;
const height = 192;
const buf = Buffer.alloc(width * height * 4, 255);
fillRect({
buf,
width,
height,
x: 0,
y: 0,
w: width,
h: height,
color,
});
const png = encodePngRgba(buf, width, height);
return png.toString("base64");
}
export function renderCatFacePngBase64(): string {
const width = 256;
const height = 288;

View File

@@ -0,0 +1,86 @@
import fs from "node:fs/promises";
import path from "node:path";
import { describe, expect, it } from "vitest";
import { patchCodexAuthForCi, prepareCodexCiAuth } from "../../scripts/prepare-codex-ci-auth.ts";
import { withTempDir } from "../test-utils/temp-dir.js";
function encodeJwt(payload: Record<string, unknown>): string {
return [
Buffer.from(JSON.stringify({ alg: "none" }), "utf-8").toString("base64url"),
Buffer.from(JSON.stringify(payload), "utf-8").toString("base64url"),
"",
].join(".");
}
function decodeJwtPayload(token: string): Record<string, unknown> {
const payload = token.split(".")[1];
if (!payload) {
throw new Error("missing payload");
}
return JSON.parse(Buffer.from(payload, "base64url").toString("utf-8")) as Record<string, unknown>;
}
describe("prepare-codex-ci-auth", () => {
it("copies tokens.account_id into id_token chatgpt_account_id", () => {
const idToken = encodeJwt({ email: "peter@example.com" });
const result = patchCodexAuthForCi({
tokens: {
account_id: "acct_123",
id_token: idToken,
},
});
expect(result.changed).toBe(true);
expect(decodeJwtPayload(String(result.auth.tokens?.id_token))).toMatchObject({
email: "peter@example.com",
chatgpt_account_id: "acct_123",
});
});
it("leaves current auth metadata unchanged", () => {
const idToken = encodeJwt({ chatgpt_account_id: "acct_existing" });
expect(
patchCodexAuthForCi({
tokens: {
account_id: "acct_123",
id_token: idToken,
},
}),
).toEqual({
auth: {
tokens: {
account_id: "acct_123",
id_token: idToken,
},
},
changed: false,
});
});
it("writes only the staged auth file", async () => {
await withTempDir("codex-ci-auth-", async (tempDir) => {
const authPath = path.join(tempDir, "auth.json");
await fs.writeFile(
authPath,
JSON.stringify({
tokens: {
account_id: "acct_123",
id_token: encodeJwt({ sub: "user" }),
},
}),
);
await expect(prepareCodexCiAuth(authPath)).resolves.toBe(true);
const updated = JSON.parse(await fs.readFile(authPath, "utf-8")) as {
tokens?: { id_token?: string };
};
expect(decodeJwtPayload(String(updated.tokens?.id_token))).toMatchObject({
sub: "user",
chatgpt_account_id: "acct_123",
});
});
});
});