Files
openclaw/extensions/codex/src/app-server/run-attempt.test.ts
2026-05-30 07:33:14 +02:00

4233 lines
156 KiB
TypeScript

import fs from "node:fs/promises";
import path from "node:path";
import {
abortAgentHarnessRun,
embeddedAgentLog,
onAgentEvent,
type AgentEventPayload,
type EmbeddedRunAttemptParams,
} from "openclaw/plugin-sdk/agent-harness-runtime";
import { SessionManager } from "openclaw/plugin-sdk/agent-sessions";
import {
onInternalDiagnosticEvent,
waitForDiagnosticEventsDrained,
type DiagnosticEventPayload,
type DiagnosticEventPrivateData,
} from "openclaw/plugin-sdk/diagnostic-runtime";
import { initializeGlobalHookRunner, registerInternalHook } from "openclaw/plugin-sdk/hook-runtime";
import { registerPluginCommand } from "openclaw/plugin-sdk/plugin-runtime";
import {
createMockPluginRegistry,
onTrustedInternalDiagnosticEvent,
} from "openclaw/plugin-sdk/plugin-test-runtime";
import { registerSandboxBackend } from "openclaw/plugin-sdk/sandbox";
import { describe, expect, it, vi } from "vitest";
import WebSocket from "ws";
import { CODEX_GPT5_BEHAVIOR_CONTRACT } from "../../prompt-overlay.js";
import { defaultCodexAppInventoryCache } from "./app-inventory-cache.js";
import {
buildCodexOpenClawPromptContext,
buildCodexSystemPromptReport,
buildCodexWorkspaceBootstrapContext,
getCodexWorkspaceMemoryToolNames,
prependCodexOpenClawPromptContext,
renderCodexWorkspaceMemoryReference,
} from "./attempt-context.js";
import * as authBridge from "./auth-bridge.js";
import { resolveCodexAppServerEnvApiKeyCacheKey } from "./auth-bridge.js";
import { CodexAppServerRpcError } from "./client.js";
import { readCodexPluginConfig, resolveCodexAppServerRuntimeOptions } from "./config.js";
import {
CODEX_OPENCLAW_DYNAMIC_TOOL_NAMESPACE,
createCodexDynamicToolBridge,
} from "./dynamic-tools.js";
import * as elicitationBridge from "./elicitation-bridge.js";
import {
CodexAppServerEventProjector,
type CodexAppServerToolTelemetry,
} from "./event-projector.js";
import { buildCodexPluginAppCacheKey } from "./plugin-app-cache-key.js";
import { buildCodexPluginThreadConfig } from "./plugin-thread-config.js";
import type { CodexServerNotification } from "./protocol.js";
import {
createAppServerHarness,
assistantMessage,
createParams,
createCodexRuntimePlanFixture,
createResumeHarness,
createStartedThreadHarness,
fastWait,
mockCall,
queueActiveRunMessageForTest,
runCodexAppServerAttempt,
setCodexAppServerClientFactoryForTest,
setupRunAttemptTestHooks,
tempDir,
threadStartResult,
turnStartResult,
userMessage,
} from "./run-attempt-test-harness.js";
import { testing } from "./run-attempt.js";
import {
ensureCodexSandboxExecServerEnvironment,
releaseCodexSandboxExecServerEnvironment,
} from "./sandbox-exec-server.js";
import { createSandboxContext } from "./sandbox-exec-server.test-helpers.js";
import { readCodexAppServerBinding, writeCodexAppServerBinding } from "./session-binding.js";
import * as sharedClientModule from "./shared-client.js";
import { createCodexTestModel } from "./test-support.js";
import { buildTurnStartParams, startOrResumeThread } from "./thread-lifecycle.js";
function flushDiagnosticEvents() {
return waitForDiagnosticEventsDrained();
}
function openSocket(url: string): Promise<WebSocket> {
return new Promise((resolve, reject) => {
const socket = new WebSocket(url);
const timer = setTimeout(() => {
socket.close();
reject(new Error("timed out opening WebSocket"));
}, 1_000);
const rejectBeforeOpen = (error: Error) => {
clearTimeout(timer);
reject(error);
};
socket.once("open", () => {
clearTimeout(timer);
resolve(socket);
});
socket.once("error", rejectBeforeOpen);
socket.once("close", () => {
rejectBeforeOpen(new Error("WebSocket closed before open"));
});
});
}
function expectResumeRequest(
requests: Array<{ method: string; params: unknown }>,
params: Record<string, unknown>,
) {
const request = requests.find((entry) => entry.method === "thread/resume");
if (!request) {
throw new Error("Expected thread/resume request");
}
const requestParams = request.params as Record<string, unknown> | undefined;
for (const [key, value] of Object.entries(params)) {
expect(requestParams?.[key]).toEqual(value);
}
}
async function writeExistingBinding(
sessionFile: string,
workspaceDir: string,
overrides: Partial<Parameters<typeof writeCodexAppServerBinding>[1]> = {},
) {
await writeCodexAppServerBinding(sessionFile, {
threadId: "thread-existing",
cwd: workspaceDir,
model: "gpt-5.4-codex",
modelProvider: "openai",
...overrides,
});
}
function createThreadLifecycleAppServerOptions(): Parameters<
typeof startOrResumeThread
>[0]["appServer"] {
return {
start: {
transport: "stdio",
command: "codex",
args: ["app-server"],
headers: {},
},
requestTimeoutMs: 60_000,
turnCompletionIdleTimeoutMs: 60_000,
approvalPolicy: "never",
approvalsReviewer: "user",
sandbox: "workspace-write",
codeModeOnly: false,
};
}
function createMessageDynamicTool(
description: string,
actions: string[] = ["send"],
): Parameters<typeof startOrResumeThread>[0]["dynamicTools"][number] {
return {
name: "message",
description,
inputSchema: {
type: "object",
properties: {
action: {
type: "string",
enum: actions,
},
},
required: ["action"],
additionalProperties: false,
},
};
}
function createNamedDynamicTool(
name: string,
): Parameters<typeof startOrResumeThread>[0]["dynamicTools"][number] {
return {
name,
description: `${name} test tool`,
inputSchema: {
type: "object",
properties: {},
additionalProperties: false,
},
};
}
function setAgentWorkspaceForTest(params: EmbeddedRunAttemptParams, workspaceDir: string): void {
params.config = {
...params.config,
agents: {
...params.config?.agents,
defaults: {
...params.config?.agents?.defaults,
workspace: workspaceDir,
},
},
} as EmbeddedRunAttemptParams["config"];
}
async function buildDynamicToolsForTest(
params: EmbeddedRunAttemptParams,
workspaceDir: string,
options: Partial<
Pick<
Parameters<typeof testing.buildDynamicTools>[0],
"forceHeartbeatTool" | "ignoreRuntimePlan"
>
> = {},
) {
const sandboxSessionKey = params.sessionKey;
if (!sandboxSessionKey) {
throw new Error("createParams must provide a sessionKey for Codex dynamic tool tests.");
}
return testing.buildDynamicTools({
params,
resolvedWorkspace: workspaceDir,
effectiveWorkspace: workspaceDir,
effectiveCwd: params.cwd ?? workspaceDir,
sandboxSessionKey,
sandbox: { enabled: false, backendId: "docker" } as never,
nativeToolSurfaceEnabled: true,
runAbortController: new AbortController(),
sessionAgentId: "main",
pluginConfig: {},
onYieldDetected: () => undefined,
...options,
});
}
async function buildCodexTurnContextForTest(
params: EmbeddedRunAttemptParams,
workspaceDir: string,
) {
const sessionAgentId = "main";
const agentTools = await buildDynamicToolsForTest(params, workspaceDir);
const toolBridge = createCodexDynamicToolBridge({
tools: agentTools,
signal: new AbortController().signal,
});
const dynamicTools = toolBridge.availableSpecs;
const memoryToolNames = getCodexWorkspaceMemoryToolNames(dynamicTools);
const workspaceBootstrapContext = await buildCodexWorkspaceBootstrapContext({
params,
resolvedWorkspace: workspaceDir,
effectiveWorkspace: workspaceDir,
sessionKey: params.sessionKey ?? params.sessionId,
sessionAgentId,
memoryToolNames,
});
const threadDeveloperInstructions = [
testing.buildDeveloperInstructions(params, { dynamicTools }),
workspaceBootstrapContext.developerInstructions,
]
.filter((section) => section?.trim())
.join("\n\n");
const openClawPromptContext = buildCodexOpenClawPromptContext({
params,
workspacePromptContext: workspaceBootstrapContext.promptContext,
workspaceMemoryReference: renderCodexWorkspaceMemoryReference({
files: workspaceBootstrapContext.memoryReferenceFiles ?? [],
toolNames: workspaceBootstrapContext.memoryToolNames,
}),
});
const codexTurnPromptText = prependCodexOpenClawPromptContext(
params.prompt,
openClawPromptContext,
);
const turnStartParams = buildTurnStartParams(params, {
threadId: "thread-1",
cwd: workspaceDir,
appServer: resolveCodexAppServerRuntimeOptions({}),
promptText: codexTurnPromptText,
turnScopedDeveloperInstructions: workspaceBootstrapContext.turnScopedDeveloperInstructions,
heartbeatCollaborationInstructions:
workspaceBootstrapContext.heartbeatCollaborationInstructions,
});
const collaborationInstructions =
turnStartParams.collaborationMode?.settings?.developer_instructions ?? "";
const inputText = turnStartParams.input?.find((item) => item.type === "text")?.text ?? "";
const systemPromptReport = buildCodexSystemPromptReport({
attempt: params,
sessionKey: params.sessionKey ?? params.sessionId,
workspaceDir,
developerInstructions: [threadDeveloperInstructions, collaborationInstructions].join("\n\n"),
workspaceBootstrapContext,
skillsPrompt: "",
tools: dynamicTools,
});
return {
collaborationInstructions,
inputText,
systemPromptReport,
threadDeveloperInstructions,
};
}
function createCodexToolBridgeForTest(
params: EmbeddedRunAttemptParams,
tools: RuntimeDynamicToolForTest[],
registeredTools: RuntimeDynamicToolForTest[] = tools,
) {
const signal = new AbortController().signal;
return createCodexDynamicToolBridge({
tools,
registeredTools,
signal,
directToolNames: testing.shouldForceMessageTool(params) ? ["message"] : [],
});
}
async function startThreadWithDisabledNativeSurfaceForTest(
params: EmbeddedRunAttemptParams,
options: {
pluginConfig?: Record<string, unknown>;
developerInstructions?: string;
} = {},
) {
const workspaceDir = params.workspaceDir;
if (!workspaceDir) {
throw new Error("createParams must provide a workspaceDir for Codex thread tests.");
}
const sandboxSessionKey = params.sessionKey;
if (!sandboxSessionKey) {
throw new Error("createParams must provide a sessionKey for Codex dynamic tool tests.");
}
const nativeToolSurfaceEnabled = testing.shouldEnableCodexAppServerNativeToolSurface(params);
const dynamicTools = await testing.buildDynamicTools({
params,
resolvedWorkspace: workspaceDir,
effectiveWorkspace: workspaceDir,
sandboxSessionKey,
sandbox: { enabled: false, backendId: "docker" } as never,
nativeToolSurfaceEnabled,
runAbortController: new AbortController(),
sessionAgentId: "main",
pluginConfig: options.pluginConfig ?? {},
onYieldDetected: () => undefined,
});
const request = vi.fn(async (method: string, _requestParams?: unknown) => {
if (method === "thread/start") {
return threadStartResult();
}
if (method === "app/list") {
throw new Error("app/list should not run when runtime toolsAllow is empty.");
}
throw new Error(`unexpected method: ${method}`);
});
const pluginConfig = {
...options.pluginConfig,
codexPlugins: {
...(options.pluginConfig?.codexPlugins as Record<string, unknown> | undefined),
enabled: false,
},
};
await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: dynamicTools as never,
appServer: createThreadLifecycleAppServerOptions(),
developerInstructions: options.developerInstructions,
nativeCodeModeEnabled: nativeToolSurfaceEnabled,
nativeCodeModeOnlyEnabled: false,
userMcpServersEnabled: false,
environmentSelection: [],
pluginThreadConfig: {
enabled: true,
build: () =>
buildCodexPluginThreadConfig({
pluginConfig,
request: request as never,
appCacheKey: "test-app-cache-key",
}),
},
});
return { request, nativeToolSurfaceEnabled };
}
function filterAllowedRuntimeToolNamesForTest(
params: EmbeddedRunAttemptParams,
tools: RuntimeDynamicToolForTest[],
) {
const toolsAllow = testing.includeForcedCodexDynamicToolAllow(params.toolsAllow, params);
return testing.filterCodexDynamicToolsForAllowlist(tools, toolsAllow).map((tool) => tool.name);
}
type RuntimeDynamicToolForTest = Parameters<
typeof createCodexDynamicToolBridge
>[0]["tools"][number];
function createRuntimeDynamicTool(name: string): RuntimeDynamicToolForTest {
return {
name,
label: name,
description: `${name} test tool`,
parameters: {
type: "object",
properties: {},
additionalProperties: false,
},
execute: vi.fn(async () => ({
content: [{ type: "text" as const, text: `${name} done` }],
details: {},
})),
};
}
function buildEmptyCodexToolTelemetry(): CodexAppServerToolTelemetry {
return {
didSendViaMessagingTool: false,
messagingToolSentTexts: [],
messagingToolSentMediaUrls: [],
messagingToolSentTargets: [],
};
}
setupRunAttemptTestHooks();
describe("runCodexAppServerAttempt", () => {
it("recreates cached Codex workspace directories after cleanup removes them", async () => {
const workspaceDir = path.join(tempDir, "cached-workspace");
await testing.ensureCodexWorkspaceDirOnceForTests(workspaceDir);
await fs.rm(workspaceDir, { recursive: true, force: true });
await testing.ensureCodexWorkspaceDirOnceForTests(workspaceDir);
expect((await fs.stat(workspaceDir)).isDirectory()).toBe(true);
});
it("starts active OpenClaw sandbox threads with Codex native execution disabled", async () => {
testing.setOpenClawCodingToolsFactoryForTests(() => [
createRuntimeDynamicTool("exec"),
createRuntimeDynamicTool("process"),
createRuntimeDynamicTool("message"),
]);
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(sessionFile, workspaceDir);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
const sandbox = {
enabled: true,
backendId: "codex-test-sandbox",
workspaceAccess: "rw",
} as never;
const nativeToolSurfaceEnabled = testing.shouldEnableCodexAppServerNativeToolSurface(
params,
sandbox,
);
const dynamicTools = await testing.buildDynamicTools({
params,
resolvedWorkspace: workspaceDir,
effectiveWorkspace: workspaceDir,
sandboxSessionKey: params.sessionKey!,
sandbox,
nativeToolSurfaceEnabled,
runAbortController: new AbortController(),
sessionAgentId: "main",
pluginConfig: {},
onYieldDetected: () => undefined,
});
const request = vi.fn(async (method: string, _requestParams?: unknown) => {
if (method === "thread/start") {
return threadStartResult();
}
throw new Error(`unexpected method: ${method}`);
});
await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: dynamicTools as never,
appServer: createThreadLifecycleAppServerOptions(),
nativeCodeModeEnabled: nativeToolSurfaceEnabled,
nativeCodeModeOnlyEnabled: false,
userMcpServersEnabled: nativeToolSurfaceEnabled,
environmentSelection: [],
});
const startRequest = request.mock.calls.find(([method]) => method === "thread/start");
const startParams = startRequest?.[1] as Record<string, unknown> | undefined;
const startConfig = startParams?.config as Record<string, unknown> | undefined;
const startDynamicTools = startParams?.dynamicTools as Array<{ name: string }> | undefined;
expect(startConfig?.["features.code_mode"]).toBe(false);
expect(startConfig?.["features.code_mode_only"]).toBe(false);
expect(startParams?.environments).toEqual([]);
expect(startDynamicTools?.map((tool) => tool.name)).toEqual([
"message",
"sandbox_exec",
"sandbox_process",
]);
});
it("routes native Codex execution through an OpenClaw sandbox exec-server when opted in", async () => {
const appServer = {
...createThreadLifecycleAppServerOptions(),
sandbox: "danger-full-access" as const,
};
const sandbox = {
...createSandboxContext({
runShellCommand: async () => ({
stdout: Buffer.alloc(0),
stderr: Buffer.alloc(0),
code: 0,
}),
}),
backendId: "codex-test-sandbox",
runtimeId: `codex-test-runtime-${path.basename(tempDir)}`,
runtimeLabel: "Codex Test Sandbox",
};
const request = vi.fn(async (method: string, _requestParams?: unknown) => {
if (method === "environment/add") {
return {};
}
if (method === "thread/start") {
return threadStartResult();
}
throw new Error(`unexpected method: ${method}`);
});
const client = {
getServerVersion: () => "0.132.0",
request,
};
try {
testing.setOpenClawCodingToolsFactoryForTests(() => [
createRuntimeDynamicTool("exec"),
createRuntimeDynamicTool("process"),
createRuntimeDynamicTool("message"),
]);
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(sessionFile, workspaceDir);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
params.config = {
agents: {
defaults: {
sandbox: {
mode: "all",
backend: "codex-test-sandbox",
scope: "session",
workspaceAccess: "rw",
prune: { idleHours: 0, maxAgeDays: 0 },
},
},
},
} as never;
const nativeToolSurfaceEnabled = testing.shouldEnableCodexAppServerNativeToolSurface(
params,
sandbox as never,
{ sandboxExecServerEnabled: true },
);
const dynamicTools = await testing.buildDynamicTools({
params,
resolvedWorkspace: workspaceDir,
effectiveWorkspace: "/workspace",
sandboxSessionKey: params.sessionKey!,
sandbox: sandbox as never,
nativeToolSurfaceEnabled,
runAbortController: new AbortController(),
sessionAgentId: "main",
pluginConfig: {
appServer: {
mode: "yolo",
experimental: { sandboxExecServer: true },
},
},
onYieldDetected: () => undefined,
});
const environment = await ensureCodexSandboxExecServerEnvironment({
client: client as never,
sandbox: sandbox as never,
appServerStartOptions: appServer.start,
});
if (!environment) {
throw new Error("expected sandbox exec-server environment");
}
const environmentSelection = [environment];
await startOrResumeThread({
client: client as never,
params,
cwd: environment.cwd,
dynamicTools: dynamicTools as never,
appServer,
nativeCodeModeEnabled: nativeToolSurfaceEnabled,
nativeCodeModeOnlyEnabled: false,
userMcpServersEnabled: nativeToolSurfaceEnabled,
environmentSelection,
});
const turnParams = buildTurnStartParams(params, {
threadId: "thread-1",
cwd: environment.cwd,
appServer,
sandboxPolicy: { type: "externalSandbox", networkAccess: "enabled" },
environmentSelection,
});
const environmentAdd = request.mock.calls.find(([method]) => method === "environment/add");
const environmentAddParams = environmentAdd?.[1] as
| { environmentId?: string; execServerUrl?: string }
| undefined;
const startRequest = request.mock.calls.find(([method]) => method === "thread/start");
const startParams = startRequest?.[1] as
| {
cwd?: string;
dynamicTools?: Array<{ name: string }>;
environments?: Array<{ environmentId?: string; cwd?: string }>;
sandbox?: string;
config?: {
"features.code_mode"?: boolean;
"features.code_mode_only"?: boolean;
};
}
| undefined;
expect(nativeToolSurfaceEnabled).toBe(true);
expect(environmentAddParams?.environmentId).toMatch(/^openclaw-sandbox-/);
expect(environmentAddParams?.execServerUrl).toMatch(/^ws:\/\/127\.0\.0\.1:/);
expect(startParams?.cwd).toBe("/workspace");
expect(startParams?.config?.["features.code_mode"]).toBe(true);
expect(startParams?.config?.["features.code_mode_only"]).toBe(false);
expect(startParams?.dynamicTools?.map((tool) => tool.name)).toEqual(["message"]);
expect(startParams?.environments).toEqual([
{ environmentId: environmentAddParams?.environmentId, cwd: "/workspace" },
]);
expect(startParams?.sandbox).toBe("danger-full-access");
expect(turnParams.sandboxPolicy).toEqual({
type: "externalSandbox",
networkAccess: "enabled",
});
expect(turnParams.cwd).toBe("/workspace");
expect(turnParams.environments).toEqual(startParams?.environments);
} finally {
await releaseCodexSandboxExecServerEnvironment(sandbox as never);
}
});
it("closes the sandbox exec-server release path used by turn/start failure cleanup", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(sessionFile, workspaceDir);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
const appServer = {
...createThreadLifecycleAppServerOptions(),
sandbox: "danger-full-access",
};
const sandbox = createSandboxContext({
runShellCommand: async () => ({
stdout: Buffer.alloc(0),
stderr: Buffer.alloc(0),
code: 0,
}),
});
const request = vi.fn(async (method: string, _params?: unknown) => {
if (method === "environment/add") {
return {};
}
if (method === "thread/start") {
return threadStartResult();
}
if (method === "turn/start") {
throw new Error("turn start failed");
}
throw new Error(`unexpected method: ${method}`);
});
const client = {
getServerVersion: () => "0.132.0",
request,
};
try {
const environment = await ensureCodexSandboxExecServerEnvironment({
client: client as never,
sandbox,
appServerStartOptions: appServer.start,
});
if (!environment) {
throw new Error("expected sandbox exec-server environment");
}
const environmentSelection = [environment];
const thread = await startOrResumeThread({
client: client as never,
params,
cwd: environment.cwd,
dynamicTools: [createNamedDynamicTool("message")] as never,
appServer: appServer as never,
nativeCodeModeEnabled: true,
nativeCodeModeOnlyEnabled: false,
userMcpServersEnabled: false,
environmentSelection,
});
const turnParams = buildTurnStartParams(params, {
threadId: thread.threadId,
cwd: environment.cwd,
appServer: appServer as never,
sandboxPolicy: { type: "externalSandbox", networkAccess: "enabled" },
environmentSelection,
});
await expect(
client.request("turn/start", turnParams).catch(async (error) => {
await releaseCodexSandboxExecServerEnvironment(sandbox);
throw error;
}),
).rejects.toThrow("turn start failed");
const environmentAdd = request.mock.calls.find(([method]) => method === "environment/add");
const environmentAddParams = environmentAdd?.[1] as { execServerUrl?: string } | undefined;
expect(environmentAddParams?.execServerUrl).toMatch(/^ws:\/\/127\.0\.0\.1:/);
await expect(openSocket(environmentAddParams!.execServerUrl!)).rejects.toThrow();
} finally {
await releaseCodexSandboxExecServerEnvironment(sandbox);
}
});
it("closes the sandbox exec-server release path used by context-engine retry setup cleanup", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(sessionFile, workspaceDir);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
const appServer = {
...createThreadLifecycleAppServerOptions(),
sandbox: "danger-full-access",
};
const sandbox = createSandboxContext({
runShellCommand: async () => ({
stdout: Buffer.alloc(0),
stderr: Buffer.alloc(0),
code: 0,
}),
});
const request = vi.fn(async (method: string, _params?: unknown) => {
if (method === "environment/add") {
return {};
}
if (method === "thread/start") {
throw new Error("retry setup failed");
}
throw new Error(`unexpected method: ${method}`);
});
const client = {
getServerVersion: () => "0.132.0",
request,
};
try {
const environment = await ensureCodexSandboxExecServerEnvironment({
client: client as never,
sandbox,
appServerStartOptions: appServer.start,
});
if (!environment) {
throw new Error("expected sandbox exec-server environment");
}
const environmentSelection = [environment];
await expect(
startOrResumeThread({
client: client as never,
params,
cwd: environment.cwd,
dynamicTools: [createNamedDynamicTool("message")] as never,
appServer: appServer as never,
nativeCodeModeEnabled: true,
nativeCodeModeOnlyEnabled: false,
userMcpServersEnabled: false,
environmentSelection,
}).catch(async (error) => {
await releaseCodexSandboxExecServerEnvironment(sandbox);
throw error;
}),
).rejects.toThrow("retry setup failed");
const environmentAdd = request.mock.calls.find(([method]) => method === "environment/add");
const environmentAddParams = environmentAdd?.[1] as { execServerUrl?: string } | undefined;
expect(environmentAddParams?.execServerUrl).toMatch(/^ws:\/\/127\.0\.0\.1:/);
await expect(openSocket(environmentAddParams!.execServerUrl!)).rejects.toThrow();
} finally {
await releaseCodexSandboxExecServerEnvironment(sandbox);
}
});
it("closes the sandbox exec-server release path used by startup timeout cleanup", async () => {
const appServer = {
...createThreadLifecycleAppServerOptions(),
sandbox: "danger-full-access",
};
const sandbox = createSandboxContext({
runShellCommand: async () => ({
stdout: Buffer.alloc(0),
stderr: Buffer.alloc(0),
code: 0,
}),
});
const request = vi.fn(async (method: string, _params?: unknown) => {
if (method === "environment/add") {
return {};
}
throw new Error(`unexpected method: ${method}`);
});
const client = {
getServerVersion: () => "0.132.0",
request,
};
try {
const environment = await ensureCodexSandboxExecServerEnvironment({
client: client as never,
sandbox,
appServerStartOptions: appServer.start,
});
if (!environment) {
throw new Error("expected sandbox exec-server environment");
}
await expect(
testing.withCodexStartupTimeout({
timeoutMs: 5,
signal: new AbortController().signal,
onTimeout: async () => {
await releaseCodexSandboxExecServerEnvironment(sandbox);
},
operation: async () => new Promise<never>(() => undefined),
}),
).rejects.toThrow("codex app-server startup timed out");
const environmentAdd = request.mock.calls.find(([method]) => method === "environment/add");
const environmentAddParams = environmentAdd?.[1] as { execServerUrl?: string } | undefined;
expect(environmentAddParams?.execServerUrl).toMatch(/^ws:\/\/127\.0\.0\.1:/);
await expect(openSocket(environmentAddParams!.execServerUrl!)).rejects.toThrow();
} finally {
await releaseCodexSandboxExecServerEnvironment(sandbox);
}
});
it("starts Codex threads without duplicate OpenClaw workspace tools by default", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const appServer = createThreadLifecycleAppServerOptions();
const request = vi.fn(async (method: string, _params: unknown) => {
if (method === "thread/start") {
return threadStartResult();
}
throw new Error(`unexpected method: ${method}`);
});
const dynamicTools = testing.filterCodexDynamicTools(
[
"read",
"write",
"edit",
"apply_patch",
"exec",
"process",
"update_plan",
"tool_call",
"tool_describe",
"tool_search",
"tool_search_code",
"web_search",
"message",
].map(createNamedDynamicTool),
{},
);
await startOrResumeThread({
client: { request } as never,
params: createParams(sessionFile, workspaceDir),
cwd: workspaceDir,
dynamicTools,
appServer,
});
const startRequest = request.mock.calls.find(([method]) => method === "thread/start");
const dynamicToolNames = (
(startRequest?.[1] as { dynamicTools?: Array<{ name: string }> } | undefined)?.dynamicTools ??
[]
).map((tool) => tool.name);
expect(dynamicToolNames).toContain("message");
expect(dynamicToolNames).toContain("web_search");
for (const toolName of [
"read",
"write",
"edit",
"apply_patch",
"exec",
"process",
"update_plan",
"tool_call",
"tool_describe",
"tool_search",
"tool_search_code",
]) {
expect(dynamicToolNames).not.toContain(toolName);
}
});
it("passes MCP server config through to Codex thread/start", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const request = vi.fn(async (method: string, _params: unknown) => {
if (method === "thread/start") {
return threadStartResult();
}
throw new Error(`unexpected method: ${method}`);
});
await startOrResumeThread({
client: { request } as never,
params: createParams(sessionFile, workspaceDir),
cwd: workspaceDir,
dynamicTools: [],
appServer: createThreadLifecycleAppServerOptions(),
config: {
mcp_servers: {
search: {
url: "https://mcp.example.com/mcp",
},
},
},
mcpServersFingerprint: "mcp-v1",
mcpServersFingerprintEvaluated: true,
});
const startRequest = request.mock.calls.find(([method]) => method === "thread/start");
expect((startRequest?.[1] as { config?: unknown } | undefined)?.config).toMatchObject({
mcp_servers: {
search: {
url: "https://mcp.example.com/mcp",
},
},
"features.code_mode": true,
"features.code_mode_only": false,
});
const binding = await readCodexAppServerBinding(sessionFile);
expect(binding?.mcpServersFingerprint).toBe("mcp-v1");
});
it("starts a new Codex thread when the MCP server fingerprint changes", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeCodexAppServerBinding(sessionFile, {
threadId: "old-thread",
cwd: workspaceDir,
dynamicToolsFingerprint: JSON.stringify([]),
mcpServersFingerprint: "mcp-v1",
});
const request = vi.fn(async (method: string, _params: unknown) => {
if (method === "thread/start") {
return threadStartResult("new-thread");
}
throw new Error(`unexpected method: ${method}`);
});
const binding = await startOrResumeThread({
client: { request } as never,
params: createParams(sessionFile, workspaceDir),
cwd: workspaceDir,
dynamicTools: [],
appServer: createThreadLifecycleAppServerOptions(),
mcpServersFingerprint: "mcp-v2",
mcpServersFingerprintEvaluated: true,
});
expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/start"]);
expect(binding.threadId).toBe("new-thread");
expect(binding.mcpServersFingerprint).toBe("mcp-v2");
});
it("uses task cwd for Codex app-server requests while keeping bootstrap workspace separate", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const taskCwd = path.join(tempDir, "task-repo");
await fs.mkdir(workspaceDir, { recursive: true });
await fs.mkdir(taskCwd, { recursive: true });
await fs.writeFile(path.join(workspaceDir, "SOUL.md"), "workspace bootstrap", "utf8");
await fs.writeFile(path.join(taskCwd, "task-marker.txt"), "task marker", "utf8");
const appServer = createThreadLifecycleAppServerOptions();
const params = createParams(sessionFile, workspaceDir);
const requests: Array<{ method: string; params: unknown }> = [];
await startOrResumeThread({
client: {
getServerVersion: () => "0.132.0",
request: async (method: string, requestParams?: unknown) => {
requests.push({ method, params: requestParams });
if (method === "thread/start") {
return threadStartResult();
}
return {};
},
} as never,
params,
cwd: taskCwd,
dynamicTools: [],
appServer,
developerInstructions: "workspace bootstrap",
});
const threadStart = requests.find((request) => request.method === "thread/start");
expect((threadStart?.params as { cwd?: string } | undefined)?.cwd).toBe(taskCwd);
const turnStart = buildTurnStartParams(params, {
threadId: "thread-1",
cwd: taskCwd,
appServer,
});
expect(turnStart.cwd).toBe(taskCwd);
});
it("starts a no-MCP Codex thread when MCP config is evaluated empty", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeCodexAppServerBinding(sessionFile, {
threadId: "old-thread",
cwd: workspaceDir,
dynamicToolsFingerprint: JSON.stringify([]),
mcpServersFingerprint: "mcp-v1",
});
const request = vi.fn(async (method: string, _params: unknown) => {
if (method === "thread/start") {
return threadStartResult("new-thread");
}
throw new Error(`unexpected method: ${method}`);
});
const binding = await startOrResumeThread({
client: { request } as never,
params: createParams(sessionFile, workspaceDir),
cwd: workspaceDir,
dynamicTools: [],
appServer: createThreadLifecycleAppServerOptions(),
mcpServersFingerprintEvaluated: true,
});
expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/start"]);
expect(binding.threadId).toBe("new-thread");
expect(binding.mcpServersFingerprint).toBeUndefined();
expect((await readCodexAppServerBinding(sessionFile))?.mcpServersFingerprint).toBeUndefined();
});
it("scopes Codex developer reply instructions to message-tool-only delivery", () => {
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(path.join(tempDir, "session.jsonl"), workspaceDir);
params.sourceReplyDeliveryMode = "message_tool_only";
expect(
testing.buildDeveloperInstructions(params, {
dynamicTools: [createMessageDynamicTool("Message test tool")],
}),
).toContain("Visible source replies are not automatically delivered for this run.");
const withoutMessageToolInstructions = testing.buildDeveloperInstructions(params, {
dynamicTools: [],
});
expect(withoutMessageToolInstructions).toContain(
"reply normally in your final assistant message",
);
expect(withoutMessageToolInstructions).not.toContain("message(action=send)");
expect(withoutMessageToolInstructions).not.toContain("Use `message`");
params.sourceReplyDeliveryMode = "automatic";
const automaticInstructions = testing.buildDeveloperInstructions(params);
expect(automaticInstructions).toContain("reply normally in your final assistant message");
expect(automaticInstructions).not.toContain("message(action=send)");
});
it("includes Codex app-server scoped plugin command guidance in developer instructions", () => {
registerPluginCommand("demo-plugin", {
name: "codex_demo",
description: "Codex demo command",
agentPromptGuidance: [
"Legacy global command guidance.",
{
text: "Codex app-server command guidance.",
surfaces: ["codex_app_server"],
},
{
text: "Unscoped structured command guidance.",
},
{
text: "OpenClaw main command guidance.",
surfaces: ["openclaw_main"],
},
],
handler: async () => ({ text: "ok" }),
});
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(path.join(tempDir, "session.jsonl"), workspaceDir);
const instructions = testing.buildDeveloperInstructions(params);
expect(instructions).toContain("Codex app-server command guidance.");
expect(instructions).not.toContain("Legacy global command guidance.");
expect(instructions).not.toContain("Unscoped structured command guidance.");
expect(instructions).not.toContain("OpenClaw main command guidance.");
});
it("keeps OpenClaw skills out of Codex developer instructions", async () => {
const llmInput = vi.fn();
initializeGlobalHookRunner(
createMockPluginRegistry([{ hookName: "llm_input", handler: llmInput }]),
);
vi.stubEnv("OPENCLAW_TRAJECTORY", "1");
vi.stubEnv("OPENCLAW_TRAJECTORY_DIR", path.join(tempDir, "trajectory"));
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const harness = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
params.skillsSnapshot = {
prompt: "<available_skills><skill><name>demo</name></skill></available_skills>",
skills: [],
};
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
await new Promise<void>((resolve) => setImmediate(resolve));
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
const result = await run;
const threadStart = harness.requests.find((request) => request.method === "thread/start");
const threadStartParams = threadStart?.params as { developerInstructions?: string };
expect(threadStartParams.developerInstructions).not.toContain("<available_skills>");
const turnStart = harness.requests.find((request) => request.method === "turn/start");
const turnStartParams = turnStart?.params as {
input?: Array<{ text?: string }>;
};
const inputText = turnStartParams.input?.[0]?.text ?? "";
expect(inputText).toContain("## OpenClaw Skills");
expect(inputText).toContain("<available_skills>");
expect(inputText).toContain("Current user request:\nhello");
const [llmInputPayload] = mockCall(llmInput, "llm_input") as [{ prompt?: string }, unknown];
expect(llmInputPayload.prompt).toBe(inputText);
const trajectoryEvents = (
await fs.readFile(path.join(tempDir, "trajectory", "session-1.jsonl"), "utf8")
)
.trim()
.split("\n")
.map((line) => JSON.parse(line) as { data?: { prompt?: string }; type?: string });
expect(trajectoryEvents.find((event) => event.type === "context.compiled")?.data?.prompt).toBe(
inputText,
);
expect(trajectoryEvents.find((event) => event.type === "prompt.submitted")?.data?.prompt).toBe(
inputText,
);
expect(result.systemPromptReport?.skills.promptChars).toBe(params.skillsSnapshot.prompt.length);
expect(result.systemPromptReport?.skills.entries).toEqual([
{ name: "demo", blockChars: "<skill><name>demo</name></skill>".length },
]);
});
it("keeps leading delivery hints out of the Codex current user request", async () => {
const sessionFile = path.join(tempDir, "session-delivery-hint.jsonl");
const workspaceDir = path.join(tempDir, "workspace-delivery-hint");
const harness = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
params.prompt = "Delivery: to send a message, use the `message` tool.\n\nhello";
params.skillsSnapshot = {
prompt: "<available_skills><skill><name>demo</name></skill></available_skills>",
skills: [],
};
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const turnStart = harness.requests.find((request) => request.method === "turn/start");
const turnStartParams = turnStart?.params as {
input?: Array<{ text?: string }>;
};
const inputText = turnStartParams.input?.[0]?.text ?? "";
expect(inputText).toContain("OpenClaw delivery metadata:");
expect(inputText).toContain(
"This delivery metadata is runtime routing guidance, not the user's request.",
);
expect(inputText).toContain("Delivery: to send a message, use the `message` tool.");
expect(inputText).toContain("Current user request:\nhello");
expect(inputText).not.toContain("Current user request:\nDelivery:");
});
it("mirrors the Codex prompt into the transcript when the turn starts", async () => {
const sessionFile = path.join(tempDir, "session-early-prompt.jsonl");
const workspaceDir = path.join(tempDir, "workspace-early-prompt");
const harness = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
params.prompt = "external channel prompt";
const onUserMessagePersisted = vi.fn();
params.onUserMessagePersisted = onUserMessagePersisted;
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
await vi.waitFor(async () => {
const raw = await fs.readFile(sessionFile, "utf8");
expect(raw).toContain('"role":"user"');
expect(raw).toContain('"content":"external channel prompt"');
expect(raw).toContain('"idempotencyKey":"codex-app-server:thread-1:turn-1:prompt"');
});
await vi.waitFor(() => {
expect(onUserMessagePersisted).toHaveBeenCalledWith(
expect.objectContaining({
role: "user",
content: "external channel prompt",
idempotencyKey: "codex-app-server:thread-1:turn-1:prompt",
}),
);
});
const rawBeforeCompletion = await fs.readFile(sessionFile, "utf8");
expect(rawBeforeCompletion).not.toContain('"role":"assistant"');
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const rawAfterCompletion = await fs.readFile(sessionFile, "utf8");
expect(rawAfterCompletion.match(/"role":"user"/gu)).toHaveLength(1);
expect(onUserMessagePersisted).toHaveBeenCalledTimes(1);
});
it("does not mirror the Codex prompt early when user message persistence is suppressed", async () => {
const sessionFile = path.join(tempDir, "session-suppressed-early-prompt.jsonl");
const workspaceDir = path.join(tempDir, "workspace-suppressed-early-prompt");
const harness = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
params.prompt = "already persisted prompt";
params.suppressNextUserMessagePersistence = true;
const readTranscript = async () =>
fs.readFile(sessionFile, "utf8").catch((error) => {
if ((error as NodeJS.ErrnoException).code === "ENOENT") {
return "";
}
throw error;
});
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
await expect(
vi.waitFor(
async () => {
const raw = await readTranscript();
expect(raw).toContain("already persisted prompt");
},
{ interval: 1, timeout: 100 },
),
).rejects.toThrow();
const rawBeforeCompletion = await readTranscript();
expect(rawBeforeCompletion).not.toContain("already persisted prompt");
expect(rawBeforeCompletion).not.toContain(
'"idempotencyKey":"codex-app-server:thread-1:turn-1:prompt"',
);
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const rawAfterCompletion = await readTranscript();
expect(rawAfterCompletion).not.toContain("already persisted prompt");
expect(rawAfterCompletion).not.toContain(
'"idempotencyKey":"codex-app-server:thread-1:turn-1:prompt"',
);
});
it("accepts turn completions scoped by nested turn thread id", async () => {
const harness = createStartedThreadHarness();
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
await harness.notify({
method: "turn/completed",
params: {
threadId: "parent-thread",
turn: {
id: "turn-1",
threadId: "thread-1",
status: "completed",
items: [{ id: "agent-1", type: "agentMessage", text: "Nested done." }],
error: null,
startedAt: null,
completedAt: null,
durationMs: null,
},
},
});
const result = await run;
expect(result.promptError).toBeNull();
expect(result.assistantTexts).toEqual(["Nested done."]);
});
it("keeps forced message dynamic tool when toolsAllow omits it", () => {
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(path.join(tempDir, "session.jsonl"), workspaceDir);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
params.sourceReplyDeliveryMode = "message_tool_only";
params.toolsAllow = ["music_generate"];
const dynamicToolNames = filterAllowedRuntimeToolNamesForTest(params, [
createRuntimeDynamicTool("message"),
createRuntimeDynamicTool("music_generate"),
]);
expect(dynamicToolNames).toContain("message");
expect(dynamicToolNames).toContain("music_generate");
});
it("keeps forced message dynamic tool when toolsAllow is empty", () => {
const tools = [
createRuntimeDynamicTool("message"),
createRuntimeDynamicTool("music_generate"),
createRuntimeDynamicTool("heartbeat_respond"),
];
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(path.join(tempDir, "session.jsonl"), workspaceDir);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
params.sourceReplyDeliveryMode = "message_tool_only";
params.toolsAllow = [];
const dynamicToolNames = filterAllowedRuntimeToolNamesForTest(params, tools);
expect(dynamicToolNames).toEqual(["message"]);
});
it("keeps forced heartbeat registration inside narrow toolsAllow policy", () => {
const tools = [
createRuntimeDynamicTool("message"),
createRuntimeDynamicTool("heartbeat_respond"),
];
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(path.join(tempDir, "session.jsonl"), workspaceDir);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
params.toolsAllow = ["message"];
const dynamicToolNames = filterAllowedRuntimeToolNamesForTest(params, tools);
expect(dynamicToolNames).toEqual(["message"]);
});
it("keeps searchable OpenClaw dynamic tools when code-mode-only is enabled", () => {
const tools = [
createRuntimeDynamicTool("message"),
createRuntimeDynamicTool("web_search"),
createRuntimeDynamicTool("heartbeat_respond"),
createRuntimeDynamicTool("sessions_spawn"),
createRuntimeDynamicTool("sessions_yield"),
];
const toolBridge = createCodexDynamicToolBridge({
tools,
signal: new AbortController().signal,
directToolNames: ["message"],
});
const message = toolBridge.specs.find((tool) => tool.name === "message");
const webSearch = toolBridge.specs.find((tool) => tool.name === "web_search");
const heartbeat = toolBridge.specs.find((tool) => tool.name === "heartbeat_respond");
const sessionsSpawn = toolBridge.specs.find((tool) => tool.name === "sessions_spawn");
const sessionsYield = toolBridge.specs.find((tool) => tool.name === "sessions_yield");
expect(message).not.toHaveProperty("namespace");
expect(message).not.toHaveProperty("deferLoading");
expect(webSearch?.namespace).toBe(CODEX_OPENCLAW_DYNAMIC_TOOL_NAMESPACE);
expect(webSearch?.deferLoading).toBe(true);
expect(heartbeat?.namespace).toBe(CODEX_OPENCLAW_DYNAMIC_TOOL_NAMESPACE);
expect(heartbeat?.deferLoading).toBe(true);
expect(sessionsSpawn?.namespace).toBe(CODEX_OPENCLAW_DYNAMIC_TOOL_NAMESPACE);
expect(sessionsSpawn?.deferLoading).toBe(true);
expect(sessionsYield).not.toHaveProperty("namespace");
expect(sessionsYield).not.toHaveProperty("deferLoading");
});
it("registers heartbeat response durably without advertising it on normal turns", async () => {
testing.setOpenClawCodingToolsFactoryForTests((options) => [
createRuntimeDynamicTool("message"),
...(options?.enableHeartbeatTool === true
? [createRuntimeDynamicTool("heartbeat_respond")]
: []),
]);
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const createRunParams = (trigger?: EmbeddedRunAttemptParams["trigger"]) => {
const params = createParams(sessionFile, workspaceDir);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
if (trigger) {
params.trigger = trigger;
}
if (trigger === "heartbeat") {
params.sourceReplyDeliveryMode = "message_tool_only";
}
return params;
};
const registeredTools = [
createRuntimeDynamicTool("message"),
createRuntimeDynamicTool("heartbeat_respond"),
];
const normalBridge = createCodexToolBridgeForTest(
createRunParams(),
[createRuntimeDynamicTool("message")],
registeredTools,
);
const normalInstructions = testing.buildDeveloperInstructions(createRunParams(), {
dynamicTools: normalBridge.availableSpecs,
});
const registeredToolNames = normalBridge.specs.map((tool) => tool.name);
expect(registeredToolNames).toContain("message");
expect(registeredToolNames).toContain("heartbeat_respond");
expect(normalInstructions).toContain(
"Deferred searchable OpenClaw dynamic tools available: message.",
);
expect(normalInstructions).not.toContain(
"Deferred searchable OpenClaw dynamic tools available: heartbeat_respond",
);
const heartbeatBridge = createCodexToolBridgeForTest(
createRunParams("heartbeat"),
[createRuntimeDynamicTool("message"), createRuntimeDynamicTool("heartbeat_respond")],
registeredTools,
);
const nextNormalBridge = createCodexToolBridgeForTest(
createRunParams(),
[createRuntimeDynamicTool("message")],
registeredTools,
);
expect(heartbeatBridge.specs.map((tool) => tool.name)).toEqual(registeredToolNames);
expect(nextNormalBridge.specs.map((tool) => tool.name)).toEqual(registeredToolNames);
});
it("keeps the persistent dynamic schema stable across heartbeat-only turns", async () => {
testing.setOpenClawCodingToolsFactoryForTests((options) => [
createRuntimeDynamicTool("message"),
createRuntimeDynamicTool("web_search"),
...(options?.enableHeartbeatTool === true
? [createRuntimeDynamicTool("heartbeat_respond")]
: []),
]);
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const createRunParams = (trigger?: EmbeddedRunAttemptParams["trigger"]) => {
const params = createParams(sessionFile, workspaceDir);
params.disableTools = false;
const runtimePlan = createCodexRuntimePlanFixture();
params.runtimePlan = {
...runtimePlan,
tools: {
normalize: (tools: Array<{ name: string }>) =>
trigger === "heartbeat"
? tools.filter((tool) => tool.name === "heartbeat_respond")
: tools,
logDiagnostics: () => undefined,
},
} as unknown as NonNullable<EmbeddedRunAttemptParams["runtimePlan"]>;
if (trigger) {
params.trigger = trigger;
}
return params;
};
const registeredTools = [
createRuntimeDynamicTool("message"),
createRuntimeDynamicTool("web_search"),
createRuntimeDynamicTool("heartbeat_respond"),
];
const normalBridge = createCodexToolBridgeForTest(
createRunParams(),
registeredTools,
registeredTools,
);
const heartbeatBridge = createCodexToolBridgeForTest(
createRunParams("heartbeat"),
[createRuntimeDynamicTool("heartbeat_respond")],
registeredTools,
);
const nextNormalBridge = createCodexToolBridgeForTest(
createRunParams(),
registeredTools,
registeredTools,
);
expect(heartbeatBridge.availableSpecs.map((tool) => tool.name)).toEqual(["heartbeat_respond"]);
expect(heartbeatBridge.specs.map((tool) => tool.name)).toEqual(
normalBridge.specs.map((tool) => tool.name),
);
expect(nextNormalBridge.specs.map((tool) => tool.name)).toEqual(
normalBridge.specs.map((tool) => tool.name),
);
});
it("disables Codex native tool surfaces when runtime toolsAllow is empty", async () => {
testing.setOpenClawCodingToolsFactoryForTests(() => [
createRuntimeDynamicTool("message"),
createRuntimeDynamicTool("web_search"),
]);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
params.toolsAllow = [];
params.extraSystemPrompt = "Tool and file actions are disabled for this sender by chat policy.";
const { request, nativeToolSurfaceEnabled } = await startThreadWithDisabledNativeSurfaceForTest(
params,
{
pluginConfig: {
appServer: { mode: "yolo" },
codexPlugins: {
enabled: true,
plugins: {
"google-calendar": {
marketplaceName: "openai-curated",
pluginName: "google-calendar",
},
},
},
},
developerInstructions: params.extraSystemPrompt,
},
);
const startRequest = request.mock.calls.find(([method]) => method === "thread/start");
const startParams = startRequest?.[1] as
| {
dynamicTools?: Array<{ name?: string }>;
environments?: unknown[];
developerInstructions?: string;
config?: {
"features.code_mode"?: boolean;
"features.code_mode_only"?: boolean;
apps?: Record<
string,
{ enabled?: boolean; destructive_enabled?: boolean; open_world_enabled?: boolean }
>;
};
}
| undefined;
expect(nativeToolSurfaceEnabled).toBe(false);
expect(startParams?.dynamicTools).toEqual([]);
expect(startParams?.environments).toEqual([]);
expect(startParams?.developerInstructions).toContain(
"Tool and file actions are disabled for this sender by chat policy.",
);
expect(startParams?.config?.["features.code_mode"]).toBe(false);
expect(startParams?.config?.["features.code_mode_only"]).toBe(false);
expect(startParams?.config?.apps?.["_default"]).toEqual({
enabled: false,
destructive_enabled: false,
open_world_enabled: false,
});
expect(startParams?.config?.apps?.["google-calendar-app"]?.enabled).toBeUndefined();
expect(request.mock.calls.map(([method]) => method)).not.toContain("app/list");
});
it("fails closed for Codex app defaults when restricted native tools have no plugin config", async () => {
testing.setOpenClawCodingToolsFactoryForTests(() => [createRuntimeDynamicTool("message")]);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
params.toolsAllow = [];
const { request } = await startThreadWithDisabledNativeSurfaceForTest(params, {
pluginConfig: { appServer: { mode: "yolo" } },
});
const startRequest = request.mock.calls.find(([method]) => method === "thread/start");
const startParams = startRequest?.[1] as
| {
config?: {
apps?: Record<
string,
{ enabled?: boolean; destructive_enabled?: boolean; open_world_enabled?: boolean }
>;
};
}
| undefined;
expect(startParams?.config?.apps?.["_default"]).toEqual({
enabled: false,
destructive_enabled: false,
open_world_enabled: false,
});
expect(request.mock.calls.map(([method]) => method)).not.toContain("app/list");
});
it("keeps searchable Codex dynamic tools canonical in mirrored transcript snapshots", async () => {
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
const projector = new CodexAppServerEventProjector(params, "thread-1", "turn-1");
projector.recordDynamicToolCall({
callId: "call-wiki-status-1",
tool: "wiki_status",
arguments: { topic: "README.md" },
});
projector.recordDynamicToolResult({
callId: "call-wiki-status-1",
tool: "wiki_status",
success: true,
terminalType: "completed",
contentItems: [{ type: "inputText", text: "wiki_status done" }],
});
const result = projector.buildResult(buildEmptyCodexToolTelemetry());
expect(result.messagesSnapshot.map((message) => message.role)).toEqual([
"user",
"assistant",
"toolResult",
]);
const assistantMessage = result.messagesSnapshot[1];
if (assistantMessage?.role !== "assistant") {
throw new Error("expected mirrored assistant tool-call message");
}
expect(assistantMessage.content).toStrictEqual([
{
type: "toolCall",
id: "call-wiki-status-1",
name: "wiki_status",
arguments: { topic: "README.md" },
input: { topic: "README.md" },
},
]);
const toolResultMessage = result.messagesSnapshot[2];
if (toolResultMessage?.role !== "toolResult") {
throw new Error("expected mirrored tool-result message");
}
expect(toolResultMessage.toolCallId).toBe("call-wiki-status-1");
expect(toolResultMessage.toolName).toBe("wiki_status");
expect(toolResultMessage.isError).toBe(false);
expect(toolResultMessage.content).toStrictEqual([
{
type: "toolResult",
id: "call-wiki-status-1",
name: "wiki_status",
toolName: "wiki_status",
toolCallId: "call-wiki-status-1",
toolUseId: "call-wiki-status-1",
tool_use_id: "call-wiki-status-1",
content: "wiki_status done",
text: "wiki_status done",
},
]);
expect(JSON.stringify(result.messagesSnapshot)).not.toContain("tool_search");
expect(JSON.stringify(result.messagesSnapshot)).not.toContain("function_call_output");
});
it("applies before_prompt_build to Codex developer instructions and turn input", async () => {
const beforePromptBuild = vi.fn(async () => ({
systemPrompt: "custom codex system",
prependSystemContext: "pre system",
appendSystemContext: "post system",
prependContext: "queued context",
}));
initializeGlobalHookRunner(
createMockPluginRegistry([{ hookName: "before_prompt_build", handler: beforePromptBuild }]),
);
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const sessionManager = SessionManager.open(sessionFile);
sessionManager.appendMessage(assistantMessage("previous turn", Date.now()));
const harness = createStartedThreadHarness();
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir));
await harness.waitForMethod("turn/start");
await new Promise<void>((resolve) => setImmediate(resolve));
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
expect(beforePromptBuild).toHaveBeenCalledOnce();
const [hookInput, hookContext] = mockCall(beforePromptBuild, "before_prompt_build") as [
{ messages?: Array<{ role?: string }>; prompt?: string },
{ runId?: string; sessionId?: string },
];
expect(hookInput.prompt).toBe("hello");
expect(hookInput.messages?.[0]?.role).toBe("assistant");
expect(hookContext.runId).toBe("run-1");
expect(hookContext.sessionId).toBe("session-1");
const threadStart = harness.requests.find((request) => request.method === "thread/start");
const threadStartParams = threadStart?.params as { developerInstructions?: string } | undefined;
const wrappedPluginSystemContext = (text: string) =>
`---\n\nOpenClaw plugin-injected system context. This block is not workspace file content.\n\n${text}\n\n---`;
expect(threadStartParams?.developerInstructions).toContain(
`${wrappedPluginSystemContext("pre system")}\n\ncustom codex system\n\n${wrappedPluginSystemContext("post system")}`,
);
const turnStart = harness.requests.find((request) => request.method === "turn/start");
const turnStartParams = turnStart?.params as
| { input?: Array<{ text?: string; text_elements?: unknown[]; type?: string }> }
| undefined;
expect(turnStartParams?.input).toEqual([
{ type: "text", text: "queued context\n\nhello", text_elements: [] },
]);
});
it("projects mirrored history when starting Codex without a native thread binding", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const sessionManager = SessionManager.open(sessionFile);
sessionManager.appendMessage(userMessage("we are fixing the Opik default project", Date.now()));
sessionManager.appendMessage(assistantMessage("Opik default project context", Date.now() + 1));
const harness = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
params.prompt = "make the default webpage openclaw";
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
await new Promise<void>((resolve) => setImmediate(resolve));
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const turnStart = harness.requests.find((request) => request.method === "turn/start");
const inputText =
(turnStart?.params as { input?: Array<{ text?: string }> } | undefined)?.input?.[0]?.text ??
"";
expect(inputText).toContain("OpenClaw assembled context for this turn:");
expect(inputText).toContain("we are fixing the Opik default project");
expect(inputText).toContain("Opik default project context");
expect(inputText).toContain("Current user request:");
expect(inputText).toContain("make the default webpage openclaw");
});
it("projects newer mirrored history when resuming an existing Codex thread binding", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
const binding = await readCodexAppServerBinding(sessionFile);
const bindingUpdatedAt = Date.parse(binding?.updatedAt ?? "");
if (!Number.isFinite(bindingUpdatedAt)) {
throw new Error("expected valid Codex binding timestamp");
}
const sessionManager = SessionManager.open(sessionFile);
sessionManager.appendMessage(
userMessage("we were discussing the Sonnet leak screenshots", bindingUpdatedAt + 1_000),
);
sessionManager.appendMessage(
assistantMessage("David Ondrej was mentioned in that prior thread", bindingUpdatedAt + 2_000),
);
const harness = createResumeHarness();
const params = createParams(sessionFile, workspaceDir);
params.prompt = "is the previous message trustworthy?";
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
await new Promise<void>((resolve) => setImmediate(resolve));
await harness.completeTurn({ threadId: "thread-existing", turnId: "turn-1" });
await run;
expect(harness.requests.map((request) => request.method)).toContain("thread/resume");
const turnStart = harness.requests.find((request) => request.method === "turn/start");
const inputText =
(turnStart?.params as { input?: Array<{ text?: string }> } | undefined)?.input?.[0]?.text ??
"";
expect(inputText).toContain("OpenClaw assembled context for this turn:");
expect(inputText).toContain("we were discussing the Sonnet leak screenshots");
expect(inputText).toContain("David Ondrej was mentioned in that prior thread");
expect(inputText).toContain("Current user request:");
expect(inputText).toContain("is the previous message trustworthy?");
});
it("does not reproject Codex-owned mirrored messages on consecutive resumes", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
const oldBindingUpdatedAt = Date.now() - 60_000;
const bindingPath = `${sessionFile}.codex-app-server.json`;
const bindingPayload = JSON.parse(await fs.readFile(bindingPath, "utf8")) as Record<
string,
unknown
>;
bindingPayload.updatedAt = new Date(oldBindingUpdatedAt).toISOString();
await fs.writeFile(bindingPath, `${JSON.stringify(bindingPayload, null, 2)}\n`);
const sessionManager = SessionManager.open(sessionFile);
sessionManager.appendMessage(
userMessage("we were discussing the Sonnet leak screenshots", oldBindingUpdatedAt + 1_000),
);
sessionManager.appendMessage(
assistantMessage(
"David Ondrej was mentioned in that prior thread",
oldBindingUpdatedAt + 2_000,
),
);
const firstHarness = createResumeHarness();
const firstParams = createParams(sessionFile, workspaceDir);
firstParams.prompt = "is the previous message trustworthy?";
const firstRun = runCodexAppServerAttempt(firstParams);
await firstHarness.waitForMethod("turn/start");
await firstHarness.completeTurn({ threadId: "thread-existing", turnId: "turn-1" });
await firstRun;
const firstTurnStart = firstHarness.requests.find((request) => request.method === "turn/start");
const firstInputText =
(firstTurnStart?.params as { input?: Array<{ text?: string }> } | undefined)?.input?.[0]
?.text ?? "";
expect(firstInputText).toContain("OpenClaw assembled context for this turn:");
expect(firstInputText).toContain("we were discussing the Sonnet leak screenshots");
expect(firstInputText).toContain("is the previous message trustworthy?");
const secondHarness = createResumeHarness();
const secondParams = createParams(sessionFile, workspaceDir);
secondParams.prompt = "continue from there";
const secondRun = runCodexAppServerAttempt(secondParams);
await secondHarness.waitForMethod("turn/start");
await secondHarness.completeTurn({ threadId: "thread-existing", turnId: "turn-1" });
await secondRun;
const secondTurnStart = secondHarness.requests.find(
(request) => request.method === "turn/start",
);
const secondInputText =
(secondTurnStart?.params as { input?: Array<{ text?: string }> } | undefined)?.input?.[0]
?.text ?? "";
expect(secondInputText).not.toContain("OpenClaw assembled context for this turn:");
expect(secondInputText).not.toContain("we were discussing the Sonnet leak screenshots");
expect(secondInputText).not.toContain("is the previous message trustworthy?");
expect(secondInputText).toContain("continue from there");
});
it("passes stable workspace files as Codex developer instructions and routes MEMORY.md through tools", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const agentsGuidance = "Follow AGENTS guidance.";
const soulGuidance = "Soul voice goes here.";
const identityGuidance = "Identity guidance goes here.";
const toolGuidance = "Tool guidance goes here.";
const userProfile = "User profile goes here.";
const memorySummary = "Memory summary goes here.";
await fs.mkdir(workspaceDir, { recursive: true });
await fs.writeFile(path.join(workspaceDir, "AGENTS.md"), agentsGuidance);
await fs.writeFile(path.join(workspaceDir, "SOUL.md"), soulGuidance);
await fs.writeFile(path.join(workspaceDir, "IDENTITY.md"), identityGuidance);
await fs.writeFile(path.join(workspaceDir, "TOOLS.md"), toolGuidance);
await fs.writeFile(path.join(workspaceDir, "USER.md"), userProfile);
await fs.writeFile(path.join(workspaceDir, "MEMORY.md"), memorySummary);
testing.setOpenClawCodingToolsFactoryForTests(() => [
createRuntimeDynamicTool("memory_search"),
createRuntimeDynamicTool("memory_get"),
]);
const params = createParams(sessionFile, workspaceDir);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
setAgentWorkspaceForTest(params, workspaceDir);
const {
collaborationInstructions,
inputText,
systemPromptReport,
threadDeveloperInstructions,
} = await buildCodexTurnContextForTest(params, workspaceDir);
expect(threadDeveloperInstructions).toContain("OpenClaw Workspace Instructions");
expect(threadDeveloperInstructions).not.toContain(soulGuidance);
expect(threadDeveloperInstructions).not.toContain(identityGuidance);
expect(threadDeveloperInstructions).toContain(toolGuidance);
expect(threadDeveloperInstructions).not.toContain(userProfile);
expect(threadDeveloperInstructions).not.toContain(memorySummary);
expect(threadDeveloperInstructions).not.toContain("Codex loads AGENTS.md natively");
expect(threadDeveloperInstructions).not.toContain(agentsGuidance);
expect(collaborationInstructions).toContain("# Collaboration Mode: Default");
expect(collaborationInstructions).toContain("request_user_input availability");
expect(collaborationInstructions).toContain("OpenClaw Agent Soul");
expect(collaborationInstructions).toContain(soulGuidance);
expect(collaborationInstructions).toContain(identityGuidance);
expect(collaborationInstructions).not.toContain(toolGuidance);
expect(collaborationInstructions).toContain(userProfile);
expect(collaborationInstructions).not.toContain(memorySummary);
expect(inputText).toContain("OpenClaw runtime context for this turn:");
expect(inputText).not.toContain("does not override Codex system/developer instructions");
expect(inputText).not.toContain("not developer policy");
expect(inputText).not.toContain(soulGuidance);
expect(inputText).not.toContain(identityGuidance);
expect(inputText).not.toContain(toolGuidance);
expect(inputText).not.toContain(userProfile);
expect(inputText).not.toContain(memorySummary);
expect(inputText).toContain("OpenClaw Workspace Memory");
expect(inputText).toContain("MEMORY.md exists in the active agent workspace");
expect(inputText).toContain("memory_search");
expect(inputText).toContain("memory_get");
expect(inputText).not.toContain("Codex loads AGENTS.md natively");
expect(inputText).not.toContain(agentsGuidance);
expect(inputText).toContain("Current user request:\nhello");
expect(systemPromptReport.systemPrompt.chars).toBe(
[threadDeveloperInstructions, collaborationInstructions].join("\n\n").length,
);
const fileStats = new Map(
systemPromptReport.injectedWorkspaceFiles.map((file) => [file.name, file]),
);
expect(fileStats.get("SOUL.md")).toMatchObject({
rawChars: soulGuidance.length,
injectedChars: soulGuidance.length,
truncated: false,
});
expect(fileStats.get("IDENTITY.md")).toMatchObject({
rawChars: identityGuidance.length,
injectedChars: identityGuidance.length,
truncated: false,
});
expect(fileStats.get("TOOLS.md")).toMatchObject({
rawChars: toolGuidance.length,
injectedChars: toolGuidance.length,
truncated: false,
});
expect(fileStats.get("USER.md")).toMatchObject({
rawChars: userProfile.length,
injectedChars: userProfile.length,
truncated: false,
});
expect(fileStats.get("MEMORY.md")).toMatchObject({
rawChars: memorySummary.length,
injectedChars: 0,
truncated: false,
});
expect(fileStats.get("AGENTS.md")).toMatchObject({
rawChars: agentsGuidance.length,
injectedChars: agentsGuidance.length,
truncated: false,
});
});
it("sends workspace bootstrap instructions through Codex app-server payloads", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const agentsGuidance = "Follow AGENTS guidance.";
const soulGuidance = "Soul voice goes here.";
const identityGuidance = "Identity guidance goes here.";
const toolGuidance = "Tool guidance goes here.";
const userProfile = "User profile goes here.";
await fs.mkdir(workspaceDir, { recursive: true });
await fs.writeFile(path.join(workspaceDir, "AGENTS.md"), agentsGuidance);
await fs.writeFile(path.join(workspaceDir, "SOUL.md"), soulGuidance);
await fs.writeFile(path.join(workspaceDir, "IDENTITY.md"), identityGuidance);
await fs.writeFile(path.join(workspaceDir, "TOOLS.md"), toolGuidance);
await fs.writeFile(path.join(workspaceDir, "USER.md"), userProfile);
const harness = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
setAgentWorkspaceForTest(params, workspaceDir);
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
await new Promise<void>((resolve) => setImmediate(resolve));
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
const result = await run;
const threadStart = harness.requests.find((request) => request.method === "thread/start");
const threadStartParams = threadStart?.params as {
config?: { instructions?: string };
developerInstructions?: string;
};
expect(threadStartParams.config?.instructions).toBeUndefined();
expect(threadStartParams.developerInstructions).toContain("OpenClaw Workspace Instructions");
expect(threadStartParams.developerInstructions).toContain(toolGuidance);
expect(threadStartParams.developerInstructions).not.toContain(agentsGuidance);
expect(threadStartParams.developerInstructions).not.toContain(soulGuidance);
expect(threadStartParams.developerInstructions).not.toContain(identityGuidance);
expect(threadStartParams.developerInstructions).not.toContain(userProfile);
const turnStart = harness.requests.find((request) => request.method === "turn/start");
const turnStartParams = turnStart?.params as {
input?: Array<{ text?: string }>;
collaborationMode?: {
settings?: {
developer_instructions?: string | null;
};
};
};
const collaborationInstructions =
turnStartParams.collaborationMode?.settings?.developer_instructions ?? "";
expect(collaborationInstructions).toContain("OpenClaw Agent Soul");
expect(collaborationInstructions).toContain(soulGuidance);
expect(collaborationInstructions).toContain(identityGuidance);
expect(collaborationInstructions).toContain(userProfile);
expect(collaborationInstructions).not.toContain(toolGuidance);
const inputText = turnStartParams.input?.[0]?.text ?? "";
expect(inputText).toBe("hello");
expect(inputText).not.toContain(agentsGuidance);
expect(result.systemPromptReport?.systemPrompt.chars).toBe(
[threadStartParams.developerInstructions ?? "", collaborationInstructions].join("\n\n")
.length,
);
});
it("injects bounded MEMORY.md when memory tools are unavailable", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const memorySummary = "Memory summary goes here.";
await fs.mkdir(workspaceDir, { recursive: true });
await fs.writeFile(path.join(workspaceDir, "MEMORY.md"), memorySummary);
const harness = createStartedThreadHarness();
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir));
await harness.waitForMethod("turn/start");
await new Promise<void>((resolve) => setImmediate(resolve));
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
const result = await run;
const turnStart = harness.requests.find((request) => request.method === "turn/start");
const turnStartParams = turnStart?.params as {
input?: Array<{ text?: string }>;
};
const inputText = turnStartParams.input?.[0]?.text ?? "";
expect(inputText).not.toContain("OpenClaw Workspace Memory");
expect(inputText).not.toContain("memory_search");
expect(inputText).toContain(memorySummary);
const fileStats = new Map(
result.systemPromptReport?.injectedWorkspaceFiles.map((file) => [file.name, file]) ?? [],
);
expect(fileStats.get("MEMORY.md")).toMatchObject({
rawChars: memorySummary.length,
injectedChars: memorySummary.length,
truncated: false,
});
});
it("routes MEMORY.md through memory_get when search is unavailable", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const memorySummary = "Memory summary goes here.";
await fs.mkdir(workspaceDir, { recursive: true });
await fs.writeFile(path.join(workspaceDir, "MEMORY.md"), memorySummary);
testing.setOpenClawCodingToolsFactoryForTests(() => [createRuntimeDynamicTool("memory_get")]);
const params = createParams(sessionFile, workspaceDir);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
setAgentWorkspaceForTest(params, workspaceDir);
const { inputText, systemPromptReport } = await buildCodexTurnContextForTest(
params,
workspaceDir,
);
expect(inputText).toContain("OpenClaw Workspace Memory");
expect(inputText).toContain("memory_get");
expect(inputText).not.toContain("memory_search");
expect(inputText).not.toContain(memorySummary);
const fileStats = new Map(
systemPromptReport.injectedWorkspaceFiles.map((file) => [file.name, file]),
);
expect(fileStats.get("MEMORY.md")).toMatchObject({
rawChars: memorySummary.length,
injectedChars: 0,
truncated: false,
});
});
it("reports MEMORY.md as truncated when no-tool fallback exceeds the bootstrap budget", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const soulGuidance = "Soul guidance ".repeat(80);
const memorySummary = "Memory summary goes here.";
await fs.mkdir(workspaceDir, { recursive: true });
await fs.writeFile(path.join(workspaceDir, "SOUL.md"), soulGuidance);
await fs.writeFile(path.join(workspaceDir, "MEMORY.md"), memorySummary);
const harness = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
params.config = {
agents: {
defaults: {
bootstrapMaxChars: 1000,
bootstrapTotalMaxChars: 1000,
},
},
} as EmbeddedRunAttemptParams["config"];
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
await new Promise<void>((resolve) => setImmediate(resolve));
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
const result = await run;
const fileStats = new Map(
result.systemPromptReport?.injectedWorkspaceFiles.map((file) => [file.name, file]) ?? [],
);
expect(fileStats.get("MEMORY.md")).toMatchObject({
rawChars: memorySummary.length,
injectedChars: 0,
truncated: true,
});
});
it("keeps MEMORY.md out of the Codex workspace context budget", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const memorySummary = "Memory summary ".repeat(300);
const hookContext = "Hook context survives the memory budget.";
const hookPath = path.join(workspaceDir, "ZZZ.md");
await fs.mkdir(workspaceDir, { recursive: true });
await fs.writeFile(path.join(workspaceDir, "MEMORY.md"), memorySummary);
registerInternalHook("agent:bootstrap", (event) => {
const context = event.context as {
bootstrapFiles: Array<{ content: string; missing: boolean; name?: string; path: string }>;
};
context.bootstrapFiles = [
...context.bootstrapFiles,
{
name: "ZZZ.md",
path: hookPath,
content: hookContext,
missing: false,
},
];
});
const params = createParams(sessionFile, workspaceDir);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
params.config = {
agents: {
defaults: {
workspace: workspaceDir,
bootstrapMaxChars: 1000,
bootstrapTotalMaxChars: 2000,
},
},
} as EmbeddedRunAttemptParams["config"];
testing.setOpenClawCodingToolsFactoryForTests(() => [
createRuntimeDynamicTool("memory_search"),
createRuntimeDynamicTool("memory_get"),
]);
const { inputText, systemPromptReport } = await buildCodexTurnContextForTest(
params,
workspaceDir,
);
expect(inputText).toContain("OpenClaw Workspace Memory");
expect(inputText).not.toContain(memorySummary);
expect(inputText).toContain(hookContext);
const fileStats = new Map(
systemPromptReport.injectedWorkspaceFiles.map((file) => [file.name, file]),
);
expect(fileStats.get("MEMORY.md")).toMatchObject({
rawChars: memorySummary.trimEnd().length,
injectedChars: 0,
truncated: false,
});
expect(fileStats.get("ZZZ.md")).toMatchObject({
rawChars: hookContext.length,
injectedChars: hookContext.length,
truncated: false,
});
});
it("keeps extra MEMORY.md bootstrap files in Codex workspace context", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const rootMemory = "Root memory should stay tool-routed.";
const nestedMemory = "Nested package memory remains prompt context.";
const nestedMemoryPath = path.join(workspaceDir, "packages/pkg/MEMORY.md");
await fs.mkdir(path.dirname(nestedMemoryPath), { recursive: true });
await fs.writeFile(path.join(workspaceDir, "MEMORY.md"), rootMemory);
await fs.writeFile(nestedMemoryPath, nestedMemory);
registerInternalHook("agent:bootstrap", (event) => {
const context = event.context as {
bootstrapFiles: Array<{ content: string; missing: boolean; name?: string; path: string }>;
};
context.bootstrapFiles = [
...context.bootstrapFiles,
{
name: "MEMORY.md",
path: nestedMemoryPath,
content: nestedMemory,
missing: false,
},
];
});
testing.setOpenClawCodingToolsFactoryForTests(() => [
createRuntimeDynamicTool("memory_search"),
createRuntimeDynamicTool("memory_get"),
]);
const params = createParams(sessionFile, workspaceDir);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
setAgentWorkspaceForTest(params, workspaceDir);
const { inputText, systemPromptReport } = await buildCodexTurnContextForTest(
params,
workspaceDir,
);
expect(inputText).toContain("OpenClaw Workspace Memory");
expect(inputText).not.toContain(rootMemory);
expect(inputText).toContain(nestedMemory);
const files = systemPromptReport.injectedWorkspaceFiles;
const rootMemoryStats = files.find(
(file) => file.path === path.join(workspaceDir, "MEMORY.md"),
);
const nestedMemoryStats = files.find((file) => file.path === nestedMemoryPath);
expect(rootMemoryStats).toMatchObject({
rawChars: rootMemory.length,
injectedChars: 0,
truncated: false,
});
expect(nestedMemoryStats).toMatchObject({
rawChars: nestedMemory.length,
injectedChars: nestedMemory.length,
truncated: false,
});
});
it("injects MEMORY.md when active workspace is not the memory tool workspace", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const memorySummary = "Memory summary goes here.";
await fs.mkdir(workspaceDir, { recursive: true });
await fs.writeFile(path.join(workspaceDir, "MEMORY.md"), memorySummary);
testing.setOpenClawCodingToolsFactoryForTests(() => [
createRuntimeDynamicTool("memory_search"),
createRuntimeDynamicTool("memory_get"),
]);
const params = createParams(sessionFile, workspaceDir);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
setAgentWorkspaceForTest(params, path.join(tempDir, "memory-workspace"));
const { inputText, systemPromptReport } = await buildCodexTurnContextForTest(
params,
workspaceDir,
);
expect(inputText).not.toContain("OpenClaw Workspace Memory");
expect(inputText).toContain(memorySummary);
const fileStats = new Map(
systemPromptReport.injectedWorkspaceFiles.map((file) => [file.name, file]),
);
expect(fileStats.get("MEMORY.md")).toMatchObject({
rawChars: memorySummary.length,
injectedChars: memorySummary.length,
truncated: false,
});
});
it("reports hook-supplied bootstrap files that only expose path and content", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const soulPath = path.join(workspaceDir, "SOUL.md");
const soulGuidance = "Hook supplied soul guidance.";
await fs.mkdir(workspaceDir, { recursive: true });
registerInternalHook("agent:bootstrap", (event) => {
const context = event.context as {
bootstrapFiles: Array<{ content: string; missing: boolean; path: string }>;
};
context.bootstrapFiles = [
{
path: soulPath,
content: soulGuidance,
missing: false,
},
];
});
const harness = createStartedThreadHarness();
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir));
await harness.waitForMethod("turn/start");
await new Promise<void>((resolve) => setImmediate(resolve));
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
const result = await run;
expect(result.systemPromptReport?.injectedWorkspaceFiles).toEqual([
expect.objectContaining({
name: "SOUL.md",
path: soulPath,
rawChars: soulGuidance.length,
injectedChars: soulGuidance.length,
truncated: false,
}),
]);
});
it("points heartbeat Codex turns at HEARTBEAT.md without injecting its contents", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const heartbeatPath = path.join(workspaceDir, "HEARTBEAT.md");
await fs.mkdir(workspaceDir, { recursive: true });
await fs.writeFile(heartbeatPath, "Heartbeat checklist goes here.");
const harness = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
params.trigger = "heartbeat";
params.bootstrapContextMode = "lightweight";
params.bootstrapContextRunKind = "heartbeat";
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
await new Promise<void>((resolve) => setImmediate(resolve));
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const threadStart = harness.requests.find((request) => request.method === "thread/start");
const threadStartParams = threadStart?.params as {
developerInstructions?: string;
};
expect(threadStartParams.developerInstructions).not.toContain("Heartbeat checklist goes here.");
const turnStart = harness.requests.find((request) => request.method === "turn/start");
const turnStartParams = turnStart?.params as {
input?: Array<{ text?: string }>;
collaborationMode?: {
settings?: {
developer_instructions?: string | null;
};
};
};
const inputText = turnStartParams.input?.[0]?.text ?? "";
const collaborationInstructions =
turnStartParams.collaborationMode?.settings?.developer_instructions ?? "";
expect(inputText).not.toContain("Heartbeat checklist goes here.");
expect(collaborationInstructions).toContain("HEARTBEAT.md exists");
expect(collaborationInstructions).toContain("Read it before proceeding with this heartbeat");
expect(collaborationInstructions).toContain(heartbeatPath);
expect(collaborationInstructions).not.toContain("Heartbeat checklist goes here.");
});
it("omits heartbeat Codex workspace pointers for empty HEARTBEAT.md files", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await fs.mkdir(workspaceDir, { recursive: true });
await fs.writeFile(path.join(workspaceDir, "HEARTBEAT.md"), "\n\n");
const harness = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
params.trigger = "heartbeat";
params.bootstrapContextMode = "lightweight";
params.bootstrapContextRunKind = "heartbeat";
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
await new Promise<void>((resolve) => setImmediate(resolve));
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const turnStart = harness.requests.find((request) => request.method === "turn/start");
const turnStartParams = turnStart?.params as {
collaborationMode?: {
settings?: {
developer_instructions?: string | null;
};
};
};
const collaborationInstructions =
turnStartParams.collaborationMode?.settings?.developer_instructions ?? "";
expect(collaborationInstructions).toContain("This is an OpenClaw heartbeat turn");
expect(collaborationInstructions).not.toContain("HEARTBEAT.md exists");
});
it("keeps lightweight cron Codex turns out of OpenClaw bootstrap context", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const exactCommand =
"cd /Users/phaedrus/Projects/openclaw && /Users/phaedrus/clawd/scripts/clawsweeper-related-scan.py";
await fs.mkdir(workspaceDir, { recursive: true });
await fs.writeFile(path.join(workspaceDir, "AGENTS.md"), "Follow AGENTS guidance.");
await fs.writeFile(path.join(workspaceDir, "SOUL.md"), "Soul voice goes here.");
const harness = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
params.trigger = "cron";
params.prompt = exactCommand;
params.bootstrapContextMode = "lightweight";
params.bootstrapContextRunKind = "cron";
params.skillsSnapshot = {
prompt: "<available_skills><skill><name>demo</name></skill></available_skills>",
skills: [],
};
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
await new Promise<void>((resolve) => setImmediate(resolve));
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
const result = await run;
const threadStart = harness.requests.find((request) => request.method === "thread/start");
const threadStartParams = threadStart?.params as {
developerInstructions?: string;
config?: Record<string, unknown>;
};
expect(threadStartParams.config?.project_doc_max_bytes).toBe(0);
expect(threadStartParams.developerInstructions).not.toContain("Soul voice goes here.");
expect(threadStartParams.developerInstructions).not.toContain("Follow AGENTS guidance.");
expect(threadStartParams.developerInstructions).not.toContain("<available_skills>");
const turnStart = harness.requests.find((request) => request.method === "turn/start");
const turnStartParams = turnStart?.params as {
input?: Array<{ text?: string }>;
};
expect(turnStartParams.input?.[0]?.text).toBe(exactCommand);
expect(result.systemPromptReport?.skills).toMatchObject({ promptChars: 0, entries: [] });
expect(result.systemPromptReport?.skills.hash).toMatch(/^[a-f0-9]{64}$/u);
});
it("forwards Codex app-server verbose tool summaries and completed output", async () => {
const onToolResult = vi.fn();
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const harness = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
params.verboseLevel = "full";
params.onToolResult = onToolResult;
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
await harness.notify({
method: "item/started",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: {
type: "dynamicToolCall",
id: "tool-1",
namespace: null,
tool: "read",
arguments: { path: "README.md" },
status: "inProgress",
contentItems: null,
success: null,
durationMs: null,
},
},
});
await harness.notify({
method: "item/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: {
type: "dynamicToolCall",
id: "tool-1",
namespace: null,
tool: "read",
arguments: { path: "README.md" },
status: "completed",
contentItems: [{ type: "inputText", text: "file contents" }],
success: true,
durationMs: 12,
},
},
});
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
expect(onToolResult).toHaveBeenCalledTimes(2);
expect(onToolResult).toHaveBeenNthCalledWith(1, {
text: "📖 Read: `from README.md`",
});
expect(onToolResult).toHaveBeenNthCalledWith(2, {
text: "📖 Read: `from README.md`\n```txt\nfile contents\n```",
});
});
it("promotes implicit Codex yolo approval policy when OpenClaw tool policy exists", async () => {
initializeGlobalHookRunner(
createMockPluginRegistry([{ hookName: "before_tool_call", handler: vi.fn() }]),
);
const info = vi.spyOn(embeddedAgentLog, "info").mockImplementation(() => undefined);
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const harness = createStartedThreadHarness();
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir));
await harness.waitForMethod("turn/start");
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const startRequest = harness.requests.find((request) => request.method === "thread/start");
const startParams = startRequest?.params as Record<string, unknown> | undefined;
expect(startParams?.approvalPolicy).toBe("untrusted");
expect(startParams?.sandbox).toBe("danger-full-access");
expect(info).toHaveBeenCalledWith(
"codex app-server approval policy promoted for OpenClaw tool policy",
{
from: "never",
to: "untrusted",
beforeToolCallHook: true,
trustedToolPolicies: [],
},
);
});
it("keeps explicit Codex yolo mode unpromoted when OpenClaw tool policy exists", async () => {
initializeGlobalHookRunner(
createMockPluginRegistry([{ hookName: "before_tool_call", handler: vi.fn() }]),
);
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const harness = createStartedThreadHarness();
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
pluginConfig: { appServer: { mode: "yolo" } },
});
await harness.waitForMethod("turn/start");
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const startRequest = harness.requests.find((request) => request.method === "thread/start");
const startParams = startRequest?.params as Record<string, unknown> | undefined;
expect(startParams?.approvalPolicy).toBe("never");
expect(startParams?.sandbox).toBe("danger-full-access");
});
it("keeps normalized full exec mode unpromoted when OpenClaw tool policy exists", async () => {
initializeGlobalHookRunner(
createMockPluginRegistry([{ hookName: "before_tool_call", handler: vi.fn() }]),
);
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const harness = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
params.config = { tools: { exec: { mode: "full" } } } as never;
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const startRequest = harness.requests.find((request) => request.method === "thread/start");
const startParams = startRequest?.params as Record<string, unknown> | undefined;
expect(startParams?.approvalPolicy).toBe("never");
expect(startParams?.sandbox).toBe("danger-full-access");
});
it("ignores invalid Codex app-server env overrides when promoting tool policy approval", async () => {
initializeGlobalHookRunner(
createMockPluginRegistry([{ hookName: "before_tool_call", handler: vi.fn() }]),
);
vi.stubEnv("OPENCLAW_CODEX_APP_SERVER_MODE", " ");
vi.stubEnv("OPENCLAW_CODEX_APP_SERVER_APPROVAL_POLICY", "always");
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const harness = createStartedThreadHarness();
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir));
await harness.waitForMethod("turn/start");
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const startRequest = harness.requests.find((request) => request.method === "thread/start");
const startParams = startRequest?.params as Record<string, unknown> | undefined;
expect(startParams?.approvalPolicy).toBe("untrusted");
});
it("preserves a healthy binding when invalid image cleanup hits a transient thread", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, {
dynamicToolsFingerprint: JSON.stringify([{ name: "message" }]),
});
const harness = createStartedThreadHarness(async (method) => {
if (method === "thread/start") {
return threadStartResult("thread-transient");
}
if (method === "turn/start") {
throw new Error("invalid image_url base64 payload");
}
return undefined;
});
await expect(runCodexAppServerAttempt(createParams(sessionFile, workspaceDir))).rejects.toThrow(
"invalid image_url base64 payload",
);
expect(harness.requests.map((request) => request.method)).toEqual([
"thread/start",
"turn/start",
"thread/unsubscribe",
]);
const binding = await readCodexAppServerBinding(sessionFile);
expect(binding?.threadId).toBe("thread-existing");
});
it("preserves a healthy binding when the server rejects unsupported image input", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
const harness = createAppServerHarness(async (method) => {
if (method === "thread/resume") {
return threadStartResult("thread-existing");
}
if (method === "turn/start") {
throw new Error("unsupported image input");
}
return {};
});
await expect(runCodexAppServerAttempt(createParams(sessionFile, workspaceDir))).rejects.toThrow(
"unsupported image input",
);
expect(harness.requests.map((request) => request.method)).toEqual([
"thread/resume",
"turn/start",
"thread/unsubscribe",
]);
const binding = await readCodexAppServerBinding(sessionFile);
expect(binding?.threadId).toBe("thread-existing");
});
it("does not leak unhandled rejections when shutdown closes before interrupt", async () => {
const unhandledRejections: unknown[] = [];
const onUnhandledRejection = (reason: unknown) => {
unhandledRejections.push(reason);
};
process.on("unhandledRejection", onUnhandledRejection);
try {
const { waitForMethod } = createStartedThreadHarness(async (method) => {
if (method === "turn/interrupt") {
throw new Error("codex app-server client is closed");
}
});
const abortController = new AbortController();
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.abortSignal = abortController.signal;
const run = runCodexAppServerAttempt(params);
await waitForMethod("turn/start");
abortController.abort("shutdown");
const result = await run;
expect(result.aborted).toBe(true);
await new Promise((resolve) => setImmediate(resolve));
expect(unhandledRejections).toStrictEqual([]);
} finally {
process.off("unhandledRejection", onUnhandledRejection);
}
});
it("forwards image attachments to the app-server turn input", async () => {
const { requests, waitForMethod, completeTurn } = createStartedThreadHarness();
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
const pngBase64 =
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+/p9sAAAAASUVORK5CYII=";
params.model = createCodexTestModel("codex", ["text", "image"]);
params.images = [
{
type: "image",
mimeType: "image/png",
data: pngBase64,
},
];
const run = runCodexAppServerAttempt(params);
await waitForMethod("turn/start");
await completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const turnStart = requests.find((entry) => entry.method === "turn/start");
const turnStartParams = turnStart?.params as
| { input?: Array<{ text?: string; text_elements?: unknown[]; type?: string; url?: string }> }
| undefined;
expect(turnStartParams?.input).toEqual([
{ type: "text", text: "hello", text_elements: [] },
{ type: "image", url: `data:image/png;base64,${pngBase64}` },
]);
});
it("does not drop turn completion notifications emitted while turn/start is in flight", async () => {
let harness: ReturnType<typeof createAppServerHarness>;
harness = createAppServerHarness(async (method) => {
if (method === "thread/start") {
return threadStartResult();
}
if (method === "turn/start") {
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
return turnStartResult("turn-1", "completed");
}
return {};
});
const result = await runCodexAppServerAttempt(
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
);
expect(result.aborted).toBe(false);
expect(result.timedOut).toBe(false);
});
it("does not fail when a buffered terminal notification is followed by client close", async () => {
let harness: ReturnType<typeof createAppServerHarness>;
let resolveBufferedTerminal!: () => void;
const bufferedTerminal = new Promise<void>((resolve) => {
resolveBufferedTerminal = resolve;
});
harness = createAppServerHarness(async (method) => {
if (method === "thread/start") {
return threadStartResult();
}
if (method === "turn/start") {
await harness.notify({
method: "item/started",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: { id: "tool-1", type: "commandExecution" },
},
});
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
resolveBufferedTerminal();
return turnStartResult("turn-1", "inProgress");
}
return {};
});
const run = runCodexAppServerAttempt(
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
{ turnTerminalIdleTimeoutMs: 60_000 },
);
await bufferedTerminal;
await new Promise<void>((resolve) => setImmediate(resolve));
harness.close();
const result = await run;
expect(result.promptError ?? undefined).toBeUndefined();
expect(result.aborted).toBe(false);
expect(result.timedOut).toBe(false);
});
it("does not time out when turn progress arrives before turn/start returns", async () => {
let harness: ReturnType<typeof createAppServerHarness>;
harness = createAppServerHarness(async (method) => {
if (method === "thread/start") {
return threadStartResult();
}
if (method === "turn/start") {
await harness.notify({
method: "turn/started",
params: {
threadId: "thread-1",
turnId: "turn-1",
turn: { id: "turn-1", status: "inProgress" },
},
});
return turnStartResult("turn-1", "inProgress");
}
return {};
});
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 60_000;
const run = runCodexAppServerAttempt(params, {
turnCompletionIdleTimeoutMs: 5,
turnTerminalIdleTimeoutMs: 60_000,
});
await harness.waitForMethod("turn/start");
await new Promise((resolve) => setTimeout(resolve, 20));
expect(harness.request.mock.calls.some(([method]) => method === "turn/interrupt")).toBe(false);
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
const result = await run;
expect(result.aborted).toBe(false);
expect(result.timedOut).toBe(false);
});
it("completes when turn/start returns a terminal turn without a follow-up notification", async () => {
const harness = createAppServerHarness(async (method) => {
if (method === "thread/start") {
return threadStartResult();
}
if (method === "turn/start") {
return {
turn: {
id: "turn-1",
status: "completed",
items: [{ type: "agentMessage", id: "msg-1", text: "done from response" }],
},
};
}
return {};
});
const result = await runCodexAppServerAttempt(
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
);
expect(harness.requests.map((entry) => entry.method)).toContain("turn/start");
expect(result.assistantTexts).toEqual(["done from response"]);
expect(result.aborted).toBe(false);
expect(result.timedOut).toBe(false);
});
it("surfaces Codex-native image generation saved paths as reply media", async () => {
const harness = createStartedThreadHarness();
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
await harness.notify({
method: "turn/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
turn: {
id: "turn-1",
status: "completed",
items: [
{
type: "imageGeneration",
id: "ig_123",
status: "completed",
revisedPrompt: "A tiny blue square",
result: "Zm9v",
savedPath: "/tmp/codex-home/generated_images/session-1/ig_123.png",
},
],
},
},
});
const result = await run;
expect(result.assistantTexts).toEqual([]);
expect(result.toolMediaUrls).toEqual(["/tmp/codex-home/generated_images/session-1/ig_123.png"]);
});
it("does not complete on unscoped turn/completed notifications", async () => {
const harness = createStartedThreadHarness();
const run = runCodexAppServerAttempt(
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
);
let resolved = false;
void run.then(() => {
resolved = true;
});
await harness.waitForMethod("turn/start");
await harness.notify({
method: "turn/completed",
params: {
turn: {
id: "turn-1",
status: "completed",
items: [{ type: "agentMessage", id: "msg-wrong", text: "wrong completion" }],
},
},
});
await new Promise<void>((resolve) => setImmediate(resolve));
expect(resolved).toBe(false);
await harness.notify({
method: "turn/completed",
params: {
threadId: "thread-1",
turn: {
id: "turn-1",
status: "completed",
items: [{ type: "agentMessage", id: "msg-right", text: "final completion" }],
},
},
});
const result = await run;
expect(result.assistantTexts).toEqual(["final completion"]);
expect(result.aborted).toBe(false);
expect(result.timedOut).toBe(false);
});
it("ignores turn/completed notifications for other subscribed threads", async () => {
const warn = vi.spyOn(embeddedAgentLog, "warn").mockImplementation(() => undefined);
const harness = createStartedThreadHarness();
const run = runCodexAppServerAttempt(
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
);
let resolved = false;
void run.then(() => {
resolved = true;
});
await harness.waitForMethod("turn/start");
await harness.notify({
method: "turn/completed",
params: {
threadId: "thread-other",
turn: {
id: "turn-other",
status: "completed",
items: [],
},
},
});
await new Promise<void>((resolve) => setImmediate(resolve));
expect(resolved).toBe(false);
expect(
warn.mock.calls.some(([message]) =>
message.includes("turn/completed did not match active turn"),
),
).toBe(false);
await harness.notify({
method: "turn/completed",
params: {
threadId: "thread-1",
turn: {
id: "turn-1",
status: "completed",
items: [{ type: "agentMessage", id: "msg-right", text: "final completion" }],
},
},
});
const result = await run;
expect(result.assistantTexts).toEqual(["final completion"]);
expect(result.aborted).toBe(false);
expect(result.timedOut).toBe(false);
});
it("routes Computer Use MCP elicitations through the native bridge", async () => {
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
let handleRequest:
| ((request: { id: string; method: string; params?: unknown }) => Promise<unknown>)
| undefined;
const bridgeSpy = vi
.spyOn(elicitationBridge, "handleCodexAppServerElicitationRequest")
.mockResolvedValue({
action: "accept",
content: { approve: true },
_meta: null,
});
const request = vi.fn(async (method: string) => {
if (method === "plugin/list") {
return {
marketplaces: [
{
name: "openai-bundled",
path: "/marketplaces/openai-bundled",
plugins: [
{
id: "computer-use@openai-bundled",
name: "computer-use",
source: {
type: "local",
path: "/marketplaces/openai-bundled/plugins/computer-use",
},
installed: true,
enabled: true,
},
],
},
],
marketplaceLoadErrors: [],
featuredPluginIds: [],
};
}
if (method === "plugin/read") {
return {
plugin: {
marketplaceName: "openai-bundled",
marketplacePath: "/marketplaces/openai-bundled",
summary: {
id: "computer-use@openai-bundled",
name: "computer-use",
source: {
type: "local",
path: "/marketplaces/openai-bundled/plugins/computer-use",
},
installed: true,
enabled: true,
},
description: null,
skills: [],
apps: [],
mcpServers: ["computer-use"],
},
};
}
if (method === "mcpServerStatus/list") {
return {
data: [
{
name: "desktop-control",
tools: {
"computer-use.get_app_state": {},
},
},
],
nextCursor: null,
};
}
if (method === "thread/start") {
return threadStartResult("thread-1");
}
if (method === "turn/start") {
return turnStartResult("turn-1", "inProgress");
}
return {};
});
setCodexAppServerClientFactoryForTest(
async () =>
({
request,
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: (
handler: (request: {
id: string;
method: string;
params?: unknown;
}) => Promise<unknown>,
) => {
handleRequest = handler;
return () => undefined;
},
}) as never,
);
const run = runCodexAppServerAttempt(
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
{
pluginConfig: {
computerUse: {
enabled: true,
marketplaceName: "openai-bundled",
mcpServerName: "desktop-control",
},
},
},
);
await vi.waitFor(() => expect(handleRequest).toBeTypeOf("function"));
const result = await handleRequest?.({
id: "request-elicitation-1",
method: "mcpServer/elicitation/request",
params: {
threadId: "thread-1",
turnId: "turn-1",
serverName: "desktop-control",
mode: "form",
},
});
expect(result).toEqual({
action: "accept",
content: { approve: true },
_meta: null,
});
const [bridgeCall] = mockCall(bridgeSpy, "elicitation bridge") as [
{
requestParams?: { serverName?: string };
computerUseMcpServerName?: string;
threadId?: string;
turnId?: string;
},
];
expect(bridgeCall.threadId).toBe("thread-1");
expect(bridgeCall.turnId).toBe("turn-1");
expect(bridgeCall.requestParams?.serverName).toBe("desktop-control");
expect(bridgeCall.computerUseMcpServerName).toBe("desktop-control");
const requestCalls = request.mock.calls as unknown as Array<[string, unknown, unknown?]>;
const threadStart = requestCalls.find(([method]) => method === "thread/start");
const threadStartParams = threadStart?.[1] as
| { approvalPolicy?: { granular?: { mcp_elicitations?: boolean } } }
| undefined;
expect(threadStartParams?.approvalPolicy?.granular?.mcp_elicitations).toBe(true);
await notify({
method: "turn/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
turn: { id: "turn-1", status: "completed" },
},
});
await run;
});
it("passes session plugin app policy context to elicitation handling", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const agentDir = path.join(tempDir, "agent");
const pluginConfig = {
codexPlugins: {
enabled: true,
plugins: {
"google-calendar": {
marketplaceName: "openai-curated",
pluginName: "google-calendar",
},
},
},
};
const appServer = resolveCodexAppServerRuntimeOptions({
pluginConfig: readCodexPluginConfig(pluginConfig),
});
defaultCodexAppInventoryCache.clear();
await defaultCodexAppInventoryCache.refreshNow({
key: buildCodexPluginAppCacheKey({
appServer,
agentDir,
}),
request: async () => ({
data: [
{
id: "google-calendar-app",
name: "Google Calendar",
description: null,
logoUrl: null,
logoUrlDark: null,
distributionChannel: null,
branding: null,
appMetadata: null,
labels: null,
installUrl: null,
isAccessible: true,
isEnabled: true,
pluginDisplayNames: [],
},
],
nextCursor: null,
}),
});
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
let handleRequest:
| ((request: { id: string; method: string; params?: unknown }) => Promise<unknown>)
| undefined;
const bridgeSpy = vi
.spyOn(elicitationBridge, "handleCodexAppServerElicitationRequest")
.mockResolvedValue({
action: "decline",
content: null,
_meta: null,
});
const request = vi.fn(async (method: string) => {
if (method === "plugin/list") {
return {
marketplaces: [
{
name: "openai-curated",
path: "/marketplaces/openai-curated",
interface: null,
plugins: [
{
id: "google-calendar",
name: "google-calendar",
source: { type: "remote" },
installed: true,
enabled: true,
installPolicy: "AVAILABLE",
authPolicy: "ON_USE",
availability: "AVAILABLE",
interface: null,
},
],
},
],
marketplaceLoadErrors: [],
featuredPluginIds: [],
};
}
if (method === "plugin/read") {
return {
plugin: {
marketplaceName: "openai-curated",
marketplacePath: "/marketplaces/openai-curated",
summary: {
id: "google-calendar",
name: "google-calendar",
source: { type: "remote" },
installed: true,
enabled: true,
installPolicy: "AVAILABLE",
authPolicy: "ON_USE",
availability: "AVAILABLE",
interface: null,
},
description: null,
skills: [],
apps: [
{
id: "google-calendar-app",
name: "Google Calendar",
description: null,
installUrl: null,
needsAuth: false,
},
],
mcpServers: ["google-calendar"],
},
};
}
if (method === "thread/start") {
return threadStartResult("thread-1");
}
if (method === "turn/start") {
return turnStartResult("turn-1", "inProgress");
}
return {};
});
setCodexAppServerClientFactoryForTest(
async () =>
({
request,
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: (
handler: (request: {
id: string;
method: string;
params?: unknown;
}) => Promise<unknown>,
) => {
handleRequest = handler;
return () => undefined;
},
}) as never,
);
const params = createParams(sessionFile, workspaceDir);
params.agentDir = agentDir;
const run = runCodexAppServerAttempt(params, { pluginConfig });
await vi.waitFor(() => expect(handleRequest).toBeTypeOf("function"));
const result = await handleRequest?.({
id: "request-elicitation-1",
method: "mcpServer/elicitation/request",
params: {
threadId: "thread-1",
turnId: "turn-1",
serverName: "google-calendar",
mode: "form",
},
});
expect(result).toEqual({
action: "decline",
content: null,
_meta: null,
});
const [bridgeCall] = mockCall(bridgeSpy, "elicitation bridge") as [
{
pluginAppPolicyContext?: {
apps?: Record<string, { mcpServerNames?: string[]; pluginName?: string }>;
};
threadId?: string;
turnId?: string;
},
];
expect(bridgeCall.threadId).toBe("thread-1");
expect(bridgeCall.turnId).toBe("turn-1");
const calendarPolicy = bridgeCall.pluginAppPolicyContext?.apps?.["google-calendar-app"];
expect(calendarPolicy?.pluginName).toBe("google-calendar");
expect(calendarPolicy?.mcpServerNames).toEqual(["google-calendar"]);
const requestCalls = request.mock.calls as unknown as Array<[string, unknown, unknown?]>;
const threadStart = requestCalls.find(([method]) => method === "thread/start");
const threadStartParams = threadStart?.[1] as
| { approvalPolicy?: { granular?: { mcp_elicitations?: boolean } } }
| undefined;
expect(threadStartParams?.approvalPolicy?.granular?.mcp_elicitations).toBe(true);
const turnStart = requestCalls.find(([method]) => method === "turn/start");
const turnStartParams = turnStart?.[1] as
| { approvalPolicy?: { granular?: { mcp_elicitations?: boolean } } }
| undefined;
expect(turnStartParams?.approvalPolicy?.granular?.mcp_elicitations).toBe(true);
await notify({
method: "turn/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
turn: { id: "turn-1", status: "completed" },
},
});
await run;
});
it("keys plugin app inventory by the resolved Codex account", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const agentDir = path.join(tempDir, "agent");
const authProfileId = "openai-codex:work";
const pluginConfig = {
codexPlugins: {
enabled: true,
plugins: {
"google-calendar": {
marketplaceName: "openai-curated",
pluginName: "google-calendar",
},
},
},
};
const appServer = resolveCodexAppServerRuntimeOptions({
pluginConfig: readCodexPluginConfig(pluginConfig),
});
defaultCodexAppInventoryCache.clear();
await defaultCodexAppInventoryCache.refreshNow({
key: buildCodexPluginAppCacheKey({
appServer,
agentDir,
authProfileId,
accountId: "account-work",
}),
request: async () => ({
data: [
{
id: "google-calendar-app",
name: "Google Calendar",
description: null,
logoUrl: null,
logoUrlDark: null,
distributionChannel: null,
branding: null,
appMetadata: null,
labels: null,
installUrl: null,
isAccessible: true,
isEnabled: true,
pluginDisplayNames: [],
},
],
nextCursor: null,
}),
});
const { requests, waitForMethod, completeTurn } = createStartedThreadHarness(async (method) => {
if (method === "plugin/list") {
return {
marketplaces: [
{
name: "openai-curated",
path: "/marketplaces/openai-curated",
interface: null,
plugins: [
{
id: "google-calendar",
name: "google-calendar",
source: { type: "remote" },
installed: true,
enabled: true,
installPolicy: "AVAILABLE",
authPolicy: "ON_USE",
availability: "AVAILABLE",
interface: null,
},
],
},
],
marketplaceLoadErrors: [],
featuredPluginIds: [],
};
}
if (method === "plugin/read") {
return {
plugin: {
marketplaceName: "openai-curated",
marketplacePath: "/marketplaces/openai-curated",
summary: {
id: "google-calendar",
name: "google-calendar",
source: { type: "remote" },
installed: true,
enabled: true,
installPolicy: "AVAILABLE",
authPolicy: "ON_USE",
availability: "AVAILABLE",
interface: null,
},
description: null,
skills: [],
apps: [
{
id: "google-calendar-app",
name: "Google Calendar",
description: null,
installUrl: null,
needsAuth: false,
},
],
mcpServers: ["google-calendar"],
},
};
}
if (method === "app/list") {
throw new Error("app/list should use the account-keyed cache entry");
}
return undefined;
});
const params = createParams(sessionFile, workspaceDir);
params.agentDir = agentDir;
params.authProfileId = authProfileId;
params.authProfileStore = {
version: 1,
profiles: {
[authProfileId]: {
type: "oauth",
provider: "openai-codex",
access: "access-token",
refresh: "refresh-token",
expires: Date.now() + 60_000,
accountId: "account-work",
email: "work@example.test",
},
},
};
const run = runCodexAppServerAttempt(params, { pluginConfig });
await waitForMethod("turn/start");
await completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const threadStart = requests.find((entry) => entry.method === "thread/start");
const threadStartParams = threadStart?.params as
| { config?: { apps?: Record<string, { enabled?: boolean }> } }
| undefined;
expect(threadStartParams?.config?.apps?.["google-calendar-app"]?.enabled).toBe(true);
expect(requests.map((entry) => entry.method)).not.toContain("app/list");
});
it("keys plugin app inventory by inherited API key fallback credentials", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const agentDir = path.join(tempDir, "agent");
const pluginConfig = {
codexPlugins: {
enabled: true,
plugins: {
"google-calendar": {
marketplaceName: "openai-curated",
pluginName: "google-calendar",
},
},
},
};
const appServer = resolveCodexAppServerRuntimeOptions({
pluginConfig: readCodexPluginConfig(pluginConfig),
});
defaultCodexAppInventoryCache.clear();
await defaultCodexAppInventoryCache.refreshNow({
key: buildCodexPluginAppCacheKey({
appServer,
agentDir,
envApiKeyFingerprint: resolveCodexAppServerEnvApiKeyCacheKey({
startOptions: appServer.start,
baseEnv: { CODEX_API_KEY: "old-codex-env-key" },
}),
}),
request: async () => ({
data: [
{
id: "google-calendar-app",
name: "Google Calendar",
description: null,
logoUrl: null,
logoUrlDark: null,
distributionChannel: null,
branding: null,
appMetadata: null,
labels: null,
installUrl: null,
isAccessible: true,
isEnabled: true,
pluginDisplayNames: [],
},
],
nextCursor: null,
}),
});
vi.stubEnv("CODEX_API_KEY", "new-codex-env-key");
vi.stubEnv("OPENAI_API_KEY", "");
const { requests, waitForMethod, completeTurn } = createStartedThreadHarness(async (method) => {
if (method === "app/list") {
return {
data: [
{
id: "google-calendar-app",
name: "Google Calendar",
description: null,
logoUrl: null,
logoUrlDark: null,
distributionChannel: null,
branding: null,
appMetadata: null,
labels: null,
installUrl: null,
isAccessible: true,
isEnabled: true,
pluginDisplayNames: [],
},
],
nextCursor: null,
};
}
if (method === "plugin/list") {
return {
marketplaces: [
{
name: "openai-curated",
path: "/marketplaces/openai-curated",
interface: null,
plugins: [
{
id: "google-calendar",
name: "google-calendar",
source: { type: "remote" },
installed: true,
enabled: true,
installPolicy: "AVAILABLE",
authPolicy: "ON_USE",
availability: "AVAILABLE",
interface: null,
},
],
},
],
marketplaceLoadErrors: [],
featuredPluginIds: [],
};
}
if (method === "plugin/read") {
return {
plugin: {
marketplaceName: "openai-curated",
marketplacePath: "/marketplaces/openai-curated",
summary: {
id: "google-calendar",
name: "google-calendar",
source: { type: "remote" },
installed: true,
enabled: true,
installPolicy: "AVAILABLE",
authPolicy: "ON_USE",
availability: "AVAILABLE",
interface: null,
},
description: null,
skills: [],
apps: [
{
id: "google-calendar-app",
name: "Google Calendar",
description: null,
installUrl: null,
needsAuth: false,
},
],
mcpServers: ["google-calendar"],
},
};
}
return undefined;
});
const params = createParams(sessionFile, workspaceDir);
params.agentDir = agentDir;
const run = runCodexAppServerAttempt(params, { pluginConfig });
await waitForMethod("turn/start");
await completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
expect(requests.map((entry) => entry.method)).toContain("app/list");
const threadStart = requests.find((entry) => entry.method === "thread/start");
const threadStartParams = threadStart?.params as
| { config?: { apps?: Record<string, { enabled?: boolean }> } }
| undefined;
expect(threadStartParams?.config?.apps?.["google-calendar-app"]?.enabled).toBe(true);
});
it("times out app-server startup before thread setup can hang forever", async () => {
setCodexAppServerClientFactoryForTest(() => new Promise<never>(() => undefined));
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 1;
await expect(runCodexAppServerAttempt(params, { startupTimeoutFloorMs: 1 })).rejects.toThrow(
"codex app-server startup timed out",
);
expect(queueActiveRunMessageForTest("session-1", "after timeout")).toBe(false);
});
it("passes the selected auth profile into app-server startup", async () => {
const seenAuthProfileIds: Array<string | undefined> = [];
const seenAgentDirs: Array<string | undefined> = [];
const { requests, waitForMethod, completeTurn } = createStartedThreadHarness(undefined, {
onStart: (authProfileId, agentDir) => {
seenAuthProfileIds.push(authProfileId);
seenAgentDirs.push(agentDir);
},
});
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.authProfileId = "openai-codex:work";
params.agentDir = path.join(tempDir, "agent");
const run = runCodexAppServerAttempt(params);
await vi.waitFor(() => expect(seenAuthProfileIds).toEqual(["openai-codex:work"]), {
interval: 1,
});
await waitForMethod("turn/start");
await new Promise<void>((resolve) => setImmediate(resolve));
await completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
expect(seenAuthProfileIds).toEqual(["openai-codex:work"]);
expect(seenAgentDirs).toEqual([path.join(tempDir, "agent")]);
expect(requests.map((entry) => entry.method)).toContain("turn/start");
});
it("times out turn start before the active run handle is installed", async () => {
const diagnosticEvents: DiagnosticEventPayload[] = [];
const stopDiagnostics = onInternalDiagnosticEvent((event) => {
if (event.type.startsWith("model.call.")) {
diagnosticEvents.push(event);
}
});
const request = vi.fn(
async (method: string, _params?: unknown, options?: { timeoutMs?: number }) => {
if (method === "thread/start") {
return threadStartResult("thread-1");
}
if (method === "turn/start") {
return await new Promise<never>((_, reject) => {
setTimeout(() => reject(new Error("turn/start timed out")), options?.timeoutMs ?? 0);
});
}
return {};
},
);
setCodexAppServerClientFactoryForTest(
async () =>
({
request,
addNotificationHandler: () => () => undefined,
addRequestHandler: () => () => undefined,
}) as never,
);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 1;
params.config = {
diagnostics: { enabled: true, otel: { enabled: true, traces: true } },
} as never;
try {
await expect(runCodexAppServerAttempt(params)).rejects.toThrow("turn/start timed out");
await flushDiagnosticEvents();
const errorEvent = diagnosticEvents.find((event) => event.type === "model.call.error") as
| ({ failureKind?: string; errorCategory?: string } & DiagnosticEventPayload)
| undefined;
expect(errorEvent?.failureKind).toBe("timeout");
expect(errorEvent?.errorCategory).toBe("timeout");
expect(queueActiveRunMessageForTest("session-1", "after timeout")).toBe(false);
} finally {
stopDiagnostics();
}
});
it("keeps extended history enabled when resuming a bound Codex thread", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
const { requests, waitForMethod, completeTurn } = createResumeHarness();
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
pluginConfig: { appServer: { mode: "yolo" } },
});
await waitForMethod("turn/start");
await completeTurn({ threadId: "thread-existing", turnId: "turn-1" });
await run;
expectResumeRequest(requests, {
threadId: "thread-existing",
model: "gpt-5.4-codex",
approvalPolicy: "never",
approvalsReviewer: "user",
sandbox: "danger-full-access",
persistExtendedHistory: true,
});
const resumeRequest = requests.find((request) => request.method === "thread/resume");
const resumeRequestParams = resumeRequest?.params as Record<string, unknown> | undefined;
expect(resumeRequestParams?.developerInstructions).not.toContain(CODEX_GPT5_BEHAVIOR_CONTRACT);
});
it("starts a fresh Codex thread before resume when the native rollout reaches the fallback fuse", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const agentDir = path.join(tempDir, "agent");
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
await fs.writeFile(
path.join(path.dirname(sessionFile), "sessions.json"),
JSON.stringify({
"agent:main:session-1": {
sessionFile,
totalTokens: 12_000,
},
}),
);
const rolloutDir = path.join(agentDir, "codex-home", "sessions");
await fs.mkdir(rolloutDir, { recursive: true });
await fs.writeFile(
path.join(rolloutDir, "rollout-thread-existing.jsonl"),
`${JSON.stringify({
payload: {
type: "token_count",
info: {
total_token_usage: {
total_tokens: 300_000,
},
},
},
})}\n`,
);
const { requests, waitForMethod, completeTurn } = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
params.agentDir = agentDir;
params.config = {
agents: {
defaults: {
compaction: {
truncateAfterCompaction: true,
maxActiveTranscriptBytes: "1mb",
},
},
},
} as never;
const run = runCodexAppServerAttempt(params, {
pluginConfig: { appServer: { mode: "yolo" } },
});
await waitForMethod("turn/start");
await completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
expect(requests.map((entry) => entry.method)).toContain("thread/start");
expect(requests.map((entry) => entry.method)).not.toContain("thread/resume");
const savedBinding = await readCodexAppServerBinding(sessionFile);
expect(savedBinding?.threadId).toBe("thread-1");
});
it("starts a fresh Codex thread before turn/start when the next prompt would exhaust native headroom", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const agentDir = path.join(tempDir, "agent");
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
await fs.writeFile(
path.join(path.dirname(sessionFile), "sessions.json"),
JSON.stringify({
"agent:main:session-1": {
sessionFile,
totalTokens: 12_000,
},
}),
);
const rolloutDir = path.join(agentDir, "codex-home", "sessions");
await fs.mkdir(rolloutDir, { recursive: true });
await fs.writeFile(
path.join(rolloutDir, "rollout-thread-existing.jsonl"),
`${JSON.stringify({
payload: {
type: "token_count",
info: {
last_token_usage: {
total_tokens: 220_000,
},
model_context_window: 258_400,
},
},
})}\n`,
);
const { requests, waitForMethod, completeTurn } = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
params.agentDir = agentDir;
params.prompt = "large prompt ".repeat(12_000);
const run = runCodexAppServerAttempt(params, {
pluginConfig: { appServer: { mode: "yolo" } },
});
await waitForMethod("turn/start");
await completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
expect(requests.map((entry) => entry.method)).toContain("thread/start");
expect(requests.map((entry) => entry.method)).not.toContain("thread/resume");
const savedBinding = await readCodexAppServerBinding(sessionFile);
expect(savedBinding?.threadId).toBe("thread-1");
});
it("preserves bound auth when rotating a fallback-fuse native rollout", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const agentDir = path.join(tempDir, "agent");
await writeExistingBinding(sessionFile, workspaceDir, {
authProfileId: "openai-codex:work",
dynamicToolsFingerprint: "[]",
});
await fs.writeFile(
path.join(path.dirname(sessionFile), "sessions.json"),
JSON.stringify({
"agent:main:session-1": {
sessionFile,
totalTokens: 12_000,
},
}),
);
const rolloutDir = path.join(agentDir, "codex-home", "sessions");
await fs.mkdir(rolloutDir, { recursive: true });
await fs.writeFile(
path.join(rolloutDir, "rollout-thread-existing.jsonl"),
`${JSON.stringify({
payload: {
type: "token_count",
info: {
total_token_usage: {
total_tokens: 300_000,
},
},
},
})}\n`,
);
const seenAuthProfileIds: Array<string | undefined> = [];
const { requests, waitForMethod, completeTurn } = createStartedThreadHarness(undefined, {
onStart: (authProfileId) => {
seenAuthProfileIds.push(authProfileId);
},
});
const params = createParams(sessionFile, workspaceDir);
delete params.authProfileId;
params.agentDir = agentDir;
params.config = {
agents: {
defaults: {
compaction: {
truncateAfterCompaction: true,
maxActiveTranscriptBytes: "1mb",
},
},
},
} as never;
const run = runCodexAppServerAttempt(params, {
pluginConfig: { appServer: { mode: "yolo" } },
});
await vi.waitFor(() => expect(seenAuthProfileIds).toEqual(["openai-codex:work"]), {
interval: 1,
});
await waitForMethod("turn/start");
await completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
expect(requests.map((entry) => entry.method)).toContain("thread/start");
expect(requests.map((entry) => entry.method)).not.toContain("thread/resume");
expect(seenAuthProfileIds).toEqual(["openai-codex:work"]);
const savedBinding = await readCodexAppServerBinding(sessionFile);
expect(savedBinding?.authProfileId).toBe("openai-codex:work");
expect(savedBinding?.threadId).toBe("thread-1");
});
it("restarts the app-server once when a shared client closes during startup", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
const requests: string[][] = [];
let starts = 0;
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
setCodexAppServerClientFactoryForTest(async () => {
const startIndex = starts++;
const methods: string[] = [];
requests.push(methods);
return {
request: vi.fn(async (method: string) => {
methods.push(method);
if (method === "thread/resume" && startIndex === 0) {
throw new Error("codex app-server client is closed");
}
if (method === "thread/resume") {
return threadStartResult("thread-existing");
}
if (method === "turn/start") {
return turnStartResult();
}
return {};
}),
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: () => () => undefined,
} as never;
});
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir));
await vi.waitFor(() => expect(requests[1]).toContain("turn/start"), fastWait);
await notify({
method: "turn/completed",
params: {
threadId: "thread-existing",
turnId: "turn-1",
turn: { id: "turn-1", status: "completed" },
},
});
const result = await run;
expect(result.aborted).toBe(false);
expect(requests).toEqual([
["thread/resume"],
["thread/resume", "turn/start", "thread/unsubscribe"],
]);
});
it("tolerates a second app-server close while retrying startup", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
const requests: string[][] = [];
let starts = 0;
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
setCodexAppServerClientFactoryForTest(async () => {
const startIndex = starts++;
const methods: string[] = [];
requests.push(methods);
return {
request: vi.fn(async (method: string) => {
methods.push(method);
if (method === "thread/resume" && startIndex < 2) {
throw new Error("codex app-server client is closed");
}
if (method === "thread/resume") {
return threadStartResult("thread-existing");
}
if (method === "turn/start") {
return turnStartResult();
}
return {};
}),
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: () => () => undefined,
} as never;
});
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir));
await vi.waitFor(() => expect(requests[2]).toContain("turn/start"), fastWait);
await notify({
method: "turn/completed",
params: {
threadId: "thread-existing",
turnId: "turn-1",
turn: { id: "turn-1", status: "completed" },
},
});
const result = await run;
expect(result.aborted).toBe(false);
expect(requests).toEqual([
["thread/resume"],
["thread/resume"],
["thread/resume", "turn/start", "thread/unsubscribe"],
]);
});
it("does not retire the shared Codex client when a spawned helper run fails with a logical thread/start error", async () => {
const clearSpy = vi.spyOn(sharedClientModule, "clearSharedCodexAppServerClientIfCurrent");
clearSpy.mockClear();
let failedClient: unknown;
setCodexAppServerClientFactoryForTest(async () => {
const c = {
request: vi.fn(async (method: string) => {
if (method === "thread/start") {
throw new CodexAppServerRpcError(
{ message: "401 authentication_error: Invalid bearer token" },
"thread/start",
);
}
return {};
}),
addNotificationHandler: vi.fn(() => () => undefined),
addRequestHandler: vi.fn(() => () => undefined),
};
failedClient = c;
return c as never;
});
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.spawnedBy = "agent:main:session-parent";
await expect(runCodexAppServerAttempt(params)).rejects.toThrow("Invalid bearer token");
const calledWithFailedClient = clearSpy.mock.calls.some(([arg]) => arg === failedClient);
expect(calledWithFailedClient).toBe(false);
clearSpy.mockRestore();
});
it("retires the shared Codex client when a spawned helper run times out during thread/start", async () => {
const clearSpy = vi.spyOn(sharedClientModule, "clearSharedCodexAppServerClientIfCurrent");
clearSpy.mockClear();
let failedClient: unknown;
setCodexAppServerClientFactoryForTest(async () => {
const c = {
request: vi.fn(async (method: string) => {
if (method === "thread/start") {
return await new Promise<never>(() => undefined);
}
return {};
}),
addNotificationHandler: vi.fn(() => () => undefined),
addRequestHandler: vi.fn(() => () => undefined),
};
failedClient = c;
return c as never;
});
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.spawnedBy = "agent:main:session-parent";
params.timeoutMs = 1;
await expect(runCodexAppServerAttempt(params, { startupTimeoutFloorMs: 1 })).rejects.toThrow(
"codex app-server startup timed out",
);
const calledWithFailedClient = clearSpy.mock.calls.some(([arg]) => arg === failedClient);
expect(calledWithFailedClient).toBe(true);
clearSpy.mockRestore();
});
it("retires the shared Codex client when a spawned helper hits a thread/start write failure", async () => {
const clearSpy = vi.spyOn(sharedClientModule, "clearSharedCodexAppServerClientIfCurrent");
clearSpy.mockClear();
let failedClient: unknown;
setCodexAppServerClientFactoryForTest(async () => {
const c = {
request: vi.fn(async (method: string) => {
if (method === "thread/start") {
throw new Error("write EPIPE");
}
return {};
}),
addNotificationHandler: vi.fn(() => () => undefined),
addRequestHandler: vi.fn(() => () => undefined),
};
failedClient = c;
return c as never;
});
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.spawnedBy = "agent:main:session-parent";
await expect(runCodexAppServerAttempt(params)).rejects.toThrow("write EPIPE");
const calledWithFailedClient = clearSpy.mock.calls.some(([arg]) => arg === failedClient);
expect(calledWithFailedClient).toBe(true);
clearSpy.mockRestore();
});
it("retires the shared Codex client when a top-level run fails with a logical thread/start error", async () => {
const clearSpy = vi.spyOn(sharedClientModule, "clearSharedCodexAppServerClientIfCurrent");
clearSpy.mockClear();
let failedClient: unknown;
setCodexAppServerClientFactoryForTest(async () => {
const c = {
request: vi.fn(async (method: string) => {
if (method === "thread/start") {
throw new CodexAppServerRpcError(
{ message: "401 authentication_error: Invalid bearer token" },
"thread/start",
);
}
return {};
}),
addNotificationHandler: vi.fn(() => () => undefined),
addRequestHandler: vi.fn(() => () => undefined),
};
failedClient = c;
return c as never;
});
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
await expect(runCodexAppServerAttempt(params)).rejects.toThrow("Invalid bearer token");
const calledWithFailedClient = clearSpy.mock.calls.some(([arg]) => arg === failedClient);
expect(calledWithFailedClient).toBe(true);
clearSpy.mockRestore();
});
it("passes configured app-server policy, sandbox, service tier, and model on resume", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, { model: "gpt-5.2" });
const { requests, waitForMethod, completeTurn } = createResumeHarness();
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
pluginConfig: {
appServer: {
approvalPolicy: "on-request",
approvalsReviewer: "guardian_subagent",
sandbox: "danger-full-access",
serviceTier: "fast",
},
},
});
await waitForMethod("turn/start");
await completeTurn({ threadId: "thread-existing", turnId: "turn-1" });
await run;
expectResumeRequest(requests, {
threadId: "thread-existing",
model: "gpt-5.4-codex",
approvalPolicy: "on-request",
approvalsReviewer: "guardian_subagent",
sandbox: "danger-full-access",
serviceTier: "priority",
persistExtendedHistory: true,
});
const resumeRequest = requests.find((request) => request.method === "thread/resume");
const resumeRequestParams = resumeRequest?.params as Record<string, unknown> | undefined;
const resumeConfig = resumeRequestParams?.config as Record<string, unknown> | undefined;
expect(resumeConfig?.["features.hooks"]).toBe(true);
expect(resumeConfig?.["features.code_mode"]).toBe(true);
expect(resumeConfig?.["features.code_mode_only"]).toBe(false);
expect(resumeRequestParams?.developerInstructions).not.toContain(CODEX_GPT5_BEHAVIOR_CONTRACT);
const turnRequest = requests.find((request) => request.method === "turn/start");
const turnRequestParams = turnRequest?.params as Record<string, unknown> | undefined;
expect(turnRequestParams?.approvalPolicy).toBe("on-request");
expect(turnRequestParams?.approvalsReviewer).toBe("guardian_subagent");
expect(turnRequestParams?.sandboxPolicy).toEqual({ type: "dangerFullAccess" });
expect(turnRequestParams?.serviceTier).toBe("priority");
expect(turnRequestParams?.model).toBe("gpt-5.4-codex");
});
it("passes current Codex service tier request values through app-server resume and turn requests", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, { model: "gpt-5.2" });
const { requests, waitForMethod, completeTurn } = createResumeHarness();
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
pluginConfig: {
appServer: {
approvalPolicy: "on-request",
sandbox: "danger-full-access",
serviceTier: "priority",
},
},
});
await waitForMethod("turn/start");
await completeTurn({ threadId: "thread-existing", turnId: "turn-1" });
await run;
const resumeRequest = requests.find((request) => request.method === "thread/resume");
const resumeRequestParams = resumeRequest?.params as Record<string, unknown> | undefined;
expect(resumeRequestParams?.serviceTier).toBe("priority");
const turnRequest = requests.find((request) => request.method === "turn/start");
const turnRequestParams = turnRequest?.params as Record<string, unknown> | undefined;
expect(turnRequestParams?.serviceTier).toBe("priority");
});
it("reuses the bound auth profile for app-server startup when params omit it", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, {
authProfileId: "openai-codex:bound",
dynamicToolsFingerprint: "[]",
});
const seenAuthProfileIds: Array<string | undefined> = [];
const seenAgentDirs: Array<string | undefined> = [];
const { requests, waitForMethod, completeTurn } = createAppServerHarness(
async (method: string) => {
if (method === "thread/resume") {
return threadStartResult("thread-existing");
}
if (method === "turn/start") {
return turnStartResult();
}
throw new Error(`unexpected method: ${method}`);
},
{
onStart: (authProfileId, agentDir) => {
seenAuthProfileIds.push(authProfileId);
seenAgentDirs.push(agentDir);
},
},
);
const params = createParams(sessionFile, workspaceDir);
delete params.authProfileId;
params.agentDir = path.join(tempDir, "agent");
const run = runCodexAppServerAttempt(params);
await vi.waitFor(() => expect(seenAuthProfileIds).toEqual(["openai-codex:bound"]), {
interval: 1,
});
await waitForMethod("turn/start");
await new Promise<void>((resolve) => setImmediate(resolve));
await completeTurn({ threadId: "thread-existing", turnId: "turn-1" });
await run;
expect(seenAuthProfileIds).toEqual(["openai-codex:bound"]);
expect(seenAgentDirs).toEqual([path.join(tempDir, "agent")]);
expect(requests.map((entry) => entry.method)).toContain("turn/start");
});
});