Files
openclaw/extensions/codex/src/app-server/run-attempt.test.ts
2026-05-20 14:20:56 +09:00

9691 lines
331 KiB
TypeScript

import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { SessionManager } from "@earendil-works/pi-coding-agent";
import {
abortAgentHarnessRun,
embeddedAgentLog,
nativeHookRelayTesting,
onAgentEvent,
queueAgentHarnessMessage,
resetAgentEventsForTest,
wrapToolWithBeforeToolCallHook,
type AgentEventPayload,
type EmbeddedRunAttemptParams,
} from "openclaw/plugin-sdk/agent-harness-runtime";
import {
emitDiagnosticEvent,
emitTrustedDiagnosticEvent,
onInternalDiagnosticEvent,
resetDiagnosticEventsForTest,
waitForDiagnosticEventsDrained,
type DiagnosticEventPayload,
} from "openclaw/plugin-sdk/diagnostic-runtime";
import {
initializeGlobalHookRunner,
resetGlobalHookRunner,
} from "openclaw/plugin-sdk/hook-runtime";
import { clearPluginCommands, registerPluginCommand } from "openclaw/plugin-sdk/plugin-runtime";
import { createMockPluginRegistry } from "openclaw/plugin-sdk/plugin-test-runtime";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
function queueActiveRunMessageForTest(
...args: Parameters<typeof queueAgentHarnessMessage>
): boolean {
return queueAgentHarnessMessage(...args);
}
import { CODEX_GPT5_BEHAVIOR_CONTRACT } from "../../prompt-overlay.js";
import { defaultCodexAppInventoryCache } from "./app-inventory-cache.js";
import * as approvalBridge from "./approval-bridge.js";
import * as authBridge from "./auth-bridge.js";
import { resolveCodexAppServerEnvApiKeyCacheKey } from "./auth-bridge.js";
import type { CodexAppServerClientFactory } from "./client-factory.js";
import {
readCodexPluginConfig,
resolveCodexAppServerRuntimeOptions,
resolveCodexPluginsPolicy,
} from "./config.js";
import {
CODEX_OPENCLAW_DYNAMIC_TOOL_NAMESPACE,
createCodexDynamicToolBridge,
} from "./dynamic-tools.js";
import * as elicitationBridge from "./elicitation-bridge.js";
import {
buildCodexPluginAppCacheKey,
resolveCodexPluginAppCacheEndpoint,
} from "./plugin-app-cache-key.js";
import type { CodexServerNotification } from "./protocol.js";
import {
readRecentCodexRateLimits,
rememberCodexRateLimits,
resetCodexRateLimitCacheForTests,
} from "./rate-limit-cache.js";
import {
runCodexAppServerAttempt as runCodexAppServerAttemptImpl,
testing,
} from "./run-attempt.js";
import { readCodexAppServerBinding, writeCodexAppServerBinding } from "./session-binding.js";
import { createCodexTestModel } from "./test-support.js";
import {
buildTurnCollaborationMode,
buildThreadResumeParams,
buildTurnStartParams,
startOrResumeThread,
} from "./thread-lifecycle.js";
let tempDir: string;
let codexAppServerClientFactoryForTest: CodexAppServerClientFactory | undefined;
const fastWait = { interval: 1, timeout: 5_000 } as const;
type RunCodexAppServerAttemptOptions = NonNullable<
Parameters<typeof runCodexAppServerAttemptImpl>[1]
>;
function flushDiagnosticEvents() {
return waitForDiagnosticEventsDrained();
}
function emitAsyncDiagnosticBacklog(count: number): void {
for (let index = 0; index < count; index += 1) {
emitDiagnosticEvent({
type: "model.call.started",
runId: `backlog-run-${index}`,
callId: `backlog-call-${index}`,
provider: "openai",
model: "gpt-5.4",
});
}
}
function activeDiagnosticToolKeys(events: DiagnosticEventPayload[]): Set<string> {
const active = new Set<string>();
for (const event of events) {
if (event.type === "tool.execution.started") {
active.add(
`${event.runId ?? event.sessionId ?? event.sessionKey ?? "unknown"}:${event.toolCallId ?? event.toolName}`,
);
} else if (
event.type === "tool.execution.completed" ||
event.type === "tool.execution.error" ||
event.type === "tool.execution.blocked"
) {
active.delete(
`${event.runId ?? event.sessionId ?? event.sessionKey ?? "unknown"}:${event.toolCallId ?? event.toolName}`,
);
}
}
return active;
}
function setCodexAppServerClientFactoryForTest(factory: CodexAppServerClientFactory): void {
codexAppServerClientFactoryForTest = factory;
}
function resetCodexAppServerClientFactoryForTest(): void {
codexAppServerClientFactoryForTest = undefined;
}
function runCodexAppServerAttempt(
params: EmbeddedRunAttemptParams,
options: RunCodexAppServerAttemptOptions = {},
) {
const clientFactory = options.clientFactory ?? codexAppServerClientFactoryForTest;
return runCodexAppServerAttemptImpl(
params,
clientFactory ? { ...options, clientFactory } : options,
);
}
function createParams(sessionFile: string, workspaceDir: string): EmbeddedRunAttemptParams {
return {
prompt: "hello",
sessionId: "session-1",
sessionKey: "agent:main:session-1",
sessionFile,
workspaceDir,
runId: "run-1",
provider: "codex",
modelId: "gpt-5.4-codex",
model: createCodexTestModel("codex"),
contextTokenBudget: 150_000,
contextWindowInfo: {
tokens: 150_000,
referenceTokens: 200_000,
source: "agentContextTokens",
},
thinkLevel: "medium",
disableTools: true,
timeoutMs: 5_000,
authStorage: {} as never,
authProfileStore: { version: 1, profiles: {} },
modelRegistry: {} as never,
} as EmbeddedRunAttemptParams;
}
function createCodexRuntimePlanFixture(): NonNullable<EmbeddedRunAttemptParams["runtimePlan"]> {
return {
auth: {},
observability: {
resolvedRef: "codex/gpt-5.4-codex",
provider: "codex",
modelId: "gpt-5.4-codex",
harnessId: "codex",
},
prompt: {
resolveSystemPromptContribution: () => undefined,
},
tools: {
normalize: (tools: unknown[]) => tools,
logDiagnostics: () => undefined,
},
} as unknown as NonNullable<EmbeddedRunAttemptParams["runtimePlan"]>;
}
function threadStartResult(threadId = "thread-1") {
return {
thread: {
id: threadId,
sessionId: "session-1",
forkedFromId: null,
preview: "",
ephemeral: false,
modelProvider: "openai",
createdAt: 1,
updatedAt: 1,
status: { type: "idle" },
path: null,
cwd: tempDir || "/tmp/openclaw-codex-test",
cliVersion: "0.125.0",
source: "unknown",
agentNickname: null,
agentRole: null,
gitInfo: null,
name: null,
turns: [],
},
model: "gpt-5.4-codex",
modelProvider: "openai",
serviceTier: null,
cwd: tempDir || "/tmp/openclaw-codex-test",
instructionSources: [],
approvalPolicy: "never",
approvalsReviewer: "user",
sandbox: { type: "dangerFullAccess" },
permissionProfile: null,
reasoningEffort: null,
};
}
function turnStartResult(turnId = "turn-1", status = "inProgress") {
return {
turn: {
id: turnId,
status,
items: [],
error: null,
startedAt: null,
completedAt: null,
durationMs: null,
},
};
}
function rateLimitsUpdated(resetsAt: number): CodexServerNotification {
return {
method: "account/rateLimits/updated",
params: {
rateLimits: {
limitId: "codex",
limitName: "Codex",
primary: { usedPercent: 100, windowDurationMins: 300, resetsAt },
secondary: null,
credits: null,
planType: "plus",
rateLimitReachedType: "rate_limit_reached",
},
},
};
}
function assistantMessage(text: string, timestamp: number) {
return {
role: "assistant" as const,
content: [{ type: "text" as const, text }],
api: "openai-codex-responses",
provider: "openai-codex",
model: "gpt-5.4-codex",
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop" as const,
timestamp,
};
}
function userMessage(text: string, timestamp: number) {
return {
role: "user" as const,
content: [{ type: "text" as const, text }],
timestamp,
};
}
function mockCall(mock: unknown, label: string, index = 0): unknown[] {
const call = (mock as { mock?: { calls?: unknown[][] } }).mock?.calls?.at(index);
if (!call) {
throw new Error(`Expected ${label} call ${index + 1}`);
}
return call;
}
function createAppServerHarness(
requestImpl: (method: string, params: unknown) => Promise<unknown>,
options: {
onStart?: (authProfileId: string | undefined, agentDir: string | undefined) => void;
} = {},
) {
const requests: Array<{ method: string; params: unknown }> = [];
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
let handleServerRequest: AppServerRequestHandler | undefined;
const closeHandlers = new Set<() => void>();
const request = vi.fn(async (method: string, params?: unknown) => {
requests.push({ method, params });
return requestImpl(method, params);
});
setCodexAppServerClientFactoryForTest(async (_startOptions, authProfileId, agentDir) => {
options.onStart?.(authProfileId, agentDir);
return {
request,
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: (handler: AppServerRequestHandler) => {
handleServerRequest = handler;
return () => undefined;
},
addCloseHandler: (handler: () => void) => {
closeHandlers.add(handler);
return () => closeHandlers.delete(handler);
},
} as never;
});
const waitForServerRequestHandler = async () => {
await vi.waitFor(() => expect(handleServerRequest).toBeTypeOf("function"), {
interval: 1,
timeout: 30_000,
});
return handleServerRequest!;
};
return {
request,
requests,
async waitForMethod(method: string, timeoutMs = 30_000) {
await vi.waitFor(
() => {
if (!requests.some((entry) => entry.method === method)) {
const mockMethods = request.mock.calls.map((call) => call[0]);
throw new Error(
`expected app-server method ${method}; saw ${requests
.map((entry) => entry.method)
.join(", ")}; mock saw ${mockMethods.join(", ")}`,
);
}
},
{ interval: 1, timeout: timeoutMs },
);
},
async notify(notification: CodexServerNotification) {
await notify(notification);
},
waitForServerRequestHandler,
async handleServerRequest(request: Parameters<AppServerRequestHandler>[0]) {
const handler = await waitForServerRequestHandler();
return handler(request);
},
async completeTurn(params: { threadId: string; turnId: string }) {
await notify({
method: "turn/completed",
params: {
threadId: params.threadId,
turnId: params.turnId,
turn: { id: params.turnId, status: "completed" },
},
});
},
close() {
for (const handler of closeHandlers) {
handler();
}
},
};
}
function createStartedThreadHarness(
requestImpl: (method: string, params: unknown) => Promise<unknown> = async () => undefined,
options: {
onStart?: (authProfileId: string | undefined, agentDir: string | undefined) => void;
} = {},
) {
return createAppServerHarness(async (method, params) => {
const override = await requestImpl(method, params);
if (override !== undefined) {
return override;
}
if (method === "thread/start") {
return threadStartResult();
}
if (method === "turn/start") {
return turnStartResult();
}
return {};
}, options);
}
function expectResumeRequest(
requests: Array<{ method: string; params: unknown }>,
params: Record<string, unknown>,
) {
const request = requests.find((entry) => entry.method === "thread/resume");
if (!request) {
throw new Error("Expected thread/resume request");
}
const requestParams = request.params as Record<string, unknown> | undefined;
for (const [key, value] of Object.entries(params)) {
expect(requestParams?.[key]).toEqual(value);
}
}
function createResumeHarness() {
return createAppServerHarness(async (method) => {
if (method === "thread/resume") {
return threadStartResult("thread-existing");
}
if (method === "turn/start") {
return turnStartResult();
}
return {};
});
}
async function writeExistingBinding(
sessionFile: string,
workspaceDir: string,
overrides: Partial<Parameters<typeof writeCodexAppServerBinding>[1]> = {},
) {
await writeCodexAppServerBinding(sessionFile, {
threadId: "thread-existing",
cwd: workspaceDir,
model: "gpt-5.4-codex",
modelProvider: "openai",
...overrides,
});
}
function createThreadLifecycleAppServerOptions(): Parameters<
typeof startOrResumeThread
>[0]["appServer"] {
return {
start: {
transport: "stdio",
command: "codex",
args: ["app-server"],
headers: {},
},
requestTimeoutMs: 60_000,
turnCompletionIdleTimeoutMs: 60_000,
approvalPolicy: "never",
approvalsReviewer: "user",
sandbox: "workspace-write",
codeModeOnly: false,
};
}
function createMessageDynamicTool(
description: string,
actions: string[] = ["send"],
): Parameters<typeof startOrResumeThread>[0]["dynamicTools"][number] {
return {
name: "message",
description,
inputSchema: {
type: "object",
properties: {
action: {
type: "string",
enum: actions,
},
},
required: ["action"],
additionalProperties: false,
},
};
}
function createNamedDynamicTool(
name: string,
): Parameters<typeof startOrResumeThread>[0]["dynamicTools"][number] {
return {
name,
description: `${name} test tool`,
inputSchema: {
type: "object",
properties: {},
additionalProperties: false,
},
};
}
type RuntimeDynamicToolForTest = Parameters<
typeof createCodexDynamicToolBridge
>[0]["tools"][number];
function createRuntimeDynamicTool(name: string): RuntimeDynamicToolForTest {
return {
name,
label: name,
description: `${name} test tool`,
parameters: {
type: "object",
properties: {},
additionalProperties: false,
},
execute: vi.fn(async () => ({
content: [{ type: "text" as const, text: `${name} done` }],
details: {},
})),
};
}
function createPluginAppConfigPatch() {
return {
apps: {
_default: {
enabled: false,
destructive_enabled: false,
open_world_enabled: false,
},
"google-calendar-app": {
enabled: true,
destructive_enabled: true,
open_world_enabled: true,
default_tools_approval_mode: "auto",
},
},
};
}
function createPluginAppPolicyContext() {
return {
fingerprint: "plugin-policy-1",
apps: {
"google-calendar-app": {
configKey: "google-calendar",
marketplaceName: "openai-curated" as const,
pluginName: "google-calendar",
allowDestructiveActions: false,
mcpServerNames: ["google-calendar"],
},
},
pluginAppIds: {
"google-calendar": ["google-calendar-app"],
},
};
}
function createTwoPluginAppConfigPatch() {
return {
apps: {
...createPluginAppConfigPatch().apps,
"gmail-app": {
enabled: true,
destructive_enabled: true,
open_world_enabled: true,
default_tools_approval_mode: "auto",
},
},
};
}
function createTwoPluginAppPolicyContext() {
return {
fingerprint: "plugin-policy-2",
apps: {
...createPluginAppPolicyContext().apps,
"gmail-app": {
configKey: "gmail",
marketplaceName: "openai-curated" as const,
pluginName: "gmail",
allowDestructiveActions: false,
mcpServerNames: ["gmail"],
},
},
pluginAppIds: {
...createPluginAppPolicyContext().pluginAppIds,
gmail: ["gmail-app"],
},
};
}
function createTwoCalendarAppConfigPatch() {
return {
apps: {
...createPluginAppConfigPatch().apps,
"google-calendar-secondary-app": {
enabled: true,
destructive_enabled: true,
open_world_enabled: true,
default_tools_approval_mode: "auto",
},
},
};
}
function createTwoCalendarAppPolicyContext() {
return {
fingerprint: "plugin-policy-calendar-2",
apps: {
...createPluginAppPolicyContext().apps,
"google-calendar-secondary-app": {
configKey: "google-calendar",
marketplaceName: "openai-curated" as const,
pluginName: "google-calendar",
allowDestructiveActions: false,
mcpServerNames: ["google-calendar"],
},
},
pluginAppIds: {
"google-calendar": ["google-calendar-app", "google-calendar-secondary-app"],
},
};
}
type AppServerRequestHandler = (request: {
id: string | number;
method: string;
params?: unknown;
}) => Promise<unknown>;
function extractRelayIdFromThreadRequest(params: unknown): string {
const config = (params as { config?: Record<string, unknown> }).config;
let command: string | undefined;
for (const key of [
"hooks.PreToolUse",
"hooks.PostToolUse",
"hooks.PermissionRequest",
"hooks.Stop",
]) {
const entries = config?.[key];
if (!Array.isArray(entries)) {
continue;
}
for (const entry of entries as Array<{ hooks?: Array<{ command?: string }> }>) {
command = entry.hooks?.find((hook) => typeof hook.command === "string")?.command;
if (command) {
break;
}
}
if (command) {
break;
}
}
const match = command?.match(/--relay-id ([^ ]+)/);
if (!match?.[1]) {
throw new Error(`relay id missing from command: ${command}`);
}
return match[1];
}
describe("runCodexAppServerAttempt", () => {
beforeEach(async () => {
resetAgentEventsForTest();
resetDiagnosticEventsForTest();
vi.stubEnv("OPENCLAW_TRAJECTORY", "0");
vi.stubEnv("CODEX_API_KEY", "");
vi.stubEnv("OPENAI_API_KEY", "");
tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-codex-run-"));
});
afterEach(async () => {
resetCodexAppServerClientFactoryForTest();
testing.resetOpenClawCodingToolsFactoryForTests();
resetCodexRateLimitCacheForTests();
nativeHookRelayTesting.clearNativeHookRelaysForTests();
clearPluginCommands();
resetAgentEventsForTest();
resetDiagnosticEventsForTest();
resetGlobalHookRunner();
defaultCodexAppInventoryCache.clear();
vi.useRealTimers();
vi.restoreAllMocks();
vi.unstubAllEnvs();
await fs.rm(tempDir, { recursive: true, force: true });
});
it("filters Codex-native dynamic tools from app-server tool exposure", () => {
const tools = [
"read",
"write",
"edit",
"apply_patch",
"exec",
"process",
"update_plan",
"tool_call",
"tool_describe",
"tool_search",
"tool_search_code",
"web_search",
"message",
"heartbeat_respond",
"sessions_spawn",
].map((name) => ({ name }));
expect(testing.filterCodexDynamicTools(tools, {}).map((tool) => tool.name)).toEqual([
"web_search",
"message",
"heartbeat_respond",
"sessions_spawn",
]);
});
it("applies additional Codex dynamic tool excludes without exposing Codex-native tools", () => {
const tools = ["read", "exec", "message", "custom_tool"].map((name) => ({ name }));
expect(
testing
.filterCodexDynamicTools(tools, {
codexDynamicToolsExclude: ["custom_tool"],
})
.map((tool) => tool.name),
).toEqual(["message"]);
});
it("exposes app-server-owned tools directly for forced private QA Codex runtime", () => {
const tools = ["read", "write", "image_generate", "message"].map((name) => ({ name }));
const privateQaCodexEnv = {
OPENCLAW_BUILD_PRIVATE_QA: "1",
OPENCLAW_QA_FORCE_RUNTIME: "codex",
};
expect(
testing.filterCodexDynamicTools(tools, {}, privateQaCodexEnv).map((tool) => tool.name),
).toEqual(["read", "write", "image_generate", "message"]);
expect(testing.resolveCodexDynamicToolsLoading({}, privateQaCodexEnv)).toBe("direct");
});
it("exposes OpenClaw sandbox shell tools under distinct names for non-Docker sandbox backends", async () => {
testing.setOpenClawCodingToolsFactoryForTests(() => [
createRuntimeDynamicTool("read"),
createRuntimeDynamicTool("write"),
createRuntimeDynamicTool("edit"),
createRuntimeDynamicTool("apply_patch"),
createRuntimeDynamicTool("exec"),
createRuntimeDynamicTool("process"),
createRuntimeDynamicTool("message"),
]);
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(sessionFile, workspaceDir);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
const sandboxSessionKey = params.sessionKey;
if (!sandboxSessionKey) {
throw new Error("createParams must provide a sessionKey for Codex dynamic tool tests.");
}
const tools = await testing.buildDynamicTools({
params,
resolvedWorkspace: workspaceDir,
effectiveWorkspace: workspaceDir,
sandboxSessionKey,
sandbox: { enabled: true, backendId: "ssh" } as never,
runAbortController: new AbortController(),
sessionAgentId: "main",
pluginConfig: {},
onYieldDetected: () => undefined,
});
expect(tools.map((tool) => tool.name)).toEqual(["message", "sandbox_exec", "sandbox_process"]);
expect(tools.find((tool) => tool.name === "sandbox_exec")?.description).toContain(
"configured sandbox backend",
);
expect(tools.find((tool) => tool.name === "sandbox_process")?.description).toContain(
"sandbox_exec sessions",
);
});
it("keeps Docker sandbox shell tools hidden when native Code Mode can honor sandbox paths", async () => {
testing.setOpenClawCodingToolsFactoryForTests(() => [
createRuntimeDynamicTool("exec"),
createRuntimeDynamicTool("process"),
createRuntimeDynamicTool("message"),
]);
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(sessionFile, workspaceDir);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
const sandboxSessionKey = params.sessionKey;
if (!sandboxSessionKey) {
throw new Error("createParams must provide a sessionKey for Codex dynamic tool tests.");
}
const dockerTools = await testing.buildDynamicTools({
params,
resolvedWorkspace: workspaceDir,
effectiveWorkspace: workspaceDir,
sandboxSessionKey,
sandbox: { enabled: true, backendId: "docker" } as never,
nativeToolSurfaceEnabled: true,
runAbortController: new AbortController(),
sessionAgentId: "main",
pluginConfig: {},
onYieldDetected: () => undefined,
});
expect(dockerTools.map((tool) => tool.name)).toEqual(["message"]);
});
it("exposes Docker sandbox shell tools when native Code Mode cannot honor sandbox paths", async () => {
testing.setOpenClawCodingToolsFactoryForTests(() => [
createRuntimeDynamicTool("exec"),
createRuntimeDynamicTool("process"),
createRuntimeDynamicTool("message"),
]);
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(sessionFile, workspaceDir);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
const sandboxSessionKey = params.sessionKey;
if (!sandboxSessionKey) {
throw new Error("createParams must provide a sessionKey for Codex dynamic tool tests.");
}
const tools = await testing.buildDynamicTools({
params,
resolvedWorkspace: workspaceDir,
effectiveWorkspace: workspaceDir,
sandboxSessionKey,
sandbox: {
enabled: true,
backendId: "docker",
docker: { binds: ["/tmp/openclaw-data:/data:rw"] },
} as never,
nativeToolSurfaceEnabled: false,
runAbortController: new AbortController(),
sessionAgentId: "main",
pluginConfig: {},
onYieldDetected: () => undefined,
});
expect(tools.map((tool) => tool.name)).toEqual(["message", "sandbox_exec", "sandbox_process"]);
expect(tools.find((tool) => tool.name === "sandbox_exec")?.description).toContain(
"Docker container-path bind layout",
);
});
it("does not expose sandbox shell tools when sandbox routing is disabled", async () => {
testing.setOpenClawCodingToolsFactoryForTests(() => [
createRuntimeDynamicTool("exec"),
createRuntimeDynamicTool("process"),
createRuntimeDynamicTool("message"),
]);
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(sessionFile, workspaceDir);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
const sandboxSessionKey = params.sessionKey;
if (!sandboxSessionKey) {
throw new Error("createParams must provide a sessionKey for Codex dynamic tool tests.");
}
const disabledSandboxTools = await testing.buildDynamicTools({
params,
resolvedWorkspace: workspaceDir,
effectiveWorkspace: workspaceDir,
sandboxSessionKey,
sandbox: { enabled: false, backendId: "ssh" } as never,
runAbortController: new AbortController(),
sessionAgentId: "main",
pluginConfig: {},
onYieldDetected: () => undefined,
});
expect(disabledSandboxTools.map((tool) => tool.name)).toEqual(["message"]);
});
it("does not expose sandbox_exec without a matching process follow-up tool", async () => {
testing.setOpenClawCodingToolsFactoryForTests(() => [
createRuntimeDynamicTool("exec"),
createRuntimeDynamicTool("message"),
]);
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(sessionFile, workspaceDir);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
const sandboxSessionKey = params.sessionKey;
if (!sandboxSessionKey) {
throw new Error("createParams must provide a sessionKey for Codex dynamic tool tests.");
}
const tools = await testing.buildDynamicTools({
params,
resolvedWorkspace: workspaceDir,
effectiveWorkspace: workspaceDir,
sandboxSessionKey,
sandbox: { enabled: true, backendId: "ssh" } as never,
runAbortController: new AbortController(),
sessionAgentId: "main",
pluginConfig: {},
onYieldDetected: () => undefined,
});
expect(tools.map((tool) => tool.name)).toEqual(["message"]);
});
it("honors Codex dynamic tool excludes for sandbox shell exposure", async () => {
testing.setOpenClawCodingToolsFactoryForTests(() => [
createRuntimeDynamicTool("exec"),
createRuntimeDynamicTool("process"),
createRuntimeDynamicTool("message"),
]);
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(sessionFile, workspaceDir);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
const sandboxSessionKey = params.sessionKey;
if (!sandboxSessionKey) {
throw new Error("createParams must provide a sessionKey for Codex dynamic tool tests.");
}
for (const excludedToolName of ["sandbox_exec", "process"]) {
const tools = await testing.buildDynamicTools({
params,
resolvedWorkspace: workspaceDir,
effectiveWorkspace: workspaceDir,
sandboxSessionKey,
sandbox: { enabled: true, backendId: "ssh" } as never,
runAbortController: new AbortController(),
sessionAgentId: "main",
pluginConfig: { codexDynamicToolsExclude: [excludedToolName] },
onYieldDetected: () => undefined,
});
expect(tools.map((tool) => tool.name)).toEqual(["message"]);
}
});
it("points yielded sandbox_exec follow-up guidance at sandbox_process", async () => {
const execTool = createRuntimeDynamicTool("exec");
vi.mocked(execTool.execute).mockResolvedValueOnce({
content: [
{
type: "text",
text: "Command still running (session exec-1, pid 123). Use process (list/poll/log/write/send-keys/submit/paste/kill/clear/remove) for follow-up.",
},
],
details: { status: "running" },
});
const processTool = createRuntimeDynamicTool("process");
const tools = testing.addSandboxShellDynamicToolsIfAvailable([], [execTool, processTool], {
sandbox: { enabled: true, backendId: "ssh" },
pluginConfig: {},
} as never);
const sandboxExec = tools.find((tool) => tool.name === "sandbox_exec");
const result = await sandboxExec?.execute("call-1", {}, undefined);
expect(result?.content).toEqual([
{
type: "text",
text: "Command still running (session exec-1, pid 123). Use sandbox_process (list/poll/log/write/send-keys/submit/paste/kill/clear/remove) for follow-up.",
},
]);
});
it("starts Codex threads without duplicate OpenClaw workspace tools by default", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const appServer = createThreadLifecycleAppServerOptions();
const request = vi.fn(async (method: string, _params: unknown) => {
if (method === "thread/start") {
return threadStartResult();
}
throw new Error(`unexpected method: ${method}`);
});
const dynamicTools = testing.filterCodexDynamicTools(
[
"read",
"write",
"edit",
"apply_patch",
"exec",
"process",
"update_plan",
"tool_call",
"tool_describe",
"tool_search",
"tool_search_code",
"web_search",
"message",
].map(createNamedDynamicTool),
{},
);
await startOrResumeThread({
client: { request } as never,
params: createParams(sessionFile, workspaceDir),
cwd: workspaceDir,
dynamicTools,
appServer,
});
const startRequest = request.mock.calls.find(([method]) => method === "thread/start");
const dynamicToolNames = (
(startRequest?.[1] as { dynamicTools?: Array<{ name: string }> } | undefined)?.dynamicTools ??
[]
).map((tool) => tool.name);
expect(dynamicToolNames).toContain("message");
expect(dynamicToolNames).toContain("web_search");
for (const toolName of [
"read",
"write",
"edit",
"apply_patch",
"exec",
"process",
"update_plan",
"tool_call",
"tool_describe",
"tool_search",
"tool_search_code",
]) {
expect(dynamicToolNames).not.toContain(toolName);
}
});
it("passes MCP server config through to Codex thread/start", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const request = vi.fn(async (method: string, _params: unknown) => {
if (method === "thread/start") {
return threadStartResult();
}
throw new Error(`unexpected method: ${method}`);
});
await startOrResumeThread({
client: { request } as never,
params: createParams(sessionFile, workspaceDir),
cwd: workspaceDir,
dynamicTools: [],
appServer: createThreadLifecycleAppServerOptions(),
config: {
mcp_servers: {
search: {
url: "https://mcp.example.com/mcp",
},
},
},
mcpServersFingerprint: "mcp-v1",
mcpServersFingerprintEvaluated: true,
});
const startRequest = request.mock.calls.find(([method]) => method === "thread/start");
expect((startRequest?.[1] as { config?: unknown } | undefined)?.config).toMatchObject({
mcp_servers: {
search: {
url: "https://mcp.example.com/mcp",
},
},
"features.code_mode": true,
"features.code_mode_only": false,
});
const binding = await readCodexAppServerBinding(sessionFile);
expect(binding?.mcpServersFingerprint).toBe("mcp-v1");
});
it("starts a new Codex thread when the MCP server fingerprint changes", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeCodexAppServerBinding(sessionFile, {
threadId: "old-thread",
cwd: workspaceDir,
dynamicToolsFingerprint: JSON.stringify([]),
mcpServersFingerprint: "mcp-v1",
});
const request = vi.fn(async (method: string, _params: unknown) => {
if (method === "thread/start") {
return threadStartResult("new-thread");
}
throw new Error(`unexpected method: ${method}`);
});
const binding = await startOrResumeThread({
client: { request } as never,
params: createParams(sessionFile, workspaceDir),
cwd: workspaceDir,
dynamicTools: [],
appServer: createThreadLifecycleAppServerOptions(),
mcpServersFingerprint: "mcp-v2",
mcpServersFingerprintEvaluated: true,
});
expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/start"]);
expect(binding.threadId).toBe("new-thread");
expect(binding.mcpServersFingerprint).toBe("mcp-v2");
});
it("starts a no-MCP Codex thread when MCP config is evaluated empty", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeCodexAppServerBinding(sessionFile, {
threadId: "old-thread",
cwd: workspaceDir,
dynamicToolsFingerprint: JSON.stringify([]),
mcpServersFingerprint: "mcp-v1",
});
const request = vi.fn(async (method: string, _params: unknown) => {
if (method === "thread/start") {
return threadStartResult("new-thread");
}
throw new Error(`unexpected method: ${method}`);
});
const binding = await startOrResumeThread({
client: { request } as never,
params: createParams(sessionFile, workspaceDir),
cwd: workspaceDir,
dynamicTools: [],
appServer: createThreadLifecycleAppServerOptions(),
mcpServersFingerprintEvaluated: true,
});
expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/start"]);
expect(binding.threadId).toBe("new-thread");
expect(binding.mcpServersFingerprint).toBeUndefined();
expect((await readCodexAppServerBinding(sessionFile))?.mcpServersFingerprint).toBeUndefined();
});
it("passes auth profiles into Codex dynamic tool construction", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(sessionFile, workspaceDir);
const authProfileStore = {
version: 1,
profiles: {
"openai:api-key-backup": {
provider: "openai",
type: "api_key",
key: "not-a-real-key",
},
},
} satisfies EmbeddedRunAttemptParams["authProfileStore"];
params.disableTools = false;
params.authProfileStore = authProfileStore;
params.runtimePlan = createCodexRuntimePlanFixture();
const factoryOptions: unknown[] = [];
testing.setOpenClawCodingToolsFactoryForTests((options) => {
factoryOptions.push(options);
return [];
});
await testing.buildDynamicTools({
params,
resolvedWorkspace: workspaceDir,
effectiveWorkspace: workspaceDir,
sandboxSessionKey: params.sessionKey!,
sandbox: null as never,
runAbortController: new AbortController(),
sessionAgentId: "main",
pluginConfig: {},
onYieldDetected: () => undefined,
});
expect(factoryOptions).toHaveLength(1);
expect((factoryOptions[0] as { authProfileStore?: unknown }).authProfileStore).toBe(
authProfileStore,
);
});
it("uses the tool auth profile store for Codex dynamic tool construction", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(sessionFile, workspaceDir);
const transportAuthProfileStore = {
version: 1,
profiles: {
"openai-codex:work": {
provider: "openai-codex",
type: "oauth",
access: "transport-token",
refresh: "transport-refresh",
expires: Date.now() + 60_000,
},
},
} satisfies EmbeddedRunAttemptParams["authProfileStore"];
const toolAuthProfileStore = {
version: 1,
profiles: {
"openai-codex:work": {
provider: "openai-codex",
type: "oauth",
access: "transport-token",
refresh: "transport-refresh",
expires: Date.now() + 60_000,
},
"xai:work": {
provider: "xai",
type: "oauth",
access: "xai-token",
refresh: "xai-refresh",
expires: Date.now() + 60_000,
},
},
} satisfies EmbeddedRunAttemptParams["authProfileStore"];
params.disableTools = false;
params.authProfileStore = transportAuthProfileStore;
params.toolAuthProfileStore = toolAuthProfileStore;
params.runtimePlan = createCodexRuntimePlanFixture();
const factoryOptions: unknown[] = [];
testing.setOpenClawCodingToolsFactoryForTests((options) => {
factoryOptions.push(options);
return [];
});
await testing.buildDynamicTools({
params,
resolvedWorkspace: workspaceDir,
effectiveWorkspace: workspaceDir,
sandboxSessionKey: params.sessionKey!,
sandbox: null as never,
runAbortController: new AbortController(),
sessionAgentId: "main",
pluginConfig: {},
onYieldDetected: () => undefined,
});
expect(factoryOptions).toHaveLength(1);
expect((factoryOptions[0] as { authProfileStore?: unknown }).authProfileStore).toBe(
toolAuthProfileStore,
);
});
it("keeps canonical OpenAI Codex runs on OpenAI dynamic tool policy", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(sessionFile, workspaceDir);
params.disableTools = false;
params.provider = "openai";
params.modelId = "gpt-5.5";
params.model = {
...createCodexTestModel("openai"),
id: "gpt-5.5",
name: "gpt-5.5",
api: "openai-responses",
} as EmbeddedRunAttemptParams["model"];
params.runtimePlan = {
...createCodexRuntimePlanFixture(),
observability: {
resolvedRef: "openai/gpt-5.5",
provider: "openai",
modelId: "gpt-5.5",
harnessId: "codex",
},
};
const factoryOptions: unknown[] = [];
testing.setOpenClawCodingToolsFactoryForTests((options) => {
factoryOptions.push(options);
return [];
});
await testing.buildDynamicTools({
params,
resolvedWorkspace: workspaceDir,
effectiveWorkspace: workspaceDir,
sandboxSessionKey: params.sessionKey!,
sandbox: null as never,
runAbortController: new AbortController(),
sessionAgentId: "main",
pluginConfig: {},
onYieldDetected: () => undefined,
});
expect(factoryOptions).toHaveLength(1);
expect((factoryOptions[0] as { modelProvider?: unknown }).modelProvider).toBe("openai");
expect((factoryOptions[0] as { modelApi?: unknown }).modelApi).toBe("openai-responses");
});
it("enables gateway subagent binding for forced private QA Codex runs", async () => {
vi.stubEnv("OPENCLAW_BUILD_PRIVATE_QA", "1");
vi.stubEnv("OPENCLAW_QA_FORCE_RUNTIME", "codex");
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(sessionFile, workspaceDir);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
const factoryOptions: unknown[] = [];
testing.setOpenClawCodingToolsFactoryForTests((options) => {
factoryOptions.push(options);
return [createRuntimeDynamicTool("sessions_spawn")];
});
const tools = await testing.buildDynamicTools({
params,
resolvedWorkspace: workspaceDir,
effectiveWorkspace: workspaceDir,
sandboxSessionKey: params.sessionKey!,
sandbox: null as never,
runAbortController: new AbortController(),
sessionAgentId: "main",
pluginConfig: {},
onYieldDetected: () => undefined,
});
expect(factoryOptions).toHaveLength(1);
const factoryOption = factoryOptions[0] as { allowGatewaySubagentBinding?: unknown };
expect(factoryOption.allowGatewaySubagentBinding).toBe(true);
expect(tools.map((tool) => tool.name)).toEqual(["sessions_spawn"]);
});
it("normalizes Codex dynamic toolsAllow entries before filtering", () => {
const tools = ["exec", "sandbox_exec", "sandbox_process", "apply_patch", "read", "message"].map(
(name) => ({ name }),
);
expect(
testing
.filterCodexDynamicToolsForAllowlist(tools, [" BASH ", "apply-patch", "READ"])
.map((tool) => tool.name),
).toEqual(["exec", "sandbox_exec", "sandbox_process", "apply_patch", "read"]);
});
it("treats an explicit empty Codex dynamic toolsAllow as no tools", () => {
const tools = ["message", "web_search"].map((name) => ({ name }));
expect(testing.filterCodexDynamicToolsForAllowlist(tools, [])).toEqual([]);
});
it("treats wildcard Codex dynamic toolsAllow as unrestricted", () => {
const tools = ["message", "web_search"].map((name) => ({ name }));
expect(testing.filterCodexDynamicToolsForAllowlist(tools, [" * "])).toEqual(tools);
});
it("disables Codex native tool surfaces for restricted runtime allowlists", () => {
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(path.join(tempDir, "session.jsonl"), workspaceDir);
params.disableTools = false;
expect(testing.shouldEnableCodexAppServerNativeToolSurface(params)).toBe(true);
params.toolsAllow = ["*"];
expect(testing.shouldEnableCodexAppServerNativeToolSurface(params)).toBe(true);
params.toolsAllow = [];
expect(testing.shouldEnableCodexAppServerNativeToolSurface(params)).toBe(false);
params.toolsAllow = ["message"];
expect(testing.shouldEnableCodexAppServerNativeToolSurface(params)).toBe(false);
});
it("disables Codex native tool surfaces when Docker bind targets need container paths", () => {
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(path.join(tempDir, "session.jsonl"), workspaceDir);
params.disableTools = false;
expect(
testing.shouldEnableCodexAppServerNativeToolSurface(params, {
enabled: true,
backendId: "docker",
docker: { binds: ["/tmp/openclaw-data:/data:rw"] },
} as never),
).toBe(false);
expect(
testing.shouldEnableCodexAppServerNativeToolSurface(params, {
enabled: true,
backendId: "docker",
docker: { binds: ["/tmp/openclaw-data:/tmp/openclaw-data:rw"] },
} as never),
).toBe(true);
expect(
testing.shouldEnableCodexAppServerNativeToolSurface(params, {
enabled: true,
backendId: "docker",
docker: {
binds: [
"/tmp/openclaw-data:/tmp/openclaw-data:rw",
"/tmp/openclaw-data/secrets:/tmp/openclaw-data/secrets:ro",
],
},
} as never),
).toBe(false);
});
it("forces the message dynamic tool for message-tool-only source replies", () => {
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(path.join(tempDir, "session.jsonl"), workspaceDir);
params.sourceReplyDeliveryMode = "message_tool_only";
expect(testing.shouldForceMessageTool(params)).toBe(true);
params.disableMessageTool = true;
expect(testing.shouldForceMessageTool(params)).toBe(false);
params.disableMessageTool = false;
params.sourceReplyDeliveryMode = "automatic";
expect(testing.shouldForceMessageTool(params)).toBe(false);
});
it("scopes Codex developer reply instructions to message-tool-only delivery", () => {
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(path.join(tempDir, "session.jsonl"), workspaceDir);
params.sourceReplyDeliveryMode = "message_tool_only";
expect(
testing.buildDeveloperInstructions(params, {
dynamicTools: [createMessageDynamicTool("Message test tool")],
}),
).toContain("To send a visible message, use the `message` tool.");
const withoutMessageToolInstructions = testing.buildDeveloperInstructions(params, {
dynamicTools: [],
});
expect(withoutMessageToolInstructions).toContain("active Codex delivery path");
expect(withoutMessageToolInstructions).not.toContain("use the `message` tool");
params.sourceReplyDeliveryMode = "automatic";
const automaticInstructions = testing.buildDeveloperInstructions(params);
expect(automaticInstructions).toContain("active Codex delivery path");
expect(automaticInstructions).not.toContain("use the `message` tool");
});
it("includes Codex app-server scoped plugin command guidance in developer instructions", () => {
registerPluginCommand("demo-plugin", {
name: "codex_demo",
description: "Codex demo command",
agentPromptGuidance: [
"Legacy global command guidance.",
{
text: "Codex app-server command guidance.",
surfaces: ["codex_app_server"],
},
{
text: "Unscoped structured command guidance.",
},
{
text: "PI main command guidance.",
surfaces: ["pi_main"],
},
],
handler: async () => ({ text: "ok" }),
});
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(path.join(tempDir, "session.jsonl"), workspaceDir);
const instructions = testing.buildDeveloperInstructions(params);
expect(instructions).toContain("Codex app-server command guidance.");
expect(instructions).not.toContain("Legacy global command guidance.");
expect(instructions).not.toContain("Unscoped structured command guidance.");
expect(instructions).not.toContain("PI main command guidance.");
});
it("keeps OpenClaw skills out of Codex developer instructions", async () => {
const llmInput = vi.fn();
initializeGlobalHookRunner(
createMockPluginRegistry([{ hookName: "llm_input", handler: llmInput }]),
);
vi.stubEnv("OPENCLAW_TRAJECTORY", "1");
vi.stubEnv("OPENCLAW_TRAJECTORY_DIR", path.join(tempDir, "trajectory"));
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const harness = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
params.skillsSnapshot = {
prompt: "<available_skills><skill><name>demo</name></skill></available_skills>",
skills: [],
};
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
await new Promise<void>((resolve) => setImmediate(resolve));
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
const result = await run;
const threadStart = harness.requests.find((request) => request.method === "thread/start");
const threadStartParams = threadStart?.params as { developerInstructions?: string };
expect(threadStartParams.developerInstructions).not.toContain("<available_skills>");
const turnStart = harness.requests.find((request) => request.method === "turn/start");
const turnStartParams = turnStart?.params as {
input?: Array<{ text?: string }>;
};
const inputText = turnStartParams.input?.[0]?.text ?? "";
expect(inputText).toContain("## OpenClaw Skills");
expect(inputText).toContain("<available_skills>");
expect(inputText).toContain("Current user request:\nhello");
const [llmInputPayload] = mockCall(llmInput, "llm_input") as [{ prompt?: string }, unknown];
expect(llmInputPayload.prompt).toBe(inputText);
const trajectoryEvents = (
await fs.readFile(path.join(tempDir, "trajectory", "session-1.jsonl"), "utf8")
)
.trim()
.split("\n")
.map((line) => JSON.parse(line) as { data?: { prompt?: string }; type?: string });
expect(trajectoryEvents.find((event) => event.type === "context.compiled")?.data?.prompt).toBe(
inputText,
);
expect(trajectoryEvents.find((event) => event.type === "prompt.submitted")?.data?.prompt).toBe(
inputText,
);
expect(result.systemPromptReport?.skills.promptChars).toBe(params.skillsSnapshot.prompt.length);
expect(result.systemPromptReport?.skills.entries).toEqual([
{ name: "demo", blockChars: "<skill><name>demo</name></skill>".length },
]);
});
it("keeps forced message dynamic tool when toolsAllow omits it", async () => {
testing.setOpenClawCodingToolsFactoryForTests(() => [
createRuntimeDynamicTool("message"),
createRuntimeDynamicTool("music_generate"),
]);
const harness = createStartedThreadHarness(async (method) => {
if (method === "turn/start") {
await new Promise((resolve) => setTimeout(resolve, 5));
return turnStartResult();
}
return undefined;
});
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
params.sourceReplyDeliveryMode = "message_tool_only";
params.toolsAllow = ["music_generate"];
const run = runCodexAppServerAttempt(params, {
pluginConfig: { appServer: { mode: "yolo" } },
});
await harness.waitForMethod("turn/start", 120_000);
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const startRequest = harness.requests.find((entry) => entry.method === "thread/start");
const dynamicToolNames =
(
startRequest?.params as { dynamicTools?: Array<{ name?: string }> } | undefined
)?.dynamicTools?.map((tool) => tool.name) ?? [];
expect(dynamicToolNames).toContain("message");
expect(dynamicToolNames).toContain("music_generate");
});
it("keeps forced message dynamic tool when toolsAllow is empty", async () => {
testing.setOpenClawCodingToolsFactoryForTests(() => [
createRuntimeDynamicTool("message"),
createRuntimeDynamicTool("music_generate"),
]);
const harness = createStartedThreadHarness();
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
params.sourceReplyDeliveryMode = "message_tool_only";
params.toolsAllow = [];
const run = runCodexAppServerAttempt(params, {
pluginConfig: { appServer: { mode: "yolo" } },
});
await harness.waitForMethod("turn/start", 120_000);
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const startRequest = harness.requests.find((entry) => entry.method === "thread/start");
const dynamicToolNames =
(
startRequest?.params as { dynamicTools?: Array<{ name?: string }> } | undefined
)?.dynamicTools?.map((tool) => tool.name) ?? [];
expect(dynamicToolNames).toEqual(["message"]);
});
it("keeps searchable OpenClaw dynamic tools when code-mode-only is enabled", async () => {
testing.setOpenClawCodingToolsFactoryForTests(() => [
createRuntimeDynamicTool("message"),
createRuntimeDynamicTool("web_search"),
createRuntimeDynamicTool("heartbeat_respond"),
createRuntimeDynamicTool("sessions_spawn"),
createRuntimeDynamicTool("sessions_yield"),
]);
const harness = createStartedThreadHarness();
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
params.sourceReplyDeliveryMode = "message_tool_only";
const run = runCodexAppServerAttempt(params, {
pluginConfig: { appServer: { mode: "yolo", codeModeOnly: true } },
});
await harness.waitForMethod("turn/start", 120_000);
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const startRequest = harness.requests.find((entry) => entry.method === "thread/start");
const dynamicTools =
(startRequest?.params as { dynamicTools?: Array<Record<string, unknown>> } | undefined)
?.dynamicTools ?? [];
const startConfig = (startRequest?.params as { config?: Record<string, unknown> } | undefined)
?.config;
const message = dynamicTools.find((tool) => tool.name === "message");
const webSearch = dynamicTools.find((tool) => tool.name === "web_search");
const heartbeat = dynamicTools.find((tool) => tool.name === "heartbeat_respond");
const sessionsSpawn = dynamicTools.find((tool) => tool.name === "sessions_spawn");
const sessionsYield = dynamicTools.find((tool) => tool.name === "sessions_yield");
expect(message).not.toHaveProperty("namespace");
expect(message).not.toHaveProperty("deferLoading");
expect(webSearch?.namespace).toBe(CODEX_OPENCLAW_DYNAMIC_TOOL_NAMESPACE);
expect(webSearch?.deferLoading).toBe(true);
expect(heartbeat?.namespace).toBe(CODEX_OPENCLAW_DYNAMIC_TOOL_NAMESPACE);
expect(heartbeat?.deferLoading).toBe(true);
expect(sessionsSpawn?.namespace).toBe(CODEX_OPENCLAW_DYNAMIC_TOOL_NAMESPACE);
expect(sessionsSpawn?.deferLoading).toBe(true);
expect(sessionsYield).not.toHaveProperty("namespace");
expect(sessionsYield).not.toHaveProperty("deferLoading");
expect(startConfig?.["features.code_mode"]).toBe(true);
expect(startConfig?.["features.code_mode_only"]).toBe(true);
});
it("disables Codex native tool surfaces when runtime toolsAllow is empty", async () => {
testing.setOpenClawCodingToolsFactoryForTests(() => [
createRuntimeDynamicTool("message"),
createRuntimeDynamicTool("web_search"),
]);
const harness = createStartedThreadHarness(async (method) => {
if (method === "app/list") {
throw new Error("app/list should not run when runtime toolsAllow is empty.");
}
return undefined;
});
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
params.toolsAllow = [];
params.extraSystemPrompt = "Tool and file actions are disabled for this sender by chat policy.";
const run = runCodexAppServerAttempt(params, {
pluginConfig: {
appServer: { mode: "yolo" },
codexPlugins: {
enabled: true,
plugins: {
"google-calendar": {
marketplaceName: "openai-curated",
pluginName: "google-calendar",
},
},
},
},
});
await harness.waitForMethod("turn/start", 120_000);
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const startRequest = harness.requests.find((entry) => entry.method === "thread/start");
const startParams = startRequest?.params as
| {
dynamicTools?: Array<{ name?: string }>;
environments?: unknown[];
developerInstructions?: string;
config?: {
"features.code_mode"?: boolean;
"features.code_mode_only"?: boolean;
apps?: Record<
string,
{ enabled?: boolean; destructive_enabled?: boolean; open_world_enabled?: boolean }
>;
};
}
| undefined;
expect(startParams?.dynamicTools).toEqual([]);
expect(startParams?.environments).toEqual([]);
expect(startParams?.developerInstructions).toContain(
"Tool and file actions are disabled for this sender by chat policy.",
);
expect(startParams?.config?.["features.code_mode"]).toBe(false);
expect(startParams?.config?.["features.code_mode_only"]).toBe(false);
expect(startParams?.config?.apps?.["_default"]).toEqual({
enabled: false,
destructive_enabled: false,
open_world_enabled: false,
});
expect(startParams?.config?.apps?.["google-calendar-app"]?.enabled).toBeUndefined();
expect(harness.requests.map((entry) => entry.method)).not.toContain("app/list");
});
it("fails closed for Codex app defaults when restricted native tools have no plugin config", async () => {
testing.setOpenClawCodingToolsFactoryForTests(() => [createRuntimeDynamicTool("message")]);
const harness = createStartedThreadHarness(async (method) => {
if (method === "app/list") {
throw new Error("app/list should not run when runtime toolsAllow is empty.");
}
return undefined;
});
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
params.toolsAllow = [];
const run = runCodexAppServerAttempt(params, {
pluginConfig: { appServer: { mode: "yolo" } },
});
await harness.waitForMethod("turn/start", 120_000);
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const startRequest = harness.requests.find((entry) => entry.method === "thread/start");
const startParams = startRequest?.params as
| {
config?: {
apps?: Record<
string,
{ enabled?: boolean; destructive_enabled?: boolean; open_world_enabled?: boolean }
>;
};
}
| undefined;
expect(startParams?.config?.apps?.["_default"]).toEqual({
enabled: false,
destructive_enabled: false,
open_world_enabled: false,
});
expect(harness.requests.map((entry) => entry.method)).not.toContain("app/list");
});
it("returns a run context report without deferred Codex dynamic tool schemas", async () => {
testing.setOpenClawCodingToolsFactoryForTests(() => [
createRuntimeDynamicTool("message"),
createRuntimeDynamicTool("web_search"),
]);
const harness = createStartedThreadHarness();
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
params.sourceReplyDeliveryMode = "message_tool_only";
params.toolsAllow = ["message", "web_search"];
const run = runCodexAppServerAttempt(params, {
pluginConfig: { appServer: { mode: "yolo" } },
});
await harness.waitForMethod("turn/start", 120_000);
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
const result = await run;
const report = result.systemPromptReport;
expect(report?.source).toBe("run");
expect(report?.provider).toBe("codex");
expect(report?.model).toBe("gpt-5.4-codex");
expect(report?.systemPrompt.chars).toBeGreaterThan(0);
const message = report?.tools.entries.find((tool) => tool.name === "message");
const webSearch = report?.tools.entries.find((tool) => tool.name === "web_search");
expect(message?.schemaChars).toBeGreaterThan(0);
expect(webSearch?.schemaChars).toBe(0);
expect(report?.tools.schemaChars).toBe(message?.schemaChars);
});
it("keeps searchable Codex dynamic tools canonical in mirrored transcript snapshots", async () => {
testing.setOpenClawCodingToolsFactoryForTests(() => [createRuntimeDynamicTool("wiki_status")]);
const harness = createStartedThreadHarness();
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
params.toolsAllow = ["wiki_status"];
const run = runCodexAppServerAttempt(params, {
pluginConfig: {
codexDynamicToolsLoading: "searchable",
appServer: { mode: "yolo" },
},
});
await harness.waitForMethod("turn/start", 120_000);
const toolResult = (await harness.handleServerRequest({
id: "request-tool-wiki-status",
method: "item/tool/call",
params: {
threadId: "thread-1",
turnId: "turn-1",
callId: "call-wiki-status-1",
namespace: CODEX_OPENCLAW_DYNAMIC_TOOL_NAMESPACE,
tool: "wiki_status",
arguments: { topic: "README.md" },
},
})) as {
contentItems?: Array<{ text?: string; type?: string }>;
success?: boolean;
};
expect(toolResult).toEqual({
success: true,
contentItems: [{ type: "inputText", text: "wiki_status done" }],
});
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
const result = await run;
expect(result.messagesSnapshot.map((message) => message.role)).toEqual([
"user",
"assistant",
"toolResult",
]);
const assistantMessage = result.messagesSnapshot[1];
if (assistantMessage?.role !== "assistant") {
throw new Error("expected mirrored assistant tool-call message");
}
expect(assistantMessage.content).toStrictEqual([
{
type: "toolCall",
id: "call-wiki-status-1",
name: "wiki_status",
arguments: { topic: "README.md" },
input: { topic: "README.md" },
},
]);
const toolResultMessage = result.messagesSnapshot[2];
if (toolResultMessage?.role !== "toolResult") {
throw new Error("expected mirrored tool-result message");
}
expect(toolResultMessage.toolCallId).toBe("call-wiki-status-1");
expect(toolResultMessage.toolName).toBe("wiki_status");
expect(toolResultMessage.isError).toBe(false);
expect(toolResultMessage.content).toStrictEqual([
{
type: "toolResult",
id: "call-wiki-status-1",
name: "wiki_status",
toolName: "wiki_status",
toolCallId: "call-wiki-status-1",
toolUseId: "call-wiki-status-1",
tool_use_id: "call-wiki-status-1",
content: "wiki_status done",
text: "wiki_status done",
},
]);
expect(JSON.stringify(result.messagesSnapshot)).not.toContain("tool_search");
expect(JSON.stringify(result.messagesSnapshot)).not.toContain("function_call_output");
});
it("passes the live run session key to Codex dynamic tools when sandbox policy uses another key", () => {
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(path.join(tempDir, "session.jsonl"), workspaceDir);
params.sessionKey = "agent:main:main";
expect(
testing.resolveOpenClawCodingToolsSessionKeys(
params,
"agent:main:telegram:default:direct:1234",
),
).toEqual({
sessionKey: "agent:main:telegram:default:direct:1234",
runSessionKey: "agent:main:main",
});
expect(testing.resolveOpenClawCodingToolsSessionKeys(params, "agent:main:main")).toEqual({
sessionKey: "agent:main:main",
runSessionKey: undefined,
});
});
it("keeps explicit dynamic tool timeouts above the default bridge deadline", () => {
const timeoutMs = testing.CODEX_DYNAMIC_TOOL_TIMEOUT_MS + 1_000;
expect(
testing.resolveDynamicToolCallTimeoutMs({
call: {
threadId: "thread-1",
turnId: "turn-1",
callId: "call-long",
namespace: null,
tool: "image_generate",
arguments: { prompt: "cat", timeoutMs },
},
config: undefined,
}),
).toBe(timeoutMs);
});
it("uses configured image generation timeouts for Codex dynamic tool calls", () => {
expect(
testing.resolveDynamicToolCallTimeoutMs({
call: {
threadId: "thread-1",
turnId: "turn-1",
callId: "call-image-generate-default",
namespace: null,
tool: "image_generate",
arguments: { prompt: "cat" },
},
config: {
agents: {
defaults: {
imageGenerationModel: {
primary: "openai/gpt-image-1",
timeoutMs: 180_000,
},
},
},
},
}),
).toBe(180_000);
});
it("uses a 120 second default for Codex image generation dynamic tool calls", () => {
expect(
testing.resolveDynamicToolCallTimeoutMs({
call: {
threadId: "thread-1",
turnId: "turn-1",
callId: "call-image-generate-default",
namespace: null,
tool: "image_generate",
arguments: { prompt: "cat" },
},
config: undefined,
}),
).toBe(120_000);
});
it("uses the media image timeout for Codex image dynamic tool calls", () => {
expect(
testing.resolveDynamicToolCallTimeoutMs({
call: {
threadId: "thread-1",
turnId: "turn-1",
callId: "call-image-default",
namespace: null,
tool: "image",
arguments: { prompt: "describe", images: ["/tmp/one.jpg"] },
},
config: {
tools: {
media: {
image: {
timeoutSeconds: 180,
},
},
},
},
}),
).toBe(180_000);
});
it("keeps Codex image dynamic tool calls above the default bridge deadline", () => {
expect(
testing.resolveDynamicToolCallTimeoutMs({
call: {
threadId: "thread-1",
turnId: "turn-1",
callId: "call-image-default",
namespace: null,
tool: "image",
arguments: { prompt: "describe", images: ["/tmp/one.jpg"] },
},
config: undefined,
}),
).toBe(testing.CODEX_DYNAMIC_IMAGE_TOOL_TIMEOUT_MS);
});
it("caps dynamic tool timeouts at the bridge maximum", () => {
expect(
testing.resolveDynamicToolCallTimeoutMs({
call: {
threadId: "thread-1",
turnId: "turn-1",
callId: "call-too-long",
namespace: null,
tool: "image_generate",
arguments: {
prompt: "cat",
timeoutMs: testing.CODEX_DYNAMIC_TOOL_MAX_TIMEOUT_MS + 1_000,
},
},
config: undefined,
}),
).toBe(testing.CODEX_DYNAMIC_TOOL_MAX_TIMEOUT_MS);
});
it("returns a failed dynamic tool response when an app-server tool call exceeds the deadline", async () => {
vi.useFakeTimers();
let capturedSignal: AbortSignal | undefined;
const onTimeout = vi.fn();
const response = testing.handleDynamicToolCallWithTimeout({
call: {
threadId: "thread-1",
turnId: "turn-1",
callId: "call-timeout",
namespace: null,
tool: "message",
arguments: { action: "send", text: "hello" },
},
toolBridge: {
handleToolCall: vi.fn((_call, options) => {
capturedSignal = options?.signal;
return new Promise<never>(() => undefined);
}),
},
signal: new AbortController().signal,
timeoutMs: 1,
onTimeout,
});
await vi.advanceTimersByTimeAsync(1);
await expect(response).resolves.toEqual({
success: false,
contentItems: [
{
type: "inputText",
text: "OpenClaw dynamic tool call timed out after 1ms while running tool message.",
},
],
});
expect(capturedSignal?.aborted).toBe(true);
expect(onTimeout).toHaveBeenCalledTimes(1);
});
it("logs process poll timeout context separately from session idle", async () => {
vi.useFakeTimers();
const warn = vi.spyOn(embeddedAgentLog, "warn").mockImplementation(() => undefined);
const response = testing.handleDynamicToolCallWithTimeout({
call: {
threadId: "thread-1",
turnId: "turn-1",
callId: "call-timeout",
namespace: null,
tool: "process",
arguments: { action: "poll", sessionId: "rapid-crustacean", timeout: 30_000 },
},
toolBridge: {
handleToolCall: vi.fn(() => new Promise<never>(() => undefined)),
},
signal: new AbortController().signal,
timeoutMs: 1,
});
await vi.advanceTimersByTimeAsync(1);
await expect(response).resolves.toEqual({
success: false,
contentItems: [
{
type: "inputText",
text: "OpenClaw dynamic tool call timed out after 1ms while waiting for process action=poll sessionId=rapid-crustacean. This is a tool RPC timeout, not a session idle timeout.",
},
],
});
expect(warn).toHaveBeenCalledWith("codex dynamic tool call timed out", {
tool: "process",
toolCallId: "call-timeout",
threadId: "thread-1",
turnId: "turn-1",
timeoutMs: 1,
timeoutKind: "codex_dynamic_tool_rpc",
processAction: "poll",
processSessionId: "rapid-crustacean",
processRequestedTimeoutMs: 30_000,
consoleMessage:
"codex process tool timeout: action=poll sessionId=rapid-crustacean toolTimeoutMs=1 requestedWaitMs=30000; per-tool-call watchdog, not session idle; repeated lines usually mean process-poll retry churn, not model progress",
});
});
it("emits normalized tool progress around app-server dynamic tool requests", async () => {
const harness = createStartedThreadHarness();
const onRunAgentEvent = vi.fn();
const onExecutionPhase = vi.fn();
const globalAgentEvents: AgentEventPayload[] = [];
const diagnosticEvents: DiagnosticEventPayload[] = [];
onAgentEvent((event) => globalAgentEvents.push(event));
const unsubscribeDiagnostics = onInternalDiagnosticEvent((event) =>
diagnosticEvents.push(event),
);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.onAgentEvent = onRunAgentEvent;
params.onExecutionPhase = onExecutionPhase;
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("thread/start");
const toolResult = (await harness.handleServerRequest({
id: "request-tool-1",
method: "item/tool/call",
params: {
threadId: "thread-1",
turnId: "turn-1",
callId: "call-1",
namespace: null,
tool: "lookup",
arguments: {
action: "search",
token: "plain-secret-value-12345",
text: "hello",
},
},
})) as {
contentItems?: Array<{ text?: string; type?: string }>;
success?: boolean;
};
expect(toolResult.success).toBe(false);
expect(toolResult.contentItems?.[0]?.type).toBe("inputText");
expect(toolResult.contentItems?.[0]?.text).toMatch(/^Unknown OpenClaw tool: lookup$/u);
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
await flushDiagnosticEvents();
unsubscribeDiagnostics();
const agentEvents = onRunAgentEvent.mock.calls.map(([event]) => event) as Array<{
data?: {
args?: Record<string, unknown>;
isError?: boolean;
name?: string;
phase?: string;
result?: { success?: boolean };
toolCallId?: string;
};
stream?: string;
}>;
const startEvent = agentEvents.find(
(event) => event.stream === "tool" && event.data?.phase === "start",
);
expect(startEvent?.data?.name).toBe("lookup");
expect(startEvent?.data?.toolCallId).toBe("call-1");
expect(startEvent?.data?.args?.action).toBe("search");
expect(startEvent?.data?.args?.token).toBe("plain-…2345");
expect(startEvent?.data?.args?.text).toBe("hello");
const resultEvent = agentEvents.find(
(event) =>
event.stream === "tool" &&
event.data?.phase === "result" &&
event.data.result !== undefined,
);
expect(resultEvent?.data?.name).toBe("lookup");
expect(resultEvent?.data?.toolCallId).toBe("call-1");
expect(resultEvent?.data?.isError).toBe(true);
expect(resultEvent?.data?.result?.success).toBe(false);
expect(JSON.stringify(agentEvents)).not.toContain("plain-secret-value-12345");
const globalStartEvent = globalAgentEvents.find(
(event) => event.stream === "tool" && event.data.phase === "start",
);
expect(globalStartEvent?.runId).toBe("run-1");
expect(globalStartEvent?.sessionKey).toBe("agent:main:session-1");
expect(globalStartEvent?.data.name).toBe("lookup");
expect(onExecutionPhase).toHaveBeenCalledWith({
phase: "turn_accepted",
provider: "codex",
model: "gpt-5.4-codex",
backend: "codex-app-server",
});
expect(onExecutionPhase).toHaveBeenCalledWith({
phase: "tool_execution_started",
provider: "codex",
model: "gpt-5.4-codex",
backend: "codex-app-server",
tool: "lookup",
toolCallId: "call-1",
});
const toolDiagnosticEvents = diagnosticEvents.filter(
(
event,
): event is Extract<
DiagnosticEventPayload,
{ type: "tool.execution.started" | "tool.execution.completed" | "tool.execution.error" }
> => event.type.startsWith("tool.execution."),
);
expect(
toolDiagnosticEvents.map((event) => ({
type: event.type,
toolName: event.toolName,
toolCallId: event.toolCallId,
})),
).toEqual([
{
type: "tool.execution.started",
toolName: "lookup",
toolCallId: "call-1",
},
{
type: "tool.execution.error",
toolName: "lookup",
toolCallId: "call-1",
},
]);
expect(activeDiagnosticToolKeys(diagnosticEvents)).toEqual(new Set());
});
it("clears dynamic tool diagnostics after successful app-server tool responses", async () => {
const harness = createStartedThreadHarness();
const diagnosticEvents: DiagnosticEventPayload[] = [];
const unsubscribeDiagnostics = onInternalDiagnosticEvent((event) =>
diagnosticEvents.push(event),
);
testing.setOpenClawCodingToolsFactoryForTests(() => [createRuntimeDynamicTool("echo")]);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("thread/start");
const toolResult = (await harness.handleServerRequest({
id: "request-echo-tool",
method: "item/tool/call",
params: {
threadId: "thread-1",
turnId: "turn-1",
callId: "call-echo-1",
namespace: null,
tool: "echo",
arguments: {},
},
})) as {
contentItems?: Array<{ text?: string; type?: string }>;
success?: boolean;
};
expect(toolResult.success).toBe(true);
expect(toolResult.contentItems?.[0]).toEqual({
type: "inputText",
text: "echo done",
});
await flushDiagnosticEvents();
unsubscribeDiagnostics();
const toolDiagnosticEvents = diagnosticEvents.filter(
(
event,
): event is Extract<
DiagnosticEventPayload,
{ type: "tool.execution.started" | "tool.execution.completed" | "tool.execution.error" }
> => event.type.startsWith("tool.execution."),
);
const toolDiagnosticEventSummaries = toolDiagnosticEvents.map((event) => ({
type: event.type,
toolName: event.toolName,
toolCallId: event.toolCallId,
}));
expect(toolDiagnosticEventSummaries).toContainEqual({
type: "tool.execution.started",
toolName: "echo",
toolCallId: "call-echo-1",
});
expect(toolDiagnosticEventSummaries.at(-1)).toEqual({
type: "tool.execution.completed",
toolName: "echo",
toolCallId: "call-echo-1",
});
expect(
toolDiagnosticEventSummaries.filter((event) => event.type === "tool.execution.started"),
).toHaveLength(1);
expect(activeDiagnosticToolKeys(diagnosticEvents)).toEqual(new Set());
await harness.notify({
method: "item/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
completedAtMs: Date.now(),
item: {
type: "dynamicToolCall",
id: "call-echo-1",
namespace: null,
tool: "echo",
arguments: {},
status: "completed",
contentItems: [{ type: "inputText", text: "echo done" }],
success: true,
durationMs: 1,
},
},
});
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
});
it("emits request-boundary terminal diagnostics when a wrapped dynamic tool does not", async () => {
const harness = createStartedThreadHarness();
const diagnosticEvents: DiagnosticEventPayload[] = [];
const unsubscribeDiagnostics = onInternalDiagnosticEvent((event) =>
diagnosticEvents.push(event),
);
const rawTool = {
name: "echo",
description: "echo test tool",
parameters: {
type: "object",
properties: {},
additionalProperties: false,
},
execute: vi.fn(async () => ({
content: [{ type: "text" as const, text: "echo done" }],
details: {},
})),
};
rawTool.execute.mockImplementationOnce(async () => {
emitTrustedDiagnosticEvent({
type: "tool.execution.completed",
runId: "other-run",
sessionId: "session-1",
sessionKey: "agent:main:session-1",
toolName: "echo",
toolCallId: "call-echo-unobserved-terminal",
durationMs: 1,
});
return {
content: [{ type: "text" as const, text: "echo done" }],
details: {},
};
});
const markedWrappedTool = {
...wrapToolWithBeforeToolCallHook(rawTool as never),
execute: rawTool.execute,
};
testing.setOpenClawCodingToolsFactoryForTests(() => [markedWrappedTool as never]);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("thread/start");
const toolResult = (await harness.handleServerRequest({
id: "request-echo-unobserved-terminal-tool",
method: "item/tool/call",
params: {
threadId: "thread-1",
turnId: "turn-1",
callId: "call-echo-unobserved-terminal",
namespace: null,
tool: "echo",
arguments: {},
},
})) as {
contentItems?: Array<{ text?: string; type?: string }>;
success?: boolean;
};
expect(toolResult.success).toBe(true);
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
await flushDiagnosticEvents();
unsubscribeDiagnostics();
const toolDiagnosticEvents = diagnosticEvents.filter(
(
event,
): event is Extract<
DiagnosticEventPayload,
{ type: "tool.execution.started" | "tool.execution.completed" | "tool.execution.error" }
> => event.type.startsWith("tool.execution."),
);
expect(
toolDiagnosticEvents.map((event) => ({
runId: event.runId,
type: event.type,
toolName: event.toolName,
toolCallId: event.toolCallId,
})),
).toEqual([
{
runId: "run-1",
type: "tool.execution.started",
toolName: "echo",
toolCallId: "call-echo-unobserved-terminal",
},
{
runId: "other-run",
type: "tool.execution.completed",
toolName: "echo",
toolCallId: "call-echo-unobserved-terminal",
},
{
runId: "run-1",
type: "tool.execution.completed",
toolName: "echo",
toolCallId: "call-echo-unobserved-terminal",
},
]);
});
it("does not duplicate terminal diagnostics for wrapped dynamic tool blocks", async () => {
const harness = createStartedThreadHarness();
const diagnosticEvents: DiagnosticEventPayload[] = [];
const unsubscribeDiagnostics = onInternalDiagnosticEvent((event) =>
diagnosticEvents.push(event),
);
const beforeToolCall = vi.fn(async () => ({
block: true,
blockReason: "blocked by policy",
}));
initializeGlobalHookRunner(
createMockPluginRegistry([{ hookName: "before_tool_call", handler: beforeToolCall }]),
);
const execute = vi.fn(async () => ({
content: [{ type: "text" as const, text: "echo done" }],
details: {},
}));
testing.setOpenClawCodingToolsFactoryForTests(() => [
{
name: "echo",
description: "echo test tool",
parameters: {
type: "object",
properties: {},
additionalProperties: false,
},
execute,
} as never,
]);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("thread/start");
const toolResult = (await harness.handleServerRequest({
id: "request-echo-blocked-tool",
method: "item/tool/call",
params: {
threadId: "thread-1",
turnId: "turn-1",
callId: "call-echo-blocked",
namespace: null,
tool: "echo",
arguments: {},
},
})) as {
contentItems?: Array<{ text?: string; type?: string }>;
success?: boolean;
};
expect(toolResult.success).toBe(false);
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
await flushDiagnosticEvents();
unsubscribeDiagnostics();
expect(beforeToolCall).toHaveBeenCalledTimes(1);
expect(execute).not.toHaveBeenCalled();
const toolDiagnosticEvents = diagnosticEvents.filter(
(
event,
): event is Extract<
DiagnosticEventPayload,
{
type:
| "tool.execution.blocked"
| "tool.execution.started"
| "tool.execution.completed"
| "tool.execution.error";
}
> => event.type.startsWith("tool.execution."),
);
expect(
toolDiagnosticEvents.map((event) => ({
type: event.type,
toolName: event.toolName,
toolCallId: event.toolCallId,
})),
).toEqual([
{
type: "tool.execution.started",
toolName: "echo",
toolCallId: "call-echo-blocked",
},
{
type: "tool.execution.blocked",
toolName: "echo",
toolCallId: "call-echo-blocked",
},
]);
});
it("does not duplicate terminal diagnostics for wrapped dynamic tool errors", async () => {
const harness = createStartedThreadHarness();
const diagnosticEvents: DiagnosticEventPayload[] = [];
const unsubscribeDiagnostics = onInternalDiagnosticEvent((event) =>
diagnosticEvents.push(event),
);
const execute = vi.fn(async () => {
throw new Error("wrapped tool failed");
});
testing.setOpenClawCodingToolsFactoryForTests(() => [
{
name: "echo",
description: "echo test tool",
parameters: {
type: "object",
properties: {},
additionalProperties: false,
},
execute,
} as never,
]);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("thread/start");
emitAsyncDiagnosticBacklog(150);
const toolResult = (await harness.handleServerRequest({
id: "request-echo-error-tool",
method: "item/tool/call",
params: {
threadId: "thread-1",
turnId: "turn-1",
callId: "call-echo-error",
namespace: null,
tool: "echo",
arguments: {},
},
})) as {
contentItems?: Array<{ text?: string; type?: string }>;
success?: boolean;
};
expect(toolResult).toEqual({
success: false,
contentItems: [{ type: "inputText", text: "wrapped tool failed" }],
});
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
await flushDiagnosticEvents();
unsubscribeDiagnostics();
expect(execute).toHaveBeenCalledTimes(1);
const toolDiagnosticEvents = diagnosticEvents.filter(
(
event,
): event is Extract<
DiagnosticEventPayload,
{ type: "tool.execution.started" | "tool.execution.completed" | "tool.execution.error" }
> => event.type.startsWith("tool.execution."),
);
expect(
toolDiagnosticEvents.map((event) => ({
type: event.type,
toolName: event.toolName,
toolCallId: event.toolCallId,
})),
).toEqual([
{
type: "tool.execution.started",
toolName: "echo",
toolCallId: "call-echo-error",
},
{
type: "tool.execution.error",
toolName: "echo",
toolCallId: "call-echo-error",
},
]);
});
it("does not duplicate terminal diagnostics for wrapped dynamic tool timeout fallbacks", async () => {
const harness = createStartedThreadHarness();
const diagnosticEvents: DiagnosticEventPayload[] = [];
const unsubscribeDiagnostics = onInternalDiagnosticEvent((event) =>
diagnosticEvents.push(event),
);
const execute = vi.fn(async () => new Promise<never>(() => {}));
testing.setOpenClawCodingToolsFactoryForTests(() => [
{
name: "echo",
description: "echo test tool",
parameters: {
type: "object",
properties: {},
additionalProperties: true,
},
execute,
} as never,
]);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("thread/start");
const toolResult = (await harness.handleServerRequest({
id: "request-echo-timeout-tool",
method: "item/tool/call",
params: {
threadId: "thread-1",
turnId: "turn-1",
callId: "call-echo-timeout",
namespace: null,
tool: "echo",
arguments: { timeoutMs: 1 },
},
})) as {
contentItems?: Array<{ text?: string; type?: string }>;
success?: boolean;
};
expect(toolResult).toEqual({
success: false,
contentItems: [
{
type: "inputText",
text: "OpenClaw dynamic tool call timed out after 1ms while running tool echo.",
},
],
});
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
await flushDiagnosticEvents();
unsubscribeDiagnostics();
expect(execute).toHaveBeenCalledTimes(1);
const toolDiagnosticEvents = diagnosticEvents.filter(
(
event,
): event is Extract<
DiagnosticEventPayload,
{ type: "tool.execution.started" | "tool.execution.completed" | "tool.execution.error" }
> => event.type.startsWith("tool.execution."),
);
expect(
toolDiagnosticEvents.map((event) => ({
type: event.type,
toolName: event.toolName,
toolCallId: event.toolCallId,
})),
).toEqual([
{
type: "tool.execution.started",
toolName: "echo",
toolCallId: "call-echo-timeout",
},
{
type: "tool.execution.error",
toolName: "echo",
toolCallId: "call-echo-timeout",
},
]);
});
it("passes normalized channel context to app-server dynamic tool result hooks", async () => {
const harness = createStartedThreadHarness();
const afterToolCall = vi.fn();
initializeGlobalHookRunner(
createMockPluginRegistry([{ hookName: "after_tool_call", handler: afterToolCall }]),
);
testing.setOpenClawCodingToolsFactoryForTests(() => [createRuntimeDynamicTool("echo")]);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.disableTools = false;
params.runtimePlan = createCodexRuntimePlanFixture();
params.messageChannel = "telegram";
params.messageProvider = "telegram";
params.currentChannelId = "telegram:-100123";
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("thread/start");
await harness.handleServerRequest({
id: "request-echo-tool",
method: "item/tool/call",
params: {
threadId: "thread-1",
turnId: "turn-1",
callId: "call-echo-1",
namespace: null,
tool: "echo",
arguments: {},
},
});
await vi.waitFor(() => {
expect(afterToolCall).toHaveBeenCalledTimes(1);
});
expect(afterToolCall.mock.calls[0]?.[1]).toEqual(
expect.objectContaining({
agentId: "main",
sessionId: "session-1",
sessionKey: "agent:main:session-1",
runId: "run-1",
channelId: "-100123",
toolName: "echo",
toolCallId: "call-echo-1",
}),
);
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
});
it("suppresses normalized tool progress for activity-log dynamic bash requests", async () => {
const harness = createStartedThreadHarness();
const onRunAgentEvent = vi.fn();
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.onAgentEvent = onRunAgentEvent;
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("thread/start");
await harness.handleServerRequest({
id: "request-tool-activity-log",
method: "item/tool/call",
params: {
threadId: "thread-1",
turnId: "turn-1",
callId: "call-log-activity-1",
namespace: null,
tool: "bash",
arguments: {
command:
'/bin/bash -lc \'/home/openclaw/.openclaw/workspace/bin/log_activity.sh "web_search" "Grilled salmon research"\'',
},
},
});
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const toolEvents = onRunAgentEvent.mock.calls.filter(([event]) => {
const record = event as { stream?: string };
return record.stream === "tool";
});
expect(toolEvents).toHaveLength(0);
});
it("releases the session when Codex never completes after a dynamic tool response", async () => {
let handleRequest:
| ((request: { id: string; method: string; params?: unknown }) => Promise<unknown>)
| undefined;
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-1");
}
if (method === "turn/start") {
return turnStartResult("turn-1", "inProgress");
}
return {};
});
setCodexAppServerClientFactoryForTest(
async () =>
({
request,
addNotificationHandler: () => () => undefined,
addRequestHandler: (
handler: (request: {
id: string;
method: string;
params?: unknown;
}) => Promise<unknown>,
) => {
handleRequest = handler;
return () => undefined;
},
}) as never,
);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 200;
const run = runCodexAppServerAttempt(params, {
pluginConfig: { appServer: { turnCompletionIdleTimeoutMs: 5 } },
});
await vi.waitFor(() => expect(handleRequest).toBeTypeOf("function"), fastWait);
const toolResult = (await handleRequest?.({
id: "request-tool-1",
method: "item/tool/call",
params: {
threadId: "thread-1",
turnId: "turn-1",
callId: "call-1",
namespace: null,
tool: "message",
arguments: { action: "send", text: "already sent" },
},
})) as {
contentItems?: Array<{ text?: string; type?: string }>;
success?: boolean;
};
expect(toolResult.success).toBe(false);
expect(toolResult.contentItems?.[0]?.type).toBe("inputText");
expect(toolResult.contentItems?.[0]?.text).toMatch(
/^(Unknown OpenClaw tool: message|Action send requires a target\.)$/u,
);
const result = await run;
expect(result.aborted).toBe(true);
expect(result.timedOut).toBe(true);
expect(result.promptError).toBe(
"codex app-server turn idle timed out waiting for turn/completed",
);
await vi.waitFor(
() =>
expect(request).toHaveBeenCalledWith(
"turn/interrupt",
{
threadId: "thread-1",
turnId: "turn-1",
},
{ timeoutMs: 5_000 },
),
{ interval: 1 },
);
expect(queueActiveRunMessageForTest("session-1", "after timeout")).toBe(false);
});
it("closes the app-server client when the active turn goes idle past the attempt timeout", async () => {
const close = vi.fn();
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-1");
}
if (method === "turn/start") {
return turnStartResult("turn-1", "inProgress");
}
if (method === "turn/interrupt") {
return new Promise<never>(() => undefined);
}
return {};
});
setCodexAppServerClientFactoryForTest(
async () =>
({
request,
close,
addNotificationHandler: () => () => undefined,
addRequestHandler: () => () => undefined,
}) as never,
);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 250;
const result = await runCodexAppServerAttempt(params);
expect(result.aborted).toBe(true);
expect(result.timedOut).toBe(true);
expect(result.promptError).toBe(
"codex app-server turn idle timed out waiting for turn/completed",
);
expect(request).toHaveBeenCalledWith(
"turn/interrupt",
{
threadId: "thread-1",
turnId: "turn-1",
},
{ timeoutMs: 5_000 },
);
expect(close).toHaveBeenCalledTimes(1);
expect(queueActiveRunMessageForTest("session-1", "after timeout")).toBe(false);
});
it("keeps a progressing active turn alive beyond the original attempt timeout", async () => {
const harness = createStartedThreadHarness();
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 100;
const onRunProgress = vi.fn();
params.onRunProgress = onRunProgress;
const run = runCodexAppServerAttempt(params, {
turnCompletionIdleTimeoutMs: 300,
turnAssistantCompletionIdleTimeoutMs: 300,
turnTerminalIdleTimeoutMs: 300,
});
await harness.waitForMethod("turn/start");
await vi.waitFor(
() =>
expect(onRunProgress).toHaveBeenCalledWith(
expect.objectContaining({ reason: "turn:start" }),
),
fastWait,
);
await new Promise((resolve) => setTimeout(resolve, 60));
await harness.notify({
method: "rawResponseItem/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: {
type: "message",
id: "raw-progress-1",
role: "assistant",
content: [{ type: "output_text", text: "Still working." }],
},
},
});
await new Promise((resolve) => setTimeout(resolve, 60));
await harness.notify({
method: "rawResponseItem/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: {
type: "message",
id: "raw-progress-2",
role: "assistant",
content: [{ type: "output_text", text: "Almost done." }],
},
},
});
expect(harness.request.mock.calls.some(([method]) => method === "turn/interrupt")).toBe(false);
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
const result = await run;
expect(result.aborted).toBe(false);
expect(result.timedOut).toBe(false);
expect(result.promptError).toBeNull();
expect(harness.request.mock.calls.some(([method]) => method === "turn/interrupt")).toBe(false);
const progressReasons = onRunProgress.mock.calls.map(([info]) => info.reason);
expect(progressReasons).toContain("turn:start");
expect(
progressReasons.filter((reason) => reason === "notification:rawResponseItem/completed"),
).toHaveLength(2);
});
it("does not count non-turn app-server requests as turn attempt progress", async () => {
const harness = createStartedThreadHarness();
const warn = vi.spyOn(embeddedAgentLog, "warn").mockImplementation(() => undefined);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 100;
const onRunProgress = vi.fn();
params.onRunProgress = onRunProgress;
const run = runCodexAppServerAttempt(params, {
turnCompletionIdleTimeoutMs: 500,
turnAssistantCompletionIdleTimeoutMs: 500,
turnTerminalIdleTimeoutMs: 500,
});
await harness.waitForMethod("turn/start");
await vi.waitFor(
() =>
expect(onRunProgress).toHaveBeenCalledWith(
expect.objectContaining({ reason: "turn:start" }),
),
fastWait,
);
await new Promise((resolve) => setTimeout(resolve, 60));
await harness.handleServerRequest({
id: "request-account-refresh",
method: "account/nonTurnRefresh",
params: {},
});
const result = await run;
expect(result.aborted).toBe(true);
expect(result.timedOut).toBe(true);
expect(result.promptError).toBe(
"codex app-server turn idle timed out waiting for turn/completed",
);
const warnCall = warn.mock.calls.find(
([message]) => message === "codex app-server turn idle timed out waiting for progress",
);
const warnData = warnCall?.[1] as
| { lastActivityReason?: string; timeoutMs?: number }
| undefined;
expect(warnData?.timeoutMs).toBe(100);
expect(warnData?.lastActivityReason).toBe("turn:start");
expect(harness.request.mock.calls.some(([method]) => method === "turn/interrupt")).toBe(true);
expect(onRunProgress.mock.calls.map(([info]) => info.reason)).toEqual(["turn:start"]);
});
it("keeps the turn attempt timeout armed while non-turn requests are pending", async () => {
const harness = createStartedThreadHarness();
const warn = vi.spyOn(embeddedAgentLog, "warn").mockImplementation(() => undefined);
vi.spyOn(authBridge, "refreshCodexAppServerAuthTokens").mockImplementation(
async () => await new Promise<never>(() => undefined),
);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 100;
const run = runCodexAppServerAttempt(params, {
turnCompletionIdleTimeoutMs: 500,
turnAssistantCompletionIdleTimeoutMs: 500,
turnTerminalIdleTimeoutMs: 500,
});
await harness.waitForMethod("turn/start");
await new Promise((resolve) => setTimeout(resolve, 60));
void harness.handleServerRequest({
id: "request-auth-refresh",
method: "account/chatgptAuthTokens/refresh",
params: {},
});
await vi.waitFor(() =>
expect(authBridge.refreshCodexAppServerAuthTokens).toHaveBeenCalledTimes(1),
);
const result = await run;
expect(result.aborted).toBe(true);
expect(result.timedOut).toBe(true);
expect(result.promptError).toBe(
"codex app-server turn idle timed out waiting for turn/completed",
);
const warnCall = warn.mock.calls.find(
([message]) => message === "codex app-server turn idle timed out waiting for progress",
);
const warnData = warnCall?.[1] as
| { lastActivityReason?: string; timeoutMs?: number }
| undefined;
expect(warnData?.timeoutMs).toBe(100);
expect(warnData?.lastActivityReason).toBe("turn:start");
expect(harness.request.mock.calls.some(([method]) => method === "turn/interrupt")).toBe(true);
});
it("counts handled nullable-turn elicitations as turn attempt progress", async () => {
const harness = createStartedThreadHarness();
vi.spyOn(elicitationBridge, "handleCodexAppServerElicitationRequest").mockResolvedValue({
action: "accept",
content: null,
_meta: null,
});
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 100;
const onRunProgress = vi.fn();
params.onRunProgress = onRunProgress;
const run = runCodexAppServerAttempt(params, {
turnCompletionIdleTimeoutMs: 300,
turnAssistantCompletionIdleTimeoutMs: 300,
turnTerminalIdleTimeoutMs: 300,
});
await harness.waitForMethod("turn/start");
await vi.waitFor(
() =>
expect(onRunProgress).toHaveBeenCalledWith(
expect.objectContaining({ reason: "turn:start" }),
),
fastWait,
);
await new Promise((resolve) => setTimeout(resolve, 60));
await harness.handleServerRequest({
id: "request-null-turn-elicitation",
method: "mcpServer/elicitation/request",
params: {
threadId: "thread-1",
turnId: null,
mode: "form",
message: "Approve?",
requestedSchema: { type: "object", properties: {} },
serverName: "server-1",
_meta: null,
},
});
await new Promise((resolve) => setTimeout(resolve, 60));
expect(harness.request.mock.calls.some(([method]) => method === "turn/interrupt")).toBe(false);
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
const result = await run;
expect(result.aborted).toBe(false);
expect(result.timedOut).toBe(false);
expect(result.promptError).toBeNull();
});
it("counts pending user input requests as turn attempt progress", async () => {
const harness = createStartedThreadHarness();
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 250;
params.onBlockReply = vi.fn();
const onRunProgress = vi.fn();
params.onRunProgress = onRunProgress;
const run = runCodexAppServerAttempt(params, {
turnCompletionIdleTimeoutMs: 600,
turnAssistantCompletionIdleTimeoutMs: 600,
turnTerminalIdleTimeoutMs: 600,
});
await harness.waitForMethod("turn/start");
await vi.waitFor(
() =>
expect(onRunProgress).toHaveBeenCalledWith(
expect.objectContaining({ reason: "turn:start" }),
),
fastWait,
);
await new Promise((resolve) => setTimeout(resolve, 75));
const response = harness.handleServerRequest({
id: "request-user-input",
method: "item/tool/requestUserInput",
params: {
threadId: "thread-1",
turnId: "turn-1",
itemId: "input-1",
questions: [
{
id: "mode",
header: "Mode",
question: "Pick a mode",
isOther: false,
isSecret: false,
options: [
{ label: "Fast", description: "Use less reasoning" },
{ label: "Deep", description: "Use more reasoning" },
],
},
],
},
});
await vi.waitFor(() => expect(params.onBlockReply).toHaveBeenCalledTimes(1), fastWait);
await new Promise((resolve) => setTimeout(resolve, 125));
expect(harness.request.mock.calls.some(([method]) => method === "turn/interrupt")).toBe(false);
expect(queueActiveRunMessageForTest("session-1", "2")).toBe(true);
await expect(response).resolves.toEqual({
answers: { mode: { answers: ["Deep"] } },
});
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
const result = await run;
expect(result.aborted).toBe(false);
expect(result.timedOut).toBe(false);
expect(result.promptError).toBeNull();
});
it("does not count mismatched turn-scoped requests as turn attempt progress", async () => {
const harness = createStartedThreadHarness();
const warn = vi.spyOn(embeddedAgentLog, "warn").mockImplementation(() => undefined);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 100;
const run = runCodexAppServerAttempt(params, {
turnCompletionIdleTimeoutMs: 500,
turnAssistantCompletionIdleTimeoutMs: 500,
turnTerminalIdleTimeoutMs: 500,
});
await harness.waitForMethod("turn/start");
await new Promise((resolve) => setTimeout(resolve, 60));
await harness.handleServerRequest({
id: "request-foreign-elicitation",
method: "mcpServer/elicitation/request",
params: {
threadId: "thread-1",
turnId: "turn-other",
mode: "form",
message: "Approve?",
requestedSchema: { type: "object", properties: {} },
serverName: "server-1",
_meta: null,
},
});
await harness.handleServerRequest({
id: "request-foreign-user-input",
method: "item/tool/requestUserInput",
params: {
threadId: "thread-1",
turnId: "turn-other",
itemId: "input-1",
questions: [],
},
});
await harness.handleServerRequest({
id: "request-foreign-approval",
method: "item/commandExecution/requestApproval",
params: {
threadId: "thread-1",
turnId: "turn-other",
itemId: "command-1",
},
});
const result = await run;
expect(result.aborted).toBe(true);
expect(result.timedOut).toBe(true);
expect(result.promptError).toBe(
"codex app-server turn idle timed out waiting for turn/completed",
);
const warnCall = warn.mock.calls.find(
([message]) => message === "codex app-server turn idle timed out waiting for progress",
);
const warnData = warnCall?.[1] as
| { lastActivityReason?: string; timeoutMs?: number }
| undefined;
expect(warnData?.timeoutMs).toBe(100);
expect(warnData?.lastActivityReason).toBe("turn:start");
expect(harness.request.mock.calls.some(([method]) => method === "turn/interrupt")).toBe(true);
});
it("does not count account rate-limit updates as turn completion activity", async () => {
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
let handleRequest:
| ((request: { id: string; method: string; params?: unknown }) => Promise<unknown>)
| undefined;
const warn = vi.spyOn(embeddedAgentLog, "warn").mockImplementation(() => undefined);
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-1");
}
if (method === "turn/start") {
return turnStartResult("turn-1", "inProgress");
}
return {};
});
setCodexAppServerClientFactoryForTest(
async () =>
({
request,
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: (
handler: (request: {
id: string;
method: string;
params?: unknown;
}) => Promise<unknown>,
) => {
handleRequest = handler;
return () => undefined;
},
}) as never,
);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 60_000;
const run = runCodexAppServerAttempt(params, {
turnCompletionIdleTimeoutMs: 5,
turnTerminalIdleTimeoutMs: 60_000,
});
await vi.waitFor(() => expect(handleRequest).toBeTypeOf("function"), fastWait);
const toolResult = (await handleRequest?.({
id: "request-tool-1",
method: "item/tool/call",
params: {
threadId: "thread-1",
turnId: "turn-1",
callId: "call-1",
namespace: null,
tool: "message",
arguments: { action: "send", text: "already sent" },
},
})) as { success?: boolean };
expect(toolResult.success).toBe(false);
await notify(rateLimitsUpdated(Math.ceil(Date.now() / 1000) + 120));
const result = await run;
expect(result.aborted).toBe(true);
expect(result.timedOut).toBe(true);
expect(result.promptError).toBe(
"codex app-server turn idle timed out waiting for turn/completed",
);
const warnCall = warn.mock.calls.find(
([message]) => message === "codex app-server turn idle timed out waiting for completion",
);
const warnData = warnCall?.[1] as
| { lastActivityReason?: string; timeoutMs?: number }
| undefined;
expect(warnData?.timeoutMs).toBe(5);
expect(warnData?.lastActivityReason).toBe("request:item/tool/call:response");
});
it("keeps the post-tool completion watchdog armed across dynamic tool completion bookkeeping", async () => {
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
let handleRequest:
| ((request: { id: string; method: string; params?: unknown }) => Promise<unknown>)
| undefined;
const warn = vi.spyOn(embeddedAgentLog, "warn").mockImplementation(() => undefined);
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-1");
}
if (method === "turn/start") {
return turnStartResult("turn-1", "inProgress");
}
return {};
});
setCodexAppServerClientFactoryForTest(
async () =>
({
request,
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: (
handler: (request: {
id: string;
method: string;
params?: unknown;
}) => Promise<unknown>,
) => {
handleRequest = handler;
return () => undefined;
},
}) as never,
);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 60_000;
const run = runCodexAppServerAttempt(params, {
turnCompletionIdleTimeoutMs: 5,
turnTerminalIdleTimeoutMs: 200,
});
await vi.waitFor(() => expect(handleRequest).toBeTypeOf("function"), fastWait);
const toolResult = (await handleRequest?.({
id: "request-tool-1",
method: "item/tool/call",
params: {
threadId: "thread-1",
turnId: "turn-1",
callId: "call-1",
namespace: null,
tool: "message",
arguments: { action: "send", text: "already sent" },
},
})) as { success?: boolean };
expect(toolResult.success).toBe(false);
await notify({
method: "item/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: {
type: "dynamicToolCall",
id: "call-1",
tool: "message",
},
},
});
const result = await run;
expect(result.aborted).toBe(true);
expect(result.timedOut).toBe(true);
expect(result.promptError).toBe(
"codex app-server turn idle timed out waiting for turn/completed",
);
expect(
warn.mock.calls.some(
([message]) => message === "codex app-server turn idle timed out waiting for completion",
),
).toBe(true);
expect(
warn.mock.calls.some(
([message]) =>
message === "codex app-server turn idle timed out waiting for terminal event",
),
).toBe(false);
});
it("keeps the post-tool completion watchdog armed across raw tool-output completion", async () => {
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
let handleRequest:
| ((request: { id: string; method: string; params?: unknown }) => Promise<unknown>)
| undefined;
const warn = vi.spyOn(embeddedAgentLog, "warn").mockImplementation(() => undefined);
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-1");
}
if (method === "turn/start") {
return turnStartResult("turn-1", "inProgress");
}
return {};
});
setCodexAppServerClientFactoryForTest(
async () =>
({
request,
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: (
handler: (request: {
id: string;
method: string;
params?: unknown;
}) => Promise<unknown>,
) => {
handleRequest = handler;
return () => undefined;
},
}) as never,
);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 60_000;
const run = runCodexAppServerAttempt(params, {
turnCompletionIdleTimeoutMs: 5,
turnTerminalIdleTimeoutMs: 200,
});
await vi.waitFor(() => expect(handleRequest).toBeTypeOf("function"), fastWait);
const toolResult = (await handleRequest?.({
id: "request-tool-1",
method: "item/tool/call",
params: {
threadId: "thread-1",
turnId: "turn-1",
callId: "call-1",
namespace: null,
tool: "message",
arguments: { action: "send", text: "already sent" },
},
})) as { success?: boolean };
expect(toolResult.success).toBe(false);
await notify({
method: "rawResponseItem/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: {
type: "custom_tool_call_output",
id: "call-1",
call_id: "call-1",
output: "already sent",
},
},
});
const result = await run;
expect(result.aborted).toBe(true);
expect(result.timedOut).toBe(true);
expect(result.promptError).toBe(
"codex app-server turn idle timed out waiting for turn/completed",
);
const completionWarnCall = warn.mock.calls.find(
([message]) => message === "codex app-server turn idle timed out waiting for completion",
);
const completionWarnData = completionWarnCall?.[1] as
| { lastActivityReason?: string; lastNotificationItemType?: string; timeoutMs?: number }
| undefined;
expect(completionWarnData?.timeoutMs).toBe(5);
expect(completionWarnData?.lastActivityReason).toBe("notification:rawResponseItem/completed");
expect(completionWarnData?.lastNotificationItemType).toBe("custom_tool_call_output");
expect(
warn.mock.calls.some(
([message]) =>
message === "codex app-server turn idle timed out waiting for terminal event",
),
).toBe(false);
});
it("keeps waiting when Codex emits a raw assistant item after a dynamic tool response", async () => {
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
let handleRequest:
| ((request: { id: string; method: string; params?: unknown }) => Promise<unknown>)
| undefined;
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-1");
}
if (method === "turn/start") {
return turnStartResult("turn-1", "inProgress");
}
return {};
});
setCodexAppServerClientFactoryForTest(
async () =>
({
request,
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: (
handler: (request: {
id: string;
method: string;
params?: unknown;
}) => Promise<unknown>,
) => {
handleRequest = handler;
return () => undefined;
},
}) as never,
);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 60_000;
const run = runCodexAppServerAttempt(params, {
turnCompletionIdleTimeoutMs: 5,
turnAssistantCompletionIdleTimeoutMs: 200,
turnTerminalIdleTimeoutMs: 200,
});
await vi.waitFor(() => expect(handleRequest).toBeTypeOf("function"), fastWait);
const toolResult = (await handleRequest?.({
id: "request-tool-1",
method: "item/tool/call",
params: {
threadId: "thread-1",
turnId: "turn-1",
callId: "call-1",
namespace: null,
tool: "message",
arguments: { action: "send", text: "already sent" },
},
})) as { success?: boolean };
expect(toolResult.success).toBe(false);
await notify({
method: "rawResponseItem/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: {
type: "message",
id: "raw-status-1",
role: "assistant",
content: [{ type: "output_text", text: "I'm writing the report now." }],
},
},
});
await new Promise((resolve) => setTimeout(resolve, 20));
expect(request.mock.calls.some(([method]) => method === "turn/interrupt")).toBe(false);
await notify({
method: "turn/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
turn: { id: "turn-1", status: "completed" },
},
});
const result = await run;
expect(result.aborted).toBe(false);
expect(result.timedOut).toBe(false);
expect(result.promptError).toBeNull();
expect(request.mock.calls.some(([method]) => method === "turn/interrupt")).toBe(false);
});
it("times out post-tool raw assistant progress after the assistant idle timeout", async () => {
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
let handleRequest:
| ((request: { id: string; method: string; params?: unknown }) => Promise<unknown>)
| undefined;
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-1");
}
if (method === "turn/start") {
return turnStartResult("turn-1", "inProgress");
}
return {};
});
setCodexAppServerClientFactoryForTest(
async () =>
({
request,
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: (
handler: (request: {
id: string;
method: string;
params?: unknown;
}) => Promise<unknown>,
) => {
handleRequest = handler;
return () => undefined;
},
}) as never,
);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 60_000;
const run = runCodexAppServerAttempt(params, {
turnCompletionIdleTimeoutMs: 50,
turnAssistantCompletionIdleTimeoutMs: 5,
turnTerminalIdleTimeoutMs: 500,
});
await vi.waitFor(() => expect(handleRequest).toBeTypeOf("function"), fastWait);
const toolResult = (await handleRequest?.({
id: "request-tool-1",
method: "item/tool/call",
params: {
threadId: "thread-1",
turnId: "turn-1",
callId: "call-1",
namespace: null,
tool: "message",
arguments: { action: "send", text: "already sent" },
},
})) as { success?: boolean };
expect(toolResult.success).toBe(false);
await notify({
method: "rawResponseItem/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: {
type: "message",
id: "raw-status-1",
role: "assistant",
content: [{ type: "output_text", text: "I'm writing the report now." }],
},
},
});
const result = await run;
expect(result.aborted).toBe(true);
expect(result.timedOut).toBe(true);
expect(result.promptError).toBe(
"codex app-server turn idle timed out waiting for turn/completed",
);
await vi.waitFor(
() =>
expect(request).toHaveBeenCalledWith(
"turn/interrupt",
{
threadId: "thread-1",
turnId: "turn-1",
},
{ timeoutMs: 5_000 },
),
{ interval: 1 },
);
});
it("times out post-native-tool raw assistant progress after the assistant idle timeout", async () => {
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-1");
}
if (method === "turn/start") {
return turnStartResult("turn-1", "inProgress");
}
return {};
});
setCodexAppServerClientFactoryForTest(
async () =>
({
request,
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: () => () => undefined,
}) as never,
);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 60_000;
const run = runCodexAppServerAttempt(params, {
turnCompletionIdleTimeoutMs: 100,
turnAssistantCompletionIdleTimeoutMs: 5,
turnTerminalIdleTimeoutMs: 500,
});
await vi.waitFor(
() =>
expect(request).toHaveBeenCalledWith("turn/start", expect.anything(), expect.anything()),
{ interval: 1 },
);
await notify({
method: "item/started",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: { type: "commandExecution", id: "cmd-1", status: "inProgress" },
},
});
await notify({
method: "item/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: { type: "commandExecution", id: "cmd-1", status: "completed" },
},
});
await notify({
method: "rawResponseItem/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: {
type: "message",
id: "raw-status-1",
role: "assistant",
content: [{ type: "output_text", text: "I'm summarizing command output." }],
},
},
});
const result = await run;
expect(result.aborted).toBe(true);
expect(result.timedOut).toBe(true);
expect(result.promptError).toBe(
"codex app-server turn idle timed out waiting for turn/completed",
);
await vi.waitFor(
() =>
expect(request).toHaveBeenCalledWith(
"turn/interrupt",
{
threadId: "thread-1",
turnId: "turn-1",
},
{ timeoutMs: 5_000 },
),
{ interval: 1 },
);
});
it("logs raw assistant item context when the terminal watchdog fires", async () => {
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
let handleRequest:
| ((request: { id: string; method: string; params?: unknown }) => Promise<unknown>)
| undefined;
const warn = vi.spyOn(embeddedAgentLog, "warn").mockImplementation(() => undefined);
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-1");
}
if (method === "turn/start") {
return turnStartResult("turn-1", "inProgress");
}
return {};
});
setCodexAppServerClientFactoryForTest(
async () =>
({
request,
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: (
handler: (request: {
id: string;
method: string;
params?: unknown;
}) => Promise<unknown>,
) => {
handleRequest = handler;
return () => undefined;
},
}) as never,
);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 60_000;
const run = runCodexAppServerAttempt(params, {
turnCompletionIdleTimeoutMs: 5,
turnAssistantCompletionIdleTimeoutMs: 500,
turnTerminalIdleTimeoutMs: 5,
});
await vi.waitFor(() => expect(handleRequest).toBeTypeOf("function"), fastWait);
const toolResult = (await handleRequest?.({
id: "request-tool-1",
method: "item/tool/call",
params: {
threadId: "thread-1",
turnId: "turn-1",
callId: "call-1",
namespace: null,
tool: "message",
arguments: { action: "send", text: "already sent" },
},
})) as { success?: boolean };
expect(toolResult.success).toBe(false);
await notify({
method: "rawResponseItem/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: {
type: "message",
id: "raw-status-1",
role: "assistant",
content: [{ type: "output_text", text: "I'm writing the report now." }],
},
},
});
const result = await run;
expect(result.aborted).toBe(true);
expect(result.timedOut).toBe(true);
expect(result.promptError).toBe(
"codex app-server turn idle timed out waiting for turn/completed",
);
const terminalWarnCall = warn.mock.calls.find(
([message]) => message === "codex app-server turn idle timed out waiting for terminal event",
);
const terminalWarnData = terminalWarnCall?.[1] as
| {
lastActivityReason?: string;
lastAssistantTextPreview?: string;
lastNotificationItemId?: string;
lastNotificationItemRole?: string;
lastNotificationItemType?: string;
lastNotificationMethod?: string;
threadId?: string;
timeoutMs?: number;
turnId?: string;
}
| undefined;
expect(terminalWarnData?.threadId).toBe("thread-1");
expect(terminalWarnData?.turnId).toBe("turn-1");
expect(terminalWarnData?.timeoutMs).toBe(5);
expect(terminalWarnData?.lastActivityReason).toBe("notification:rawResponseItem/completed");
expect(terminalWarnData?.lastNotificationMethod).toBe("rawResponseItem/completed");
expect(terminalWarnData?.lastNotificationItemId).toBe("raw-status-1");
expect(terminalWarnData?.lastNotificationItemType).toBe("message");
expect(terminalWarnData?.lastNotificationItemRole).toBe("assistant");
expect(terminalWarnData?.lastAssistantTextPreview).toBe("I'm writing the report now.");
expect(
warn.mock.calls.some(
([message]) => message === "codex app-server turn idle timed out waiting for completion",
),
).toBe(false);
});
it("releases the session when Codex accepts a turn but never sends progress", async () => {
const harness = createStartedThreadHarness();
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 60_000;
const run = runCodexAppServerAttempt(params, { turnCompletionIdleTimeoutMs: 5 });
await harness.waitForMethod("turn/start");
const result = await run;
expect(result.aborted).toBe(true);
expect(result.timedOut).toBe(true);
expect(result.promptError).toBe(
"codex app-server turn idle timed out waiting for turn/completed",
);
await vi.waitFor(
() =>
expect(harness.request).toHaveBeenCalledWith(
"turn/interrupt",
{
threadId: "thread-1",
turnId: "turn-1",
},
{ timeoutMs: 5_000 },
),
{ interval: 1 },
);
expect(queueActiveRunMessageForTest("session-1", "after silent turn")).toBe(false);
});
it("does not treat global rate-limit notifications as turn progress", async () => {
const harness = createStartedThreadHarness();
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 200;
const run = runCodexAppServerAttempt(params, { turnCompletionIdleTimeoutMs: 15 });
await harness.waitForMethod("turn/start");
await harness.notify(rateLimitsUpdated(Date.now() + 60_000));
await new Promise((resolve) => setTimeout(resolve, 20));
const result = await run;
expect({
aborted: result.aborted,
timedOut: result.timedOut,
promptError: result.promptError,
}).toEqual({
aborted: true,
timedOut: true,
promptError: "codex app-server turn idle timed out waiting for turn/completed",
});
await vi.waitFor(
() =>
expect(harness.request).toHaveBeenCalledWith(
"turn/interrupt",
{
threadId: "thread-1",
turnId: "turn-1",
},
{ timeoutMs: 5_000 },
),
{ interval: 1 },
);
});
it("yields a macrotask before processing queued app-server notifications", async () => {
const harness = createStartedThreadHarness();
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 1_000;
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
const notification = rateLimitsUpdated(Date.now() + 60_000);
const processing = harness.notify(notification);
await Promise.resolve();
expect(readRecentCodexRateLimits()).toBeUndefined();
await processing;
expect(readRecentCodexRateLimits()).toEqual(notification.params);
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await expect(run).resolves.toMatchObject({ aborted: false, timedOut: false });
});
it("releases the session when a completed agent message item goes quiet", async () => {
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-1");
}
if (method === "turn/start") {
return turnStartResult("turn-1", "inProgress");
}
return {};
});
setCodexAppServerClientFactoryForTest(
async () =>
({
request,
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: () => () => undefined,
}) as never,
);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 200;
const run = runCodexAppServerAttempt(params, {
turnAssistantCompletionIdleTimeoutMs: 5,
});
await vi.waitFor(
() =>
expect(request).toHaveBeenCalledWith("turn/start", expect.anything(), expect.anything()),
{ interval: 1 },
);
await notify({
method: "item/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: {
type: "agentMessage",
id: "msg-final-1",
text: "Done.",
},
},
});
const result = await run;
expect({
aborted: result.aborted,
timedOut: result.timedOut,
promptError: result.promptError,
assistantTexts: result.assistantTexts,
}).toEqual({
aborted: false,
timedOut: false,
promptError: null,
assistantTexts: ["Done."],
});
await vi.waitFor(
() =>
expect(request).toHaveBeenCalledWith(
"turn/interrupt",
{
threadId: "thread-1",
turnId: "turn-1",
},
{ timeoutMs: 5_000 },
),
{ interval: 1 },
);
});
it("releases the session when a real completed agent message omits text", async () => {
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-1");
}
if (method === "turn/start") {
return turnStartResult("turn-1", "inProgress");
}
return {};
});
setCodexAppServerClientFactoryForTest(
async () =>
({
request,
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: () => () => undefined,
}) as never,
);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 200;
const run = runCodexAppServerAttempt(params, {
turnAssistantCompletionIdleTimeoutMs: 5,
});
await vi.waitFor(
() =>
expect(request).toHaveBeenCalledWith("turn/start", expect.anything(), expect.anything()),
{ interval: 1 },
);
await notify({
method: "item/agentMessage/delta",
params: {
threadId: "thread-1",
turnId: "turn-1",
itemId: "msg-final-1",
delta: "Done.",
},
});
await notify({
method: "item/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: {
type: "agentMessage",
id: "msg-final-1",
},
},
});
const result = await run;
expect({
aborted: result.aborted,
timedOut: result.timedOut,
promptError: result.promptError,
assistantTexts: result.assistantTexts,
}).toEqual({
aborted: false,
timedOut: false,
promptError: null,
assistantTexts: ["Done."],
});
await vi.waitFor(
() =>
expect(request).toHaveBeenCalledWith(
"turn/interrupt",
{
threadId: "thread-1",
turnId: "turn-1",
},
{ timeoutMs: 5_000 },
),
{ interval: 1 },
);
});
it("keeps the completed assistant release armed across bookkeeping notifications", async () => {
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-1");
}
if (method === "turn/start") {
return turnStartResult("turn-1", "inProgress");
}
return {};
});
setCodexAppServerClientFactoryForTest(
async () =>
({
request,
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: () => () => undefined,
}) as never,
);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 200;
const run = runCodexAppServerAttempt(params, {
turnAssistantCompletionIdleTimeoutMs: 5,
});
await vi.waitFor(
() =>
expect(request).toHaveBeenCalledWith("turn/start", expect.anything(), expect.anything()),
{ interval: 1 },
);
await notify({
method: "item/agentMessage/delta",
params: {
threadId: "thread-1",
turnId: "turn-1",
itemId: "msg-final-1",
delta: "Done.",
},
});
await notify({
method: "item/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: {
type: "agentMessage",
id: "msg-final-1",
},
},
});
await notify({
method: "turn/plan/updated",
params: {
threadId: "thread-1",
turnId: "turn-1",
plan: [],
},
});
const result = await run;
expect({
aborted: result.aborted,
timedOut: result.timedOut,
promptError: result.promptError,
assistantTexts: result.assistantTexts,
}).toEqual({
aborted: false,
timedOut: false,
promptError: null,
assistantTexts: ["Done."],
});
await vi.waitFor(
() =>
expect(request).toHaveBeenCalledWith(
"turn/interrupt",
{
threadId: "thread-1",
turnId: "turn-1",
},
{ timeoutMs: 5_000 },
),
{ interval: 1 },
);
});
it("does not release commentary agent message items", async () => {
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-1");
}
if (method === "turn/start") {
return turnStartResult("turn-1", "inProgress");
}
return {};
});
setCodexAppServerClientFactoryForTest(
async () =>
({
request,
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: () => () => undefined,
}) as never,
);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 200;
const run = runCodexAppServerAttempt(params, {
turnAssistantCompletionIdleTimeoutMs: 5,
});
await vi.waitFor(
() =>
expect(request).toHaveBeenCalledWith("turn/start", expect.anything(), expect.anything()),
{ interval: 1 },
);
await notify({
method: "item/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: {
type: "agentMessage",
id: "msg-commentary-1",
phase: "commentary",
text: "I am checking the workspace.",
},
},
});
await new Promise((resolve) => setTimeout(resolve, 20));
expect(request).not.toHaveBeenCalledWith("turn/interrupt", expect.anything());
await notify({
method: "item/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: {
type: "agentMessage",
id: "msg-final-1",
phase: "final_answer",
text: "Done.",
},
},
});
const result = await run;
expect({
aborted: result.aborted,
timedOut: result.timedOut,
promptError: result.promptError,
assistantTexts: result.assistantTexts,
}).toEqual({
aborted: false,
timedOut: false,
promptError: null,
assistantTexts: ["Done."],
});
});
it("does not release or return commentary raw assistant response items", async () => {
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-1");
}
if (method === "turn/start") {
return turnStartResult("turn-1", "inProgress");
}
return {};
});
setCodexAppServerClientFactoryForTest(
async () =>
({
request,
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: () => () => undefined,
}) as never,
);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 200;
const run = runCodexAppServerAttempt(params, {
turnAssistantCompletionIdleTimeoutMs: 5,
});
await vi.waitFor(
() =>
expect(request).toHaveBeenCalledWith("turn/start", expect.anything(), expect.anything()),
{ interval: 1 },
);
await notify({
method: "rawResponseItem/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: {
type: "message",
id: "raw-commentary-1",
role: "assistant",
phase: "commentary",
content: [{ type: "output_text", text: "I am checking the workspace." }],
},
},
});
await new Promise((resolve) => setTimeout(resolve, 20));
expect(request).not.toHaveBeenCalledWith("turn/interrupt", expect.anything());
await notify({
method: "turn/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
turn: { id: "turn-1", status: "completed" },
},
});
const result = await run;
expect({
aborted: result.aborted,
timedOut: result.timedOut,
promptError: result.promptError,
assistantTexts: result.assistantTexts,
}).toEqual({
aborted: false,
timedOut: false,
promptError: null,
assistantTexts: [],
});
});
it("releases the session after a raw assistant response item without turn completion", async () => {
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-1");
}
if (method === "turn/start") {
return turnStartResult("turn-1", "inProgress");
}
return {};
});
setCodexAppServerClientFactoryForTest(
async () =>
({
request,
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: () => () => undefined,
}) as never,
);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 200;
const run = runCodexAppServerAttempt(params, {
turnAssistantCompletionIdleTimeoutMs: 5,
turnTerminalIdleTimeoutMs: 15,
});
await vi.waitFor(
() =>
expect(request).toHaveBeenCalledWith("turn/start", expect.anything(), expect.anything()),
{ interval: 1 },
);
await notify({
method: "rawResponseItem/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: {
type: "message",
id: "raw-final-1",
role: "assistant",
content: [{ type: "output_text", text: "Done." }],
},
},
});
const result = await run;
expect({
aborted: result.aborted,
timedOut: result.timedOut,
promptError: result.promptError,
assistantTexts: result.assistantTexts,
}).toEqual({
aborted: false,
timedOut: false,
promptError: null,
assistantTexts: ["Done."],
});
await vi.waitFor(
() =>
expect(request).toHaveBeenCalledWith(
"turn/interrupt",
{
threadId: "thread-1",
turnId: "turn-1",
},
{ timeoutMs: 5_000 },
),
{ interval: 1 },
);
});
it("keeps waiting when a current-turn item is still active", async () => {
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-1");
}
if (method === "turn/start") {
return turnStartResult("turn-1", "inProgress");
}
return {};
});
setCodexAppServerClientFactoryForTest(
async () =>
({
request,
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: () => () => undefined,
}) as never,
);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 200;
const run = runCodexAppServerAttempt(params, {
turnAssistantCompletionIdleTimeoutMs: 5,
turnTerminalIdleTimeoutMs: 50,
});
await vi.waitFor(
() =>
expect(request).toHaveBeenCalledWith("turn/start", expect.anything(), expect.anything()),
{ interval: 1 },
);
await notify({
method: "item/started",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: { type: "commandExecution", id: "cmd-1", status: "inProgress" },
},
});
await notify({
method: "item/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: {
type: "agentMessage",
id: "msg-final-1",
text: "Done.",
},
},
});
await new Promise((resolve) => setTimeout(resolve, 20));
expect(request).not.toHaveBeenCalledWith("turn/interrupt", expect.anything());
await notify({
method: "item/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: { type: "commandExecution", id: "cmd-1", status: "completed" },
},
});
const result = await run;
expect({
aborted: result.aborted,
timedOut: result.timedOut,
promptError: result.promptError,
assistantTexts: result.assistantTexts,
}).toEqual({
aborted: false,
timedOut: false,
promptError: null,
assistantTexts: ["Done."],
});
});
it("times out promptly when the last completed non-assistant current-turn item is not followed by turn completion", async () => {
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-1");
}
if (method === "turn/start") {
return turnStartResult("turn-1", "inProgress");
}
return {};
});
setCodexAppServerClientFactoryForTest(
async () =>
({
request,
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: () => () => undefined,
}) as never,
);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 200;
const run = runCodexAppServerAttempt(params, {
turnCompletionIdleTimeoutMs: 5,
turnTerminalIdleTimeoutMs: 60_000,
});
await vi.waitFor(
() =>
expect(request).toHaveBeenCalledWith("turn/start", expect.anything(), expect.anything()),
{ interval: 1 },
);
await notify({
method: "item/started",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: {
type: "dynamicToolCall",
id: "tool-1",
tool: "sessions_list",
arguments: {},
status: "inProgress",
},
},
});
await notify({
method: "item/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: {
type: "dynamicToolCall",
id: "tool-1",
tool: "sessions_list",
arguments: {},
status: "completed",
success: true,
contentItems: [],
},
},
});
await expect(run).resolves.toMatchObject({
aborted: true,
timedOut: true,
promptError: "codex app-server turn idle timed out waiting for turn/completed",
});
await vi.waitFor(
() =>
expect(request).toHaveBeenCalledWith(
"turn/interrupt",
{
threadId: "thread-1",
turnId: "turn-1",
},
{ timeoutMs: 5_000 },
),
{ interval: 1 },
);
});
it("applies before_prompt_build to Codex developer instructions and turn input", async () => {
const beforePromptBuild = vi.fn(async () => ({
systemPrompt: "custom codex system",
prependSystemContext: "pre system",
appendSystemContext: "post system",
prependContext: "queued context",
}));
initializeGlobalHookRunner(
createMockPluginRegistry([{ hookName: "before_prompt_build", handler: beforePromptBuild }]),
);
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const sessionManager = SessionManager.open(sessionFile);
sessionManager.appendMessage(assistantMessage("previous turn", Date.now()));
const harness = createStartedThreadHarness();
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir));
await harness.waitForMethod("turn/start");
await new Promise<void>((resolve) => setImmediate(resolve));
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
expect(beforePromptBuild).toHaveBeenCalledOnce();
const [hookInput, hookContext] = mockCall(beforePromptBuild, "before_prompt_build") as [
{ messages?: Array<{ role?: string }>; prompt?: string },
{ runId?: string; sessionId?: string },
];
expect(hookInput.prompt).toBe("hello");
expect(hookInput.messages?.[0]?.role).toBe("assistant");
expect(hookContext.runId).toBe("run-1");
expect(hookContext.sessionId).toBe("session-1");
const threadStart = harness.requests.find((request) => request.method === "thread/start");
const threadStartParams = threadStart?.params as { developerInstructions?: string } | undefined;
expect(threadStartParams?.developerInstructions).toContain("pre system\n\ncustom codex system");
const turnStart = harness.requests.find((request) => request.method === "turn/start");
const turnStartParams = turnStart?.params as
| { input?: Array<{ text?: string; text_elements?: unknown[]; type?: string }> }
| undefined;
expect(turnStartParams?.input).toEqual([
{ type: "text", text: "queued context\n\nhello", text_elements: [] },
]);
});
it("projects mirrored history when starting Codex without a native thread binding", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const sessionManager = SessionManager.open(sessionFile);
sessionManager.appendMessage(userMessage("we are fixing the Opik default project", Date.now()));
sessionManager.appendMessage(assistantMessage("Opik default project context", Date.now() + 1));
const harness = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
params.prompt = "make the default webpage openclaw";
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
await new Promise<void>((resolve) => setImmediate(resolve));
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const turnStart = harness.requests.find((request) => request.method === "turn/start");
const inputText =
(turnStart?.params as { input?: Array<{ text?: string }> } | undefined)?.input?.[0]?.text ??
"";
expect(inputText).toContain("OpenClaw assembled context for this turn:");
expect(inputText).toContain("we are fixing the Opik default project");
expect(inputText).toContain("Opik default project context");
expect(inputText).toContain("Current user request:");
expect(inputText).toContain("make the default webpage openclaw");
});
it("passes stable workspace files as Codex developer instructions and keeps MEMORY.md as turn context", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const agentsGuidance = "Follow AGENTS guidance.";
const soulGuidance = "Soul voice goes here.";
const identityGuidance = "Identity guidance goes here.";
const toolGuidance = "Tool guidance goes here.";
const userProfile = "User profile goes here.";
const heartbeatChecklist = "Heartbeat checklist goes here.";
const memorySummary = "Memory summary goes here.";
await fs.mkdir(workspaceDir, { recursive: true });
await fs.writeFile(path.join(workspaceDir, "AGENTS.md"), agentsGuidance);
await fs.writeFile(path.join(workspaceDir, "SOUL.md"), soulGuidance);
await fs.writeFile(path.join(workspaceDir, "IDENTITY.md"), identityGuidance);
await fs.writeFile(path.join(workspaceDir, "TOOLS.md"), toolGuidance);
await fs.writeFile(path.join(workspaceDir, "USER.md"), userProfile);
await fs.writeFile(path.join(workspaceDir, "HEARTBEAT.md"), heartbeatChecklist);
await fs.writeFile(path.join(workspaceDir, "MEMORY.md"), memorySummary);
const harness = createStartedThreadHarness();
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir));
await harness.waitForMethod("turn/start");
await new Promise<void>((resolve) => setImmediate(resolve));
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
const result = await run;
const threadStart = harness.requests.find((request) => request.method === "thread/start");
const threadStartParams = threadStart?.params as {
config?: { instructions?: string };
developerInstructions?: string;
};
const config = threadStartParams.config;
expect(threadStartParams.developerInstructions).toContain("OpenClaw Agent Soul");
expect(threadStartParams.developerInstructions).toContain(
"They define who you are, how you work",
);
expect(threadStartParams.developerInstructions).toContain(soulGuidance);
expect(threadStartParams.developerInstructions).toContain(identityGuidance);
expect(threadStartParams.developerInstructions).toContain(toolGuidance);
expect(threadStartParams.developerInstructions).toContain(userProfile);
expect(threadStartParams.developerInstructions).not.toContain(heartbeatChecklist);
expect(threadStartParams.developerInstructions).not.toContain(memorySummary);
expect(threadStartParams.developerInstructions).not.toContain("Codex loads AGENTS.md natively");
expect(threadStartParams.developerInstructions).not.toContain(agentsGuidance);
expect(config?.instructions).toBeUndefined();
const turnStart = harness.requests.find((request) => request.method === "turn/start");
const turnStartParams = turnStart?.params as {
input?: Array<{ text?: string }>;
};
const inputText = turnStartParams.input?.[0]?.text ?? "";
expect(inputText).toContain("OpenClaw runtime context for this turn:");
expect(inputText).not.toContain("does not override Codex system/developer instructions");
expect(inputText).not.toContain("not developer policy");
expect(inputText).not.toContain(soulGuidance);
expect(inputText).not.toContain(identityGuidance);
expect(inputText).not.toContain(toolGuidance);
expect(inputText).not.toContain(userProfile);
expect(inputText).not.toContain(heartbeatChecklist);
expect(inputText).toContain(memorySummary);
expect(inputText).toContain("Codex loads AGENTS.md natively");
expect(inputText).not.toContain(agentsGuidance);
expect(inputText).toContain("Current user request:\nhello");
const fileStats = new Map(
result.systemPromptReport?.injectedWorkspaceFiles.map((file) => [file.name, file]) ?? [],
);
expect(fileStats.get("SOUL.md")).toMatchObject({
rawChars: soulGuidance.length,
injectedChars: soulGuidance.length,
truncated: false,
});
expect(fileStats.get("IDENTITY.md")).toMatchObject({
rawChars: identityGuidance.length,
injectedChars: identityGuidance.length,
truncated: false,
});
expect(fileStats.get("TOOLS.md")).toMatchObject({
rawChars: toolGuidance.length,
injectedChars: toolGuidance.length,
truncated: false,
});
expect(fileStats.get("USER.md")).toMatchObject({
rawChars: userProfile.length,
injectedChars: userProfile.length,
truncated: false,
});
expect(fileStats.get("MEMORY.md")).toMatchObject({
rawChars: memorySummary.length,
injectedChars: memorySummary.length,
truncated: false,
});
expect(fileStats.get("HEARTBEAT.md")).toMatchObject({
rawChars: heartbeatChecklist.length,
injectedChars: 0,
truncated: false,
});
expect(fileStats.get("AGENTS.md")).toMatchObject({
rawChars: agentsGuidance.length,
injectedChars: agentsGuidance.length,
truncated: false,
});
});
it("points heartbeat Codex turns at HEARTBEAT.md without injecting its contents", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const heartbeatPath = path.join(workspaceDir, "HEARTBEAT.md");
await fs.mkdir(workspaceDir, { recursive: true });
await fs.writeFile(heartbeatPath, "Heartbeat checklist goes here.");
const harness = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
params.trigger = "heartbeat";
params.bootstrapContextMode = "lightweight";
params.bootstrapContextRunKind = "heartbeat";
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
await new Promise<void>((resolve) => setImmediate(resolve));
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const threadStart = harness.requests.find((request) => request.method === "thread/start");
const threadStartParams = threadStart?.params as {
developerInstructions?: string;
};
expect(threadStartParams.developerInstructions).not.toContain("Heartbeat checklist goes here.");
const turnStart = harness.requests.find((request) => request.method === "turn/start");
const turnStartParams = turnStart?.params as {
input?: Array<{ text?: string }>;
collaborationMode?: {
settings?: {
developer_instructions?: string | null;
};
};
};
const inputText = turnStartParams.input?.[0]?.text ?? "";
const collaborationInstructions =
turnStartParams.collaborationMode?.settings?.developer_instructions ?? "";
expect(inputText).not.toContain("Heartbeat checklist goes here.");
expect(collaborationInstructions).toContain("HEARTBEAT.md exists");
expect(collaborationInstructions).toContain("Read it before proceeding with this heartbeat");
expect(collaborationInstructions).toContain(heartbeatPath);
expect(collaborationInstructions).not.toContain("Heartbeat checklist goes here.");
});
it("omits heartbeat Codex workspace pointers for empty HEARTBEAT.md files", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await fs.mkdir(workspaceDir, { recursive: true });
await fs.writeFile(path.join(workspaceDir, "HEARTBEAT.md"), "\n\n");
const harness = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
params.trigger = "heartbeat";
params.bootstrapContextMode = "lightweight";
params.bootstrapContextRunKind = "heartbeat";
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
await new Promise<void>((resolve) => setImmediate(resolve));
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const turnStart = harness.requests.find((request) => request.method === "turn/start");
const turnStartParams = turnStart?.params as {
collaborationMode?: {
settings?: {
developer_instructions?: string | null;
};
};
};
const collaborationInstructions =
turnStartParams.collaborationMode?.settings?.developer_instructions ?? "";
expect(collaborationInstructions).toContain("This is an OpenClaw heartbeat turn");
expect(collaborationInstructions).not.toContain("HEARTBEAT.md exists");
});
it("remaps Codex bootstrap files under dot-prefixed workspace directories", () => {
expect(
testing.remapCodexContextFilePath({
file: {
path: "/real/workspace/..context/SOUL.md",
content: "Soul voice goes here.",
},
sourceWorkspaceDir: "/real/workspace",
targetWorkspaceDir: "/sandbox/workspace",
}),
).toEqual({
path: "/sandbox/workspace/..context/SOUL.md",
content: "Soul voice goes here.",
});
expect(
testing.remapCodexContextFilePath({
file: {
path: "/outside/SOUL.md",
content: "outside",
},
sourceWorkspaceDir: "/real/workspace",
targetWorkspaceDir: "/sandbox/workspace",
}),
).toEqual({
path: "/outside/SOUL.md",
content: "outside",
});
});
it("keeps lightweight cron Codex turns out of OpenClaw bootstrap context", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const exactCommand =
"cd /Users/phaedrus/Projects/openclaw && /Users/phaedrus/clawd/scripts/clawsweeper-related-scan.py";
await fs.mkdir(workspaceDir, { recursive: true });
await fs.writeFile(path.join(workspaceDir, "AGENTS.md"), "Follow AGENTS guidance.");
await fs.writeFile(path.join(workspaceDir, "SOUL.md"), "Soul voice goes here.");
const harness = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
params.trigger = "cron";
params.prompt = exactCommand;
params.bootstrapContextMode = "lightweight";
params.bootstrapContextRunKind = "cron";
params.skillsSnapshot = {
prompt: "<available_skills><skill><name>demo</name></skill></available_skills>",
skills: [],
};
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
await new Promise<void>((resolve) => setImmediate(resolve));
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
const result = await run;
const threadStart = harness.requests.find((request) => request.method === "thread/start");
const threadStartParams = threadStart?.params as {
developerInstructions?: string;
config?: Record<string, unknown>;
};
expect(threadStartParams.config?.project_doc_max_bytes).toBe(0);
expect(threadStartParams.developerInstructions).not.toContain("Soul voice goes here.");
expect(threadStartParams.developerInstructions).not.toContain("Follow AGENTS guidance.");
expect(threadStartParams.developerInstructions).not.toContain("<available_skills>");
const turnStart = harness.requests.find((request) => request.method === "turn/start");
const turnStartParams = turnStart?.params as {
input?: Array<{ text?: string }>;
};
expect(turnStartParams.input?.[0]?.text).toBe(exactCommand);
expect(result.systemPromptReport?.skills).toEqual({ promptChars: 0, entries: [] });
});
it("fires llm_input, llm_output, and agent_end hooks for codex turns", async () => {
const llmInput = vi.fn();
const llmOutput = vi.fn();
const agentEnd = vi.fn();
const onRunAgentEvent = vi.fn();
const globalAgentEvents: AgentEventPayload[] = [];
onAgentEvent((event) => globalAgentEvents.push(event));
initializeGlobalHookRunner(
createMockPluginRegistry([
{ hookName: "llm_input", handler: llmInput },
{ hookName: "llm_output", handler: llmOutput },
{ hookName: "agent_end", handler: agentEnd },
]),
);
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const sessionManager = SessionManager.open(sessionFile);
sessionManager.appendMessage(assistantMessage("existing context", Date.now()));
const harness = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
params.runtimePlan = createCodexRuntimePlanFixture();
params.onAgentEvent = onRunAgentEvent;
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
expect(llmInput).toHaveBeenCalled();
await new Promise<void>((resolve) => setImmediate(resolve));
const [llmInputPayload, llmInputContext] = mockCall(llmInput, "llm_input") as [
{
historyMessages?: Array<{ role?: string }>;
imagesCount?: number;
model?: string;
prompt?: string;
provider?: string;
runId?: string;
sessionId?: string;
systemPrompt?: string;
},
{ runId?: string; sessionId?: string; sessionKey?: string },
];
expect(llmInputPayload.runId).toBe("run-1");
expect(llmInputPayload.sessionId).toBe("session-1");
expect(llmInputPayload.provider).toBe("codex");
expect(llmInputPayload.model).toBe("gpt-5.4-codex");
expect(llmInputPayload.prompt).toBe("hello");
expect(llmInputPayload.imagesCount).toBe(0);
expect(llmInputPayload.historyMessages?.[0]?.role).toBe("assistant");
expect(llmInputPayload.systemPrompt).toContain(
"You are a personal agent running inside OpenClaw.",
);
expect(llmInputPayload.systemPrompt).not.toContain(CODEX_GPT5_BEHAVIOR_CONTRACT);
expect(llmInputContext.runId).toBe("run-1");
expect(llmInputContext.sessionId).toBe("session-1");
expect(llmInputContext.sessionKey).toBe("agent:main:session-1");
await harness.notify({
method: "item/agentMessage/delta",
params: {
threadId: "thread-1",
turnId: "turn-1",
itemId: "msg-1",
delta: "hello back",
},
});
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
const result = await run;
expect(result.assistantTexts).toEqual(["hello back"]);
expect(llmOutput).toHaveBeenCalledTimes(1);
expect(agentEnd).toHaveBeenCalledTimes(1);
const agentEvents = onRunAgentEvent.mock.calls.map(([event]) => event) as Array<{
data: {
endedAt?: number;
phase?: string;
startedAt?: number;
text?: string;
};
stream: string;
}>;
const lifecycleStart = agentEvents.find(
(event) => event.stream === "lifecycle" && event.data.phase === "start",
);
expect(typeof lifecycleStart?.data.startedAt).toBe("number");
const assistantEvent = agentEvents.find((event) => event.stream === "assistant");
expect(assistantEvent?.data).toEqual({ text: "hello back" });
const lifecycleEnd = agentEvents.find(
(event) => event.stream === "lifecycle" && event.data.phase === "end",
);
expect(typeof lifecycleEnd?.data.startedAt).toBe("number");
expect(typeof lifecycleEnd?.data.endedAt).toBe("number");
const startIndex = agentEvents.findIndex(
(event) => event.stream === "lifecycle" && event.data.phase === "start",
);
const assistantIndex = agentEvents.findIndex((event) => event.stream === "assistant");
const endIndex = agentEvents.findIndex(
(event) => event.stream === "lifecycle" && event.data.phase === "end",
);
expect(startIndex).toBeGreaterThanOrEqual(0);
expect(assistantIndex).toBeGreaterThan(startIndex);
expect(endIndex).toBeGreaterThan(assistantIndex);
const globalAssistantEvent = globalAgentEvents.find((event) => event.stream === "assistant");
expect(globalAssistantEvent?.runId).toBe("run-1");
expect(globalAssistantEvent?.sessionKey).toBe("agent:main:session-1");
expect(globalAssistantEvent?.data).toEqual({ text: "hello back" });
const globalEndEvent = globalAgentEvents.find(
(event) => event.stream === "lifecycle" && event.data.phase === "end",
);
expect(globalEndEvent?.runId).toBe("run-1");
expect(globalEndEvent?.sessionKey).toBe("agent:main:session-1");
const [llmOutputPayload, llmOutputContext] = mockCall(llmOutput, "llm_output") as [
{
assistantTexts?: string[];
harnessId?: string;
lastAssistant?: { role?: string };
model?: string;
provider?: string;
resolvedRef?: string;
runId?: string;
sessionId?: string;
contextTokenBudget?: number;
contextWindowSource?: string;
contextWindowReferenceTokens?: number;
},
{
runId?: string;
sessionId?: string;
contextTokenBudget?: number;
contextWindowSource?: string;
contextWindowReferenceTokens?: number;
},
];
expect(llmOutputPayload.runId).toBe("run-1");
expect(llmOutputPayload.sessionId).toBe("session-1");
expect(llmOutputPayload.provider).toBe("codex");
expect(llmOutputPayload.model).toBe("gpt-5.4-codex");
expect(llmOutputPayload.contextTokenBudget).toBe(150_000);
expect(llmOutputPayload.contextWindowSource).toBe("agentContextTokens");
expect(llmOutputPayload.contextWindowReferenceTokens).toBe(200_000);
expect(llmOutputPayload.resolvedRef).toBe("codex/gpt-5.4-codex");
expect(llmOutputPayload.harnessId).toBe("codex");
expect(llmOutputPayload.assistantTexts).toEqual(["hello back"]);
expect(llmOutputPayload.lastAssistant?.role).toBe("assistant");
expect(llmOutputContext.runId).toBe("run-1");
expect(llmOutputContext.sessionId).toBe("session-1");
expect(llmOutputContext.contextTokenBudget).toBe(150_000);
expect(llmOutputContext.contextWindowSource).toBe("agentContextTokens");
expect(llmOutputContext.contextWindowReferenceTokens).toBe(200_000);
const [agentEndPayload, agentEndContext] = mockCall(agentEnd, "agent_end") as [
{ messages?: Array<{ role?: string }>; success?: boolean },
{ runId?: string; sessionId?: string },
];
expect(agentEndPayload.success).toBe(true);
expect(agentEndPayload.messages?.some((message) => message.role === "user")).toBe(true);
expect(agentEndPayload.messages?.some((message) => message.role === "assistant")).toBe(true);
expect(agentEndContext.runId).toBe("run-1");
expect(agentEndContext.sessionId).toBe("session-1");
});
it("forwards Codex app-server verbose tool summaries and completed output", async () => {
const onToolResult = vi.fn();
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const harness = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
params.verboseLevel = "full";
params.onToolResult = onToolResult;
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
await harness.notify({
method: "item/started",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: {
type: "dynamicToolCall",
id: "tool-1",
namespace: null,
tool: "read",
arguments: { path: "README.md" },
status: "inProgress",
contentItems: null,
success: null,
durationMs: null,
},
},
});
await harness.notify({
method: "item/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: {
type: "dynamicToolCall",
id: "tool-1",
namespace: null,
tool: "read",
arguments: { path: "README.md" },
status: "completed",
contentItems: [{ type: "inputText", text: "file contents" }],
success: true,
durationMs: 12,
},
},
});
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
expect(onToolResult).toHaveBeenCalledTimes(2);
expect(onToolResult).toHaveBeenNthCalledWith(1, {
text: "📖 Read: `from README.md`",
});
expect(onToolResult).toHaveBeenNthCalledWith(2, {
text: "📖 Read: `from README.md`\n```txt\nfile contents\n```",
});
});
it("registers native hook relay config for an enabled Codex turn and cleans it up", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const harness = createStartedThreadHarness();
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
nativeHookRelay: {
enabled: true,
events: ["pre_tool_use"],
gatewayTimeoutMs: 4321,
hookTimeoutSec: 9,
},
});
await harness.waitForMethod("turn/start");
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const startRequest = harness.requests.find((request) => request.method === "thread/start");
const startConfig = (startRequest?.params as { config?: Record<string, unknown> } | undefined)
?.config;
expect(startConfig?.["features.hooks"]).toBe(true);
const preToolUseHooks = startConfig?.["hooks.PreToolUse"] as
| Array<{ hooks?: Array<{ command?: string; timeout?: number; type?: string }> }>
| undefined;
const preToolUseCommand = preToolUseHooks?.[0]?.hooks?.[0];
expect(preToolUseCommand?.type).toBe("command");
expect(preToolUseCommand?.timeout).toBe(9);
expect(preToolUseCommand?.command).toContain("--event pre_tool_use --timeout 4321");
const hookState = startConfig?.["hooks.state"] as Record<
string,
{ enabled?: unknown; trusted_hash?: unknown }
>;
const preToolUseState = hookState?.["/<session-flags>/config.toml:pre_tool_use:0:0"];
expect(preToolUseState?.enabled).toBe(true);
expect(preToolUseState?.trusted_hash).toMatch(/^sha256:[a-f0-9]{64}$/);
const relayId = extractRelayIdFromThreadRequest(startRequest?.params);
expect(nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)).toBeUndefined();
});
it("forwards command approval requests through the active native hook relay", async () => {
const approvalSpy = vi
.spyOn(approvalBridge, "handleCodexAppServerApprovalRequest")
.mockResolvedValue({ decision: "decline" });
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const harness = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
params.messageChannel = "discord";
params.currentChannelId = "channel:target";
const run = runCodexAppServerAttempt(params, {
nativeHookRelay: {
enabled: true,
events: ["pre_tool_use"],
},
});
await harness.waitForMethod("turn/start");
const startRequest = harness.requests.find((request) => request.method === "thread/start");
const relayId = extractRelayIdFromThreadRequest(startRequest?.params);
expect(nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)).toBeDefined();
const response = await harness.handleServerRequest({
id: "request-command-approval",
method: "item/commandExecution/requestApproval",
params: {
threadId: "thread-1",
turnId: "turn-1",
itemId: "cmd-1",
command: "/bin/bash -lc 'node -v'",
cwd: workspaceDir,
},
});
expect(response).toEqual({ decision: "decline" });
expect(approvalSpy).toHaveBeenCalledTimes(1);
const approvalArgs = approvalSpy.mock.calls[0]?.[0];
expect(approvalArgs).toMatchObject({
method: "item/commandExecution/requestApproval",
requestParams: {
threadId: "thread-1",
turnId: "turn-1",
itemId: "cmd-1",
command: "/bin/bash -lc 'node -v'",
cwd: workspaceDir,
},
threadId: "thread-1",
turnId: "turn-1",
});
expect(approvalArgs?.nativeHookRelay).toMatchObject({
relayId,
allowedEvents: expect.arrayContaining(["pre_tool_use"]),
});
expect(nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)).toMatchObject({
channelId: "target",
});
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
expect(nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)).toBeUndefined();
});
it("promotes implicit Codex yolo approval policy when OpenClaw tool policy exists", async () => {
initializeGlobalHookRunner(
createMockPluginRegistry([{ hookName: "before_tool_call", handler: vi.fn() }]),
);
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const harness = createStartedThreadHarness();
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir));
await harness.waitForMethod("turn/start");
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const startRequest = harness.requests.find((request) => request.method === "thread/start");
const startParams = startRequest?.params as Record<string, unknown> | undefined;
expect(startParams?.approvalPolicy).toBe("untrusted");
expect(startParams?.sandbox).toBe("danger-full-access");
});
it("keeps implicit Codex yolo approval policy when untrusted approvals are disallowed", () => {
const appServer = resolveCodexAppServerRuntimeOptions({ env: {}, requirementsToml: null });
const resolved = testing.resolveCodexAppServerForOpenClawToolPolicy({
appServer,
pluginConfig: readCodexPluginConfig({}),
env: {},
shouldPromote: true,
canUseUntrustedApprovalPolicy: false,
});
expect(resolved.approvalPolicy).toBe("never");
});
it("keeps explicit Codex yolo mode unpromoted when OpenClaw tool policy exists", async () => {
initializeGlobalHookRunner(
createMockPluginRegistry([{ hookName: "before_tool_call", handler: vi.fn() }]),
);
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const harness = createStartedThreadHarness();
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
pluginConfig: { appServer: { mode: "yolo" } },
});
await harness.waitForMethod("turn/start");
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const startRequest = harness.requests.find((request) => request.method === "thread/start");
const startParams = startRequest?.params as Record<string, unknown> | undefined;
expect(startParams?.approvalPolicy).toBe("never");
expect(startParams?.sandbox).toBe("danger-full-access");
});
it("ignores invalid Codex app-server env overrides when promoting tool policy approval", async () => {
initializeGlobalHookRunner(
createMockPluginRegistry([{ hookName: "before_tool_call", handler: vi.fn() }]),
);
vi.stubEnv("OPENCLAW_CODEX_APP_SERVER_MODE", " ");
vi.stubEnv("OPENCLAW_CODEX_APP_SERVER_APPROVAL_POLICY", "always");
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const harness = createStartedThreadHarness();
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir));
await harness.waitForMethod("turn/start");
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const startRequest = harness.requests.find((request) => request.method === "thread/start");
const startParams = startRequest?.params as Record<string, unknown> | undefined;
expect(startParams?.approvalPolicy).toBe("untrusted");
});
it("keeps the native hook relay default floor for short Codex turns", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const harness = createStartedThreadHarness();
const relayFloorMs = 30 * 60_000;
const startedAtMs = Date.now();
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
nativeHookRelay: {
enabled: true,
events: ["pre_tool_use"],
},
});
await harness.waitForMethod("turn/start");
const startRequest = harness.requests.find((request) => request.method === "thread/start");
const relayId = extractRelayIdFromThreadRequest(startRequest?.params);
const registration = nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId);
if (!registration) {
throw new Error("Expected native hook relay registration");
}
expect(registration.expiresAtMs - startedAtMs).toBeGreaterThanOrEqual(relayFloorMs);
expect(registration.expiresAtMs - startedAtMs).toBeLessThan(relayFloorMs + 10_000);
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
expect(nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)).toBeUndefined();
});
it("throttles default native hook relay renewal on current-turn progress", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const harness = createStartedThreadHarness();
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
nativeHookRelay: {
enabled: true,
events: ["pre_tool_use"],
},
});
await harness.waitForMethod("turn/start");
const startRequest = harness.requests.find((request) => request.method === "thread/start");
const relayId = extractRelayIdFromThreadRequest(startRequest?.params);
const registration = nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId);
if (!registration) {
throw new Error("Expected native hook relay registration");
}
const firstExpiresAtMs = registration.expiresAtMs;
for (const id of ["raw-progress-1", "raw-progress-2"]) {
await harness.notify({
method: "rawResponseItem/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: {
type: "message",
id,
role: "assistant",
content: [{ type: "output_text", text: "Still working." }],
},
},
});
expect(
nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)?.expiresAtMs,
).toBe(firstExpiresAtMs);
}
await harness.notify({
method: "rawResponseItem/completed",
params: {
threadId: "foreign-thread",
turnId: "turn-1",
item: {
type: "message",
id: "foreign-progress",
role: "assistant",
content: [{ type: "output_text", text: "Wrong thread." }],
},
},
});
expect(
nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)?.expiresAtMs,
).toBe(firstExpiresAtMs);
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
expect(nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)).toBeUndefined();
});
it("preserves an explicit native hook relay ttl", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const harness = createStartedThreadHarness();
const explicitTtlMs = 123_456;
const startedAtMs = Date.now();
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
nativeHookRelay: {
enabled: true,
events: ["pre_tool_use"],
ttlMs: explicitTtlMs,
},
});
await harness.waitForMethod("turn/start");
const startRequest = harness.requests.find((request) => request.method === "thread/start");
const relayId = extractRelayIdFromThreadRequest(startRequest?.params);
const registration = nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId);
if (!registration) {
throw new Error("Expected native hook relay registration");
}
expect(registration.expiresAtMs - startedAtMs).toBeGreaterThanOrEqual(explicitTtlMs);
expect(registration.expiresAtMs - startedAtMs).toBeLessThan(explicitTtlMs + 10_000);
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
expect(nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)).toBeUndefined();
});
it("lets Codex app-server approval modes own native permission requests by default", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const harness = createStartedThreadHarness();
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
pluginConfig: {
appServer: {
mode: "guardian",
},
},
});
await harness.waitForMethod("turn/start");
const startRequest = harness.requests.find((request) => request.method === "thread/start");
const startConfig = (startRequest?.params as { config?: Record<string, unknown> } | undefined)
?.config;
expect(startConfig?.["features.hooks"]).toBe(true);
expect(Array.isArray(startConfig?.["hooks.PreToolUse"])).toBe(true);
expect(startConfig?.["hooks.PostToolUse"]).toEqual([]);
expect(startConfig?.["hooks.Stop"]).toEqual([]);
expect(startConfig).not.toHaveProperty("hooks.PermissionRequest");
const relayId = extractRelayIdFromThreadRequest(startRequest?.params);
expect(
nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)?.allowedEvents,
).toEqual(["pre_tool_use", "post_tool_use", "before_agent_finalize"]);
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
expect(nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)).toBeUndefined();
});
it("preserves explicit native permission request relay events in app-server approval modes", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const harness = createStartedThreadHarness();
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
pluginConfig: {
appServer: {
mode: "guardian",
},
},
nativeHookRelay: {
enabled: true,
events: ["permission_request"],
},
});
await harness.waitForMethod("turn/start");
const startRequest = harness.requests.find((request) => request.method === "thread/start");
const startConfig = (startRequest?.params as { config?: Record<string, unknown> } | undefined)
?.config;
expect(startConfig?.["features.hooks"]).toBe(true);
expect(Array.isArray(startConfig?.["hooks.PermissionRequest"])).toBe(true);
const relayId = extractRelayIdFromThreadRequest(startRequest?.params);
expect(
nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)?.allowedEvents,
).toEqual(["permission_request"]);
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
expect(nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)).toBeUndefined();
});
it("keeps native hook relays alive across startup and long Codex turn timeouts", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const harness = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
const abortController = new AbortController();
const attemptTimeoutMs = 45 * 60_000;
const startupTimeoutMs = attemptTimeoutMs;
const turnStartTimeoutMs = attemptTimeoutMs;
const cleanupGraceMs = 5 * 60_000;
const expectedRelayTtlMs =
attemptTimeoutMs + startupTimeoutMs + turnStartTimeoutMs + cleanupGraceMs;
params.timeoutMs = attemptTimeoutMs;
params.abortSignal = abortController.signal;
const startedAtMs = Date.now();
const run = runCodexAppServerAttempt(params, {
nativeHookRelay: {
enabled: true,
events: ["pre_tool_use"],
},
});
let completed = false;
let relayId: string | undefined;
try {
await harness.waitForMethod("turn/start");
const startRequest = harness.requests.find((request) => request.method === "thread/start");
relayId = extractRelayIdFromThreadRequest(startRequest?.params);
const registration = nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId);
if (!registration) {
throw new Error("Expected native hook relay registration");
}
expect(registration.expiresAtMs - startedAtMs).toBeGreaterThanOrEqual(expectedRelayTtlMs);
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
completed = true;
await run;
expect(
nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId),
).toBeUndefined();
} finally {
if (!completed) {
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" }).catch(() => {});
abortController.abort(new Error("test cleanup"));
await run.catch(() => {});
}
}
});
it("reuses the Codex native hook relay id across runs for the same session", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const firstHarness = createStartedThreadHarness();
const firstRun = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
nativeHookRelay: {
enabled: true,
events: ["pre_tool_use"],
},
});
await firstHarness.waitForMethod("turn/start");
await firstHarness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await firstRun;
const firstStartRequest = firstHarness.requests.find(
(request) => request.method === "thread/start",
);
const firstRelayId = extractRelayIdFromThreadRequest(firstStartRequest?.params);
expect(
nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(firstRelayId),
).toBeUndefined();
const secondHarness = createResumeHarness();
const secondParams = createParams(sessionFile, workspaceDir);
secondParams.runId = "run-2";
const secondRun = runCodexAppServerAttempt(secondParams, {
nativeHookRelay: {
enabled: true,
events: ["pre_tool_use"],
},
});
await secondHarness.waitForMethod("turn/start");
const resumeRequest = secondHarness.requests.find(
(request) => request.method === "thread/resume",
);
const secondRelayId = extractRelayIdFromThreadRequest(resumeRequest?.params);
expect(secondRelayId).toBe(firstRelayId);
const resumedRegistration =
nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(firstRelayId);
expect(resumedRegistration?.runId).toBe("run-2");
expect(resumedRegistration?.allowedEvents).toEqual(["pre_tool_use"]);
await secondHarness.completeTurn({ threadId: "thread-existing", turnId: "turn-1" });
await secondRun;
expect(
nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(firstRelayId),
).toBeUndefined();
});
it("builds deterministic opaque Codex native hook relay ids", () => {
const relayId = testing.buildCodexNativeHookRelayId({
agentId: "dev-codex",
sessionId: "cu-pr-relay-smoke",
sessionKey: "agent:dev-codex:cu-pr-relay-smoke",
});
expect(relayId).toBe("codex-8810b5252975550c887ff0def512b25e944bac39");
expect(relayId).not.toContain("dev-codex");
expect(relayId).not.toContain("cu-pr-relay-smoke");
});
it("sends clearing Codex native hook config when the relay is disabled", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const harness = createStartedThreadHarness();
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
nativeHookRelay: { enabled: false },
});
await harness.waitForMethod("turn/start");
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const startRequest = harness.requests.find((request) => request.method === "thread/start");
const startConfig = (startRequest?.params as { config?: Record<string, unknown> } | undefined)
?.config;
expect(startConfig?.["features.hooks"]).toBe(false);
expect(startConfig?.["hooks.PreToolUse"]).toEqual([]);
expect(startConfig?.["hooks.PostToolUse"]).toEqual([]);
expect(startConfig?.["hooks.PermissionRequest"]).toEqual([]);
expect(startConfig?.["hooks.Stop"]).toEqual([]);
});
it("cleans up native hook relay state when turn/start fails", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const harness = createStartedThreadHarness(async (method) => {
if (method === "turn/start") {
throw new Error("turn start exploded");
}
return undefined;
});
await expect(
runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
nativeHookRelay: { enabled: true },
}),
).rejects.toThrow("turn start exploded");
const startRequest = harness.requests.find((request) => request.method === "thread/start");
const relayId = extractRelayIdFromThreadRequest(startRequest?.params);
expect(nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)).toBeUndefined();
});
it("preserves a healthy binding when invalid image cleanup hits a transient thread", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, {
dynamicToolsFingerprint: JSON.stringify([{ name: "message" }]),
});
const harness = createStartedThreadHarness(async (method) => {
if (method === "thread/start") {
return threadStartResult("thread-transient");
}
if (method === "turn/start") {
throw new Error("invalid image_url base64 payload");
}
return undefined;
});
await expect(runCodexAppServerAttempt(createParams(sessionFile, workspaceDir))).rejects.toThrow(
"invalid image_url base64 payload",
);
expect(harness.requests.map((request) => request.method)).toEqual([
"thread/start",
"turn/start",
]);
const binding = await readCodexAppServerBinding(sessionFile);
expect(binding?.threadId).toBe("thread-existing");
});
it("preserves a healthy binding when the server rejects unsupported image input", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
const harness = createAppServerHarness(async (method) => {
if (method === "thread/resume") {
return threadStartResult("thread-existing");
}
if (method === "turn/start") {
throw new Error("unsupported image input");
}
return {};
});
await expect(runCodexAppServerAttempt(createParams(sessionFile, workspaceDir))).rejects.toThrow(
"unsupported image input",
);
expect(harness.requests.map((request) => request.method)).toEqual([
"thread/resume",
"turn/start",
]);
const binding = await readCodexAppServerBinding(sessionFile);
expect(binding?.threadId).toBe("thread-existing");
});
it("recognizes invalid image payload errors without matching unsupported image input", () => {
expect(testing.isInvalidCodexImagePayloadError("invalid_image_url")).toBe(true);
expect(testing.isInvalidCodexImagePayloadError("malformed-base64 image payload")).toBe(true);
expect(testing.isInvalidCodexImagePayloadError("unsupported image input")).toBe(false);
});
it("preserves Codex usage-limit reset details when turn/start fails", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const resetsAt = Math.ceil(Date.now() / 1000) + 120;
const authProfileId = "openai-codex:work";
const harnessRef: { current?: ReturnType<typeof createStartedThreadHarness> } = {};
const harness = createStartedThreadHarness(async (method) => {
if (method === "turn/start") {
if (!harnessRef.current) {
throw new Error("Expected Codex app-server harness to be initialized");
}
void harnessRef.current.notify(rateLimitsUpdated(resetsAt));
throw Object.assign(new Error("You've reached your usage limit."), {
data: { codexErrorInfo: "usageLimitExceeded" },
});
}
return undefined;
});
harnessRef.current = harness;
const params = createParams(sessionFile, workspaceDir);
params.authProfileId = authProfileId;
params.authProfileStore = {
version: 1,
profiles: {
[authProfileId]: {
type: "oauth",
provider: "openai-codex",
access: "access",
refresh: "refresh",
expires: Date.now() + 60_000,
},
},
};
const result = await runCodexAppServerAttempt(params);
expect(result.promptErrorSource).toBe("prompt");
expect(result.promptError).toContain("You've reached your Codex subscription usage limit.");
expect(result.promptError).toContain("Next reset in");
});
it("uses a recent Codex rate-limit snapshot when turn/start omits reset details", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const resetsAt = Math.ceil(Date.now() / 1000) + 120;
const authProfileId = "openai-codex:work";
rememberCodexRateLimits({
rateLimits: {
limitId: "codex",
limitName: "Codex",
primary: { usedPercent: 100, windowDurationMins: 300, resetsAt },
secondary: null,
credits: null,
planType: "plus",
rateLimitReachedType: "rate_limit_reached",
},
rateLimitsByLimitId: null,
});
const harness = createStartedThreadHarness(async (method) => {
if (method === "turn/start") {
throw Object.assign(new Error("You've reached your usage limit."), {
data: { codexErrorInfo: "usageLimitExceeded" },
});
}
return undefined;
});
const params = createParams(sessionFile, workspaceDir);
params.authProfileId = authProfileId;
params.authProfileStore = {
version: 1,
profiles: {
[authProfileId]: {
type: "oauth",
provider: "openai-codex",
access: "access",
refresh: "refresh",
expires: Date.now() + 60_000,
},
},
};
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
const result = await run;
expect(result.promptErrorSource).toBe("prompt");
expect(result.promptError).toContain("You've reached your Codex subscription usage limit.");
expect(result.promptError).toContain("Next reset in");
expect(params.authProfileStore.usageStats?.[authProfileId]?.blockedUntil).toBeUndefined();
});
it("refreshes Codex account rate limits when turn/start omits reset details", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const resetsAt = Math.ceil(Date.now() / 1000) + 120;
const harness = createStartedThreadHarness(async (method) => {
if (method === "turn/start") {
throw Object.assign(new Error("You've reached your usage limit."), {
data: { codexErrorInfo: "usageLimitExceeded" },
});
}
if (method === "account/rateLimits/read") {
return rateLimitsUpdated(resetsAt).params;
}
return undefined;
});
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir));
await harness.waitForMethod("account/rateLimits/read");
const result = await run;
expect(result.promptErrorSource).toBe("prompt");
expect(result.promptError).toContain("You've reached your Codex subscription usage limit.");
expect(result.promptError).toContain("Next reset in");
expect(result.promptError).not.toContain("Codex did not return a reset time");
});
it("cleans up native hook relay state when the Codex turn aborts", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const harness = createStartedThreadHarness();
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
nativeHookRelay: { enabled: true },
});
await harness.waitForMethod("turn/start");
const startRequest = harness.requests.find((request) => request.method === "thread/start");
const relayId = extractRelayIdFromThreadRequest(startRequest?.params);
expect(abortAgentHarnessRun("session-1")).toBe(true);
const result = await run;
expect(result.aborted).toBe(true);
expect(nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)).toBeUndefined();
});
it("refreshes Codex account rate limits when a failed turn omits reset details", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const resetsAt = Math.ceil(Date.now() / 1000) + 120;
const harness = createStartedThreadHarness(async (method) => {
if (method === "account/rateLimits/read") {
return rateLimitsUpdated(resetsAt).params;
}
return undefined;
});
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir));
await harness.waitForMethod("turn/start");
await harness.notify({
method: "turn/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
turn: {
id: "turn-1",
status: "failed",
error: {
message: "You've reached your usage limit.",
codexErrorInfo: "usageLimitExceeded",
},
},
},
});
const result = await run;
expect(result.promptError).toContain("You've reached your Codex subscription usage limit.");
expect(result.promptError).toContain("Next reset in");
expect(result.promptError).not.toContain("Codex did not return a reset time");
expect(harness.requests.some((request) => request.method === "account/rateLimits/read")).toBe(
true,
);
});
it("fires agent_end with failure metadata when the codex turn fails", async () => {
const agentEnd = vi.fn();
const onRunAgentEvent = vi.fn();
initializeGlobalHookRunner(
createMockPluginRegistry([{ hookName: "agent_end", handler: agentEnd }]),
);
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const harness = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
params.onAgentEvent = onRunAgentEvent;
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
await harness.notify({
method: "turn/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
turn: {
id: "turn-1",
status: "failed",
error: { message: "codex exploded" },
},
},
});
const result = await run;
expect(result.promptError).toBe("codex exploded");
expect(agentEnd).toHaveBeenCalledTimes(1);
const agentEvents = onRunAgentEvent.mock.calls.map(([event]) => event) as Array<{
data: { endedAt?: number; error?: string; phase?: string; startedAt?: number };
stream: string;
}>;
const startEvent = agentEvents.find(
(event) => event.stream === "lifecycle" && event.data.phase === "start",
);
expect(typeof startEvent?.data.startedAt).toBe("number");
const errorEvent = agentEvents.find(
(event) => event.stream === "lifecycle" && event.data.phase === "error",
);
expect(typeof errorEvent?.data.startedAt).toBe("number");
expect(typeof errorEvent?.data.endedAt).toBe("number");
expect(errorEvent?.data.error).toBe("codex exploded");
expect(agentEvents.some((event) => event.stream === "assistant")).toBe(false);
const [agentEndPayload, agentEndContext] = mockCall(agentEnd, "agent_end") as [
{ error?: string; success?: boolean },
{ runId?: string; sessionId?: string },
];
expect(agentEndPayload.success).toBe(false);
expect(agentEndPayload.error).toBe("codex exploded");
expect(agentEndContext.runId).toBe("run-1");
expect(agentEndContext.sessionId).toBe("session-1");
});
it("fires llm_output and agent_end when turn/start fails", async () => {
const llmInput = vi.fn();
const llmOutput = vi.fn();
const agentEnd = vi.fn();
initializeGlobalHookRunner(
createMockPluginRegistry([
{ hookName: "llm_input", handler: llmInput },
{ hookName: "llm_output", handler: llmOutput },
{ hookName: "agent_end", handler: agentEnd },
]),
);
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
SessionManager.open(sessionFile).appendMessage(
assistantMessage("existing context", Date.now()),
);
createStartedThreadHarness(async (method) => {
if (method === "turn/start") {
throw new Error("turn start exploded");
}
return undefined;
});
const params = createParams(sessionFile, workspaceDir);
params.runtimePlan = createCodexRuntimePlanFixture();
params.messageChannel = "discord";
params.messageProvider = "discord-voice";
params.senderId = "user-123";
params.senderName = "Test User";
params.senderUsername = "testuser";
params.inputProvenance = {
kind: "external_user",
sourceChannel: "discord",
};
await expect(runCodexAppServerAttempt(params)).rejects.toThrow("turn start exploded");
expect(llmInput).toHaveBeenCalledTimes(1);
expect(llmOutput).toHaveBeenCalledTimes(1);
expect(agentEnd).toHaveBeenCalledTimes(1);
const [llmOutputPayload] = mockCall(llmOutput, "llm_output") as [
{
assistantTexts?: string[];
harnessId?: string;
model?: string;
provider?: string;
resolvedRef?: string;
runId?: string;
sessionId?: string;
},
unknown,
];
expect(llmOutputPayload.assistantTexts).toEqual([]);
expect(llmOutputPayload.model).toBe("gpt-5.4-codex");
expect(llmOutputPayload.provider).toBe("codex");
expect(llmOutputPayload.resolvedRef).toBe("codex/gpt-5.4-codex");
expect(llmOutputPayload.harnessId).toBe("codex");
expect(llmOutputPayload.runId).toBe("run-1");
expect(llmOutputPayload.sessionId).toBe("session-1");
const [agentEndPayload] = mockCall(agentEnd, "agent_end") as [
{ error?: string; messages?: Array<{ role?: string }>; success?: boolean },
unknown,
];
expect(agentEndPayload.success).toBe(false);
expect(agentEndPayload.error).toBe("turn start exploded");
expect(agentEndPayload.messages?.some((message) => message.role === "assistant")).toBe(true);
const userMessage = agentEndPayload.messages?.find((message) => message.role === "user") as
| {
content?: unknown;
provenance?: unknown;
role?: string;
senderId?: unknown;
senderLabel?: unknown;
senderName?: unknown;
senderUsername?: unknown;
sourceChannel?: unknown;
}
| undefined;
expect(userMessage).toMatchObject({
role: "user",
content: "hello",
sourceChannel: "discord",
senderId: "user-123",
senderName: "Test User",
senderUsername: "testuser",
senderLabel: "Test User (user-123)",
provenance: {
kind: "external_user",
sourceChannel: "discord",
},
});
});
it("fires agent_end with success false when the codex turn is aborted", async () => {
const agentEnd = vi.fn();
initializeGlobalHookRunner(
createMockPluginRegistry([{ hookName: "agent_end", handler: agentEnd }]),
);
const { waitForMethod } = createStartedThreadHarness();
const run = runCodexAppServerAttempt(
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
{ pluginConfig: { appServer: { mode: "yolo" } } },
);
await waitForMethod("turn/start");
expect(abortAgentHarnessRun("session-1")).toBe(true);
const result = await run;
expect(result.aborted).toBe(true);
expect(agentEnd).toHaveBeenCalledTimes(1);
const [agentEndPayload] = mockCall(agentEnd, "agent_end") as [{ success?: boolean }, unknown];
expect(agentEndPayload.success).toBe(false);
});
it("forwards queued user input and aborts the active app-server turn", async () => {
const { requests, waitForMethod } = createStartedThreadHarness();
const run = runCodexAppServerAttempt(
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
{ pluginConfig: { appServer: { mode: "yolo" } } },
);
await waitForMethod("turn/start");
expect(queueActiveRunMessageForTest("session-1", "more context", { debounceMs: 1 })).toBe(true);
await vi.waitFor(() => expect(requests.map((entry) => entry.method)).toContain("turn/steer"), {
interval: 1,
});
expect(abortAgentHarnessRun("session-1")).toBe(true);
await vi.waitFor(
() => expect(requests.map((entry) => entry.method)).toContain("turn/interrupt"),
{ interval: 1 },
);
const result = await run;
expect(result.aborted).toBe(true);
const threadStart = requests.find((entry) => entry.method === "thread/start");
const threadStartParams = threadStart?.params as
| {
approvalPolicy?: string;
approvalsReviewer?: string;
developerInstructions?: string;
model?: string;
sandbox?: string;
}
| undefined;
expect(threadStartParams?.model).toBe("gpt-5.4-codex");
expect(threadStartParams?.approvalPolicy).toBe("never");
expect(threadStartParams?.sandbox).toBe("danger-full-access");
expect(threadStartParams?.approvalsReviewer).toBe("user");
expect(threadStartParams?.developerInstructions).not.toContain(CODEX_GPT5_BEHAVIOR_CONTRACT);
const steer = requests.find((entry) => entry.method === "turn/steer");
expect(steer?.params).toEqual({
threadId: "thread-1",
expectedTurnId: "turn-1",
input: [{ type: "text", text: "more context", text_elements: [] }],
});
const interrupt = requests.find((entry) => entry.method === "turn/interrupt");
expect(interrupt?.params).toEqual({ threadId: "thread-1", turnId: "turn-1" });
});
it("batches default queued steering before sending turn/steer", async () => {
const { requests, waitForMethod, completeTurn } = createStartedThreadHarness();
const run = runCodexAppServerAttempt(
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
);
await waitForMethod("turn/start");
expect(queueActiveRunMessageForTest("session-1", "first", { debounceMs: 5 })).toBe(true);
expect(queueActiveRunMessageForTest("session-1", "second", { debounceMs: 5 })).toBe(true);
await vi.waitFor(
() =>
expect(requests.filter((entry) => entry.method === "turn/steer")).toEqual([
{
method: "turn/steer",
params: {
threadId: "thread-1",
expectedTurnId: "turn-1",
input: [
{ type: "text", text: "first", text_elements: [] },
{ type: "text", text: "second", text_elements: [] },
],
},
},
]),
{ interval: 1 },
);
await completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
});
it("resolves queued steering only after turn/steer is accepted", async () => {
const request = vi.fn(async () => ({ turnId: "turn-1" }));
const queue = testing.createCodexSteeringQueue({
client: { request } as never,
threadId: "thread-1",
turnId: "turn-1",
answerPendingUserInput: () => false,
signal: new AbortController().signal,
});
await expect(queue.queue("accepted", { debounceMs: 0 })).resolves.toBeUndefined();
expect(request).toHaveBeenCalledWith("turn/steer", {
threadId: "thread-1",
expectedTurnId: "turn-1",
input: [{ type: "text", text: "accepted", text_elements: [] }],
});
});
it("rejects queued steering when turn/steer is rejected", async () => {
const request = vi.fn(async () => {
throw new Error("cannot steer a compact turn");
});
const queue = testing.createCodexSteeringQueue({
client: { request } as never,
threadId: "thread-1",
turnId: "turn-1",
answerPendingUserInput: () => false,
signal: new AbortController().signal,
});
await expect(queue.queue("rejected", { debounceMs: 0 })).rejects.toThrow(
"cannot steer a compact turn",
);
expect(request).toHaveBeenCalledWith("turn/steer", {
threadId: "thread-1",
expectedTurnId: "turn-1",
input: [{ type: "text", text: "rejected", text_elements: [] }],
});
});
it("rejects queued steering when the run aborts before debounce flush", async () => {
const controller = new AbortController();
const request = vi.fn(async () => ({ turnId: "turn-1" }));
const queue = testing.createCodexSteeringQueue({
client: { request } as never,
threadId: "thread-1",
turnId: "turn-1",
answerPendingUserInput: () => false,
signal: controller.signal,
});
const queued = queue.queue("aborted", { debounceMs: 0 });
const rejected = expect(queued).rejects.toThrow("codex app-server steering queue aborted");
controller.abort();
await rejected;
expect(request).not.toHaveBeenCalled();
});
it("flushes pending default queued steering during normal turn cleanup", async () => {
const { requests, waitForMethod, completeTurn } = createStartedThreadHarness();
const run = runCodexAppServerAttempt(
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
);
await waitForMethod("turn/start");
expect(queueActiveRunMessageForTest("session-1", "late steer", { debounceMs: 30_000 })).toBe(
true,
);
await completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
expect(requests.filter((entry) => entry.method === "turn/steer")).toEqual([
{
method: "turn/steer",
params: {
threadId: "thread-1",
expectedTurnId: "turn-1",
input: [{ type: "text", text: "late steer", text_elements: [] }],
},
},
]);
});
it("batches explicit all-mode steering before sending turn/steer", async () => {
const { requests, waitForMethod, completeTurn } = createStartedThreadHarness();
const run = runCodexAppServerAttempt(
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
);
await waitForMethod("turn/start");
expect(queueActiveRunMessageForTest("session-1", "first", { steeringMode: "all" })).toBe(true);
expect(queueActiveRunMessageForTest("session-1", "second", { steeringMode: "all" })).toBe(true);
await vi.waitFor(
() =>
expect(requests.filter((entry) => entry.method === "turn/steer")).toEqual([
{
method: "turn/steer",
params: {
threadId: "thread-1",
expectedTurnId: "turn-1",
input: [
{ type: "text", text: "first", text_elements: [] },
{ type: "text", text: "second", text_elements: [] },
],
},
},
]),
{ interval: 1 },
);
await completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
});
it("routes request_user_input prompts through the active run follow-up queue", async () => {
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
let handleRequest:
| ((request: { id: string; method: string; params?: unknown }) => Promise<unknown>)
| undefined;
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult();
}
if (method === "turn/start") {
return turnStartResult();
}
return {};
});
setCodexAppServerClientFactoryForTest(
async () =>
({
request,
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: (
handler: (request: {
id: string;
method: string;
params?: unknown;
}) => Promise<unknown>,
) => {
handleRequest = handler;
return () => undefined;
},
}) as never,
);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.onBlockReply = vi.fn();
const run = runCodexAppServerAttempt(params);
await vi.waitFor(
() => expect(request.mock.calls.map(([method]) => method)).toContain("turn/start"),
{ interval: 1 },
);
await vi.waitFor(() => expect(handleRequest).toBeTypeOf("function"), fastWait);
const response = handleRequest?.({
id: "request-input-1",
method: "item/tool/requestUserInput",
params: {
threadId: "thread-1",
turnId: "turn-1",
itemId: "ask-1",
questions: [
{
id: "mode",
header: "Mode",
question: "Pick a mode",
isOther: false,
isSecret: false,
options: [
{ label: "Fast", description: "Use less reasoning" },
{ label: "Deep", description: "Use more reasoning" },
],
},
],
},
});
await vi.waitFor(() => expect(params.onBlockReply).toHaveBeenCalledTimes(1), fastWait);
expect(queueActiveRunMessageForTest("session-1", "2")).toBe(true);
await expect(response).resolves.toEqual({
answers: { mode: { answers: ["Deep"] } },
});
const requestCalls = request.mock.calls as unknown as Array<[string, unknown]>;
expect(
requestCalls.some(
([method, callParams]) =>
method === "turn/steer" &&
(callParams as { expectedTurnId?: string } | undefined)?.expectedTurnId === "turn-1",
),
).toBe(false);
await notify({
method: "turn/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
turn: { id: "turn-1", status: "completed" },
},
});
await run;
});
it("does not leak unhandled rejections when shutdown closes before interrupt", async () => {
const unhandledRejections: unknown[] = [];
const onUnhandledRejection = (reason: unknown) => {
unhandledRejections.push(reason);
};
process.on("unhandledRejection", onUnhandledRejection);
try {
const { waitForMethod } = createStartedThreadHarness(async (method) => {
if (method === "turn/interrupt") {
throw new Error("codex app-server client is closed");
}
});
const abortController = new AbortController();
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.abortSignal = abortController.signal;
const run = runCodexAppServerAttempt(params);
await waitForMethod("turn/start");
abortController.abort("shutdown");
const result = await run;
expect(result.aborted).toBe(true);
await new Promise((resolve) => setImmediate(resolve));
expect(unhandledRejections).toStrictEqual([]);
} finally {
process.off("unhandledRejection", onUnhandledRejection);
}
});
it("forwards image attachments to the app-server turn input", async () => {
const { requests, waitForMethod, completeTurn } = createStartedThreadHarness();
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
const pngBase64 =
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+/p9sAAAAASUVORK5CYII=";
params.model = createCodexTestModel("codex", ["text", "image"]);
params.images = [
{
type: "image",
mimeType: "image/png",
data: pngBase64,
},
];
const run = runCodexAppServerAttempt(params);
await waitForMethod("turn/start");
await completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const turnStart = requests.find((entry) => entry.method === "turn/start");
const turnStartParams = turnStart?.params as
| { input?: Array<{ text?: string; text_elements?: unknown[]; type?: string; url?: string }> }
| undefined;
expect(turnStartParams?.input).toEqual([
{ type: "text", text: "hello", text_elements: [] },
{ type: "image", url: `data:image/png;base64,${pngBase64}` },
]);
});
it("does not drop turn completion notifications emitted while turn/start is in flight", async () => {
let harness: ReturnType<typeof createAppServerHarness>;
harness = createAppServerHarness(async (method) => {
if (method === "thread/start") {
return threadStartResult();
}
if (method === "turn/start") {
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
return turnStartResult("turn-1", "completed");
}
return {};
});
const result = await runCodexAppServerAttempt(
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
);
expect(result.aborted).toBe(false);
expect(result.timedOut).toBe(false);
});
it("does not fail when a buffered terminal notification is followed by client close", async () => {
let harness: ReturnType<typeof createAppServerHarness>;
let resolveBufferedTerminal!: () => void;
const bufferedTerminal = new Promise<void>((resolve) => {
resolveBufferedTerminal = resolve;
});
harness = createAppServerHarness(async (method) => {
if (method === "thread/start") {
return threadStartResult();
}
if (method === "turn/start") {
await harness.notify({
method: "item/started",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: { id: "tool-1", type: "commandExecution" },
},
});
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
resolveBufferedTerminal();
return turnStartResult("turn-1", "inProgress");
}
return {};
});
const run = runCodexAppServerAttempt(
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
{ turnTerminalIdleTimeoutMs: 60_000 },
);
await bufferedTerminal;
await new Promise<void>((resolve) => setImmediate(resolve));
harness.close();
const result = await run;
expect(result.promptError ?? undefined).toBeUndefined();
expect(result.aborted).toBe(false);
expect(result.timedOut).toBe(false);
});
it("does not time out when turn progress arrives before turn/start returns", async () => {
let harness: ReturnType<typeof createAppServerHarness>;
harness = createAppServerHarness(async (method) => {
if (method === "thread/start") {
return threadStartResult();
}
if (method === "turn/start") {
await harness.notify({
method: "turn/started",
params: {
threadId: "thread-1",
turnId: "turn-1",
turn: { id: "turn-1", status: "inProgress" },
},
});
return turnStartResult("turn-1", "inProgress");
}
return {};
});
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 60_000;
const run = runCodexAppServerAttempt(params, {
turnCompletionIdleTimeoutMs: 5,
turnTerminalIdleTimeoutMs: 60_000,
});
await harness.waitForMethod("turn/start");
await new Promise((resolve) => setTimeout(resolve, 20));
expect(harness.request.mock.calls.some(([method]) => method === "turn/interrupt")).toBe(false);
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
const result = await run;
expect(result.aborted).toBe(false);
expect(result.timedOut).toBe(false);
});
it("completes when turn/start returns a terminal turn without a follow-up notification", async () => {
const harness = createAppServerHarness(async (method) => {
if (method === "thread/start") {
return threadStartResult();
}
if (method === "turn/start") {
return {
turn: {
id: "turn-1",
status: "completed",
items: [{ type: "agentMessage", id: "msg-1", text: "done from response" }],
},
};
}
return {};
});
const result = await runCodexAppServerAttempt(
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
);
expect(harness.requests.map((entry) => entry.method)).toContain("turn/start");
expect(result.assistantTexts).toEqual(["done from response"]);
expect(result.aborted).toBe(false);
expect(result.timedOut).toBe(false);
});
it("surfaces Codex-native image generation saved paths as reply media", async () => {
const harness = createStartedThreadHarness();
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
const run = runCodexAppServerAttempt(params);
await harness.waitForMethod("turn/start");
await harness.notify({
method: "turn/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
turn: {
id: "turn-1",
status: "completed",
items: [
{
type: "imageGeneration",
id: "ig_123",
status: "completed",
revisedPrompt: "A tiny blue square",
result: "Zm9v",
savedPath: "/tmp/codex-home/generated_images/session-1/ig_123.png",
},
],
},
},
});
const result = await run;
expect(result.assistantTexts).toEqual([]);
expect(result.toolMediaUrls).toEqual(["/tmp/codex-home/generated_images/session-1/ig_123.png"]);
});
it("does not complete on unscoped turn/completed notifications", async () => {
const harness = createStartedThreadHarness();
const run = runCodexAppServerAttempt(
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
);
let resolved = false;
void run.then(() => {
resolved = true;
});
await harness.waitForMethod("turn/start");
await harness.notify({
method: "turn/completed",
params: {
turn: {
id: "turn-1",
status: "completed",
items: [{ type: "agentMessage", id: "msg-wrong", text: "wrong completion" }],
},
},
});
await new Promise<void>((resolve) => setImmediate(resolve));
expect(resolved).toBe(false);
await harness.notify({
method: "turn/completed",
params: {
threadId: "thread-1",
turn: {
id: "turn-1",
status: "completed",
items: [{ type: "agentMessage", id: "msg-right", text: "final completion" }],
},
},
});
const result = await run;
expect(result.assistantTexts).toEqual(["final completion"]);
expect(result.aborted).toBe(false);
expect(result.timedOut).toBe(false);
});
it("releases completion when Codex raw-events an interrupted turn marker", async () => {
const harness = createStartedThreadHarness();
const run = runCodexAppServerAttempt(
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
{ turnTerminalIdleTimeoutMs: 60_000 },
);
let resolved = false;
void run.then(() => {
resolved = true;
});
await harness.waitForMethod("turn/start");
await harness.notify({
method: "rawResponseItem/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: {
id: "abort-marker-1",
type: "message",
role: "user",
content: [
{
type: "input_text",
text: "<turn_aborted>\nThe user interrupted the previous turn on purpose. Any running unified exec processes may still be running in the background. If any tools/commands were aborted, they may have partially executed.\n</turn_aborted>",
},
],
},
},
});
const result = await run;
expect(resolved).toBe(true);
expect(result.aborted).toBe(true);
expect(result.timedOut).toBe(false);
expect(result.promptError).toBeNull();
expect(harness.request.mock.calls.some(([method]) => method === "turn/interrupt")).toBe(false);
});
it("releases completion when the app-server client closes during an active turn", async () => {
const harness = createStartedThreadHarness();
const run = runCodexAppServerAttempt(
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
{ turnTerminalIdleTimeoutMs: 60_000 },
);
await harness.waitForMethod("turn/start");
await new Promise<void>((resolve) => setImmediate(resolve));
harness.close();
const result = await run;
expect(result.promptError).toBe("codex app-server client closed before turn completed");
expect(result.aborted).toBe(false);
expect(result.timedOut).toBe(false);
});
it("does not fail a turn when the client closes after terminal completion is queued", async () => {
const harness = createStartedThreadHarness();
const run = runCodexAppServerAttempt(
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
{ turnTerminalIdleTimeoutMs: 60_000 },
);
await harness.waitForMethod("turn/start");
const completed = harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
harness.close();
await completed;
const result = await run;
expect(result.promptError ?? undefined).toBeUndefined();
expect(result.aborted).toBe(false);
expect(result.timedOut).toBe(false);
});
it("does not treat a user prompt containing the interrupted marker as terminal", async () => {
const harness = createStartedThreadHarness();
const markerPrompt =
"<turn_aborted>\nThe user interrupted the previous turn on purpose. Any running unified exec processes may still be running in the background. If any tools/commands were aborted, they may have partially executed.\n</turn_aborted>";
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.prompt = markerPrompt;
const run = runCodexAppServerAttempt(params, { turnTerminalIdleTimeoutMs: 60_000 });
let resolved = false;
void run.then(() => {
resolved = true;
});
await harness.waitForMethod("turn/start");
await harness.notify({
method: "rawResponseItem/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
item: {
id: "user-prompt-1",
type: "message",
role: "user",
content: [
{
type: "input_text",
text: markerPrompt,
},
],
},
},
});
await new Promise<void>((resolve) => setImmediate(resolve));
expect(resolved).toBe(false);
await harness.notify({
method: "turn/completed",
params: {
threadId: "thread-1",
turn: {
id: "turn-1",
status: "completed",
items: [{ type: "agentMessage", id: "msg-1", text: "It marks an interrupted turn." }],
},
},
});
const result = await run;
expect(resolved).toBe(true);
expect(result.aborted).toBe(false);
expect(result.timedOut).toBe(false);
expect(result.assistantTexts).toEqual(["It marks an interrupted turn."]);
});
it("releases completion when a projector callback throws during turn/completed", async () => {
// Regression for openclaw/openclaw#67996: a throw inside the projector's
// turn/completed handler must not strand resolveCompletion, otherwise the
// gateway session lane stays locked and every follow-up message queues
// behind a run that will never resolve.
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-1");
}
if (method === "turn/start") {
return turnStartResult("turn-1", "inProgress");
}
return {};
});
setCodexAppServerClientFactoryForTest(
async () =>
({
request,
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: () => () => undefined,
}) as never,
);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.onAgentEvent = () => {
throw new Error("downstream consumer exploded");
};
const run = runCodexAppServerAttempt(params);
await vi.waitFor(() =>
expect(request.mock.calls.map(([method]) => method)).toContain("turn/start"),
);
await notify({
method: "turn/completed",
params: {
threadId: "thread-1",
turn: {
id: "turn-1",
status: "completed",
items: [{ id: "plan-1", type: "plan", text: "step one\nstep two" }],
},
},
});
const result = await run;
expect(result.aborted).toBe(false);
expect(result.timedOut).toBe(false);
});
it("routes MCP approval elicitations through the native bridge", async () => {
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
let handleRequest:
| ((request: { id: string; method: string; params?: unknown }) => Promise<unknown>)
| undefined;
const bridgeSpy = vi
.spyOn(elicitationBridge, "handleCodexAppServerElicitationRequest")
.mockResolvedValue({
action: "accept",
content: { approve: true },
_meta: null,
});
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-1");
}
if (method === "turn/start") {
return turnStartResult("turn-1", "inProgress");
}
return {};
});
setCodexAppServerClientFactoryForTest(
async () =>
({
request,
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: (
handler: (request: {
id: string;
method: string;
params?: unknown;
}) => Promise<unknown>,
) => {
handleRequest = handler;
return () => undefined;
},
}) as never,
);
const run = runCodexAppServerAttempt(
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
);
await vi.waitFor(() => expect(handleRequest).toBeTypeOf("function"));
const result = await handleRequest?.({
id: "request-elicitation-1",
method: "mcpServer/elicitation/request",
params: {
threadId: "thread-1",
turnId: "turn-1",
serverName: "codex_apps__github",
mode: "form",
},
});
expect(result).toEqual({
action: "accept",
content: { approve: true },
_meta: null,
});
const [bridgeCall] = mockCall(bridgeSpy, "elicitation bridge") as [
{ threadId?: string; turnId?: string },
];
expect(bridgeCall.threadId).toBe("thread-1");
expect(bridgeCall.turnId).toBe("turn-1");
await notify({
method: "turn/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
turn: { id: "turn-1", status: "completed" },
},
});
await run;
});
it("passes session plugin app policy context to elicitation handling", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const agentDir = path.join(tempDir, "agent");
const pluginConfig = {
codexPlugins: {
enabled: true,
plugins: {
"google-calendar": {
marketplaceName: "openai-curated",
pluginName: "google-calendar",
},
},
},
};
const appServer = resolveCodexAppServerRuntimeOptions({
pluginConfig: readCodexPluginConfig(pluginConfig),
});
defaultCodexAppInventoryCache.clear();
await defaultCodexAppInventoryCache.refreshNow({
key: buildCodexPluginAppCacheKey({
appServer,
agentDir,
}),
request: async () => ({
data: [
{
id: "google-calendar-app",
name: "Google Calendar",
description: null,
logoUrl: null,
logoUrlDark: null,
distributionChannel: null,
branding: null,
appMetadata: null,
labels: null,
installUrl: null,
isAccessible: true,
isEnabled: true,
pluginDisplayNames: [],
},
],
nextCursor: null,
}),
});
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
let handleRequest:
| ((request: { id: string; method: string; params?: unknown }) => Promise<unknown>)
| undefined;
const bridgeSpy = vi
.spyOn(elicitationBridge, "handleCodexAppServerElicitationRequest")
.mockResolvedValue({
action: "decline",
content: null,
_meta: null,
});
const request = vi.fn(async (method: string) => {
if (method === "plugin/list") {
return {
marketplaces: [
{
name: "openai-curated",
path: "/marketplaces/openai-curated",
interface: null,
plugins: [
{
id: "google-calendar",
name: "google-calendar",
source: { type: "remote" },
installed: true,
enabled: true,
installPolicy: "AVAILABLE",
authPolicy: "ON_USE",
availability: "AVAILABLE",
interface: null,
},
],
},
],
marketplaceLoadErrors: [],
featuredPluginIds: [],
};
}
if (method === "plugin/read") {
return {
plugin: {
marketplaceName: "openai-curated",
marketplacePath: "/marketplaces/openai-curated",
summary: {
id: "google-calendar",
name: "google-calendar",
source: { type: "remote" },
installed: true,
enabled: true,
installPolicy: "AVAILABLE",
authPolicy: "ON_USE",
availability: "AVAILABLE",
interface: null,
},
description: null,
skills: [],
apps: [
{
id: "google-calendar-app",
name: "Google Calendar",
description: null,
installUrl: null,
needsAuth: false,
},
],
mcpServers: ["google-calendar"],
},
};
}
if (method === "thread/start") {
return threadStartResult("thread-1");
}
if (method === "turn/start") {
return turnStartResult("turn-1", "inProgress");
}
return {};
});
setCodexAppServerClientFactoryForTest(
async () =>
({
request,
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: (
handler: (request: {
id: string;
method: string;
params?: unknown;
}) => Promise<unknown>,
) => {
handleRequest = handler;
return () => undefined;
},
}) as never,
);
const params = createParams(sessionFile, workspaceDir);
params.agentDir = agentDir;
const run = runCodexAppServerAttempt(params, { pluginConfig });
await vi.waitFor(() => expect(handleRequest).toBeTypeOf("function"));
const result = await handleRequest?.({
id: "request-elicitation-1",
method: "mcpServer/elicitation/request",
params: {
threadId: "thread-1",
turnId: "turn-1",
serverName: "google-calendar",
mode: "form",
},
});
expect(result).toEqual({
action: "decline",
content: null,
_meta: null,
});
const [bridgeCall] = mockCall(bridgeSpy, "elicitation bridge") as [
{
pluginAppPolicyContext?: {
apps?: Record<string, { mcpServerNames?: string[]; pluginName?: string }>;
};
threadId?: string;
turnId?: string;
},
];
expect(bridgeCall.threadId).toBe("thread-1");
expect(bridgeCall.turnId).toBe("turn-1");
const calendarPolicy = bridgeCall.pluginAppPolicyContext?.apps?.["google-calendar-app"];
expect(calendarPolicy?.pluginName).toBe("google-calendar");
expect(calendarPolicy?.mcpServerNames).toEqual(["google-calendar"]);
const requestCalls = request.mock.calls as unknown as Array<[string, unknown, unknown?]>;
const threadStart = requestCalls.find(([method]) => method === "thread/start");
const threadStartParams = threadStart?.[1] as
| { approvalPolicy?: { granular?: { mcp_elicitations?: boolean } } }
| undefined;
expect(threadStartParams?.approvalPolicy?.granular?.mcp_elicitations).toBe(true);
const turnStart = requestCalls.find(([method]) => method === "turn/start");
const turnStartParams = turnStart?.[1] as
| { approvalPolicy?: { granular?: { mcp_elicitations?: boolean } } }
| undefined;
expect(turnStartParams?.approvalPolicy?.granular?.mcp_elicitations).toBe(true);
await notify({
method: "turn/completed",
params: {
threadId: "thread-1",
turnId: "turn-1",
turn: { id: "turn-1", status: "completed" },
},
});
await run;
});
it("keys plugin app inventory by the resolved Codex account", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const agentDir = path.join(tempDir, "agent");
const authProfileId = "openai-codex:work";
const pluginConfig = {
codexPlugins: {
enabled: true,
plugins: {
"google-calendar": {
marketplaceName: "openai-curated",
pluginName: "google-calendar",
},
},
},
};
const appServer = resolveCodexAppServerRuntimeOptions({
pluginConfig: readCodexPluginConfig(pluginConfig),
});
defaultCodexAppInventoryCache.clear();
await defaultCodexAppInventoryCache.refreshNow({
key: buildCodexPluginAppCacheKey({
appServer,
agentDir,
authProfileId,
accountId: "account-work",
}),
request: async () => ({
data: [
{
id: "google-calendar-app",
name: "Google Calendar",
description: null,
logoUrl: null,
logoUrlDark: null,
distributionChannel: null,
branding: null,
appMetadata: null,
labels: null,
installUrl: null,
isAccessible: true,
isEnabled: true,
pluginDisplayNames: [],
},
],
nextCursor: null,
}),
});
const { requests, waitForMethod, completeTurn } = createStartedThreadHarness(async (method) => {
if (method === "plugin/list") {
return {
marketplaces: [
{
name: "openai-curated",
path: "/marketplaces/openai-curated",
interface: null,
plugins: [
{
id: "google-calendar",
name: "google-calendar",
source: { type: "remote" },
installed: true,
enabled: true,
installPolicy: "AVAILABLE",
authPolicy: "ON_USE",
availability: "AVAILABLE",
interface: null,
},
],
},
],
marketplaceLoadErrors: [],
featuredPluginIds: [],
};
}
if (method === "plugin/read") {
return {
plugin: {
marketplaceName: "openai-curated",
marketplacePath: "/marketplaces/openai-curated",
summary: {
id: "google-calendar",
name: "google-calendar",
source: { type: "remote" },
installed: true,
enabled: true,
installPolicy: "AVAILABLE",
authPolicy: "ON_USE",
availability: "AVAILABLE",
interface: null,
},
description: null,
skills: [],
apps: [
{
id: "google-calendar-app",
name: "Google Calendar",
description: null,
installUrl: null,
needsAuth: false,
},
],
mcpServers: ["google-calendar"],
},
};
}
if (method === "app/list") {
throw new Error("app/list should use the account-keyed cache entry");
}
return undefined;
});
const params = createParams(sessionFile, workspaceDir);
params.agentDir = agentDir;
params.authProfileId = authProfileId;
params.authProfileStore = {
version: 1,
profiles: {
[authProfileId]: {
type: "oauth",
provider: "openai-codex",
access: "access-token",
refresh: "refresh-token",
expires: Date.now() + 60_000,
accountId: "account-work",
email: "work@example.test",
},
},
};
const run = runCodexAppServerAttempt(params, { pluginConfig });
await waitForMethod("turn/start");
await completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
const threadStart = requests.find((entry) => entry.method === "thread/start");
const threadStartParams = threadStart?.params as
| { config?: { apps?: Record<string, { enabled?: boolean }> } }
| undefined;
expect(threadStartParams?.config?.apps?.["google-calendar-app"]?.enabled).toBe(true);
expect(requests.map((entry) => entry.method)).not.toContain("app/list");
});
it("keys plugin app inventory by inherited API key fallback credentials", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const agentDir = path.join(tempDir, "agent");
const pluginConfig = {
codexPlugins: {
enabled: true,
plugins: {
"google-calendar": {
marketplaceName: "openai-curated",
pluginName: "google-calendar",
},
},
},
};
const appServer = resolveCodexAppServerRuntimeOptions({
pluginConfig: readCodexPluginConfig(pluginConfig),
});
defaultCodexAppInventoryCache.clear();
await defaultCodexAppInventoryCache.refreshNow({
key: buildCodexPluginAppCacheKey({
appServer,
agentDir,
envApiKeyFingerprint: resolveCodexAppServerEnvApiKeyCacheKey({
startOptions: appServer.start,
baseEnv: { CODEX_API_KEY: "old-codex-env-key" },
}),
}),
request: async () => ({
data: [
{
id: "google-calendar-app",
name: "Google Calendar",
description: null,
logoUrl: null,
logoUrlDark: null,
distributionChannel: null,
branding: null,
appMetadata: null,
labels: null,
installUrl: null,
isAccessible: true,
isEnabled: true,
pluginDisplayNames: [],
},
],
nextCursor: null,
}),
});
vi.stubEnv("CODEX_API_KEY", "new-codex-env-key");
vi.stubEnv("OPENAI_API_KEY", "");
const { requests, waitForMethod, completeTurn } = createStartedThreadHarness(async (method) => {
if (method === "app/list") {
return {
data: [
{
id: "google-calendar-app",
name: "Google Calendar",
description: null,
logoUrl: null,
logoUrlDark: null,
distributionChannel: null,
branding: null,
appMetadata: null,
labels: null,
installUrl: null,
isAccessible: true,
isEnabled: true,
pluginDisplayNames: [],
},
],
nextCursor: null,
};
}
if (method === "plugin/list") {
return {
marketplaces: [
{
name: "openai-curated",
path: "/marketplaces/openai-curated",
interface: null,
plugins: [
{
id: "google-calendar",
name: "google-calendar",
source: { type: "remote" },
installed: true,
enabled: true,
installPolicy: "AVAILABLE",
authPolicy: "ON_USE",
availability: "AVAILABLE",
interface: null,
},
],
},
],
marketplaceLoadErrors: [],
featuredPluginIds: [],
};
}
if (method === "plugin/read") {
return {
plugin: {
marketplaceName: "openai-curated",
marketplacePath: "/marketplaces/openai-curated",
summary: {
id: "google-calendar",
name: "google-calendar",
source: { type: "remote" },
installed: true,
enabled: true,
installPolicy: "AVAILABLE",
authPolicy: "ON_USE",
availability: "AVAILABLE",
interface: null,
},
description: null,
skills: [],
apps: [
{
id: "google-calendar-app",
name: "Google Calendar",
description: null,
installUrl: null,
needsAuth: false,
},
],
mcpServers: ["google-calendar"],
},
};
}
return undefined;
});
const params = createParams(sessionFile, workspaceDir);
params.agentDir = agentDir;
const run = runCodexAppServerAttempt(params, { pluginConfig });
await waitForMethod("turn/start");
await completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
expect(requests.map((entry) => entry.method)).toContain("app/list");
const threadStart = requests.find((entry) => entry.method === "thread/start");
const threadStartParams = threadStart?.params as
| { config?: { apps?: Record<string, { enabled?: boolean }> } }
| undefined;
expect(threadStartParams?.config?.apps?.["google-calendar-app"]?.enabled).toBe(true);
});
it("times out app-server startup before thread setup can hang forever", async () => {
setCodexAppServerClientFactoryForTest(() => new Promise<never>(() => undefined));
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 1;
await expect(runCodexAppServerAttempt(params, { startupTimeoutFloorMs: 1 })).rejects.toThrow(
"codex app-server startup timed out",
);
expect(queueActiveRunMessageForTest("session-1", "after timeout")).toBe(false);
});
it("passes the selected auth profile into app-server startup", async () => {
const seenAuthProfileIds: Array<string | undefined> = [];
const seenAgentDirs: Array<string | undefined> = [];
const { requests, waitForMethod, completeTurn } = createStartedThreadHarness(undefined, {
onStart: (authProfileId, agentDir) => {
seenAuthProfileIds.push(authProfileId);
seenAgentDirs.push(agentDir);
},
});
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.authProfileId = "openai-codex:work";
params.agentDir = path.join(tempDir, "agent");
const run = runCodexAppServerAttempt(params);
await vi.waitFor(() => expect(seenAuthProfileIds).toEqual(["openai-codex:work"]), {
interval: 1,
});
await waitForMethod("turn/start");
await new Promise<void>((resolve) => setImmediate(resolve));
await completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
expect(seenAuthProfileIds).toEqual(["openai-codex:work"]);
expect(seenAgentDirs).toEqual([path.join(tempDir, "agent")]);
expect(requests.map((entry) => entry.method)).toContain("turn/start");
});
it("times out turn start before the active run handle is installed", async () => {
const request = vi.fn(
async (method: string, _params?: unknown, options?: { timeoutMs?: number }) => {
if (method === "thread/start") {
return threadStartResult("thread-1");
}
if (method === "turn/start") {
return await new Promise<never>((_, reject) => {
setTimeout(() => reject(new Error("turn/start timed out")), options?.timeoutMs ?? 0);
});
}
return {};
},
);
setCodexAppServerClientFactoryForTest(
async () =>
({
request,
addNotificationHandler: () => () => undefined,
addRequestHandler: () => () => undefined,
}) as never,
);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 1;
await expect(runCodexAppServerAttempt(params)).rejects.toThrow("turn/start timed out");
expect(queueActiveRunMessageForTest("session-1", "after timeout")).toBe(false);
});
it("keeps extended history enabled when resuming a bound Codex thread", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
const { requests, waitForMethod, completeTurn } = createResumeHarness();
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
pluginConfig: { appServer: { mode: "yolo" } },
});
await waitForMethod("turn/start");
await completeTurn({ threadId: "thread-existing", turnId: "turn-1" });
await run;
expectResumeRequest(requests, {
threadId: "thread-existing",
model: "gpt-5.4-codex",
approvalPolicy: "never",
approvalsReviewer: "user",
sandbox: "danger-full-access",
persistExtendedHistory: true,
});
const resumeRequest = requests.find((request) => request.method === "thread/resume");
const resumeRequestParams = resumeRequest?.params as Record<string, unknown> | undefined;
expect(resumeRequestParams?.developerInstructions).not.toContain(CODEX_GPT5_BEHAVIOR_CONTRACT);
});
it("starts a fresh Codex thread before resume when the native rollout is over budget", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const agentDir = path.join(tempDir, "agent");
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
await fs.writeFile(
path.join(path.dirname(sessionFile), "sessions.json"),
JSON.stringify({
"agent:main:session-1": {
sessionFile,
totalTokens: 12_000,
},
}),
);
const rolloutDir = path.join(agentDir, "codex-home", "sessions");
await fs.mkdir(rolloutDir, { recursive: true });
await fs.writeFile(
path.join(rolloutDir, "rollout-thread-existing.jsonl"),
`${JSON.stringify({
payload: {
type: "token_count",
info: {
total_token_usage: {
total_tokens: 70_000,
},
},
},
})}\n`,
);
const { requests, waitForMethod, completeTurn } = createStartedThreadHarness();
const params = createParams(sessionFile, workspaceDir);
params.agentDir = agentDir;
params.config = {
agents: {
defaults: {
compaction: {
truncateAfterCompaction: true,
maxActiveTranscriptBytes: "1mb",
},
},
},
} as never;
const run = runCodexAppServerAttempt(params, {
pluginConfig: { appServer: { mode: "yolo" } },
});
await waitForMethod("turn/start");
await completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
expect(requests.map((entry) => entry.method)).toContain("thread/start");
expect(requests.map((entry) => entry.method)).not.toContain("thread/resume");
const savedBinding = await readCodexAppServerBinding(sessionFile);
expect(savedBinding?.threadId).toBe("thread-1");
});
it("preserves bound auth when rotating an over-budget native rollout", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const agentDir = path.join(tempDir, "agent");
await writeExistingBinding(sessionFile, workspaceDir, {
authProfileId: "openai-codex:work",
dynamicToolsFingerprint: "[]",
});
await fs.writeFile(
path.join(path.dirname(sessionFile), "sessions.json"),
JSON.stringify({
"agent:main:session-1": {
sessionFile,
totalTokens: 12_000,
},
}),
);
const rolloutDir = path.join(agentDir, "codex-home", "sessions");
await fs.mkdir(rolloutDir, { recursive: true });
await fs.writeFile(
path.join(rolloutDir, "rollout-thread-existing.jsonl"),
`${JSON.stringify({
payload: {
type: "token_count",
info: {
total_token_usage: {
total_tokens: 70_000,
},
},
},
})}\n`,
);
const seenAuthProfileIds: Array<string | undefined> = [];
const { requests, waitForMethod, completeTurn } = createStartedThreadHarness(undefined, {
onStart: (authProfileId) => {
seenAuthProfileIds.push(authProfileId);
},
});
const params = createParams(sessionFile, workspaceDir);
delete params.authProfileId;
params.agentDir = agentDir;
params.config = {
agents: {
defaults: {
compaction: {
truncateAfterCompaction: true,
maxActiveTranscriptBytes: "1mb",
},
},
},
} as never;
const run = runCodexAppServerAttempt(params, {
pluginConfig: { appServer: { mode: "yolo" } },
});
await vi.waitFor(() => expect(seenAuthProfileIds).toEqual(["openai-codex:work"]), {
interval: 1,
});
await waitForMethod("turn/start");
await completeTurn({ threadId: "thread-1", turnId: "turn-1" });
await run;
expect(requests.map((entry) => entry.method)).toContain("thread/start");
expect(requests.map((entry) => entry.method)).not.toContain("thread/resume");
expect(seenAuthProfileIds).toEqual(["openai-codex:work"]);
const savedBinding = await readCodexAppServerBinding(sessionFile);
expect(savedBinding?.authProfileId).toBe("openai-codex:work");
expect(savedBinding?.threadId).toBe("thread-1");
});
it("does not use a default byte limit when maxActiveTranscriptBytes is unset", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const agentDir = path.join(tempDir, "agent");
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
await fs.writeFile(
path.join(path.dirname(sessionFile), "sessions.json"),
JSON.stringify({
"agent:main:session-1": {
sessionFile,
totalTokens: 12_000,
},
}),
);
const rolloutDir = path.join(agentDir, "codex-home", "sessions");
await fs.mkdir(rolloutDir, { recursive: true });
await fs.writeFile(
path.join(rolloutDir, "rollout-thread-existing.jsonl"),
"x".repeat(2_000_000),
);
const binding = await testing.rotateOversizedCodexAppServerStartupBinding({
binding: await readCodexAppServerBinding(sessionFile),
sessionFile,
agentDir,
config: {
agents: {
defaults: {
compaction: {
truncateAfterCompaction: true,
},
},
},
} as never,
});
expect(binding?.threadId).toBe("thread-existing");
const savedBinding = await readCodexAppServerBinding(sessionFile);
expect(savedBinding?.threadId).toBe("thread-existing");
});
it("honors shorthand byte units for native rollout limits", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const agentDir = path.join(tempDir, "agent");
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
await fs.writeFile(
path.join(path.dirname(sessionFile), "sessions.json"),
JSON.stringify({
"agent:main:session-1": {
sessionFile,
totalTokens: 12_000,
},
}),
);
const rolloutDir = path.join(agentDir, "codex-home", "sessions");
await fs.mkdir(rolloutDir, { recursive: true });
await fs.writeFile(path.join(rolloutDir, "rollout-thread-existing.jsonl"), "x".repeat(2_000));
const binding = await testing.rotateOversizedCodexAppServerStartupBinding({
binding: await readCodexAppServerBinding(sessionFile),
sessionFile,
agentDir,
config: {
agents: {
defaults: {
compaction: {
truncateAfterCompaction: true,
maxActiveTranscriptBytes: "1k",
},
},
},
} as never,
});
expect(binding).toBeUndefined();
const savedBinding = await readCodexAppServerBinding(sessionFile);
expect(savedBinding).toBeUndefined();
});
it("honors custom Codex home rollout files for native rollout limits", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const agentDir = path.join(tempDir, "agent");
const codexHome = path.join(tempDir, "custom-codex-home");
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
await fs.writeFile(
path.join(path.dirname(sessionFile), "sessions.json"),
JSON.stringify({
"agent:main:session-1": {
sessionFile,
totalTokens: 12_000,
},
}),
);
const rolloutDir = path.join(codexHome, "sessions");
await fs.mkdir(rolloutDir, { recursive: true });
await fs.writeFile(path.join(rolloutDir, "rollout-thread-existing.jsonl"), "x".repeat(2_000));
const binding = await testing.rotateOversizedCodexAppServerStartupBinding({
binding: await readCodexAppServerBinding(sessionFile),
sessionFile,
agentDir,
codexHome,
config: {
agents: {
defaults: {
compaction: {
truncateAfterCompaction: true,
maxActiveTranscriptBytes: 1_000,
},
},
},
} as never,
});
expect(binding).toBeUndefined();
const savedBinding = await readCodexAppServerBinding(sessionFile);
expect(savedBinding).toBeUndefined();
});
it("uses current rollout token usage before cumulative usage", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const agentDir = path.join(tempDir, "agent");
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
await fs.writeFile(
path.join(path.dirname(sessionFile), "sessions.json"),
JSON.stringify({
"agent:main:session-1": {
sessionFile,
totalTokens: 12_000,
},
}),
);
const rolloutDir = path.join(agentDir, "codex-home", "sessions");
await fs.mkdir(rolloutDir, { recursive: true });
await fs.writeFile(
path.join(rolloutDir, "rollout-thread-existing.jsonl"),
`${JSON.stringify({
payload: {
type: "token_count",
info: {
total_token_usage: {
total_tokens: 70_000,
},
last_token_usage: {
total_tokens: 12_000,
},
},
},
})}\n`,
);
const binding = await testing.rotateOversizedCodexAppServerStartupBinding({
binding: await readCodexAppServerBinding(sessionFile),
sessionFile,
agentDir,
config: {
agents: {
defaults: {
compaction: {
truncateAfterCompaction: true,
maxActiveTranscriptBytes: "1mb",
},
},
},
} as never,
});
expect(binding?.threadId).toBe("thread-existing");
const savedBinding = await readCodexAppServerBinding(sessionFile);
expect(savedBinding?.threadId).toBe("thread-existing");
});
it("ignores stale session token totals for native rollout rotation", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const agentDir = path.join(tempDir, "agent");
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
await fs.writeFile(
path.join(path.dirname(sessionFile), "sessions.json"),
JSON.stringify({
"agent:main:session-1": {
sessionFile,
totalTokens: 70_000,
totalTokensFresh: false,
},
}),
);
const rolloutDir = path.join(agentDir, "codex-home", "sessions");
await fs.mkdir(rolloutDir, { recursive: true });
await fs.writeFile(
path.join(rolloutDir, "rollout-thread-existing.jsonl"),
`${JSON.stringify({
payload: {
type: "token_count",
info: {
last_token_usage: {
total_tokens: 12_000,
},
},
},
})}\n`,
);
const binding = await testing.rotateOversizedCodexAppServerStartupBinding({
binding: await readCodexAppServerBinding(sessionFile),
sessionFile,
agentDir,
config: {
agents: {
defaults: {
compaction: {
truncateAfterCompaction: true,
maxActiveTranscriptBytes: "1mb",
},
},
},
} as never,
});
expect(binding?.threadId).toBe("thread-existing");
const savedBinding = await readCodexAppServerBinding(sessionFile);
expect(savedBinding?.threadId).toBe("thread-existing");
});
it("streams rollout token scans without reading the whole file", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const agentDir = path.join(tempDir, "agent");
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
await fs.writeFile(
path.join(path.dirname(sessionFile), "sessions.json"),
JSON.stringify({
"agent:main:session-1": {
sessionFile,
totalTokens: 12_000,
},
}),
);
const rolloutDir = path.join(agentDir, "codex-home", "sessions");
await fs.mkdir(rolloutDir, { recursive: true });
const rolloutFile = path.join(rolloutDir, "rollout-thread-existing.jsonl");
await fs.writeFile(
rolloutFile,
`${JSON.stringify({
payload: {
type: "token_count",
info: {
last_token_usage: {
total_tokens: 70_000,
},
},
},
})}\n`,
);
const readFileSpy = vi.spyOn(fs, "readFile");
const binding = await testing.rotateOversizedCodexAppServerStartupBinding({
binding: await readCodexAppServerBinding(sessionFile),
sessionFile,
agentDir,
config: {
agents: {
defaults: {
compaction: {
truncateAfterCompaction: true,
maxActiveTranscriptBytes: "1mb",
},
},
},
} as never,
});
expect(binding).toBeUndefined();
expect(readFileSpy.mock.calls.some(([file]) => file === rolloutFile)).toBe(false);
const savedBinding = await readCodexAppServerBinding(sessionFile);
expect(savedBinding).toBeUndefined();
});
it("clears byte-oversized rollouts before reading their contents", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const agentDir = path.join(tempDir, "agent");
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
await fs.writeFile(
path.join(path.dirname(sessionFile), "sessions.json"),
JSON.stringify({
"agent:main:session-1": {
sessionFile,
totalTokens: 12_000,
},
}),
);
const rolloutDir = path.join(agentDir, "codex-home", "sessions");
await fs.mkdir(rolloutDir, { recursive: true });
const rolloutFile = path.join(rolloutDir, "rollout-thread-existing.jsonl");
await fs.writeFile(rolloutFile, "x".repeat(2_000));
const readFileSpy = vi.spyOn(fs, "readFile");
const binding = await testing.rotateOversizedCodexAppServerStartupBinding({
binding: await readCodexAppServerBinding(sessionFile),
sessionFile,
agentDir,
config: {
agents: {
defaults: {
compaction: {
truncateAfterCompaction: true,
maxActiveTranscriptBytes: 1_000,
},
},
},
} as never,
});
expect(binding).toBeUndefined();
expect(readFileSpy.mock.calls.some(([file]) => file === rolloutFile)).toBe(false);
const savedBinding = await readCodexAppServerBinding(sessionFile);
expect(savedBinding).toBeUndefined();
});
it("clears native rollouts at the configured byte limit", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const agentDir = path.join(tempDir, "agent");
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
await fs.writeFile(
path.join(path.dirname(sessionFile), "sessions.json"),
JSON.stringify({
"agent:main:session-1": {
sessionFile,
totalTokens: 12_000,
},
}),
);
const rolloutDir = path.join(agentDir, "codex-home", "sessions");
await fs.mkdir(rolloutDir, { recursive: true });
await fs.writeFile(path.join(rolloutDir, "rollout-thread-existing.jsonl"), "x".repeat(1_000));
const binding = await testing.rotateOversizedCodexAppServerStartupBinding({
binding: await readCodexAppServerBinding(sessionFile),
sessionFile,
agentDir,
config: {
agents: {
defaults: {
compaction: {
truncateAfterCompaction: true,
maxActiveTranscriptBytes: 1_000,
},
},
},
} as never,
});
expect(binding).toBeUndefined();
const savedBinding = await readCodexAppServerBinding(sessionFile);
expect(savedBinding).toBeUndefined();
});
it("resumes a bound Codex thread when only dynamic tool descriptions change", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(sessionFile, workspaceDir);
const appServer = createThreadLifecycleAppServerOptions();
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-existing");
}
if (method === "thread/resume") {
return threadStartResult("thread-existing");
}
throw new Error(`unexpected method: ${method}`);
});
await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [
createMessageDynamicTool("Send and manage messages for the current Slack thread."),
],
appServer,
});
const binding = await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [
createMessageDynamicTool("Send and manage messages for the current Discord channel."),
],
appServer,
});
expect(binding.threadId).toBe("thread-existing");
expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/start", "thread/resume"]);
});
it("resumes a bound Codex thread when dynamic tools are reordered", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(sessionFile, workspaceDir);
const appServer = createThreadLifecycleAppServerOptions();
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-existing");
}
if (method === "thread/resume") {
return threadStartResult("thread-existing");
}
throw new Error(`unexpected method: ${method}`);
});
await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [createNamedDynamicTool("wiki_status"), createNamedDynamicTool("diffs")],
appServer,
});
const binding = await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [createNamedDynamicTool("diffs"), createNamedDynamicTool("wiki_status")],
appServer,
});
expect(binding.threadId).toBe("thread-existing");
expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/start", "thread/resume"]);
});
it("starts a fresh Codex thread for legacy context-engine sidecars without metadata", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
const params = createParams(sessionFile, workspaceDir);
params.contextEngine = {
info: { id: "lossless-claw", name: "Lossless Claw", ownsCompaction: true },
assemble: vi.fn(),
compact: vi.fn(),
} as never;
params.contextTokenBudget = 400_000;
const appServer = createThreadLifecycleAppServerOptions();
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-fresh");
}
throw new Error(`unexpected method: ${method}`);
});
const binding = await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [],
appServer,
});
expect(binding.threadId).toBe("thread-fresh");
expect(binding.lifecycle).toEqual({
action: "started",
rotatedContextEngineBinding: true,
});
expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/start"]);
const savedBinding = await readCodexAppServerBinding(sessionFile);
expect(savedBinding?.contextEngine?.engineId).toBe("lossless-claw");
expect(savedBinding?.contextEngine?.policyFingerprint).toContain('"contextTokenBudget":400000');
});
it("resumes a Codex thread when context-engine sidecar metadata is compatible", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const contextEngine = {
schemaVersion: 1 as const,
engineId: "lossless-claw",
policyFingerprint:
'{"schemaVersion":1,"engineId":"lossless-claw","ownsCompaction":true,"contextTokenBudget":400000,"projectionMaxChars":1000000}',
};
await writeExistingBinding(sessionFile, workspaceDir, {
dynamicToolsFingerprint: "[]",
contextEngine,
});
const params = createParams(sessionFile, workspaceDir);
params.contextEngine = {
info: { id: "lossless-claw", name: "Lossless Claw", ownsCompaction: true },
assemble: vi.fn(),
compact: vi.fn(),
} as never;
params.contextTokenBudget = 400_000;
const appServer = createThreadLifecycleAppServerOptions();
const request = vi.fn(async (method: string) => {
if (method === "thread/resume") {
return threadStartResult("thread-existing");
}
throw new Error(`unexpected method: ${method}`);
});
const binding = await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [],
appServer,
});
expect(binding.threadId).toBe("thread-existing");
expect(binding.lifecycle).toEqual({ action: "resumed" });
expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/resume"]);
});
it("starts a fresh Codex thread when context-engine sidecar metadata is no longer active", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, {
dynamicToolsFingerprint: "[]",
contextEngine: {
schemaVersion: 1,
engineId: "lossless-claw",
policyFingerprint:
'{"schemaVersion":1,"engineId":"lossless-claw","ownsCompaction":true,"contextTokenBudget":400000,"projectionMaxChars":1000000}',
},
});
const params = createParams(sessionFile, workspaceDir);
const appServer = createThreadLifecycleAppServerOptions();
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-fresh");
}
throw new Error(`unexpected method: ${method}`);
});
const binding = await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [],
appServer,
});
expect(binding.threadId).toBe("thread-fresh");
expect(binding.lifecycle).toEqual({
action: "started",
rotatedContextEngineBinding: true,
});
expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/start"]);
const savedBinding = await readCodexAppServerBinding(sessionFile);
expect(savedBinding?.contextEngine).toBeUndefined();
});
it("starts a fresh Codex thread when context-engine policy metadata changes", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, {
dynamicToolsFingerprint: "[]",
contextEngine: {
schemaVersion: 1,
engineId: "lossless-claw",
policyFingerprint:
'{"schemaVersion":1,"engineId":"lossless-claw","engineVersion":"1.0.0","ownsCompaction":true,"turnMaintenanceMode":"foreground","citationsMode":"inline","contextTokenBudget":400000,"projectionMaxChars":1000000}',
},
});
const params = createParams(sessionFile, workspaceDir);
params.contextEngine = {
info: {
id: "lossless-claw",
name: "Lossless Claw",
version: "1.0.1",
ownsCompaction: true,
turnMaintenanceMode: "foreground",
},
assemble: vi.fn(),
compact: vi.fn(),
} as never;
params.config = { memory: { citations: "inline" } } as never;
params.contextTokenBudget = 400_000;
const appServer = createThreadLifecycleAppServerOptions();
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-fresh");
}
throw new Error(`unexpected method: ${method}`);
});
const binding = await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [],
appServer,
});
expect(binding.threadId).toBe("thread-fresh");
expect(binding.lifecycle).toEqual({
action: "started",
rotatedContextEngineBinding: true,
});
expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/start"]);
const savedBinding = await readCodexAppServerBinding(sessionFile);
expect(savedBinding?.contextEngine?.policyFingerprint).toContain('"engineVersion":"1.0.1"');
expect(savedBinding?.contextEngine?.policyFingerprint).toContain(
'"turnMaintenanceMode":"foreground"',
);
expect(savedBinding?.contextEngine?.policyFingerprint).toContain('"citationsMode":"inline"');
});
it("keeps the previous dynamic tool fingerprint for transient no-tool maintenance turns", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(sessionFile, workspaceDir);
const appServer = createThreadLifecycleAppServerOptions();
let nextThread = 1;
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult(`thread-${nextThread++}`);
}
if (method === "thread/resume") {
return threadStartResult("thread-1");
}
throw new Error(`unexpected method: ${method}`);
});
await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [createMessageDynamicTool("Send and manage messages.")],
appServer,
});
const fingerprint = (await readCodexAppServerBinding(sessionFile))?.dynamicToolsFingerprint;
await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [],
appServer,
});
await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [createMessageDynamicTool("Send and manage messages.")],
appServer,
});
const binding = await readCodexAppServerBinding(sessionFile);
expect(binding?.dynamicToolsFingerprint).toBe(fingerprint);
expect(binding?.threadId).toBe("thread-1");
expect(request.mock.calls.map(([method]) => method)).toEqual([
"thread/start",
"thread/start",
"thread/resume",
]);
});
it("keeps plugin app bindings across transient native-tool-disabled turns", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const pluginAppPolicyContext = createPluginAppPolicyContext();
await writeExistingBinding(sessionFile, workspaceDir, {
pluginAppsFingerprint: "plugin-apps-config-1",
pluginAppsInputFingerprint: "plugin-apps-input-1",
pluginAppPolicyContext,
});
const params = createParams(sessionFile, workspaceDir);
const appServer = createThreadLifecycleAppServerOptions();
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-transient");
}
if (method === "thread/resume") {
return threadStartResult("thread-existing");
}
throw new Error(`unexpected method: ${method}`);
});
const buildDenyAllPluginThreadConfig = vi.fn(async () => ({
enabled: true,
configPatch: {
apps: {
_default: {
enabled: false,
destructive_enabled: false,
open_world_enabled: false,
},
},
},
fingerprint: "plugin-apps-deny-all",
inputFingerprint: "plugin-apps-input-deny-all",
policyContext: { fingerprint: "plugin-policy-deny-all", apps: {}, pluginAppIds: {} },
diagnostics: [],
}));
const buildEnabledPluginThreadConfig = vi.fn(async () => ({
enabled: true,
configPatch: createPluginAppConfigPatch(),
fingerprint: "plugin-apps-config-1",
inputFingerprint: "plugin-apps-input-1",
policyContext: pluginAppPolicyContext,
diagnostics: [],
}));
await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [],
appServer,
nativeCodeModeEnabled: false,
pluginThreadConfig: {
enabled: true,
inputFingerprint: "plugin-apps-input-deny-all",
enabledPluginConfigKeys: [],
build: buildDenyAllPluginThreadConfig,
},
});
const savedAfterDeny = await readCodexAppServerBinding(sessionFile);
expect(savedAfterDeny?.threadId).toBe("thread-existing");
expect(savedAfterDeny?.pluginAppsFingerprint).toBe("plugin-apps-config-1");
expect(savedAfterDeny?.pluginAppsInputFingerprint).toBe("plugin-apps-input-1");
await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [],
appServer,
pluginThreadConfig: {
enabled: true,
inputFingerprint: "plugin-apps-input-1",
enabledPluginConfigKeys: ["google-calendar"],
build: buildEnabledPluginThreadConfig,
},
});
expect(buildDenyAllPluginThreadConfig).toHaveBeenCalledTimes(1);
expect(buildEnabledPluginThreadConfig).toHaveBeenCalledTimes(1);
const requestCalls = request.mock.calls as unknown as Array<[string, { config?: unknown }]>;
expect(requestCalls.map(([method]) => method)).toEqual(["thread/start", "thread/resume"]);
expect(requestCalls[0]?.[1].config).toMatchObject({
apps: {
_default: {
enabled: false,
destructive_enabled: false,
open_world_enabled: false,
},
},
});
const savedAfterAllowed = await readCodexAppServerBinding(sessionFile);
expect(savedAfterAllowed?.threadId).toBe("thread-existing");
expect(savedAfterAllowed?.pluginAppsFingerprint).toBe("plugin-apps-config-1");
expect(savedAfterAllowed?.pluginAppsInputFingerprint).toBe("plugin-apps-input-1");
expect(savedAfterAllowed?.pluginAppPolicyContext).toEqual(pluginAppPolicyContext);
});
it("preserves the binding when the app-server closes during thread resume", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
const appServer = createThreadLifecycleAppServerOptions();
const request = vi.fn(async (method: string) => {
if (method === "thread/resume") {
throw new Error("codex app-server client is closed");
}
throw new Error(`unexpected method: ${method}`);
});
await expect(
startOrResumeThread({
client: { request } as never,
params: createParams(sessionFile, workspaceDir),
cwd: workspaceDir,
dynamicTools: [],
appServer,
}),
).rejects.toThrow("codex app-server client is closed");
expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/resume"]);
const binding = await readCodexAppServerBinding(sessionFile);
expect(binding?.threadId).toBe("thread-existing");
});
it("restarts the app-server once when a shared client closes during startup", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
const requests: string[][] = [];
let starts = 0;
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
setCodexAppServerClientFactoryForTest(async () => {
const startIndex = starts++;
const methods: string[] = [];
requests.push(methods);
return {
request: vi.fn(async (method: string) => {
methods.push(method);
if (method === "thread/resume" && startIndex === 0) {
throw new Error("codex app-server client is closed");
}
if (method === "thread/resume") {
return threadStartResult("thread-existing");
}
if (method === "turn/start") {
return turnStartResult();
}
return {};
}),
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: () => () => undefined,
} as never;
});
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir));
await vi.waitFor(() => expect(requests[1]).toContain("turn/start"), fastWait);
await notify({
method: "turn/completed",
params: {
threadId: "thread-existing",
turnId: "turn-1",
turn: { id: "turn-1", status: "completed" },
},
});
const result = await run;
expect(result.aborted).toBe(false);
expect(requests).toEqual([["thread/resume"], ["thread/resume", "turn/start"]]);
});
it("tolerates a second app-server close while retrying startup", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
const requests: string[][] = [];
let starts = 0;
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
setCodexAppServerClientFactoryForTest(async () => {
const startIndex = starts++;
const methods: string[] = [];
requests.push(methods);
return {
request: vi.fn(async (method: string) => {
methods.push(method);
if (method === "thread/resume" && startIndex < 2) {
throw new Error("codex app-server client is closed");
}
if (method === "thread/resume") {
return threadStartResult("thread-existing");
}
if (method === "turn/start") {
return turnStartResult();
}
return {};
}),
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: () => () => undefined,
} as never;
});
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir));
await vi.waitFor(() => expect(requests[2]).toContain("turn/start"), fastWait);
await notify({
method: "turn/completed",
params: {
threadId: "thread-existing",
turnId: "turn-1",
turn: { id: "turn-1", status: "completed" },
},
});
const result = await run;
expect(result.aborted).toBe(false);
expect(requests).toEqual([
["thread/resume"],
["thread/resume"],
["thread/resume", "turn/start"],
]);
});
it("passes native hook relay config on thread start and resume", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(sessionFile, workspaceDir);
const appServer = createThreadLifecycleAppServerOptions();
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-existing");
}
if (method === "thread/resume") {
return threadStartResult("thread-existing");
}
throw new Error(`unexpected method: ${method}`);
});
const config = {
"features.hooks": true,
"hooks.PreToolUse": [],
};
const expectedConfig = {
...config,
"features.code_mode": true,
"features.code_mode_only": false,
};
await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [],
appServer,
config,
});
await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [],
appServer,
config,
});
const requestCalls = request.mock.calls as unknown as Array<[string, { config?: unknown }]>;
expect(requestCalls.map(([method]) => method)).toEqual(["thread/start", "thread/resume"]);
expect(requestCalls[0]?.[1].config).toEqual(expectedConfig);
expect(requestCalls[1]?.[1].config).toEqual(expectedConfig);
});
it("merges native hook relay config with plugin app config when starting a thread", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(sessionFile, workspaceDir);
const appServer = createThreadLifecycleAppServerOptions();
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-plugins");
}
throw new Error(`unexpected method: ${method}`);
});
const pluginAppPolicyContext = createPluginAppPolicyContext();
const buildPluginThreadConfig = vi.fn(async () => ({
enabled: true,
configPatch: createPluginAppConfigPatch(),
fingerprint: "plugin-apps-config-1",
inputFingerprint: "plugin-apps-input-1",
policyContext: pluginAppPolicyContext,
diagnostics: [],
}));
await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [],
appServer,
config: { "features.hooks": true, hooks: { PreToolUse: [] } },
pluginThreadConfig: {
enabled: true,
inputFingerprint: "plugin-apps-input-1",
enabledPluginConfigKeys: ["google-calendar"],
build: buildPluginThreadConfig,
},
});
expect(buildPluginThreadConfig).toHaveBeenCalledTimes(1);
const requestCalls = request.mock.calls as unknown as Array<[string, { config?: unknown }]>;
expect(requestCalls.map(([method]) => method)).toEqual(["thread/start"]);
expect(requestCalls[0]?.[1].config).toEqual({
"features.hooks": true,
"features.code_mode": true,
"features.code_mode_only": false,
hooks: { PreToolUse: [] },
...createPluginAppConfigPatch(),
});
const binding = await readCodexAppServerBinding(sessionFile);
expect(binding?.threadId).toBe("thread-plugins");
expect(binding?.pluginAppsFingerprint).toBe("plugin-apps-config-1");
expect(binding?.pluginAppsInputFingerprint).toBe("plugin-apps-input-1");
expect(binding?.pluginAppPolicyContext).toEqual(pluginAppPolicyContext);
});
it("keeps native hook relay config as the final thread config patch", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(sessionFile, workspaceDir);
const appServer = createThreadLifecycleAppServerOptions();
const request = vi.fn(async (method: string) => {
if (method === "thread/start" || method === "thread/resume") {
return threadStartResult("thread-hooks");
}
throw new Error(`unexpected method: ${method}`);
});
const pluginAppPolicyContext = createPluginAppPolicyContext();
const finalConfigPatch = {
"features.hooks": true,
"hooks.PreToolUse": [
{
hooks: [{ type: "command", command: "openclaw-native-hook-relay", timeout: 5 }],
},
],
};
const buildPluginThreadConfig = vi.fn(async () => ({
enabled: true,
configPatch: {
"features.hooks": false,
"hooks.PreToolUse": [],
...createPluginAppConfigPatch(),
},
fingerprint: "plugin-apps-config-1",
inputFingerprint: "plugin-apps-input-1",
policyContext: pluginAppPolicyContext,
diagnostics: [],
}));
const pluginThreadConfig = {
enabled: true,
inputFingerprint: "plugin-apps-input-1",
build: buildPluginThreadConfig,
};
await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [],
appServer,
config: { "features.hooks": false },
finalConfigPatch,
pluginThreadConfig,
});
await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [],
appServer,
config: { "features.hooks": false },
finalConfigPatch,
pluginThreadConfig: {
...pluginThreadConfig,
enabledPluginConfigKeys: ["google-calendar"],
},
});
const requestCalls = request.mock.calls as unknown as Array<[string, { config?: unknown }]>;
expect(requestCalls.map(([method]) => method)).toEqual(["thread/start", "thread/resume"]);
expect(requestCalls[0]?.[1].config).toMatchObject({
"features.hooks": true,
"features.code_mode": true,
"features.code_mode_only": false,
"hooks.PreToolUse": finalConfigPatch["hooks.PreToolUse"],
...createPluginAppConfigPatch(),
});
expect(requestCalls[1]?.[1].config).toMatchObject({
"features.hooks": true,
"features.code_mode": true,
"features.code_mode_only": false,
"hooks.PreToolUse": finalConfigPatch["hooks.PreToolUse"],
});
});
it("revalidates compatible plugin app bindings without resending app config", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(sessionFile, workspaceDir);
const appServer = createThreadLifecycleAppServerOptions();
const request = vi.fn(async (method: string) => {
if (method === "thread/start" || method === "thread/resume") {
return threadStartResult("thread-plugins");
}
throw new Error(`unexpected method: ${method}`);
});
const pluginAppPolicyContext = createPluginAppPolicyContext();
const buildPluginThreadConfig = vi.fn(async () => ({
enabled: true,
configPatch: createPluginAppConfigPatch(),
fingerprint: "plugin-apps-config-1",
inputFingerprint: "plugin-apps-input-1",
policyContext: pluginAppPolicyContext,
diagnostics: [],
}));
await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [],
appServer,
config: { "features.hooks": true },
pluginThreadConfig: {
enabled: true,
inputFingerprint: "plugin-apps-input-1",
build: buildPluginThreadConfig,
},
});
const binding = await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [],
appServer,
config: { "features.hooks": true },
pluginThreadConfig: {
enabled: true,
inputFingerprint: "plugin-apps-input-1",
enabledPluginConfigKeys: ["google-calendar"],
build: buildPluginThreadConfig,
},
});
expect(binding.pluginAppPolicyContext).toEqual(pluginAppPolicyContext);
expect(buildPluginThreadConfig).toHaveBeenCalledTimes(2);
const requestCalls = request.mock.calls as unknown as Array<[string, { config?: unknown }]>;
expect(requestCalls.map(([method]) => method)).toEqual(["thread/start", "thread/resume"]);
expect(requestCalls[0]?.[1].config).toEqual({
"features.hooks": true,
"features.code_mode": true,
"features.code_mode_only": false,
...createPluginAppConfigPatch(),
});
expect(requestCalls[1]?.[1].config).toEqual({
"features.hooks": true,
"features.code_mode": true,
"features.code_mode_only": false,
});
});
it("starts a new plugin app thread when full binding revalidation removes an app", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, {
dynamicToolsFingerprint: "[]",
pluginAppsFingerprint: "plugin-apps-config-1",
pluginAppsInputFingerprint: "plugin-apps-input-1",
pluginAppPolicyContext: createPluginAppPolicyContext(),
});
const params = createParams(sessionFile, workspaceDir);
const appServer = createThreadLifecycleAppServerOptions();
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-revalidated");
}
throw new Error(`unexpected method: ${method}`);
});
const emptyPolicyContext = { fingerprint: "plugin-policy-empty", apps: {}, pluginAppIds: {} };
const buildPluginThreadConfig = vi.fn(async () => ({
enabled: true,
configPatch: {
apps: {
_default: {
enabled: false,
destructive_enabled: false,
open_world_enabled: false,
},
},
},
fingerprint: "plugin-apps-empty",
inputFingerprint: "plugin-apps-input-1",
policyContext: emptyPolicyContext,
diagnostics: [],
}));
await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [],
appServer,
pluginThreadConfig: {
enabled: true,
inputFingerprint: "plugin-apps-input-1",
enabledPluginConfigKeys: ["google-calendar"],
build: buildPluginThreadConfig,
},
});
expect(buildPluginThreadConfig).toHaveBeenCalledTimes(1);
const requestCalls = request.mock.calls as unknown as Array<[string, { config?: unknown }]>;
expect(requestCalls.map(([method]) => method)).toEqual(["thread/start"]);
expect(requestCalls[0]?.[1].config).toEqual({
"features.code_mode": true,
"features.code_mode_only": false,
apps: {
_default: {
enabled: false,
destructive_enabled: false,
open_world_enabled: false,
},
},
});
const binding = await readCodexAppServerBinding(sessionFile);
expect(binding?.threadId).toBe("thread-revalidated");
expect(binding?.pluginAppsFingerprint).toBe("plugin-apps-empty");
expect(binding?.pluginAppPolicyContext).toEqual(emptyPolicyContext);
});
it("keeps the existing plugin app binding when revalidation fails", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const pluginAppPolicyContext = createPluginAppPolicyContext();
await writeExistingBinding(sessionFile, workspaceDir, {
dynamicToolsFingerprint: "[]",
pluginAppsFingerprint: "plugin-apps-config-1",
pluginAppsInputFingerprint: "plugin-apps-input-1",
pluginAppPolicyContext,
});
const params = createParams(sessionFile, workspaceDir);
const appServer = createThreadLifecycleAppServerOptions();
const request = vi.fn(async (method: string) => {
if (method === "thread/resume") {
return threadStartResult("thread-existing");
}
throw new Error(`unexpected method: ${method}`);
});
await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [],
appServer,
pluginThreadConfig: {
enabled: true,
inputFingerprint: "plugin-apps-input-1",
enabledPluginConfigKeys: ["google-calendar"],
build: async () => {
throw new Error("plugin inventory unavailable");
},
},
});
const requestCalls = request.mock.calls as unknown as Array<[string, { config?: unknown }]>;
expect(requestCalls.map(([method]) => method)).toEqual(["thread/resume"]);
expect(requestCalls[0]?.[1].config).toEqual({
"features.code_mode": true,
"features.code_mode_only": false,
});
const binding = await readCodexAppServerBinding(sessionFile);
expect(binding?.threadId).toBe("thread-existing");
expect(binding?.pluginAppsFingerprint).toBe("plugin-apps-config-1");
expect(binding?.pluginAppsInputFingerprint).toBe("plugin-apps-input-1");
expect(binding?.pluginAppPolicyContext).toEqual(pluginAppPolicyContext);
});
it("rebuilds an empty plugin app binding after app inventory recovers", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, {
dynamicToolsFingerprint: "[]",
pluginAppsFingerprint: "plugin-apps-empty",
pluginAppsInputFingerprint: "plugin-apps-input-1",
pluginAppPolicyContext: { fingerprint: "plugin-policy-empty", apps: {}, pluginAppIds: {} },
});
const params = createParams(sessionFile, workspaceDir);
const appServer = createThreadLifecycleAppServerOptions();
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-recovered");
}
throw new Error(`unexpected method: ${method}`);
});
const pluginAppPolicyContext = createPluginAppPolicyContext();
const buildPluginThreadConfig = vi.fn(async () => ({
enabled: true,
configPatch: createPluginAppConfigPatch(),
fingerprint: "plugin-apps-config-1",
inputFingerprint: "plugin-apps-input-1",
policyContext: pluginAppPolicyContext,
diagnostics: [],
}));
await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [],
appServer,
pluginThreadConfig: {
enabled: true,
inputFingerprint: "plugin-apps-input-1",
build: buildPluginThreadConfig,
},
});
expect(buildPluginThreadConfig).toHaveBeenCalledTimes(1);
const requestCalls = request.mock.calls as unknown as Array<[string, { config?: unknown }]>;
expect(requestCalls.map(([method]) => method)).toEqual(["thread/start"]);
expect(requestCalls[0]?.[1].config).toEqual({
...createPluginAppConfigPatch(),
"features.code_mode": true,
"features.code_mode_only": false,
});
const binding = await readCodexAppServerBinding(sessionFile);
expect(binding?.threadId).toBe("thread-recovered");
expect(binding?.pluginAppsFingerprint).toBe("plugin-apps-config-1");
expect(binding?.pluginAppPolicyContext).toEqual(pluginAppPolicyContext);
});
it("keeps an empty plugin app binding when recovery still produces the same config", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const emptyPolicyContext = { fingerprint: "plugin-policy-empty", apps: {}, pluginAppIds: {} };
await writeExistingBinding(sessionFile, workspaceDir, {
dynamicToolsFingerprint: "[]",
pluginAppsFingerprint: "plugin-apps-empty",
pluginAppsInputFingerprint: "plugin-apps-input-1",
pluginAppPolicyContext: emptyPolicyContext,
});
const params = createParams(sessionFile, workspaceDir);
const appServer = createThreadLifecycleAppServerOptions();
const request = vi.fn(async (method: string) => {
if (method === "thread/resume") {
return threadStartResult("thread-existing");
}
throw new Error(`unexpected method: ${method}`);
});
const buildPluginThreadConfig = vi.fn(async () => ({
enabled: true,
configPatch: {
apps: {
_default: {
enabled: false,
destructive_enabled: false,
open_world_enabled: false,
},
},
},
fingerprint: "plugin-apps-empty",
inputFingerprint: "plugin-apps-input-1",
policyContext: emptyPolicyContext,
diagnostics: [],
}));
await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [],
appServer,
pluginThreadConfig: {
enabled: true,
inputFingerprint: "plugin-apps-input-1",
build: buildPluginThreadConfig,
},
});
expect(buildPluginThreadConfig).toHaveBeenCalledTimes(1);
const requestCalls = request.mock.calls as unknown as Array<[string, { config?: unknown }]>;
expect(requestCalls.map(([method]) => method)).toEqual(["thread/resume"]);
expect(requestCalls[0]?.[1].config).toEqual({
"features.code_mode": true,
"features.code_mode_only": false,
});
});
it("rebuilds a partial plugin app binding after another plugin recovers", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, {
dynamicToolsFingerprint: "[]",
pluginAppsFingerprint: "plugin-apps-partial",
pluginAppsInputFingerprint: "plugin-apps-input-1",
pluginAppPolicyContext: createPluginAppPolicyContext(),
});
const params = createParams(sessionFile, workspaceDir);
const appServer = createThreadLifecycleAppServerOptions();
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-recovered");
}
throw new Error(`unexpected method: ${method}`);
});
const recoveredPolicyContext = createTwoPluginAppPolicyContext();
const buildPluginThreadConfig = vi.fn(async () => ({
enabled: true,
configPatch: createTwoPluginAppConfigPatch(),
fingerprint: "plugin-apps-config-2",
inputFingerprint: "plugin-apps-input-1",
policyContext: recoveredPolicyContext,
diagnostics: [],
}));
await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [],
appServer,
pluginThreadConfig: {
enabled: true,
inputFingerprint: "plugin-apps-input-1",
enabledPluginConfigKeys: ["google-calendar", "gmail"],
build: buildPluginThreadConfig,
},
});
expect(buildPluginThreadConfig).toHaveBeenCalledTimes(1);
const requestCalls = request.mock.calls as unknown as Array<[string, { config?: unknown }]>;
expect(requestCalls.map(([method]) => method)).toEqual(["thread/start"]);
expect(requestCalls[0]?.[1].config).toEqual({
...createTwoPluginAppConfigPatch(),
"features.code_mode": true,
"features.code_mode_only": false,
});
const binding = await readCodexAppServerBinding(sessionFile);
expect(binding?.threadId).toBe("thread-recovered");
expect(binding?.pluginAppsFingerprint).toBe("plugin-apps-config-2");
expect(binding?.pluginAppPolicyContext).toEqual(recoveredPolicyContext);
});
it("rebuilds a partial plugin app binding after another app from the same plugin recovers", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, {
dynamicToolsFingerprint: "[]",
pluginAppsFingerprint: "plugin-apps-partial",
pluginAppsInputFingerprint: "plugin-apps-input-1",
pluginAppPolicyContext: {
...createPluginAppPolicyContext(),
pluginAppIds: {
"google-calendar": ["google-calendar-app", "google-calendar-secondary-app"],
},
},
});
const params = createParams(sessionFile, workspaceDir);
const appServer = createThreadLifecycleAppServerOptions();
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-recovered");
}
throw new Error(`unexpected method: ${method}`);
});
const recoveredPolicyContext = createTwoCalendarAppPolicyContext();
const buildPluginThreadConfig = vi.fn(async () => ({
enabled: true,
configPatch: createTwoCalendarAppConfigPatch(),
fingerprint: "plugin-apps-config-calendar-2",
inputFingerprint: "plugin-apps-input-1",
policyContext: recoveredPolicyContext,
diagnostics: [],
}));
await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [],
appServer,
pluginThreadConfig: {
enabled: true,
inputFingerprint: "plugin-apps-input-1",
enabledPluginConfigKeys: ["google-calendar"],
build: buildPluginThreadConfig,
},
});
expect(buildPluginThreadConfig).toHaveBeenCalledTimes(1);
const requestCalls = request.mock.calls as unknown as Array<[string, { config?: unknown }]>;
expect(requestCalls.map(([method]) => method)).toEqual(["thread/start"]);
expect(requestCalls[0]?.[1].config).toEqual({
...createTwoCalendarAppConfigPatch(),
"features.code_mode": true,
"features.code_mode_only": false,
});
const binding = await readCodexAppServerBinding(sessionFile);
expect(binding?.threadId).toBe("thread-recovered");
expect(binding?.pluginAppsFingerprint).toBe("plugin-apps-config-calendar-2");
expect(binding?.pluginAppPolicyContext).toEqual(recoveredPolicyContext);
});
it("starts a new configured thread for legacy bindings missing plugin app metadata", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
const params = createParams(sessionFile, workspaceDir);
const appServer = createThreadLifecycleAppServerOptions();
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-plugins");
}
throw new Error(`unexpected method: ${method}`);
});
const pluginAppPolicyContext = createPluginAppPolicyContext();
await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [],
appServer,
pluginThreadConfig: {
enabled: true,
inputFingerprint: "plugin-apps-input-1",
build: async () => ({
enabled: true,
configPatch: createPluginAppConfigPatch(),
fingerprint: "plugin-apps-config-1",
inputFingerprint: "plugin-apps-input-1",
policyContext: pluginAppPolicyContext,
diagnostics: [],
}),
},
});
const requestCalls = request.mock.calls as unknown as Array<[string, { config?: unknown }]>;
expect(requestCalls.map(([method]) => method)).toEqual(["thread/start"]);
expect(requestCalls[0]?.[1].config).toEqual({
...createPluginAppConfigPatch(),
"features.code_mode": true,
"features.code_mode_only": false,
});
const binding = await readCodexAppServerBinding(sessionFile);
expect(binding?.threadId).toBe("thread-plugins");
expect(binding?.pluginAppsFingerprint).toBe("plugin-apps-config-1");
expect(binding?.pluginAppPolicyContext).toEqual(pluginAppPolicyContext);
});
it("starts a new Codex thread when dynamic tool schemas change", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const params = createParams(sessionFile, workspaceDir);
const appServer = createThreadLifecycleAppServerOptions();
let nextThread = 1;
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult(`thread-${nextThread++}`);
}
throw new Error(`unexpected method: ${method}`);
});
await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [createMessageDynamicTool("Send and manage messages.", ["send"])],
appServer,
});
const binding = await startOrResumeThread({
client: { request } as never,
params,
cwd: workspaceDir,
dynamicTools: [createMessageDynamicTool("Send and manage messages.", ["send", "read"])],
appServer,
});
expect(binding.threadId).toBe("thread-2");
expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/start", "thread/start"]);
});
it("passes configured app-server policy, sandbox, service tier, and model on resume", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, { model: "gpt-5.2" });
const { requests, waitForMethod, completeTurn } = createResumeHarness();
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
pluginConfig: {
appServer: {
approvalPolicy: "on-request",
approvalsReviewer: "guardian_subagent",
sandbox: "danger-full-access",
serviceTier: "fast",
},
},
});
await waitForMethod("turn/start");
await completeTurn({ threadId: "thread-existing", turnId: "turn-1" });
await run;
expectResumeRequest(requests, {
threadId: "thread-existing",
model: "gpt-5.4-codex",
approvalPolicy: "on-request",
approvalsReviewer: "guardian_subagent",
sandbox: "danger-full-access",
serviceTier: "priority",
persistExtendedHistory: true,
});
const resumeRequest = requests.find((request) => request.method === "thread/resume");
const resumeRequestParams = resumeRequest?.params as Record<string, unknown> | undefined;
const resumeConfig = resumeRequestParams?.config as Record<string, unknown> | undefined;
expect(resumeConfig?.["features.hooks"]).toBe(true);
expect(resumeConfig?.["features.code_mode"]).toBe(true);
expect(resumeConfig?.["features.code_mode_only"]).toBe(false);
expect(resumeRequestParams?.developerInstructions).not.toContain(CODEX_GPT5_BEHAVIOR_CONTRACT);
const turnRequest = requests.find((request) => request.method === "turn/start");
const turnRequestParams = turnRequest?.params as Record<string, unknown> | undefined;
expect(turnRequestParams?.approvalPolicy).toBe("on-request");
expect(turnRequestParams?.approvalsReviewer).toBe("guardian_subagent");
expect(turnRequestParams?.sandboxPolicy).toEqual({ type: "dangerFullAccess" });
expect(turnRequestParams?.serviceTier).toBe("priority");
expect(turnRequestParams?.model).toBe("gpt-5.4-codex");
});
it("maps active OpenClaw sandbox egress into Codex workspace-write turns", () => {
const appServer = resolveCodexAppServerRuntimeOptions({
pluginConfig: {
appServer: {
approvalPolicy: "never",
sandbox: "danger-full-access",
},
},
});
expect(
testing.resolveCodexAppServerSandboxPolicyForOpenClawSandbox(
appServer,
{
enabled: true,
backendId: "docker",
docker: { network: "none" },
} as never,
"/tmp/workspace",
),
).toEqual({
type: "workspaceWrite",
writableRoots: ["/tmp/workspace"],
networkAccess: false,
excludeTmpdirEnvVar: false,
excludeSlashTmp: false,
});
expect(
testing.resolveCodexAppServerSandboxPolicyForOpenClawSandbox(
{ ...appServer, sandbox: "workspace-write" },
{
enabled: true,
backendId: "docker",
docker: { network: "bridge" },
} as never,
"/tmp/workspace",
),
).toEqual({
type: "workspaceWrite",
writableRoots: ["/tmp/workspace"],
networkAccess: true,
excludeTmpdirEnvVar: false,
excludeSlashTmp: false,
});
expect(
testing.resolveCodexAppServerSandboxPolicyForOpenClawSandbox(
appServer,
{
enabled: true,
backendId: "docker",
docker: {
network: "bridge",
binds: [
"/tmp/openclaw-writable-data:/data:rw",
"/tmp/openclaw-readonly-data:/readonly:ro",
],
},
} as never,
"/tmp/workspace",
),
).toEqual({
type: "workspaceWrite",
writableRoots: ["/tmp/workspace", path.resolve("/tmp/openclaw-writable-data")],
networkAccess: true,
excludeTmpdirEnvVar: false,
excludeSlashTmp: false,
});
expect(
testing.resolveCodexAppServerSandboxPolicyForOpenClawSandbox(
appServer,
{
enabled: true,
backendId: "ssh",
} as never,
"/tmp/workspace",
),
).toEqual({
type: "workspaceWrite",
writableRoots: ["/tmp/workspace"],
networkAccess: true,
excludeTmpdirEnvVar: false,
excludeSlashTmp: false,
});
expect(
testing.resolveCodexAppServerSandboxPolicyForOpenClawSandbox(
appServer,
null,
"/tmp/workspace",
),
).toBeUndefined();
expect(
testing.resolveCodexAppServerSandboxPolicyForOpenClawSandbox(
{ ...appServer, sandbox: "read-only" },
{ enabled: true } as never,
"/tmp/workspace",
),
).toBeUndefined();
});
it("passes current Codex service tier request values through app-server resume and turn requests", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, { model: "gpt-5.2" });
const { requests, waitForMethod, completeTurn } = createResumeHarness();
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
pluginConfig: {
appServer: {
approvalPolicy: "on-request",
sandbox: "danger-full-access",
serviceTier: "priority",
},
},
});
await waitForMethod("turn/start");
await completeTurn({ threadId: "thread-existing", turnId: "turn-1" });
await run;
const resumeRequest = requests.find((request) => request.method === "thread/resume");
const resumeRequestParams = resumeRequest?.params as Record<string, unknown> | undefined;
expect(resumeRequestParams?.serviceTier).toBe("priority");
const turnRequest = requests.find((request) => request.method === "turn/start");
const turnRequestParams = turnRequest?.params as Record<string, unknown> | undefined;
expect(turnRequestParams?.serviceTier).toBe("priority");
});
it("keys plugin app inventory by websocket credentials without exposing them", () => {
const first = resolveCodexPluginAppCacheEndpoint({
start: {
transport: "websocket",
command: "codex",
args: [],
url: "ws://127.0.0.1:39175",
authToken: "token-first",
headers: { Authorization: "Bearer first" },
},
});
const second = resolveCodexPluginAppCacheEndpoint({
start: {
transport: "websocket",
command: "codex",
args: [],
url: "ws://127.0.0.1:39175",
authToken: "token-second",
headers: { Authorization: "Bearer second" },
},
});
expect(first).not.toEqual(second);
expect(first).not.toContain("token-first");
expect(first).not.toContain("Bearer first");
expect(second).not.toContain("token-second");
expect(second).not.toContain("Bearer second");
});
it("redacts plugin thread config eligibility log data", () => {
const appServer = {
start: {
transport: "websocket" as const,
command: "codex",
commandSource: "config" as const,
args: [],
url: "ws://127.0.0.1:39175",
authToken: "token-secret",
headers: {
Authorization: "Bearer secret",
"X-Test-Token": "header-secret",
},
env: {
CODEX_HOME: "/tmp/codex-home",
OPENAI_API_KEY: "env-secret",
},
},
codeModeOnly: false,
requestTimeoutMs: 60_000,
turnCompletionIdleTimeoutMs: 60_000,
approvalPolicy: "never" as const,
approvalsReviewer: "user" as const,
sandbox: "danger-full-access" as const,
serviceTier: "priority" as const,
};
const resolvedPluginPolicy = resolveCodexPluginsPolicy({
codexPlugins: {
enabled: true,
plugins: {
"google-calendar": {
marketplaceName: "openai-curated",
pluginName: "google-calendar",
},
},
},
});
const logData = testing.buildCodexPluginThreadConfigEligibilityLogData({
sessionId: "session-1",
sessionKey: "agent:main:session-1",
pluginThreadConfigRequired: true,
resolvedPluginPolicy,
enabledPluginConfigKeys: ["google-calendar"],
pluginAppCacheKey: buildCodexPluginAppCacheKey({
appServer,
agentDir: "/tmp/agent",
authProfileId: "openai-codex:work",
accountId: "account-work",
envApiKeyFingerprint: "env-key",
}),
startupAuthProfileId: "openai-codex:work",
appServer,
});
expect(logData).toEqual(
expect.objectContaining({
sessionId: "session-1",
sessionKey: "agent:main:session-1",
enabled: true,
policyConfigured: true,
policyEnabled: true,
pluginConfigKeys: ["google-calendar"],
enabledPluginConfigKeys: ["google-calendar"],
appCacheKeyFingerprint: expect.stringMatching(/^sha256:/),
authProfileId: "openai-codex:work",
appServerTransport: "websocket",
appServerCommandSource: "config",
}),
);
expect(logData).not.toHaveProperty("appCacheKeyInput");
const serialized = JSON.stringify(logData);
expect(serialized).not.toContain("token-secret");
expect(serialized).not.toContain("Bearer secret");
expect(serialized).not.toContain("header-secret");
expect(serialized).not.toContain("env-secret");
expect(serialized).not.toContain("/tmp/codex-home");
});
it("builds resume and turn params from the currently selected OpenClaw model", () => {
const params = createParams("/tmp/session.jsonl", "/tmp/workspace");
const appServer = {
start: {
transport: "stdio" as const,
command: "codex",
args: ["app-server", "--listen", "stdio://"],
headers: {},
},
codeModeOnly: false,
requestTimeoutMs: 60_000,
turnCompletionIdleTimeoutMs: 60_000,
approvalPolicy: "on-request" as const,
approvalsReviewer: "guardian_subagent" as const,
sandbox: "danger-full-access" as const,
serviceTier: "flex" as const,
};
const resumeParams = buildThreadResumeParams(params, { threadId: "thread-1", appServer });
expect(resumeParams).toEqual({
threadId: "thread-1",
model: "gpt-5.4-codex",
approvalPolicy: "on-request",
approvalsReviewer: "guardian_subagent",
config: {
"features.code_mode": true,
"features.code_mode_only": false,
},
sandbox: "danger-full-access",
serviceTier: "flex",
developerInstructions: resumeParams.developerInstructions,
persistExtendedHistory: true,
});
expect(resumeParams.developerInstructions).not.toContain(CODEX_GPT5_BEHAVIOR_CONTRACT);
const turnParams = buildTurnStartParams(params, {
threadId: "thread-1",
cwd: "/tmp/workspace",
appServer,
});
expect(turnParams.threadId).toBe("thread-1");
expect(turnParams.cwd).toBe("/tmp/workspace");
expect(turnParams.model).toBe("gpt-5.4-codex");
expect(turnParams.approvalPolicy).toBe("on-request");
expect(turnParams.approvalsReviewer).toBe("guardian_subagent");
expect(turnParams.sandboxPolicy).toEqual({ type: "dangerFullAccess" });
expect(turnParams.serviceTier).toBe("flex");
expect(turnParams.collaborationMode).toEqual({
mode: "default",
settings: {
model: "gpt-5.4-codex",
reasoning_effort: "medium",
developer_instructions: null,
},
});
});
it("uses turn-scoped collaboration instructions for heartbeat Codex turns", () => {
const params = createParams("/tmp/session.jsonl", "/tmp/workspace");
params.trigger = "heartbeat";
const heartbeatCollaborationMode = buildTurnCollaborationMode(params, {
heartbeatCollaborationInstructions:
"HEARTBEAT.md exists at /tmp/workspace/HEARTBEAT.md. Read it before proceeding.",
});
expect(heartbeatCollaborationMode.mode).toBe("default");
expect(heartbeatCollaborationMode.settings.model).toBe("gpt-5.4-codex");
expect(heartbeatCollaborationMode.settings.reasoning_effort).toBe("medium");
expect(heartbeatCollaborationMode.settings.developer_instructions).toContain(
"This is an OpenClaw heartbeat turn. Apply these instructions only to this heartbeat wake",
);
expect(heartbeatCollaborationMode.settings.developer_instructions).toContain(
"Use heartbeats to create useful proactive progress",
);
expect(heartbeatCollaborationMode.settings.developer_instructions).toContain(
"If `heartbeat_respond` is not already available and `tool_search` is available",
);
expect(heartbeatCollaborationMode.settings.developer_instructions).toContain(
"HEARTBEAT.md exists at /tmp/workspace/HEARTBEAT.md.",
);
params.trigger = "user";
expect(
buildTurnCollaborationMode(params, {
heartbeatCollaborationInstructions:
"HEARTBEAT.md exists at /tmp/workspace/HEARTBEAT.md. Read it before proceeding.",
}).settings.developer_instructions,
).toBeNull();
});
it("uses turn-scoped collaboration instructions for cron Codex turns", () => {
const params = createParams("/tmp/session.jsonl", "/tmp/workspace");
params.trigger = "cron";
const cronCollaborationMode = buildTurnCollaborationMode(params);
expect(cronCollaborationMode.mode).toBe("default");
expect(cronCollaborationMode.settings.model).toBe("gpt-5.4-codex");
expect(cronCollaborationMode.settings.reasoning_effort).toBe("medium");
expect(cronCollaborationMode.settings.developer_instructions).toContain(
"This is an OpenClaw cron automation turn",
);
expect(cronCollaborationMode.settings.developer_instructions).toContain(
"If it asks you to run an exact command, run that command before doing any investigation",
);
expect(cronCollaborationMode.settings.developer_instructions).toContain(
"Use context already provided by the runtime",
);
});
it("preserves the bound auth profile when resume params omit authProfileId", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, {
authProfileId: "openai-codex:bound",
});
const params = createParams(sessionFile, workspaceDir);
delete params.authProfileId;
params.agentDir = path.join(tempDir, "agent");
const binding = await startOrResumeThread({
client: {
request: async (method: string) => {
if (method === "thread/resume") {
return threadStartResult("thread-existing");
}
throw new Error(`unexpected method: ${method}`);
},
} as never,
params,
cwd: workspaceDir,
dynamicTools: [],
appServer: {
start: {
transport: "stdio",
command: "codex",
args: ["app-server"],
headers: {},
},
codeModeOnly: false,
requestTimeoutMs: 60_000,
turnCompletionIdleTimeoutMs: 60_000,
approvalPolicy: "never",
approvalsReviewer: "user",
sandbox: "workspace-write",
},
});
expect(binding.authProfileId).toBe("openai-codex:bound");
});
it("reuses the bound auth profile for app-server startup when params omit it", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, {
authProfileId: "openai-codex:bound",
dynamicToolsFingerprint: "[]",
});
const seenAuthProfileIds: Array<string | undefined> = [];
const seenAgentDirs: Array<string | undefined> = [];
const { requests, waitForMethod, completeTurn } = createAppServerHarness(
async (method: string) => {
if (method === "thread/resume") {
return threadStartResult("thread-existing");
}
if (method === "turn/start") {
return turnStartResult();
}
throw new Error(`unexpected method: ${method}`);
},
{
onStart: (authProfileId, agentDir) => {
seenAuthProfileIds.push(authProfileId);
seenAgentDirs.push(agentDir);
},
},
);
const params = createParams(sessionFile, workspaceDir);
delete params.authProfileId;
params.agentDir = path.join(tempDir, "agent");
const run = runCodexAppServerAttempt(params);
await vi.waitFor(() => expect(seenAuthProfileIds).toEqual(["openai-codex:bound"]), {
interval: 1,
});
await waitForMethod("turn/start");
await new Promise<void>((resolve) => setImmediate(resolve));
await completeTurn({ threadId: "thread-existing", turnId: "turn-1" });
await run;
expect(seenAuthProfileIds).toEqual(["openai-codex:bound"]);
expect(seenAgentDirs).toEqual([path.join(tempDir, "agent")]);
expect(requests.map((entry) => entry.method)).toContain("turn/start");
});
});