mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-10 05:22:55 +00:00
9691 lines
331 KiB
TypeScript
9691 lines
331 KiB
TypeScript
import fs from "node:fs/promises";
|
|
import os from "node:os";
|
|
import path from "node:path";
|
|
import { SessionManager } from "@earendil-works/pi-coding-agent";
|
|
import {
|
|
abortAgentHarnessRun,
|
|
embeddedAgentLog,
|
|
nativeHookRelayTesting,
|
|
onAgentEvent,
|
|
queueAgentHarnessMessage,
|
|
resetAgentEventsForTest,
|
|
wrapToolWithBeforeToolCallHook,
|
|
type AgentEventPayload,
|
|
type EmbeddedRunAttemptParams,
|
|
} from "openclaw/plugin-sdk/agent-harness-runtime";
|
|
import {
|
|
emitDiagnosticEvent,
|
|
emitTrustedDiagnosticEvent,
|
|
onInternalDiagnosticEvent,
|
|
resetDiagnosticEventsForTest,
|
|
waitForDiagnosticEventsDrained,
|
|
type DiagnosticEventPayload,
|
|
} from "openclaw/plugin-sdk/diagnostic-runtime";
|
|
import {
|
|
initializeGlobalHookRunner,
|
|
resetGlobalHookRunner,
|
|
} from "openclaw/plugin-sdk/hook-runtime";
|
|
import { clearPluginCommands, registerPluginCommand } from "openclaw/plugin-sdk/plugin-runtime";
|
|
import { createMockPluginRegistry } from "openclaw/plugin-sdk/plugin-test-runtime";
|
|
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
|
|
|
function queueActiveRunMessageForTest(
|
|
...args: Parameters<typeof queueAgentHarnessMessage>
|
|
): boolean {
|
|
return queueAgentHarnessMessage(...args);
|
|
}
|
|
import { CODEX_GPT5_BEHAVIOR_CONTRACT } from "../../prompt-overlay.js";
|
|
import { defaultCodexAppInventoryCache } from "./app-inventory-cache.js";
|
|
import * as approvalBridge from "./approval-bridge.js";
|
|
import * as authBridge from "./auth-bridge.js";
|
|
import { resolveCodexAppServerEnvApiKeyCacheKey } from "./auth-bridge.js";
|
|
import type { CodexAppServerClientFactory } from "./client-factory.js";
|
|
import {
|
|
readCodexPluginConfig,
|
|
resolveCodexAppServerRuntimeOptions,
|
|
resolveCodexPluginsPolicy,
|
|
} from "./config.js";
|
|
import {
|
|
CODEX_OPENCLAW_DYNAMIC_TOOL_NAMESPACE,
|
|
createCodexDynamicToolBridge,
|
|
} from "./dynamic-tools.js";
|
|
import * as elicitationBridge from "./elicitation-bridge.js";
|
|
import {
|
|
buildCodexPluginAppCacheKey,
|
|
resolveCodexPluginAppCacheEndpoint,
|
|
} from "./plugin-app-cache-key.js";
|
|
import type { CodexServerNotification } from "./protocol.js";
|
|
import {
|
|
readRecentCodexRateLimits,
|
|
rememberCodexRateLimits,
|
|
resetCodexRateLimitCacheForTests,
|
|
} from "./rate-limit-cache.js";
|
|
import {
|
|
runCodexAppServerAttempt as runCodexAppServerAttemptImpl,
|
|
testing,
|
|
} from "./run-attempt.js";
|
|
import { readCodexAppServerBinding, writeCodexAppServerBinding } from "./session-binding.js";
|
|
import { createCodexTestModel } from "./test-support.js";
|
|
import {
|
|
buildTurnCollaborationMode,
|
|
buildThreadResumeParams,
|
|
buildTurnStartParams,
|
|
startOrResumeThread,
|
|
} from "./thread-lifecycle.js";
|
|
|
|
let tempDir: string;
|
|
let codexAppServerClientFactoryForTest: CodexAppServerClientFactory | undefined;
|
|
const fastWait = { interval: 1, timeout: 5_000 } as const;
|
|
|
|
type RunCodexAppServerAttemptOptions = NonNullable<
|
|
Parameters<typeof runCodexAppServerAttemptImpl>[1]
|
|
>;
|
|
|
|
function flushDiagnosticEvents() {
|
|
return waitForDiagnosticEventsDrained();
|
|
}
|
|
|
|
function emitAsyncDiagnosticBacklog(count: number): void {
|
|
for (let index = 0; index < count; index += 1) {
|
|
emitDiagnosticEvent({
|
|
type: "model.call.started",
|
|
runId: `backlog-run-${index}`,
|
|
callId: `backlog-call-${index}`,
|
|
provider: "openai",
|
|
model: "gpt-5.4",
|
|
});
|
|
}
|
|
}
|
|
|
|
function activeDiagnosticToolKeys(events: DiagnosticEventPayload[]): Set<string> {
|
|
const active = new Set<string>();
|
|
for (const event of events) {
|
|
if (event.type === "tool.execution.started") {
|
|
active.add(
|
|
`${event.runId ?? event.sessionId ?? event.sessionKey ?? "unknown"}:${event.toolCallId ?? event.toolName}`,
|
|
);
|
|
} else if (
|
|
event.type === "tool.execution.completed" ||
|
|
event.type === "tool.execution.error" ||
|
|
event.type === "tool.execution.blocked"
|
|
) {
|
|
active.delete(
|
|
`${event.runId ?? event.sessionId ?? event.sessionKey ?? "unknown"}:${event.toolCallId ?? event.toolName}`,
|
|
);
|
|
}
|
|
}
|
|
return active;
|
|
}
|
|
|
|
function setCodexAppServerClientFactoryForTest(factory: CodexAppServerClientFactory): void {
|
|
codexAppServerClientFactoryForTest = factory;
|
|
}
|
|
|
|
function resetCodexAppServerClientFactoryForTest(): void {
|
|
codexAppServerClientFactoryForTest = undefined;
|
|
}
|
|
|
|
function runCodexAppServerAttempt(
|
|
params: EmbeddedRunAttemptParams,
|
|
options: RunCodexAppServerAttemptOptions = {},
|
|
) {
|
|
const clientFactory = options.clientFactory ?? codexAppServerClientFactoryForTest;
|
|
return runCodexAppServerAttemptImpl(
|
|
params,
|
|
clientFactory ? { ...options, clientFactory } : options,
|
|
);
|
|
}
|
|
|
|
function createParams(sessionFile: string, workspaceDir: string): EmbeddedRunAttemptParams {
|
|
return {
|
|
prompt: "hello",
|
|
sessionId: "session-1",
|
|
sessionKey: "agent:main:session-1",
|
|
sessionFile,
|
|
workspaceDir,
|
|
runId: "run-1",
|
|
provider: "codex",
|
|
modelId: "gpt-5.4-codex",
|
|
model: createCodexTestModel("codex"),
|
|
contextTokenBudget: 150_000,
|
|
contextWindowInfo: {
|
|
tokens: 150_000,
|
|
referenceTokens: 200_000,
|
|
source: "agentContextTokens",
|
|
},
|
|
thinkLevel: "medium",
|
|
disableTools: true,
|
|
timeoutMs: 5_000,
|
|
authStorage: {} as never,
|
|
authProfileStore: { version: 1, profiles: {} },
|
|
modelRegistry: {} as never,
|
|
} as EmbeddedRunAttemptParams;
|
|
}
|
|
|
|
function createCodexRuntimePlanFixture(): NonNullable<EmbeddedRunAttemptParams["runtimePlan"]> {
|
|
return {
|
|
auth: {},
|
|
observability: {
|
|
resolvedRef: "codex/gpt-5.4-codex",
|
|
provider: "codex",
|
|
modelId: "gpt-5.4-codex",
|
|
harnessId: "codex",
|
|
},
|
|
prompt: {
|
|
resolveSystemPromptContribution: () => undefined,
|
|
},
|
|
tools: {
|
|
normalize: (tools: unknown[]) => tools,
|
|
logDiagnostics: () => undefined,
|
|
},
|
|
} as unknown as NonNullable<EmbeddedRunAttemptParams["runtimePlan"]>;
|
|
}
|
|
|
|
function threadStartResult(threadId = "thread-1") {
|
|
return {
|
|
thread: {
|
|
id: threadId,
|
|
sessionId: "session-1",
|
|
forkedFromId: null,
|
|
preview: "",
|
|
ephemeral: false,
|
|
modelProvider: "openai",
|
|
createdAt: 1,
|
|
updatedAt: 1,
|
|
status: { type: "idle" },
|
|
path: null,
|
|
cwd: tempDir || "/tmp/openclaw-codex-test",
|
|
cliVersion: "0.125.0",
|
|
source: "unknown",
|
|
agentNickname: null,
|
|
agentRole: null,
|
|
gitInfo: null,
|
|
name: null,
|
|
turns: [],
|
|
},
|
|
model: "gpt-5.4-codex",
|
|
modelProvider: "openai",
|
|
serviceTier: null,
|
|
cwd: tempDir || "/tmp/openclaw-codex-test",
|
|
instructionSources: [],
|
|
approvalPolicy: "never",
|
|
approvalsReviewer: "user",
|
|
sandbox: { type: "dangerFullAccess" },
|
|
permissionProfile: null,
|
|
reasoningEffort: null,
|
|
};
|
|
}
|
|
|
|
function turnStartResult(turnId = "turn-1", status = "inProgress") {
|
|
return {
|
|
turn: {
|
|
id: turnId,
|
|
status,
|
|
items: [],
|
|
error: null,
|
|
startedAt: null,
|
|
completedAt: null,
|
|
durationMs: null,
|
|
},
|
|
};
|
|
}
|
|
|
|
function rateLimitsUpdated(resetsAt: number): CodexServerNotification {
|
|
return {
|
|
method: "account/rateLimits/updated",
|
|
params: {
|
|
rateLimits: {
|
|
limitId: "codex",
|
|
limitName: "Codex",
|
|
primary: { usedPercent: 100, windowDurationMins: 300, resetsAt },
|
|
secondary: null,
|
|
credits: null,
|
|
planType: "plus",
|
|
rateLimitReachedType: "rate_limit_reached",
|
|
},
|
|
},
|
|
};
|
|
}
|
|
|
|
function assistantMessage(text: string, timestamp: number) {
|
|
return {
|
|
role: "assistant" as const,
|
|
content: [{ type: "text" as const, text }],
|
|
api: "openai-codex-responses",
|
|
provider: "openai-codex",
|
|
model: "gpt-5.4-codex",
|
|
usage: {
|
|
input: 0,
|
|
output: 0,
|
|
cacheRead: 0,
|
|
cacheWrite: 0,
|
|
totalTokens: 0,
|
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
},
|
|
stopReason: "stop" as const,
|
|
timestamp,
|
|
};
|
|
}
|
|
|
|
function userMessage(text: string, timestamp: number) {
|
|
return {
|
|
role: "user" as const,
|
|
content: [{ type: "text" as const, text }],
|
|
timestamp,
|
|
};
|
|
}
|
|
|
|
function mockCall(mock: unknown, label: string, index = 0): unknown[] {
|
|
const call = (mock as { mock?: { calls?: unknown[][] } }).mock?.calls?.at(index);
|
|
if (!call) {
|
|
throw new Error(`Expected ${label} call ${index + 1}`);
|
|
}
|
|
return call;
|
|
}
|
|
|
|
function createAppServerHarness(
|
|
requestImpl: (method: string, params: unknown) => Promise<unknown>,
|
|
options: {
|
|
onStart?: (authProfileId: string | undefined, agentDir: string | undefined) => void;
|
|
} = {},
|
|
) {
|
|
const requests: Array<{ method: string; params: unknown }> = [];
|
|
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
|
|
let handleServerRequest: AppServerRequestHandler | undefined;
|
|
const closeHandlers = new Set<() => void>();
|
|
const request = vi.fn(async (method: string, params?: unknown) => {
|
|
requests.push({ method, params });
|
|
return requestImpl(method, params);
|
|
});
|
|
|
|
setCodexAppServerClientFactoryForTest(async (_startOptions, authProfileId, agentDir) => {
|
|
options.onStart?.(authProfileId, agentDir);
|
|
return {
|
|
request,
|
|
addNotificationHandler: (handler: typeof notify) => {
|
|
notify = handler;
|
|
return () => undefined;
|
|
},
|
|
addRequestHandler: (handler: AppServerRequestHandler) => {
|
|
handleServerRequest = handler;
|
|
return () => undefined;
|
|
},
|
|
addCloseHandler: (handler: () => void) => {
|
|
closeHandlers.add(handler);
|
|
return () => closeHandlers.delete(handler);
|
|
},
|
|
} as never;
|
|
});
|
|
|
|
const waitForServerRequestHandler = async () => {
|
|
await vi.waitFor(() => expect(handleServerRequest).toBeTypeOf("function"), {
|
|
interval: 1,
|
|
timeout: 30_000,
|
|
});
|
|
return handleServerRequest!;
|
|
};
|
|
|
|
return {
|
|
request,
|
|
requests,
|
|
async waitForMethod(method: string, timeoutMs = 30_000) {
|
|
await vi.waitFor(
|
|
() => {
|
|
if (!requests.some((entry) => entry.method === method)) {
|
|
const mockMethods = request.mock.calls.map((call) => call[0]);
|
|
throw new Error(
|
|
`expected app-server method ${method}; saw ${requests
|
|
.map((entry) => entry.method)
|
|
.join(", ")}; mock saw ${mockMethods.join(", ")}`,
|
|
);
|
|
}
|
|
},
|
|
{ interval: 1, timeout: timeoutMs },
|
|
);
|
|
},
|
|
async notify(notification: CodexServerNotification) {
|
|
await notify(notification);
|
|
},
|
|
waitForServerRequestHandler,
|
|
async handleServerRequest(request: Parameters<AppServerRequestHandler>[0]) {
|
|
const handler = await waitForServerRequestHandler();
|
|
return handler(request);
|
|
},
|
|
async completeTurn(params: { threadId: string; turnId: string }) {
|
|
await notify({
|
|
method: "turn/completed",
|
|
params: {
|
|
threadId: params.threadId,
|
|
turnId: params.turnId,
|
|
turn: { id: params.turnId, status: "completed" },
|
|
},
|
|
});
|
|
},
|
|
close() {
|
|
for (const handler of closeHandlers) {
|
|
handler();
|
|
}
|
|
},
|
|
};
|
|
}
|
|
|
|
function createStartedThreadHarness(
|
|
requestImpl: (method: string, params: unknown) => Promise<unknown> = async () => undefined,
|
|
options: {
|
|
onStart?: (authProfileId: string | undefined, agentDir: string | undefined) => void;
|
|
} = {},
|
|
) {
|
|
return createAppServerHarness(async (method, params) => {
|
|
const override = await requestImpl(method, params);
|
|
if (override !== undefined) {
|
|
return override;
|
|
}
|
|
if (method === "thread/start") {
|
|
return threadStartResult();
|
|
}
|
|
if (method === "turn/start") {
|
|
return turnStartResult();
|
|
}
|
|
return {};
|
|
}, options);
|
|
}
|
|
|
|
function expectResumeRequest(
|
|
requests: Array<{ method: string; params: unknown }>,
|
|
params: Record<string, unknown>,
|
|
) {
|
|
const request = requests.find((entry) => entry.method === "thread/resume");
|
|
if (!request) {
|
|
throw new Error("Expected thread/resume request");
|
|
}
|
|
const requestParams = request.params as Record<string, unknown> | undefined;
|
|
for (const [key, value] of Object.entries(params)) {
|
|
expect(requestParams?.[key]).toEqual(value);
|
|
}
|
|
}
|
|
|
|
function createResumeHarness() {
|
|
return createAppServerHarness(async (method) => {
|
|
if (method === "thread/resume") {
|
|
return threadStartResult("thread-existing");
|
|
}
|
|
if (method === "turn/start") {
|
|
return turnStartResult();
|
|
}
|
|
return {};
|
|
});
|
|
}
|
|
|
|
async function writeExistingBinding(
|
|
sessionFile: string,
|
|
workspaceDir: string,
|
|
overrides: Partial<Parameters<typeof writeCodexAppServerBinding>[1]> = {},
|
|
) {
|
|
await writeCodexAppServerBinding(sessionFile, {
|
|
threadId: "thread-existing",
|
|
cwd: workspaceDir,
|
|
model: "gpt-5.4-codex",
|
|
modelProvider: "openai",
|
|
...overrides,
|
|
});
|
|
}
|
|
|
|
function createThreadLifecycleAppServerOptions(): Parameters<
|
|
typeof startOrResumeThread
|
|
>[0]["appServer"] {
|
|
return {
|
|
start: {
|
|
transport: "stdio",
|
|
command: "codex",
|
|
args: ["app-server"],
|
|
headers: {},
|
|
},
|
|
requestTimeoutMs: 60_000,
|
|
turnCompletionIdleTimeoutMs: 60_000,
|
|
approvalPolicy: "never",
|
|
approvalsReviewer: "user",
|
|
sandbox: "workspace-write",
|
|
codeModeOnly: false,
|
|
};
|
|
}
|
|
|
|
function createMessageDynamicTool(
|
|
description: string,
|
|
actions: string[] = ["send"],
|
|
): Parameters<typeof startOrResumeThread>[0]["dynamicTools"][number] {
|
|
return {
|
|
name: "message",
|
|
description,
|
|
inputSchema: {
|
|
type: "object",
|
|
properties: {
|
|
action: {
|
|
type: "string",
|
|
enum: actions,
|
|
},
|
|
},
|
|
required: ["action"],
|
|
additionalProperties: false,
|
|
},
|
|
};
|
|
}
|
|
|
|
function createNamedDynamicTool(
|
|
name: string,
|
|
): Parameters<typeof startOrResumeThread>[0]["dynamicTools"][number] {
|
|
return {
|
|
name,
|
|
description: `${name} test tool`,
|
|
inputSchema: {
|
|
type: "object",
|
|
properties: {},
|
|
additionalProperties: false,
|
|
},
|
|
};
|
|
}
|
|
|
|
type RuntimeDynamicToolForTest = Parameters<
|
|
typeof createCodexDynamicToolBridge
|
|
>[0]["tools"][number];
|
|
|
|
function createRuntimeDynamicTool(name: string): RuntimeDynamicToolForTest {
|
|
return {
|
|
name,
|
|
label: name,
|
|
description: `${name} test tool`,
|
|
parameters: {
|
|
type: "object",
|
|
properties: {},
|
|
additionalProperties: false,
|
|
},
|
|
execute: vi.fn(async () => ({
|
|
content: [{ type: "text" as const, text: `${name} done` }],
|
|
details: {},
|
|
})),
|
|
};
|
|
}
|
|
|
|
function createPluginAppConfigPatch() {
|
|
return {
|
|
apps: {
|
|
_default: {
|
|
enabled: false,
|
|
destructive_enabled: false,
|
|
open_world_enabled: false,
|
|
},
|
|
"google-calendar-app": {
|
|
enabled: true,
|
|
destructive_enabled: true,
|
|
open_world_enabled: true,
|
|
default_tools_approval_mode: "auto",
|
|
},
|
|
},
|
|
};
|
|
}
|
|
|
|
function createPluginAppPolicyContext() {
|
|
return {
|
|
fingerprint: "plugin-policy-1",
|
|
apps: {
|
|
"google-calendar-app": {
|
|
configKey: "google-calendar",
|
|
marketplaceName: "openai-curated" as const,
|
|
pluginName: "google-calendar",
|
|
allowDestructiveActions: false,
|
|
mcpServerNames: ["google-calendar"],
|
|
},
|
|
},
|
|
pluginAppIds: {
|
|
"google-calendar": ["google-calendar-app"],
|
|
},
|
|
};
|
|
}
|
|
|
|
function createTwoPluginAppConfigPatch() {
|
|
return {
|
|
apps: {
|
|
...createPluginAppConfigPatch().apps,
|
|
"gmail-app": {
|
|
enabled: true,
|
|
destructive_enabled: true,
|
|
open_world_enabled: true,
|
|
default_tools_approval_mode: "auto",
|
|
},
|
|
},
|
|
};
|
|
}
|
|
|
|
function createTwoPluginAppPolicyContext() {
|
|
return {
|
|
fingerprint: "plugin-policy-2",
|
|
apps: {
|
|
...createPluginAppPolicyContext().apps,
|
|
"gmail-app": {
|
|
configKey: "gmail",
|
|
marketplaceName: "openai-curated" as const,
|
|
pluginName: "gmail",
|
|
allowDestructiveActions: false,
|
|
mcpServerNames: ["gmail"],
|
|
},
|
|
},
|
|
pluginAppIds: {
|
|
...createPluginAppPolicyContext().pluginAppIds,
|
|
gmail: ["gmail-app"],
|
|
},
|
|
};
|
|
}
|
|
|
|
function createTwoCalendarAppConfigPatch() {
|
|
return {
|
|
apps: {
|
|
...createPluginAppConfigPatch().apps,
|
|
"google-calendar-secondary-app": {
|
|
enabled: true,
|
|
destructive_enabled: true,
|
|
open_world_enabled: true,
|
|
default_tools_approval_mode: "auto",
|
|
},
|
|
},
|
|
};
|
|
}
|
|
|
|
function createTwoCalendarAppPolicyContext() {
|
|
return {
|
|
fingerprint: "plugin-policy-calendar-2",
|
|
apps: {
|
|
...createPluginAppPolicyContext().apps,
|
|
"google-calendar-secondary-app": {
|
|
configKey: "google-calendar",
|
|
marketplaceName: "openai-curated" as const,
|
|
pluginName: "google-calendar",
|
|
allowDestructiveActions: false,
|
|
mcpServerNames: ["google-calendar"],
|
|
},
|
|
},
|
|
pluginAppIds: {
|
|
"google-calendar": ["google-calendar-app", "google-calendar-secondary-app"],
|
|
},
|
|
};
|
|
}
|
|
|
|
type AppServerRequestHandler = (request: {
|
|
id: string | number;
|
|
method: string;
|
|
params?: unknown;
|
|
}) => Promise<unknown>;
|
|
|
|
function extractRelayIdFromThreadRequest(params: unknown): string {
|
|
const config = (params as { config?: Record<string, unknown> }).config;
|
|
let command: string | undefined;
|
|
for (const key of [
|
|
"hooks.PreToolUse",
|
|
"hooks.PostToolUse",
|
|
"hooks.PermissionRequest",
|
|
"hooks.Stop",
|
|
]) {
|
|
const entries = config?.[key];
|
|
if (!Array.isArray(entries)) {
|
|
continue;
|
|
}
|
|
for (const entry of entries as Array<{ hooks?: Array<{ command?: string }> }>) {
|
|
command = entry.hooks?.find((hook) => typeof hook.command === "string")?.command;
|
|
if (command) {
|
|
break;
|
|
}
|
|
}
|
|
if (command) {
|
|
break;
|
|
}
|
|
}
|
|
const match = command?.match(/--relay-id ([^ ]+)/);
|
|
if (!match?.[1]) {
|
|
throw new Error(`relay id missing from command: ${command}`);
|
|
}
|
|
return match[1];
|
|
}
|
|
|
|
describe("runCodexAppServerAttempt", () => {
|
|
beforeEach(async () => {
|
|
resetAgentEventsForTest();
|
|
resetDiagnosticEventsForTest();
|
|
vi.stubEnv("OPENCLAW_TRAJECTORY", "0");
|
|
vi.stubEnv("CODEX_API_KEY", "");
|
|
vi.stubEnv("OPENAI_API_KEY", "");
|
|
tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-codex-run-"));
|
|
});
|
|
|
|
afterEach(async () => {
|
|
resetCodexAppServerClientFactoryForTest();
|
|
testing.resetOpenClawCodingToolsFactoryForTests();
|
|
resetCodexRateLimitCacheForTests();
|
|
nativeHookRelayTesting.clearNativeHookRelaysForTests();
|
|
clearPluginCommands();
|
|
resetAgentEventsForTest();
|
|
resetDiagnosticEventsForTest();
|
|
resetGlobalHookRunner();
|
|
defaultCodexAppInventoryCache.clear();
|
|
vi.useRealTimers();
|
|
vi.restoreAllMocks();
|
|
vi.unstubAllEnvs();
|
|
await fs.rm(tempDir, { recursive: true, force: true });
|
|
});
|
|
|
|
it("filters Codex-native dynamic tools from app-server tool exposure", () => {
|
|
const tools = [
|
|
"read",
|
|
"write",
|
|
"edit",
|
|
"apply_patch",
|
|
"exec",
|
|
"process",
|
|
"update_plan",
|
|
"tool_call",
|
|
"tool_describe",
|
|
"tool_search",
|
|
"tool_search_code",
|
|
"web_search",
|
|
"message",
|
|
"heartbeat_respond",
|
|
"sessions_spawn",
|
|
].map((name) => ({ name }));
|
|
|
|
expect(testing.filterCodexDynamicTools(tools, {}).map((tool) => tool.name)).toEqual([
|
|
"web_search",
|
|
"message",
|
|
"heartbeat_respond",
|
|
"sessions_spawn",
|
|
]);
|
|
});
|
|
|
|
it("applies additional Codex dynamic tool excludes without exposing Codex-native tools", () => {
|
|
const tools = ["read", "exec", "message", "custom_tool"].map((name) => ({ name }));
|
|
|
|
expect(
|
|
testing
|
|
.filterCodexDynamicTools(tools, {
|
|
codexDynamicToolsExclude: ["custom_tool"],
|
|
})
|
|
.map((tool) => tool.name),
|
|
).toEqual(["message"]);
|
|
});
|
|
|
|
it("exposes app-server-owned tools directly for forced private QA Codex runtime", () => {
|
|
const tools = ["read", "write", "image_generate", "message"].map((name) => ({ name }));
|
|
const privateQaCodexEnv = {
|
|
OPENCLAW_BUILD_PRIVATE_QA: "1",
|
|
OPENCLAW_QA_FORCE_RUNTIME: "codex",
|
|
};
|
|
|
|
expect(
|
|
testing.filterCodexDynamicTools(tools, {}, privateQaCodexEnv).map((tool) => tool.name),
|
|
).toEqual(["read", "write", "image_generate", "message"]);
|
|
expect(testing.resolveCodexDynamicToolsLoading({}, privateQaCodexEnv)).toBe("direct");
|
|
});
|
|
|
|
it("exposes OpenClaw sandbox shell tools under distinct names for non-Docker sandbox backends", async () => {
|
|
testing.setOpenClawCodingToolsFactoryForTests(() => [
|
|
createRuntimeDynamicTool("read"),
|
|
createRuntimeDynamicTool("write"),
|
|
createRuntimeDynamicTool("edit"),
|
|
createRuntimeDynamicTool("apply_patch"),
|
|
createRuntimeDynamicTool("exec"),
|
|
createRuntimeDynamicTool("process"),
|
|
createRuntimeDynamicTool("message"),
|
|
]);
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.disableTools = false;
|
|
params.runtimePlan = createCodexRuntimePlanFixture();
|
|
const sandboxSessionKey = params.sessionKey;
|
|
if (!sandboxSessionKey) {
|
|
throw new Error("createParams must provide a sessionKey for Codex dynamic tool tests.");
|
|
}
|
|
|
|
const tools = await testing.buildDynamicTools({
|
|
params,
|
|
resolvedWorkspace: workspaceDir,
|
|
effectiveWorkspace: workspaceDir,
|
|
sandboxSessionKey,
|
|
sandbox: { enabled: true, backendId: "ssh" } as never,
|
|
runAbortController: new AbortController(),
|
|
sessionAgentId: "main",
|
|
pluginConfig: {},
|
|
onYieldDetected: () => undefined,
|
|
});
|
|
|
|
expect(tools.map((tool) => tool.name)).toEqual(["message", "sandbox_exec", "sandbox_process"]);
|
|
expect(tools.find((tool) => tool.name === "sandbox_exec")?.description).toContain(
|
|
"configured sandbox backend",
|
|
);
|
|
expect(tools.find((tool) => tool.name === "sandbox_process")?.description).toContain(
|
|
"sandbox_exec sessions",
|
|
);
|
|
});
|
|
|
|
it("keeps Docker sandbox shell tools hidden when native Code Mode can honor sandbox paths", async () => {
|
|
testing.setOpenClawCodingToolsFactoryForTests(() => [
|
|
createRuntimeDynamicTool("exec"),
|
|
createRuntimeDynamicTool("process"),
|
|
createRuntimeDynamicTool("message"),
|
|
]);
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.disableTools = false;
|
|
params.runtimePlan = createCodexRuntimePlanFixture();
|
|
const sandboxSessionKey = params.sessionKey;
|
|
if (!sandboxSessionKey) {
|
|
throw new Error("createParams must provide a sessionKey for Codex dynamic tool tests.");
|
|
}
|
|
|
|
const dockerTools = await testing.buildDynamicTools({
|
|
params,
|
|
resolvedWorkspace: workspaceDir,
|
|
effectiveWorkspace: workspaceDir,
|
|
sandboxSessionKey,
|
|
sandbox: { enabled: true, backendId: "docker" } as never,
|
|
nativeToolSurfaceEnabled: true,
|
|
runAbortController: new AbortController(),
|
|
sessionAgentId: "main",
|
|
pluginConfig: {},
|
|
onYieldDetected: () => undefined,
|
|
});
|
|
|
|
expect(dockerTools.map((tool) => tool.name)).toEqual(["message"]);
|
|
});
|
|
|
|
it("exposes Docker sandbox shell tools when native Code Mode cannot honor sandbox paths", async () => {
|
|
testing.setOpenClawCodingToolsFactoryForTests(() => [
|
|
createRuntimeDynamicTool("exec"),
|
|
createRuntimeDynamicTool("process"),
|
|
createRuntimeDynamicTool("message"),
|
|
]);
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.disableTools = false;
|
|
params.runtimePlan = createCodexRuntimePlanFixture();
|
|
const sandboxSessionKey = params.sessionKey;
|
|
if (!sandboxSessionKey) {
|
|
throw new Error("createParams must provide a sessionKey for Codex dynamic tool tests.");
|
|
}
|
|
|
|
const tools = await testing.buildDynamicTools({
|
|
params,
|
|
resolvedWorkspace: workspaceDir,
|
|
effectiveWorkspace: workspaceDir,
|
|
sandboxSessionKey,
|
|
sandbox: {
|
|
enabled: true,
|
|
backendId: "docker",
|
|
docker: { binds: ["/tmp/openclaw-data:/data:rw"] },
|
|
} as never,
|
|
nativeToolSurfaceEnabled: false,
|
|
runAbortController: new AbortController(),
|
|
sessionAgentId: "main",
|
|
pluginConfig: {},
|
|
onYieldDetected: () => undefined,
|
|
});
|
|
|
|
expect(tools.map((tool) => tool.name)).toEqual(["message", "sandbox_exec", "sandbox_process"]);
|
|
expect(tools.find((tool) => tool.name === "sandbox_exec")?.description).toContain(
|
|
"Docker container-path bind layout",
|
|
);
|
|
});
|
|
|
|
it("does not expose sandbox shell tools when sandbox routing is disabled", async () => {
|
|
testing.setOpenClawCodingToolsFactoryForTests(() => [
|
|
createRuntimeDynamicTool("exec"),
|
|
createRuntimeDynamicTool("process"),
|
|
createRuntimeDynamicTool("message"),
|
|
]);
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.disableTools = false;
|
|
params.runtimePlan = createCodexRuntimePlanFixture();
|
|
const sandboxSessionKey = params.sessionKey;
|
|
if (!sandboxSessionKey) {
|
|
throw new Error("createParams must provide a sessionKey for Codex dynamic tool tests.");
|
|
}
|
|
|
|
const disabledSandboxTools = await testing.buildDynamicTools({
|
|
params,
|
|
resolvedWorkspace: workspaceDir,
|
|
effectiveWorkspace: workspaceDir,
|
|
sandboxSessionKey,
|
|
sandbox: { enabled: false, backendId: "ssh" } as never,
|
|
runAbortController: new AbortController(),
|
|
sessionAgentId: "main",
|
|
pluginConfig: {},
|
|
onYieldDetected: () => undefined,
|
|
});
|
|
|
|
expect(disabledSandboxTools.map((tool) => tool.name)).toEqual(["message"]);
|
|
});
|
|
|
|
it("does not expose sandbox_exec without a matching process follow-up tool", async () => {
|
|
testing.setOpenClawCodingToolsFactoryForTests(() => [
|
|
createRuntimeDynamicTool("exec"),
|
|
createRuntimeDynamicTool("message"),
|
|
]);
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.disableTools = false;
|
|
params.runtimePlan = createCodexRuntimePlanFixture();
|
|
const sandboxSessionKey = params.sessionKey;
|
|
if (!sandboxSessionKey) {
|
|
throw new Error("createParams must provide a sessionKey for Codex dynamic tool tests.");
|
|
}
|
|
|
|
const tools = await testing.buildDynamicTools({
|
|
params,
|
|
resolvedWorkspace: workspaceDir,
|
|
effectiveWorkspace: workspaceDir,
|
|
sandboxSessionKey,
|
|
sandbox: { enabled: true, backendId: "ssh" } as never,
|
|
runAbortController: new AbortController(),
|
|
sessionAgentId: "main",
|
|
pluginConfig: {},
|
|
onYieldDetected: () => undefined,
|
|
});
|
|
|
|
expect(tools.map((tool) => tool.name)).toEqual(["message"]);
|
|
});
|
|
|
|
it("honors Codex dynamic tool excludes for sandbox shell exposure", async () => {
|
|
testing.setOpenClawCodingToolsFactoryForTests(() => [
|
|
createRuntimeDynamicTool("exec"),
|
|
createRuntimeDynamicTool("process"),
|
|
createRuntimeDynamicTool("message"),
|
|
]);
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.disableTools = false;
|
|
params.runtimePlan = createCodexRuntimePlanFixture();
|
|
const sandboxSessionKey = params.sessionKey;
|
|
if (!sandboxSessionKey) {
|
|
throw new Error("createParams must provide a sessionKey for Codex dynamic tool tests.");
|
|
}
|
|
|
|
for (const excludedToolName of ["sandbox_exec", "process"]) {
|
|
const tools = await testing.buildDynamicTools({
|
|
params,
|
|
resolvedWorkspace: workspaceDir,
|
|
effectiveWorkspace: workspaceDir,
|
|
sandboxSessionKey,
|
|
sandbox: { enabled: true, backendId: "ssh" } as never,
|
|
runAbortController: new AbortController(),
|
|
sessionAgentId: "main",
|
|
pluginConfig: { codexDynamicToolsExclude: [excludedToolName] },
|
|
onYieldDetected: () => undefined,
|
|
});
|
|
|
|
expect(tools.map((tool) => tool.name)).toEqual(["message"]);
|
|
}
|
|
});
|
|
|
|
it("points yielded sandbox_exec follow-up guidance at sandbox_process", async () => {
|
|
const execTool = createRuntimeDynamicTool("exec");
|
|
vi.mocked(execTool.execute).mockResolvedValueOnce({
|
|
content: [
|
|
{
|
|
type: "text",
|
|
text: "Command still running (session exec-1, pid 123). Use process (list/poll/log/write/send-keys/submit/paste/kill/clear/remove) for follow-up.",
|
|
},
|
|
],
|
|
details: { status: "running" },
|
|
});
|
|
const processTool = createRuntimeDynamicTool("process");
|
|
const tools = testing.addSandboxShellDynamicToolsIfAvailable([], [execTool, processTool], {
|
|
sandbox: { enabled: true, backendId: "ssh" },
|
|
pluginConfig: {},
|
|
} as never);
|
|
|
|
const sandboxExec = tools.find((tool) => tool.name === "sandbox_exec");
|
|
const result = await sandboxExec?.execute("call-1", {}, undefined);
|
|
|
|
expect(result?.content).toEqual([
|
|
{
|
|
type: "text",
|
|
text: "Command still running (session exec-1, pid 123). Use sandbox_process (list/poll/log/write/send-keys/submit/paste/kill/clear/remove) for follow-up.",
|
|
},
|
|
]);
|
|
});
|
|
|
|
it("starts Codex threads without duplicate OpenClaw workspace tools by default", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const appServer = createThreadLifecycleAppServerOptions();
|
|
const request = vi.fn(async (method: string, _params: unknown) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult();
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
});
|
|
const dynamicTools = testing.filterCodexDynamicTools(
|
|
[
|
|
"read",
|
|
"write",
|
|
"edit",
|
|
"apply_patch",
|
|
"exec",
|
|
"process",
|
|
"update_plan",
|
|
"tool_call",
|
|
"tool_describe",
|
|
"tool_search",
|
|
"tool_search_code",
|
|
"web_search",
|
|
"message",
|
|
].map(createNamedDynamicTool),
|
|
{},
|
|
);
|
|
|
|
await startOrResumeThread({
|
|
client: { request } as never,
|
|
params: createParams(sessionFile, workspaceDir),
|
|
cwd: workspaceDir,
|
|
dynamicTools,
|
|
appServer,
|
|
});
|
|
|
|
const startRequest = request.mock.calls.find(([method]) => method === "thread/start");
|
|
const dynamicToolNames = (
|
|
(startRequest?.[1] as { dynamicTools?: Array<{ name: string }> } | undefined)?.dynamicTools ??
|
|
[]
|
|
).map((tool) => tool.name);
|
|
|
|
expect(dynamicToolNames).toContain("message");
|
|
expect(dynamicToolNames).toContain("web_search");
|
|
for (const toolName of [
|
|
"read",
|
|
"write",
|
|
"edit",
|
|
"apply_patch",
|
|
"exec",
|
|
"process",
|
|
"update_plan",
|
|
"tool_call",
|
|
"tool_describe",
|
|
"tool_search",
|
|
"tool_search_code",
|
|
]) {
|
|
expect(dynamicToolNames).not.toContain(toolName);
|
|
}
|
|
});
|
|
|
|
it("passes MCP server config through to Codex thread/start", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const request = vi.fn(async (method: string, _params: unknown) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult();
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
});
|
|
|
|
await startOrResumeThread({
|
|
client: { request } as never,
|
|
params: createParams(sessionFile, workspaceDir),
|
|
cwd: workspaceDir,
|
|
dynamicTools: [],
|
|
appServer: createThreadLifecycleAppServerOptions(),
|
|
config: {
|
|
mcp_servers: {
|
|
search: {
|
|
url: "https://mcp.example.com/mcp",
|
|
},
|
|
},
|
|
},
|
|
mcpServersFingerprint: "mcp-v1",
|
|
mcpServersFingerprintEvaluated: true,
|
|
});
|
|
|
|
const startRequest = request.mock.calls.find(([method]) => method === "thread/start");
|
|
expect((startRequest?.[1] as { config?: unknown } | undefined)?.config).toMatchObject({
|
|
mcp_servers: {
|
|
search: {
|
|
url: "https://mcp.example.com/mcp",
|
|
},
|
|
},
|
|
"features.code_mode": true,
|
|
"features.code_mode_only": false,
|
|
});
|
|
const binding = await readCodexAppServerBinding(sessionFile);
|
|
expect(binding?.mcpServersFingerprint).toBe("mcp-v1");
|
|
});
|
|
|
|
it("starts a new Codex thread when the MCP server fingerprint changes", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
await writeCodexAppServerBinding(sessionFile, {
|
|
threadId: "old-thread",
|
|
cwd: workspaceDir,
|
|
dynamicToolsFingerprint: JSON.stringify([]),
|
|
mcpServersFingerprint: "mcp-v1",
|
|
});
|
|
const request = vi.fn(async (method: string, _params: unknown) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("new-thread");
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
});
|
|
|
|
const binding = await startOrResumeThread({
|
|
client: { request } as never,
|
|
params: createParams(sessionFile, workspaceDir),
|
|
cwd: workspaceDir,
|
|
dynamicTools: [],
|
|
appServer: createThreadLifecycleAppServerOptions(),
|
|
mcpServersFingerprint: "mcp-v2",
|
|
mcpServersFingerprintEvaluated: true,
|
|
});
|
|
|
|
expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/start"]);
|
|
expect(binding.threadId).toBe("new-thread");
|
|
expect(binding.mcpServersFingerprint).toBe("mcp-v2");
|
|
});
|
|
|
|
it("starts a no-MCP Codex thread when MCP config is evaluated empty", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
await writeCodexAppServerBinding(sessionFile, {
|
|
threadId: "old-thread",
|
|
cwd: workspaceDir,
|
|
dynamicToolsFingerprint: JSON.stringify([]),
|
|
mcpServersFingerprint: "mcp-v1",
|
|
});
|
|
const request = vi.fn(async (method: string, _params: unknown) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("new-thread");
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
});
|
|
|
|
const binding = await startOrResumeThread({
|
|
client: { request } as never,
|
|
params: createParams(sessionFile, workspaceDir),
|
|
cwd: workspaceDir,
|
|
dynamicTools: [],
|
|
appServer: createThreadLifecycleAppServerOptions(),
|
|
mcpServersFingerprintEvaluated: true,
|
|
});
|
|
|
|
expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/start"]);
|
|
expect(binding.threadId).toBe("new-thread");
|
|
expect(binding.mcpServersFingerprint).toBeUndefined();
|
|
expect((await readCodexAppServerBinding(sessionFile))?.mcpServersFingerprint).toBeUndefined();
|
|
});
|
|
|
|
it("passes auth profiles into Codex dynamic tool construction", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
const authProfileStore = {
|
|
version: 1,
|
|
profiles: {
|
|
"openai:api-key-backup": {
|
|
provider: "openai",
|
|
type: "api_key",
|
|
key: "not-a-real-key",
|
|
},
|
|
},
|
|
} satisfies EmbeddedRunAttemptParams["authProfileStore"];
|
|
params.disableTools = false;
|
|
params.authProfileStore = authProfileStore;
|
|
params.runtimePlan = createCodexRuntimePlanFixture();
|
|
const factoryOptions: unknown[] = [];
|
|
testing.setOpenClawCodingToolsFactoryForTests((options) => {
|
|
factoryOptions.push(options);
|
|
return [];
|
|
});
|
|
|
|
await testing.buildDynamicTools({
|
|
params,
|
|
resolvedWorkspace: workspaceDir,
|
|
effectiveWorkspace: workspaceDir,
|
|
sandboxSessionKey: params.sessionKey!,
|
|
sandbox: null as never,
|
|
runAbortController: new AbortController(),
|
|
sessionAgentId: "main",
|
|
pluginConfig: {},
|
|
onYieldDetected: () => undefined,
|
|
});
|
|
|
|
expect(factoryOptions).toHaveLength(1);
|
|
expect((factoryOptions[0] as { authProfileStore?: unknown }).authProfileStore).toBe(
|
|
authProfileStore,
|
|
);
|
|
});
|
|
|
|
it("uses the tool auth profile store for Codex dynamic tool construction", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
const transportAuthProfileStore = {
|
|
version: 1,
|
|
profiles: {
|
|
"openai-codex:work": {
|
|
provider: "openai-codex",
|
|
type: "oauth",
|
|
access: "transport-token",
|
|
refresh: "transport-refresh",
|
|
expires: Date.now() + 60_000,
|
|
},
|
|
},
|
|
} satisfies EmbeddedRunAttemptParams["authProfileStore"];
|
|
const toolAuthProfileStore = {
|
|
version: 1,
|
|
profiles: {
|
|
"openai-codex:work": {
|
|
provider: "openai-codex",
|
|
type: "oauth",
|
|
access: "transport-token",
|
|
refresh: "transport-refresh",
|
|
expires: Date.now() + 60_000,
|
|
},
|
|
"xai:work": {
|
|
provider: "xai",
|
|
type: "oauth",
|
|
access: "xai-token",
|
|
refresh: "xai-refresh",
|
|
expires: Date.now() + 60_000,
|
|
},
|
|
},
|
|
} satisfies EmbeddedRunAttemptParams["authProfileStore"];
|
|
params.disableTools = false;
|
|
params.authProfileStore = transportAuthProfileStore;
|
|
params.toolAuthProfileStore = toolAuthProfileStore;
|
|
params.runtimePlan = createCodexRuntimePlanFixture();
|
|
const factoryOptions: unknown[] = [];
|
|
testing.setOpenClawCodingToolsFactoryForTests((options) => {
|
|
factoryOptions.push(options);
|
|
return [];
|
|
});
|
|
|
|
await testing.buildDynamicTools({
|
|
params,
|
|
resolvedWorkspace: workspaceDir,
|
|
effectiveWorkspace: workspaceDir,
|
|
sandboxSessionKey: params.sessionKey!,
|
|
sandbox: null as never,
|
|
runAbortController: new AbortController(),
|
|
sessionAgentId: "main",
|
|
pluginConfig: {},
|
|
onYieldDetected: () => undefined,
|
|
});
|
|
|
|
expect(factoryOptions).toHaveLength(1);
|
|
expect((factoryOptions[0] as { authProfileStore?: unknown }).authProfileStore).toBe(
|
|
toolAuthProfileStore,
|
|
);
|
|
});
|
|
|
|
it("keeps canonical OpenAI Codex runs on OpenAI dynamic tool policy", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.disableTools = false;
|
|
params.provider = "openai";
|
|
params.modelId = "gpt-5.5";
|
|
params.model = {
|
|
...createCodexTestModel("openai"),
|
|
id: "gpt-5.5",
|
|
name: "gpt-5.5",
|
|
api: "openai-responses",
|
|
} as EmbeddedRunAttemptParams["model"];
|
|
params.runtimePlan = {
|
|
...createCodexRuntimePlanFixture(),
|
|
observability: {
|
|
resolvedRef: "openai/gpt-5.5",
|
|
provider: "openai",
|
|
modelId: "gpt-5.5",
|
|
harnessId: "codex",
|
|
},
|
|
};
|
|
const factoryOptions: unknown[] = [];
|
|
testing.setOpenClawCodingToolsFactoryForTests((options) => {
|
|
factoryOptions.push(options);
|
|
return [];
|
|
});
|
|
|
|
await testing.buildDynamicTools({
|
|
params,
|
|
resolvedWorkspace: workspaceDir,
|
|
effectiveWorkspace: workspaceDir,
|
|
sandboxSessionKey: params.sessionKey!,
|
|
sandbox: null as never,
|
|
runAbortController: new AbortController(),
|
|
sessionAgentId: "main",
|
|
pluginConfig: {},
|
|
onYieldDetected: () => undefined,
|
|
});
|
|
|
|
expect(factoryOptions).toHaveLength(1);
|
|
expect((factoryOptions[0] as { modelProvider?: unknown }).modelProvider).toBe("openai");
|
|
expect((factoryOptions[0] as { modelApi?: unknown }).modelApi).toBe("openai-responses");
|
|
});
|
|
|
|
it("enables gateway subagent binding for forced private QA Codex runs", async () => {
|
|
vi.stubEnv("OPENCLAW_BUILD_PRIVATE_QA", "1");
|
|
vi.stubEnv("OPENCLAW_QA_FORCE_RUNTIME", "codex");
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.disableTools = false;
|
|
params.runtimePlan = createCodexRuntimePlanFixture();
|
|
const factoryOptions: unknown[] = [];
|
|
testing.setOpenClawCodingToolsFactoryForTests((options) => {
|
|
factoryOptions.push(options);
|
|
return [createRuntimeDynamicTool("sessions_spawn")];
|
|
});
|
|
|
|
const tools = await testing.buildDynamicTools({
|
|
params,
|
|
resolvedWorkspace: workspaceDir,
|
|
effectiveWorkspace: workspaceDir,
|
|
sandboxSessionKey: params.sessionKey!,
|
|
sandbox: null as never,
|
|
runAbortController: new AbortController(),
|
|
sessionAgentId: "main",
|
|
pluginConfig: {},
|
|
onYieldDetected: () => undefined,
|
|
});
|
|
|
|
expect(factoryOptions).toHaveLength(1);
|
|
const factoryOption = factoryOptions[0] as { allowGatewaySubagentBinding?: unknown };
|
|
expect(factoryOption.allowGatewaySubagentBinding).toBe(true);
|
|
expect(tools.map((tool) => tool.name)).toEqual(["sessions_spawn"]);
|
|
});
|
|
|
|
it("normalizes Codex dynamic toolsAllow entries before filtering", () => {
|
|
const tools = ["exec", "sandbox_exec", "sandbox_process", "apply_patch", "read", "message"].map(
|
|
(name) => ({ name }),
|
|
);
|
|
|
|
expect(
|
|
testing
|
|
.filterCodexDynamicToolsForAllowlist(tools, [" BASH ", "apply-patch", "READ"])
|
|
.map((tool) => tool.name),
|
|
).toEqual(["exec", "sandbox_exec", "sandbox_process", "apply_patch", "read"]);
|
|
});
|
|
|
|
it("treats an explicit empty Codex dynamic toolsAllow as no tools", () => {
|
|
const tools = ["message", "web_search"].map((name) => ({ name }));
|
|
|
|
expect(testing.filterCodexDynamicToolsForAllowlist(tools, [])).toEqual([]);
|
|
});
|
|
|
|
it("treats wildcard Codex dynamic toolsAllow as unrestricted", () => {
|
|
const tools = ["message", "web_search"].map((name) => ({ name }));
|
|
|
|
expect(testing.filterCodexDynamicToolsForAllowlist(tools, [" * "])).toEqual(tools);
|
|
});
|
|
|
|
it("disables Codex native tool surfaces for restricted runtime allowlists", () => {
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const params = createParams(path.join(tempDir, "session.jsonl"), workspaceDir);
|
|
params.disableTools = false;
|
|
|
|
expect(testing.shouldEnableCodexAppServerNativeToolSurface(params)).toBe(true);
|
|
|
|
params.toolsAllow = ["*"];
|
|
expect(testing.shouldEnableCodexAppServerNativeToolSurface(params)).toBe(true);
|
|
|
|
params.toolsAllow = [];
|
|
expect(testing.shouldEnableCodexAppServerNativeToolSurface(params)).toBe(false);
|
|
|
|
params.toolsAllow = ["message"];
|
|
expect(testing.shouldEnableCodexAppServerNativeToolSurface(params)).toBe(false);
|
|
});
|
|
|
|
it("disables Codex native tool surfaces when Docker bind targets need container paths", () => {
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const params = createParams(path.join(tempDir, "session.jsonl"), workspaceDir);
|
|
params.disableTools = false;
|
|
|
|
expect(
|
|
testing.shouldEnableCodexAppServerNativeToolSurface(params, {
|
|
enabled: true,
|
|
backendId: "docker",
|
|
docker: { binds: ["/tmp/openclaw-data:/data:rw"] },
|
|
} as never),
|
|
).toBe(false);
|
|
|
|
expect(
|
|
testing.shouldEnableCodexAppServerNativeToolSurface(params, {
|
|
enabled: true,
|
|
backendId: "docker",
|
|
docker: { binds: ["/tmp/openclaw-data:/tmp/openclaw-data:rw"] },
|
|
} as never),
|
|
).toBe(true);
|
|
|
|
expect(
|
|
testing.shouldEnableCodexAppServerNativeToolSurface(params, {
|
|
enabled: true,
|
|
backendId: "docker",
|
|
docker: {
|
|
binds: [
|
|
"/tmp/openclaw-data:/tmp/openclaw-data:rw",
|
|
"/tmp/openclaw-data/secrets:/tmp/openclaw-data/secrets:ro",
|
|
],
|
|
},
|
|
} as never),
|
|
).toBe(false);
|
|
});
|
|
|
|
it("forces the message dynamic tool for message-tool-only source replies", () => {
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const params = createParams(path.join(tempDir, "session.jsonl"), workspaceDir);
|
|
params.sourceReplyDeliveryMode = "message_tool_only";
|
|
|
|
expect(testing.shouldForceMessageTool(params)).toBe(true);
|
|
|
|
params.disableMessageTool = true;
|
|
expect(testing.shouldForceMessageTool(params)).toBe(false);
|
|
|
|
params.disableMessageTool = false;
|
|
params.sourceReplyDeliveryMode = "automatic";
|
|
expect(testing.shouldForceMessageTool(params)).toBe(false);
|
|
});
|
|
|
|
it("scopes Codex developer reply instructions to message-tool-only delivery", () => {
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const params = createParams(path.join(tempDir, "session.jsonl"), workspaceDir);
|
|
params.sourceReplyDeliveryMode = "message_tool_only";
|
|
|
|
expect(
|
|
testing.buildDeveloperInstructions(params, {
|
|
dynamicTools: [createMessageDynamicTool("Message test tool")],
|
|
}),
|
|
).toContain("To send a visible message, use the `message` tool.");
|
|
|
|
const withoutMessageToolInstructions = testing.buildDeveloperInstructions(params, {
|
|
dynamicTools: [],
|
|
});
|
|
expect(withoutMessageToolInstructions).toContain("active Codex delivery path");
|
|
expect(withoutMessageToolInstructions).not.toContain("use the `message` tool");
|
|
|
|
params.sourceReplyDeliveryMode = "automatic";
|
|
const automaticInstructions = testing.buildDeveloperInstructions(params);
|
|
expect(automaticInstructions).toContain("active Codex delivery path");
|
|
expect(automaticInstructions).not.toContain("use the `message` tool");
|
|
});
|
|
|
|
it("includes Codex app-server scoped plugin command guidance in developer instructions", () => {
|
|
registerPluginCommand("demo-plugin", {
|
|
name: "codex_demo",
|
|
description: "Codex demo command",
|
|
agentPromptGuidance: [
|
|
"Legacy global command guidance.",
|
|
{
|
|
text: "Codex app-server command guidance.",
|
|
surfaces: ["codex_app_server"],
|
|
},
|
|
{
|
|
text: "Unscoped structured command guidance.",
|
|
},
|
|
{
|
|
text: "PI main command guidance.",
|
|
surfaces: ["pi_main"],
|
|
},
|
|
],
|
|
handler: async () => ({ text: "ok" }),
|
|
});
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const params = createParams(path.join(tempDir, "session.jsonl"), workspaceDir);
|
|
|
|
const instructions = testing.buildDeveloperInstructions(params);
|
|
|
|
expect(instructions).toContain("Codex app-server command guidance.");
|
|
expect(instructions).not.toContain("Legacy global command guidance.");
|
|
expect(instructions).not.toContain("Unscoped structured command guidance.");
|
|
expect(instructions).not.toContain("PI main command guidance.");
|
|
});
|
|
|
|
it("keeps OpenClaw skills out of Codex developer instructions", async () => {
|
|
const llmInput = vi.fn();
|
|
initializeGlobalHookRunner(
|
|
createMockPluginRegistry([{ hookName: "llm_input", handler: llmInput }]),
|
|
);
|
|
vi.stubEnv("OPENCLAW_TRAJECTORY", "1");
|
|
vi.stubEnv("OPENCLAW_TRAJECTORY_DIR", path.join(tempDir, "trajectory"));
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const harness = createStartedThreadHarness();
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.skillsSnapshot = {
|
|
prompt: "<available_skills><skill><name>demo</name></skill></available_skills>",
|
|
skills: [],
|
|
};
|
|
|
|
const run = runCodexAppServerAttempt(params);
|
|
await harness.waitForMethod("turn/start");
|
|
await new Promise<void>((resolve) => setImmediate(resolve));
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
const result = await run;
|
|
|
|
const threadStart = harness.requests.find((request) => request.method === "thread/start");
|
|
const threadStartParams = threadStart?.params as { developerInstructions?: string };
|
|
expect(threadStartParams.developerInstructions).not.toContain("<available_skills>");
|
|
|
|
const turnStart = harness.requests.find((request) => request.method === "turn/start");
|
|
const turnStartParams = turnStart?.params as {
|
|
input?: Array<{ text?: string }>;
|
|
};
|
|
const inputText = turnStartParams.input?.[0]?.text ?? "";
|
|
expect(inputText).toContain("## OpenClaw Skills");
|
|
expect(inputText).toContain("<available_skills>");
|
|
expect(inputText).toContain("Current user request:\nhello");
|
|
const [llmInputPayload] = mockCall(llmInput, "llm_input") as [{ prompt?: string }, unknown];
|
|
expect(llmInputPayload.prompt).toBe(inputText);
|
|
const trajectoryEvents = (
|
|
await fs.readFile(path.join(tempDir, "trajectory", "session-1.jsonl"), "utf8")
|
|
)
|
|
.trim()
|
|
.split("\n")
|
|
.map((line) => JSON.parse(line) as { data?: { prompt?: string }; type?: string });
|
|
expect(trajectoryEvents.find((event) => event.type === "context.compiled")?.data?.prompt).toBe(
|
|
inputText,
|
|
);
|
|
expect(trajectoryEvents.find((event) => event.type === "prompt.submitted")?.data?.prompt).toBe(
|
|
inputText,
|
|
);
|
|
expect(result.systemPromptReport?.skills.promptChars).toBe(params.skillsSnapshot.prompt.length);
|
|
expect(result.systemPromptReport?.skills.entries).toEqual([
|
|
{ name: "demo", blockChars: "<skill><name>demo</name></skill>".length },
|
|
]);
|
|
});
|
|
|
|
it("keeps forced message dynamic tool when toolsAllow omits it", async () => {
|
|
testing.setOpenClawCodingToolsFactoryForTests(() => [
|
|
createRuntimeDynamicTool("message"),
|
|
createRuntimeDynamicTool("music_generate"),
|
|
]);
|
|
const harness = createStartedThreadHarness(async (method) => {
|
|
if (method === "turn/start") {
|
|
await new Promise((resolve) => setTimeout(resolve, 5));
|
|
return turnStartResult();
|
|
}
|
|
return undefined;
|
|
});
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.disableTools = false;
|
|
params.runtimePlan = createCodexRuntimePlanFixture();
|
|
params.sourceReplyDeliveryMode = "message_tool_only";
|
|
params.toolsAllow = ["music_generate"];
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
pluginConfig: { appServer: { mode: "yolo" } },
|
|
});
|
|
await harness.waitForMethod("turn/start", 120_000);
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
|
|
const startRequest = harness.requests.find((entry) => entry.method === "thread/start");
|
|
const dynamicToolNames =
|
|
(
|
|
startRequest?.params as { dynamicTools?: Array<{ name?: string }> } | undefined
|
|
)?.dynamicTools?.map((tool) => tool.name) ?? [];
|
|
|
|
expect(dynamicToolNames).toContain("message");
|
|
expect(dynamicToolNames).toContain("music_generate");
|
|
});
|
|
|
|
it("keeps forced message dynamic tool when toolsAllow is empty", async () => {
|
|
testing.setOpenClawCodingToolsFactoryForTests(() => [
|
|
createRuntimeDynamicTool("message"),
|
|
createRuntimeDynamicTool("music_generate"),
|
|
]);
|
|
const harness = createStartedThreadHarness();
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.disableTools = false;
|
|
params.runtimePlan = createCodexRuntimePlanFixture();
|
|
params.sourceReplyDeliveryMode = "message_tool_only";
|
|
params.toolsAllow = [];
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
pluginConfig: { appServer: { mode: "yolo" } },
|
|
});
|
|
await harness.waitForMethod("turn/start", 120_000);
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
|
|
const startRequest = harness.requests.find((entry) => entry.method === "thread/start");
|
|
const dynamicToolNames =
|
|
(
|
|
startRequest?.params as { dynamicTools?: Array<{ name?: string }> } | undefined
|
|
)?.dynamicTools?.map((tool) => tool.name) ?? [];
|
|
|
|
expect(dynamicToolNames).toEqual(["message"]);
|
|
});
|
|
|
|
it("keeps searchable OpenClaw dynamic tools when code-mode-only is enabled", async () => {
|
|
testing.setOpenClawCodingToolsFactoryForTests(() => [
|
|
createRuntimeDynamicTool("message"),
|
|
createRuntimeDynamicTool("web_search"),
|
|
createRuntimeDynamicTool("heartbeat_respond"),
|
|
createRuntimeDynamicTool("sessions_spawn"),
|
|
createRuntimeDynamicTool("sessions_yield"),
|
|
]);
|
|
const harness = createStartedThreadHarness();
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.disableTools = false;
|
|
params.runtimePlan = createCodexRuntimePlanFixture();
|
|
params.sourceReplyDeliveryMode = "message_tool_only";
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
pluginConfig: { appServer: { mode: "yolo", codeModeOnly: true } },
|
|
});
|
|
await harness.waitForMethod("turn/start", 120_000);
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
|
|
const startRequest = harness.requests.find((entry) => entry.method === "thread/start");
|
|
const dynamicTools =
|
|
(startRequest?.params as { dynamicTools?: Array<Record<string, unknown>> } | undefined)
|
|
?.dynamicTools ?? [];
|
|
const startConfig = (startRequest?.params as { config?: Record<string, unknown> } | undefined)
|
|
?.config;
|
|
const message = dynamicTools.find((tool) => tool.name === "message");
|
|
const webSearch = dynamicTools.find((tool) => tool.name === "web_search");
|
|
const heartbeat = dynamicTools.find((tool) => tool.name === "heartbeat_respond");
|
|
const sessionsSpawn = dynamicTools.find((tool) => tool.name === "sessions_spawn");
|
|
const sessionsYield = dynamicTools.find((tool) => tool.name === "sessions_yield");
|
|
|
|
expect(message).not.toHaveProperty("namespace");
|
|
expect(message).not.toHaveProperty("deferLoading");
|
|
expect(webSearch?.namespace).toBe(CODEX_OPENCLAW_DYNAMIC_TOOL_NAMESPACE);
|
|
expect(webSearch?.deferLoading).toBe(true);
|
|
expect(heartbeat?.namespace).toBe(CODEX_OPENCLAW_DYNAMIC_TOOL_NAMESPACE);
|
|
expect(heartbeat?.deferLoading).toBe(true);
|
|
expect(sessionsSpawn?.namespace).toBe(CODEX_OPENCLAW_DYNAMIC_TOOL_NAMESPACE);
|
|
expect(sessionsSpawn?.deferLoading).toBe(true);
|
|
expect(sessionsYield).not.toHaveProperty("namespace");
|
|
expect(sessionsYield).not.toHaveProperty("deferLoading");
|
|
expect(startConfig?.["features.code_mode"]).toBe(true);
|
|
expect(startConfig?.["features.code_mode_only"]).toBe(true);
|
|
});
|
|
|
|
it("disables Codex native tool surfaces when runtime toolsAllow is empty", async () => {
|
|
testing.setOpenClawCodingToolsFactoryForTests(() => [
|
|
createRuntimeDynamicTool("message"),
|
|
createRuntimeDynamicTool("web_search"),
|
|
]);
|
|
const harness = createStartedThreadHarness(async (method) => {
|
|
if (method === "app/list") {
|
|
throw new Error("app/list should not run when runtime toolsAllow is empty.");
|
|
}
|
|
return undefined;
|
|
});
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.disableTools = false;
|
|
params.runtimePlan = createCodexRuntimePlanFixture();
|
|
params.toolsAllow = [];
|
|
params.extraSystemPrompt = "Tool and file actions are disabled for this sender by chat policy.";
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
pluginConfig: {
|
|
appServer: { mode: "yolo" },
|
|
codexPlugins: {
|
|
enabled: true,
|
|
plugins: {
|
|
"google-calendar": {
|
|
marketplaceName: "openai-curated",
|
|
pluginName: "google-calendar",
|
|
},
|
|
},
|
|
},
|
|
},
|
|
});
|
|
await harness.waitForMethod("turn/start", 120_000);
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
|
|
const startRequest = harness.requests.find((entry) => entry.method === "thread/start");
|
|
const startParams = startRequest?.params as
|
|
| {
|
|
dynamicTools?: Array<{ name?: string }>;
|
|
environments?: unknown[];
|
|
developerInstructions?: string;
|
|
config?: {
|
|
"features.code_mode"?: boolean;
|
|
"features.code_mode_only"?: boolean;
|
|
apps?: Record<
|
|
string,
|
|
{ enabled?: boolean; destructive_enabled?: boolean; open_world_enabled?: boolean }
|
|
>;
|
|
};
|
|
}
|
|
| undefined;
|
|
|
|
expect(startParams?.dynamicTools).toEqual([]);
|
|
expect(startParams?.environments).toEqual([]);
|
|
expect(startParams?.developerInstructions).toContain(
|
|
"Tool and file actions are disabled for this sender by chat policy.",
|
|
);
|
|
expect(startParams?.config?.["features.code_mode"]).toBe(false);
|
|
expect(startParams?.config?.["features.code_mode_only"]).toBe(false);
|
|
expect(startParams?.config?.apps?.["_default"]).toEqual({
|
|
enabled: false,
|
|
destructive_enabled: false,
|
|
open_world_enabled: false,
|
|
});
|
|
expect(startParams?.config?.apps?.["google-calendar-app"]?.enabled).toBeUndefined();
|
|
expect(harness.requests.map((entry) => entry.method)).not.toContain("app/list");
|
|
});
|
|
|
|
it("fails closed for Codex app defaults when restricted native tools have no plugin config", async () => {
|
|
testing.setOpenClawCodingToolsFactoryForTests(() => [createRuntimeDynamicTool("message")]);
|
|
const harness = createStartedThreadHarness(async (method) => {
|
|
if (method === "app/list") {
|
|
throw new Error("app/list should not run when runtime toolsAllow is empty.");
|
|
}
|
|
return undefined;
|
|
});
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.disableTools = false;
|
|
params.runtimePlan = createCodexRuntimePlanFixture();
|
|
params.toolsAllow = [];
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
pluginConfig: { appServer: { mode: "yolo" } },
|
|
});
|
|
await harness.waitForMethod("turn/start", 120_000);
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
|
|
const startRequest = harness.requests.find((entry) => entry.method === "thread/start");
|
|
const startParams = startRequest?.params as
|
|
| {
|
|
config?: {
|
|
apps?: Record<
|
|
string,
|
|
{ enabled?: boolean; destructive_enabled?: boolean; open_world_enabled?: boolean }
|
|
>;
|
|
};
|
|
}
|
|
| undefined;
|
|
|
|
expect(startParams?.config?.apps?.["_default"]).toEqual({
|
|
enabled: false,
|
|
destructive_enabled: false,
|
|
open_world_enabled: false,
|
|
});
|
|
expect(harness.requests.map((entry) => entry.method)).not.toContain("app/list");
|
|
});
|
|
|
|
it("returns a run context report without deferred Codex dynamic tool schemas", async () => {
|
|
testing.setOpenClawCodingToolsFactoryForTests(() => [
|
|
createRuntimeDynamicTool("message"),
|
|
createRuntimeDynamicTool("web_search"),
|
|
]);
|
|
const harness = createStartedThreadHarness();
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.disableTools = false;
|
|
params.runtimePlan = createCodexRuntimePlanFixture();
|
|
params.sourceReplyDeliveryMode = "message_tool_only";
|
|
params.toolsAllow = ["message", "web_search"];
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
pluginConfig: { appServer: { mode: "yolo" } },
|
|
});
|
|
await harness.waitForMethod("turn/start", 120_000);
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
const result = await run;
|
|
|
|
const report = result.systemPromptReport;
|
|
expect(report?.source).toBe("run");
|
|
expect(report?.provider).toBe("codex");
|
|
expect(report?.model).toBe("gpt-5.4-codex");
|
|
expect(report?.systemPrompt.chars).toBeGreaterThan(0);
|
|
|
|
const message = report?.tools.entries.find((tool) => tool.name === "message");
|
|
const webSearch = report?.tools.entries.find((tool) => tool.name === "web_search");
|
|
expect(message?.schemaChars).toBeGreaterThan(0);
|
|
expect(webSearch?.schemaChars).toBe(0);
|
|
expect(report?.tools.schemaChars).toBe(message?.schemaChars);
|
|
});
|
|
|
|
it("keeps searchable Codex dynamic tools canonical in mirrored transcript snapshots", async () => {
|
|
testing.setOpenClawCodingToolsFactoryForTests(() => [createRuntimeDynamicTool("wiki_status")]);
|
|
const harness = createStartedThreadHarness();
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.disableTools = false;
|
|
params.runtimePlan = createCodexRuntimePlanFixture();
|
|
params.toolsAllow = ["wiki_status"];
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
pluginConfig: {
|
|
codexDynamicToolsLoading: "searchable",
|
|
appServer: { mode: "yolo" },
|
|
},
|
|
});
|
|
await harness.waitForMethod("turn/start", 120_000);
|
|
|
|
const toolResult = (await harness.handleServerRequest({
|
|
id: "request-tool-wiki-status",
|
|
method: "item/tool/call",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
callId: "call-wiki-status-1",
|
|
namespace: CODEX_OPENCLAW_DYNAMIC_TOOL_NAMESPACE,
|
|
tool: "wiki_status",
|
|
arguments: { topic: "README.md" },
|
|
},
|
|
})) as {
|
|
contentItems?: Array<{ text?: string; type?: string }>;
|
|
success?: boolean;
|
|
};
|
|
expect(toolResult).toEqual({
|
|
success: true,
|
|
contentItems: [{ type: "inputText", text: "wiki_status done" }],
|
|
});
|
|
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
const result = await run;
|
|
|
|
expect(result.messagesSnapshot.map((message) => message.role)).toEqual([
|
|
"user",
|
|
"assistant",
|
|
"toolResult",
|
|
]);
|
|
const assistantMessage = result.messagesSnapshot[1];
|
|
if (assistantMessage?.role !== "assistant") {
|
|
throw new Error("expected mirrored assistant tool-call message");
|
|
}
|
|
expect(assistantMessage.content).toStrictEqual([
|
|
{
|
|
type: "toolCall",
|
|
id: "call-wiki-status-1",
|
|
name: "wiki_status",
|
|
arguments: { topic: "README.md" },
|
|
input: { topic: "README.md" },
|
|
},
|
|
]);
|
|
const toolResultMessage = result.messagesSnapshot[2];
|
|
if (toolResultMessage?.role !== "toolResult") {
|
|
throw new Error("expected mirrored tool-result message");
|
|
}
|
|
expect(toolResultMessage.toolCallId).toBe("call-wiki-status-1");
|
|
expect(toolResultMessage.toolName).toBe("wiki_status");
|
|
expect(toolResultMessage.isError).toBe(false);
|
|
expect(toolResultMessage.content).toStrictEqual([
|
|
{
|
|
type: "toolResult",
|
|
id: "call-wiki-status-1",
|
|
name: "wiki_status",
|
|
toolName: "wiki_status",
|
|
toolCallId: "call-wiki-status-1",
|
|
toolUseId: "call-wiki-status-1",
|
|
tool_use_id: "call-wiki-status-1",
|
|
content: "wiki_status done",
|
|
text: "wiki_status done",
|
|
},
|
|
]);
|
|
expect(JSON.stringify(result.messagesSnapshot)).not.toContain("tool_search");
|
|
expect(JSON.stringify(result.messagesSnapshot)).not.toContain("function_call_output");
|
|
});
|
|
|
|
it("passes the live run session key to Codex dynamic tools when sandbox policy uses another key", () => {
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const params = createParams(path.join(tempDir, "session.jsonl"), workspaceDir);
|
|
params.sessionKey = "agent:main:main";
|
|
|
|
expect(
|
|
testing.resolveOpenClawCodingToolsSessionKeys(
|
|
params,
|
|
"agent:main:telegram:default:direct:1234",
|
|
),
|
|
).toEqual({
|
|
sessionKey: "agent:main:telegram:default:direct:1234",
|
|
runSessionKey: "agent:main:main",
|
|
});
|
|
|
|
expect(testing.resolveOpenClawCodingToolsSessionKeys(params, "agent:main:main")).toEqual({
|
|
sessionKey: "agent:main:main",
|
|
runSessionKey: undefined,
|
|
});
|
|
});
|
|
|
|
it("keeps explicit dynamic tool timeouts above the default bridge deadline", () => {
|
|
const timeoutMs = testing.CODEX_DYNAMIC_TOOL_TIMEOUT_MS + 1_000;
|
|
|
|
expect(
|
|
testing.resolveDynamicToolCallTimeoutMs({
|
|
call: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
callId: "call-long",
|
|
namespace: null,
|
|
tool: "image_generate",
|
|
arguments: { prompt: "cat", timeoutMs },
|
|
},
|
|
config: undefined,
|
|
}),
|
|
).toBe(timeoutMs);
|
|
});
|
|
|
|
it("uses configured image generation timeouts for Codex dynamic tool calls", () => {
|
|
expect(
|
|
testing.resolveDynamicToolCallTimeoutMs({
|
|
call: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
callId: "call-image-generate-default",
|
|
namespace: null,
|
|
tool: "image_generate",
|
|
arguments: { prompt: "cat" },
|
|
},
|
|
config: {
|
|
agents: {
|
|
defaults: {
|
|
imageGenerationModel: {
|
|
primary: "openai/gpt-image-1",
|
|
timeoutMs: 180_000,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
}),
|
|
).toBe(180_000);
|
|
});
|
|
|
|
it("uses a 120 second default for Codex image generation dynamic tool calls", () => {
|
|
expect(
|
|
testing.resolveDynamicToolCallTimeoutMs({
|
|
call: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
callId: "call-image-generate-default",
|
|
namespace: null,
|
|
tool: "image_generate",
|
|
arguments: { prompt: "cat" },
|
|
},
|
|
config: undefined,
|
|
}),
|
|
).toBe(120_000);
|
|
});
|
|
|
|
it("uses the media image timeout for Codex image dynamic tool calls", () => {
|
|
expect(
|
|
testing.resolveDynamicToolCallTimeoutMs({
|
|
call: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
callId: "call-image-default",
|
|
namespace: null,
|
|
tool: "image",
|
|
arguments: { prompt: "describe", images: ["/tmp/one.jpg"] },
|
|
},
|
|
config: {
|
|
tools: {
|
|
media: {
|
|
image: {
|
|
timeoutSeconds: 180,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
}),
|
|
).toBe(180_000);
|
|
});
|
|
|
|
it("keeps Codex image dynamic tool calls above the default bridge deadline", () => {
|
|
expect(
|
|
testing.resolveDynamicToolCallTimeoutMs({
|
|
call: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
callId: "call-image-default",
|
|
namespace: null,
|
|
tool: "image",
|
|
arguments: { prompt: "describe", images: ["/tmp/one.jpg"] },
|
|
},
|
|
config: undefined,
|
|
}),
|
|
).toBe(testing.CODEX_DYNAMIC_IMAGE_TOOL_TIMEOUT_MS);
|
|
});
|
|
|
|
it("caps dynamic tool timeouts at the bridge maximum", () => {
|
|
expect(
|
|
testing.resolveDynamicToolCallTimeoutMs({
|
|
call: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
callId: "call-too-long",
|
|
namespace: null,
|
|
tool: "image_generate",
|
|
arguments: {
|
|
prompt: "cat",
|
|
timeoutMs: testing.CODEX_DYNAMIC_TOOL_MAX_TIMEOUT_MS + 1_000,
|
|
},
|
|
},
|
|
config: undefined,
|
|
}),
|
|
).toBe(testing.CODEX_DYNAMIC_TOOL_MAX_TIMEOUT_MS);
|
|
});
|
|
|
|
it("returns a failed dynamic tool response when an app-server tool call exceeds the deadline", async () => {
|
|
vi.useFakeTimers();
|
|
let capturedSignal: AbortSignal | undefined;
|
|
const onTimeout = vi.fn();
|
|
const response = testing.handleDynamicToolCallWithTimeout({
|
|
call: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
callId: "call-timeout",
|
|
namespace: null,
|
|
tool: "message",
|
|
arguments: { action: "send", text: "hello" },
|
|
},
|
|
toolBridge: {
|
|
handleToolCall: vi.fn((_call, options) => {
|
|
capturedSignal = options?.signal;
|
|
return new Promise<never>(() => undefined);
|
|
}),
|
|
},
|
|
signal: new AbortController().signal,
|
|
timeoutMs: 1,
|
|
onTimeout,
|
|
});
|
|
|
|
await vi.advanceTimersByTimeAsync(1);
|
|
|
|
await expect(response).resolves.toEqual({
|
|
success: false,
|
|
contentItems: [
|
|
{
|
|
type: "inputText",
|
|
text: "OpenClaw dynamic tool call timed out after 1ms while running tool message.",
|
|
},
|
|
],
|
|
});
|
|
expect(capturedSignal?.aborted).toBe(true);
|
|
expect(onTimeout).toHaveBeenCalledTimes(1);
|
|
});
|
|
|
|
it("logs process poll timeout context separately from session idle", async () => {
|
|
vi.useFakeTimers();
|
|
const warn = vi.spyOn(embeddedAgentLog, "warn").mockImplementation(() => undefined);
|
|
const response = testing.handleDynamicToolCallWithTimeout({
|
|
call: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
callId: "call-timeout",
|
|
namespace: null,
|
|
tool: "process",
|
|
arguments: { action: "poll", sessionId: "rapid-crustacean", timeout: 30_000 },
|
|
},
|
|
toolBridge: {
|
|
handleToolCall: vi.fn(() => new Promise<never>(() => undefined)),
|
|
},
|
|
signal: new AbortController().signal,
|
|
timeoutMs: 1,
|
|
});
|
|
|
|
await vi.advanceTimersByTimeAsync(1);
|
|
|
|
await expect(response).resolves.toEqual({
|
|
success: false,
|
|
contentItems: [
|
|
{
|
|
type: "inputText",
|
|
text: "OpenClaw dynamic tool call timed out after 1ms while waiting for process action=poll sessionId=rapid-crustacean. This is a tool RPC timeout, not a session idle timeout.",
|
|
},
|
|
],
|
|
});
|
|
expect(warn).toHaveBeenCalledWith("codex dynamic tool call timed out", {
|
|
tool: "process",
|
|
toolCallId: "call-timeout",
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
timeoutMs: 1,
|
|
timeoutKind: "codex_dynamic_tool_rpc",
|
|
processAction: "poll",
|
|
processSessionId: "rapid-crustacean",
|
|
processRequestedTimeoutMs: 30_000,
|
|
consoleMessage:
|
|
"codex process tool timeout: action=poll sessionId=rapid-crustacean toolTimeoutMs=1 requestedWaitMs=30000; per-tool-call watchdog, not session idle; repeated lines usually mean process-poll retry churn, not model progress",
|
|
});
|
|
});
|
|
|
|
it("emits normalized tool progress around app-server dynamic tool requests", async () => {
|
|
const harness = createStartedThreadHarness();
|
|
const onRunAgentEvent = vi.fn();
|
|
const onExecutionPhase = vi.fn();
|
|
const globalAgentEvents: AgentEventPayload[] = [];
|
|
const diagnosticEvents: DiagnosticEventPayload[] = [];
|
|
onAgentEvent((event) => globalAgentEvents.push(event));
|
|
const unsubscribeDiagnostics = onInternalDiagnosticEvent((event) =>
|
|
diagnosticEvents.push(event),
|
|
);
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.onAgentEvent = onRunAgentEvent;
|
|
params.onExecutionPhase = onExecutionPhase;
|
|
|
|
const run = runCodexAppServerAttempt(params);
|
|
await harness.waitForMethod("thread/start");
|
|
|
|
const toolResult = (await harness.handleServerRequest({
|
|
id: "request-tool-1",
|
|
method: "item/tool/call",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
callId: "call-1",
|
|
namespace: null,
|
|
tool: "lookup",
|
|
arguments: {
|
|
action: "search",
|
|
token: "plain-secret-value-12345",
|
|
text: "hello",
|
|
},
|
|
},
|
|
})) as {
|
|
contentItems?: Array<{ text?: string; type?: string }>;
|
|
success?: boolean;
|
|
};
|
|
expect(toolResult.success).toBe(false);
|
|
expect(toolResult.contentItems?.[0]?.type).toBe("inputText");
|
|
expect(toolResult.contentItems?.[0]?.text).toMatch(/^Unknown OpenClaw tool: lookup$/u);
|
|
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
await flushDiagnosticEvents();
|
|
unsubscribeDiagnostics();
|
|
|
|
const agentEvents = onRunAgentEvent.mock.calls.map(([event]) => event) as Array<{
|
|
data?: {
|
|
args?: Record<string, unknown>;
|
|
isError?: boolean;
|
|
name?: string;
|
|
phase?: string;
|
|
result?: { success?: boolean };
|
|
toolCallId?: string;
|
|
};
|
|
stream?: string;
|
|
}>;
|
|
const startEvent = agentEvents.find(
|
|
(event) => event.stream === "tool" && event.data?.phase === "start",
|
|
);
|
|
expect(startEvent?.data?.name).toBe("lookup");
|
|
expect(startEvent?.data?.toolCallId).toBe("call-1");
|
|
expect(startEvent?.data?.args?.action).toBe("search");
|
|
expect(startEvent?.data?.args?.token).toBe("plain-…2345");
|
|
expect(startEvent?.data?.args?.text).toBe("hello");
|
|
const resultEvent = agentEvents.find(
|
|
(event) =>
|
|
event.stream === "tool" &&
|
|
event.data?.phase === "result" &&
|
|
event.data.result !== undefined,
|
|
);
|
|
expect(resultEvent?.data?.name).toBe("lookup");
|
|
expect(resultEvent?.data?.toolCallId).toBe("call-1");
|
|
expect(resultEvent?.data?.isError).toBe(true);
|
|
expect(resultEvent?.data?.result?.success).toBe(false);
|
|
expect(JSON.stringify(agentEvents)).not.toContain("plain-secret-value-12345");
|
|
const globalStartEvent = globalAgentEvents.find(
|
|
(event) => event.stream === "tool" && event.data.phase === "start",
|
|
);
|
|
expect(globalStartEvent?.runId).toBe("run-1");
|
|
expect(globalStartEvent?.sessionKey).toBe("agent:main:session-1");
|
|
expect(globalStartEvent?.data.name).toBe("lookup");
|
|
expect(onExecutionPhase).toHaveBeenCalledWith({
|
|
phase: "turn_accepted",
|
|
provider: "codex",
|
|
model: "gpt-5.4-codex",
|
|
backend: "codex-app-server",
|
|
});
|
|
expect(onExecutionPhase).toHaveBeenCalledWith({
|
|
phase: "tool_execution_started",
|
|
provider: "codex",
|
|
model: "gpt-5.4-codex",
|
|
backend: "codex-app-server",
|
|
tool: "lookup",
|
|
toolCallId: "call-1",
|
|
});
|
|
const toolDiagnosticEvents = diagnosticEvents.filter(
|
|
(
|
|
event,
|
|
): event is Extract<
|
|
DiagnosticEventPayload,
|
|
{ type: "tool.execution.started" | "tool.execution.completed" | "tool.execution.error" }
|
|
> => event.type.startsWith("tool.execution."),
|
|
);
|
|
expect(
|
|
toolDiagnosticEvents.map((event) => ({
|
|
type: event.type,
|
|
toolName: event.toolName,
|
|
toolCallId: event.toolCallId,
|
|
})),
|
|
).toEqual([
|
|
{
|
|
type: "tool.execution.started",
|
|
toolName: "lookup",
|
|
toolCallId: "call-1",
|
|
},
|
|
{
|
|
type: "tool.execution.error",
|
|
toolName: "lookup",
|
|
toolCallId: "call-1",
|
|
},
|
|
]);
|
|
expect(activeDiagnosticToolKeys(diagnosticEvents)).toEqual(new Set());
|
|
});
|
|
|
|
it("clears dynamic tool diagnostics after successful app-server tool responses", async () => {
|
|
const harness = createStartedThreadHarness();
|
|
const diagnosticEvents: DiagnosticEventPayload[] = [];
|
|
const unsubscribeDiagnostics = onInternalDiagnosticEvent((event) =>
|
|
diagnosticEvents.push(event),
|
|
);
|
|
testing.setOpenClawCodingToolsFactoryForTests(() => [createRuntimeDynamicTool("echo")]);
|
|
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.disableTools = false;
|
|
params.runtimePlan = createCodexRuntimePlanFixture();
|
|
|
|
const run = runCodexAppServerAttempt(params);
|
|
await harness.waitForMethod("thread/start");
|
|
|
|
const toolResult = (await harness.handleServerRequest({
|
|
id: "request-echo-tool",
|
|
method: "item/tool/call",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
callId: "call-echo-1",
|
|
namespace: null,
|
|
tool: "echo",
|
|
arguments: {},
|
|
},
|
|
})) as {
|
|
contentItems?: Array<{ text?: string; type?: string }>;
|
|
success?: boolean;
|
|
};
|
|
|
|
expect(toolResult.success).toBe(true);
|
|
expect(toolResult.contentItems?.[0]).toEqual({
|
|
type: "inputText",
|
|
text: "echo done",
|
|
});
|
|
await flushDiagnosticEvents();
|
|
unsubscribeDiagnostics();
|
|
|
|
const toolDiagnosticEvents = diagnosticEvents.filter(
|
|
(
|
|
event,
|
|
): event is Extract<
|
|
DiagnosticEventPayload,
|
|
{ type: "tool.execution.started" | "tool.execution.completed" | "tool.execution.error" }
|
|
> => event.type.startsWith("tool.execution."),
|
|
);
|
|
const toolDiagnosticEventSummaries = toolDiagnosticEvents.map((event) => ({
|
|
type: event.type,
|
|
toolName: event.toolName,
|
|
toolCallId: event.toolCallId,
|
|
}));
|
|
expect(toolDiagnosticEventSummaries).toContainEqual({
|
|
type: "tool.execution.started",
|
|
toolName: "echo",
|
|
toolCallId: "call-echo-1",
|
|
});
|
|
expect(toolDiagnosticEventSummaries.at(-1)).toEqual({
|
|
type: "tool.execution.completed",
|
|
toolName: "echo",
|
|
toolCallId: "call-echo-1",
|
|
});
|
|
expect(
|
|
toolDiagnosticEventSummaries.filter((event) => event.type === "tool.execution.started"),
|
|
).toHaveLength(1);
|
|
expect(activeDiagnosticToolKeys(diagnosticEvents)).toEqual(new Set());
|
|
|
|
await harness.notify({
|
|
method: "item/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
completedAtMs: Date.now(),
|
|
item: {
|
|
type: "dynamicToolCall",
|
|
id: "call-echo-1",
|
|
namespace: null,
|
|
tool: "echo",
|
|
arguments: {},
|
|
status: "completed",
|
|
contentItems: [{ type: "inputText", text: "echo done" }],
|
|
success: true,
|
|
durationMs: 1,
|
|
},
|
|
},
|
|
});
|
|
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
});
|
|
|
|
it("emits request-boundary terminal diagnostics when a wrapped dynamic tool does not", async () => {
|
|
const harness = createStartedThreadHarness();
|
|
const diagnosticEvents: DiagnosticEventPayload[] = [];
|
|
const unsubscribeDiagnostics = onInternalDiagnosticEvent((event) =>
|
|
diagnosticEvents.push(event),
|
|
);
|
|
const rawTool = {
|
|
name: "echo",
|
|
description: "echo test tool",
|
|
parameters: {
|
|
type: "object",
|
|
properties: {},
|
|
additionalProperties: false,
|
|
},
|
|
execute: vi.fn(async () => ({
|
|
content: [{ type: "text" as const, text: "echo done" }],
|
|
details: {},
|
|
})),
|
|
};
|
|
rawTool.execute.mockImplementationOnce(async () => {
|
|
emitTrustedDiagnosticEvent({
|
|
type: "tool.execution.completed",
|
|
runId: "other-run",
|
|
sessionId: "session-1",
|
|
sessionKey: "agent:main:session-1",
|
|
toolName: "echo",
|
|
toolCallId: "call-echo-unobserved-terminal",
|
|
durationMs: 1,
|
|
});
|
|
return {
|
|
content: [{ type: "text" as const, text: "echo done" }],
|
|
details: {},
|
|
};
|
|
});
|
|
const markedWrappedTool = {
|
|
...wrapToolWithBeforeToolCallHook(rawTool as never),
|
|
execute: rawTool.execute,
|
|
};
|
|
testing.setOpenClawCodingToolsFactoryForTests(() => [markedWrappedTool as never]);
|
|
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.disableTools = false;
|
|
params.runtimePlan = createCodexRuntimePlanFixture();
|
|
|
|
const run = runCodexAppServerAttempt(params);
|
|
await harness.waitForMethod("thread/start");
|
|
|
|
const toolResult = (await harness.handleServerRequest({
|
|
id: "request-echo-unobserved-terminal-tool",
|
|
method: "item/tool/call",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
callId: "call-echo-unobserved-terminal",
|
|
namespace: null,
|
|
tool: "echo",
|
|
arguments: {},
|
|
},
|
|
})) as {
|
|
contentItems?: Array<{ text?: string; type?: string }>;
|
|
success?: boolean;
|
|
};
|
|
expect(toolResult.success).toBe(true);
|
|
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
await flushDiagnosticEvents();
|
|
unsubscribeDiagnostics();
|
|
|
|
const toolDiagnosticEvents = diagnosticEvents.filter(
|
|
(
|
|
event,
|
|
): event is Extract<
|
|
DiagnosticEventPayload,
|
|
{ type: "tool.execution.started" | "tool.execution.completed" | "tool.execution.error" }
|
|
> => event.type.startsWith("tool.execution."),
|
|
);
|
|
expect(
|
|
toolDiagnosticEvents.map((event) => ({
|
|
runId: event.runId,
|
|
type: event.type,
|
|
toolName: event.toolName,
|
|
toolCallId: event.toolCallId,
|
|
})),
|
|
).toEqual([
|
|
{
|
|
runId: "run-1",
|
|
type: "tool.execution.started",
|
|
toolName: "echo",
|
|
toolCallId: "call-echo-unobserved-terminal",
|
|
},
|
|
{
|
|
runId: "other-run",
|
|
type: "tool.execution.completed",
|
|
toolName: "echo",
|
|
toolCallId: "call-echo-unobserved-terminal",
|
|
},
|
|
{
|
|
runId: "run-1",
|
|
type: "tool.execution.completed",
|
|
toolName: "echo",
|
|
toolCallId: "call-echo-unobserved-terminal",
|
|
},
|
|
]);
|
|
});
|
|
|
|
it("does not duplicate terminal diagnostics for wrapped dynamic tool blocks", async () => {
|
|
const harness = createStartedThreadHarness();
|
|
const diagnosticEvents: DiagnosticEventPayload[] = [];
|
|
const unsubscribeDiagnostics = onInternalDiagnosticEvent((event) =>
|
|
diagnosticEvents.push(event),
|
|
);
|
|
const beforeToolCall = vi.fn(async () => ({
|
|
block: true,
|
|
blockReason: "blocked by policy",
|
|
}));
|
|
initializeGlobalHookRunner(
|
|
createMockPluginRegistry([{ hookName: "before_tool_call", handler: beforeToolCall }]),
|
|
);
|
|
const execute = vi.fn(async () => ({
|
|
content: [{ type: "text" as const, text: "echo done" }],
|
|
details: {},
|
|
}));
|
|
testing.setOpenClawCodingToolsFactoryForTests(() => [
|
|
{
|
|
name: "echo",
|
|
description: "echo test tool",
|
|
parameters: {
|
|
type: "object",
|
|
properties: {},
|
|
additionalProperties: false,
|
|
},
|
|
execute,
|
|
} as never,
|
|
]);
|
|
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.disableTools = false;
|
|
params.runtimePlan = createCodexRuntimePlanFixture();
|
|
|
|
const run = runCodexAppServerAttempt(params);
|
|
await harness.waitForMethod("thread/start");
|
|
|
|
const toolResult = (await harness.handleServerRequest({
|
|
id: "request-echo-blocked-tool",
|
|
method: "item/tool/call",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
callId: "call-echo-blocked",
|
|
namespace: null,
|
|
tool: "echo",
|
|
arguments: {},
|
|
},
|
|
})) as {
|
|
contentItems?: Array<{ text?: string; type?: string }>;
|
|
success?: boolean;
|
|
};
|
|
expect(toolResult.success).toBe(false);
|
|
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
await flushDiagnosticEvents();
|
|
unsubscribeDiagnostics();
|
|
|
|
expect(beforeToolCall).toHaveBeenCalledTimes(1);
|
|
expect(execute).not.toHaveBeenCalled();
|
|
const toolDiagnosticEvents = diagnosticEvents.filter(
|
|
(
|
|
event,
|
|
): event is Extract<
|
|
DiagnosticEventPayload,
|
|
{
|
|
type:
|
|
| "tool.execution.blocked"
|
|
| "tool.execution.started"
|
|
| "tool.execution.completed"
|
|
| "tool.execution.error";
|
|
}
|
|
> => event.type.startsWith("tool.execution."),
|
|
);
|
|
expect(
|
|
toolDiagnosticEvents.map((event) => ({
|
|
type: event.type,
|
|
toolName: event.toolName,
|
|
toolCallId: event.toolCallId,
|
|
})),
|
|
).toEqual([
|
|
{
|
|
type: "tool.execution.started",
|
|
toolName: "echo",
|
|
toolCallId: "call-echo-blocked",
|
|
},
|
|
{
|
|
type: "tool.execution.blocked",
|
|
toolName: "echo",
|
|
toolCallId: "call-echo-blocked",
|
|
},
|
|
]);
|
|
});
|
|
|
|
it("does not duplicate terminal diagnostics for wrapped dynamic tool errors", async () => {
|
|
const harness = createStartedThreadHarness();
|
|
const diagnosticEvents: DiagnosticEventPayload[] = [];
|
|
const unsubscribeDiagnostics = onInternalDiagnosticEvent((event) =>
|
|
diagnosticEvents.push(event),
|
|
);
|
|
const execute = vi.fn(async () => {
|
|
throw new Error("wrapped tool failed");
|
|
});
|
|
testing.setOpenClawCodingToolsFactoryForTests(() => [
|
|
{
|
|
name: "echo",
|
|
description: "echo test tool",
|
|
parameters: {
|
|
type: "object",
|
|
properties: {},
|
|
additionalProperties: false,
|
|
},
|
|
execute,
|
|
} as never,
|
|
]);
|
|
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.disableTools = false;
|
|
params.runtimePlan = createCodexRuntimePlanFixture();
|
|
|
|
const run = runCodexAppServerAttempt(params);
|
|
await harness.waitForMethod("thread/start");
|
|
emitAsyncDiagnosticBacklog(150);
|
|
|
|
const toolResult = (await harness.handleServerRequest({
|
|
id: "request-echo-error-tool",
|
|
method: "item/tool/call",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
callId: "call-echo-error",
|
|
namespace: null,
|
|
tool: "echo",
|
|
arguments: {},
|
|
},
|
|
})) as {
|
|
contentItems?: Array<{ text?: string; type?: string }>;
|
|
success?: boolean;
|
|
};
|
|
expect(toolResult).toEqual({
|
|
success: false,
|
|
contentItems: [{ type: "inputText", text: "wrapped tool failed" }],
|
|
});
|
|
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
await flushDiagnosticEvents();
|
|
unsubscribeDiagnostics();
|
|
|
|
expect(execute).toHaveBeenCalledTimes(1);
|
|
const toolDiagnosticEvents = diagnosticEvents.filter(
|
|
(
|
|
event,
|
|
): event is Extract<
|
|
DiagnosticEventPayload,
|
|
{ type: "tool.execution.started" | "tool.execution.completed" | "tool.execution.error" }
|
|
> => event.type.startsWith("tool.execution."),
|
|
);
|
|
expect(
|
|
toolDiagnosticEvents.map((event) => ({
|
|
type: event.type,
|
|
toolName: event.toolName,
|
|
toolCallId: event.toolCallId,
|
|
})),
|
|
).toEqual([
|
|
{
|
|
type: "tool.execution.started",
|
|
toolName: "echo",
|
|
toolCallId: "call-echo-error",
|
|
},
|
|
{
|
|
type: "tool.execution.error",
|
|
toolName: "echo",
|
|
toolCallId: "call-echo-error",
|
|
},
|
|
]);
|
|
});
|
|
|
|
it("does not duplicate terminal diagnostics for wrapped dynamic tool timeout fallbacks", async () => {
|
|
const harness = createStartedThreadHarness();
|
|
const diagnosticEvents: DiagnosticEventPayload[] = [];
|
|
const unsubscribeDiagnostics = onInternalDiagnosticEvent((event) =>
|
|
diagnosticEvents.push(event),
|
|
);
|
|
const execute = vi.fn(async () => new Promise<never>(() => {}));
|
|
testing.setOpenClawCodingToolsFactoryForTests(() => [
|
|
{
|
|
name: "echo",
|
|
description: "echo test tool",
|
|
parameters: {
|
|
type: "object",
|
|
properties: {},
|
|
additionalProperties: true,
|
|
},
|
|
execute,
|
|
} as never,
|
|
]);
|
|
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.disableTools = false;
|
|
params.runtimePlan = createCodexRuntimePlanFixture();
|
|
|
|
const run = runCodexAppServerAttempt(params);
|
|
await harness.waitForMethod("thread/start");
|
|
|
|
const toolResult = (await harness.handleServerRequest({
|
|
id: "request-echo-timeout-tool",
|
|
method: "item/tool/call",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
callId: "call-echo-timeout",
|
|
namespace: null,
|
|
tool: "echo",
|
|
arguments: { timeoutMs: 1 },
|
|
},
|
|
})) as {
|
|
contentItems?: Array<{ text?: string; type?: string }>;
|
|
success?: boolean;
|
|
};
|
|
expect(toolResult).toEqual({
|
|
success: false,
|
|
contentItems: [
|
|
{
|
|
type: "inputText",
|
|
text: "OpenClaw dynamic tool call timed out after 1ms while running tool echo.",
|
|
},
|
|
],
|
|
});
|
|
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
await flushDiagnosticEvents();
|
|
unsubscribeDiagnostics();
|
|
|
|
expect(execute).toHaveBeenCalledTimes(1);
|
|
const toolDiagnosticEvents = diagnosticEvents.filter(
|
|
(
|
|
event,
|
|
): event is Extract<
|
|
DiagnosticEventPayload,
|
|
{ type: "tool.execution.started" | "tool.execution.completed" | "tool.execution.error" }
|
|
> => event.type.startsWith("tool.execution."),
|
|
);
|
|
expect(
|
|
toolDiagnosticEvents.map((event) => ({
|
|
type: event.type,
|
|
toolName: event.toolName,
|
|
toolCallId: event.toolCallId,
|
|
})),
|
|
).toEqual([
|
|
{
|
|
type: "tool.execution.started",
|
|
toolName: "echo",
|
|
toolCallId: "call-echo-timeout",
|
|
},
|
|
{
|
|
type: "tool.execution.error",
|
|
toolName: "echo",
|
|
toolCallId: "call-echo-timeout",
|
|
},
|
|
]);
|
|
});
|
|
|
|
it("passes normalized channel context to app-server dynamic tool result hooks", async () => {
|
|
const harness = createStartedThreadHarness();
|
|
const afterToolCall = vi.fn();
|
|
initializeGlobalHookRunner(
|
|
createMockPluginRegistry([{ hookName: "after_tool_call", handler: afterToolCall }]),
|
|
);
|
|
testing.setOpenClawCodingToolsFactoryForTests(() => [createRuntimeDynamicTool("echo")]);
|
|
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.disableTools = false;
|
|
params.runtimePlan = createCodexRuntimePlanFixture();
|
|
params.messageChannel = "telegram";
|
|
params.messageProvider = "telegram";
|
|
params.currentChannelId = "telegram:-100123";
|
|
|
|
const run = runCodexAppServerAttempt(params);
|
|
await harness.waitForMethod("thread/start");
|
|
|
|
await harness.handleServerRequest({
|
|
id: "request-echo-tool",
|
|
method: "item/tool/call",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
callId: "call-echo-1",
|
|
namespace: null,
|
|
tool: "echo",
|
|
arguments: {},
|
|
},
|
|
});
|
|
|
|
await vi.waitFor(() => {
|
|
expect(afterToolCall).toHaveBeenCalledTimes(1);
|
|
});
|
|
expect(afterToolCall.mock.calls[0]?.[1]).toEqual(
|
|
expect.objectContaining({
|
|
agentId: "main",
|
|
sessionId: "session-1",
|
|
sessionKey: "agent:main:session-1",
|
|
runId: "run-1",
|
|
channelId: "-100123",
|
|
toolName: "echo",
|
|
toolCallId: "call-echo-1",
|
|
}),
|
|
);
|
|
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
});
|
|
|
|
it("suppresses normalized tool progress for activity-log dynamic bash requests", async () => {
|
|
const harness = createStartedThreadHarness();
|
|
const onRunAgentEvent = vi.fn();
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.onAgentEvent = onRunAgentEvent;
|
|
|
|
const run = runCodexAppServerAttempt(params);
|
|
await harness.waitForMethod("thread/start");
|
|
|
|
await harness.handleServerRequest({
|
|
id: "request-tool-activity-log",
|
|
method: "item/tool/call",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
callId: "call-log-activity-1",
|
|
namespace: null,
|
|
tool: "bash",
|
|
arguments: {
|
|
command:
|
|
'/bin/bash -lc \'/home/openclaw/.openclaw/workspace/bin/log_activity.sh "web_search" "Grilled salmon research"\'',
|
|
},
|
|
},
|
|
});
|
|
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
|
|
const toolEvents = onRunAgentEvent.mock.calls.filter(([event]) => {
|
|
const record = event as { stream?: string };
|
|
return record.stream === "tool";
|
|
});
|
|
expect(toolEvents).toHaveLength(0);
|
|
});
|
|
|
|
it("releases the session when Codex never completes after a dynamic tool response", async () => {
|
|
let handleRequest:
|
|
| ((request: { id: string; method: string; params?: unknown }) => Promise<unknown>)
|
|
| undefined;
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-1");
|
|
}
|
|
if (method === "turn/start") {
|
|
return turnStartResult("turn-1", "inProgress");
|
|
}
|
|
return {};
|
|
});
|
|
setCodexAppServerClientFactoryForTest(
|
|
async () =>
|
|
({
|
|
request,
|
|
addNotificationHandler: () => () => undefined,
|
|
addRequestHandler: (
|
|
handler: (request: {
|
|
id: string;
|
|
method: string;
|
|
params?: unknown;
|
|
}) => Promise<unknown>,
|
|
) => {
|
|
handleRequest = handler;
|
|
return () => undefined;
|
|
},
|
|
}) as never,
|
|
);
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 200;
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
pluginConfig: { appServer: { turnCompletionIdleTimeoutMs: 5 } },
|
|
});
|
|
await vi.waitFor(() => expect(handleRequest).toBeTypeOf("function"), fastWait);
|
|
|
|
const toolResult = (await handleRequest?.({
|
|
id: "request-tool-1",
|
|
method: "item/tool/call",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
callId: "call-1",
|
|
namespace: null,
|
|
tool: "message",
|
|
arguments: { action: "send", text: "already sent" },
|
|
},
|
|
})) as {
|
|
contentItems?: Array<{ text?: string; type?: string }>;
|
|
success?: boolean;
|
|
};
|
|
expect(toolResult.success).toBe(false);
|
|
expect(toolResult.contentItems?.[0]?.type).toBe("inputText");
|
|
expect(toolResult.contentItems?.[0]?.text).toMatch(
|
|
/^(Unknown OpenClaw tool: message|Action send requires a target\.)$/u,
|
|
);
|
|
|
|
const result = await run;
|
|
expect(result.aborted).toBe(true);
|
|
expect(result.timedOut).toBe(true);
|
|
expect(result.promptError).toBe(
|
|
"codex app-server turn idle timed out waiting for turn/completed",
|
|
);
|
|
await vi.waitFor(
|
|
() =>
|
|
expect(request).toHaveBeenCalledWith(
|
|
"turn/interrupt",
|
|
{
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
},
|
|
{ timeoutMs: 5_000 },
|
|
),
|
|
{ interval: 1 },
|
|
);
|
|
expect(queueActiveRunMessageForTest("session-1", "after timeout")).toBe(false);
|
|
});
|
|
|
|
it("closes the app-server client when the active turn goes idle past the attempt timeout", async () => {
|
|
const close = vi.fn();
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-1");
|
|
}
|
|
if (method === "turn/start") {
|
|
return turnStartResult("turn-1", "inProgress");
|
|
}
|
|
if (method === "turn/interrupt") {
|
|
return new Promise<never>(() => undefined);
|
|
}
|
|
return {};
|
|
});
|
|
setCodexAppServerClientFactoryForTest(
|
|
async () =>
|
|
({
|
|
request,
|
|
close,
|
|
addNotificationHandler: () => () => undefined,
|
|
addRequestHandler: () => () => undefined,
|
|
}) as never,
|
|
);
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 250;
|
|
|
|
const result = await runCodexAppServerAttempt(params);
|
|
|
|
expect(result.aborted).toBe(true);
|
|
expect(result.timedOut).toBe(true);
|
|
expect(result.promptError).toBe(
|
|
"codex app-server turn idle timed out waiting for turn/completed",
|
|
);
|
|
expect(request).toHaveBeenCalledWith(
|
|
"turn/interrupt",
|
|
{
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
},
|
|
{ timeoutMs: 5_000 },
|
|
);
|
|
expect(close).toHaveBeenCalledTimes(1);
|
|
expect(queueActiveRunMessageForTest("session-1", "after timeout")).toBe(false);
|
|
});
|
|
|
|
it("keeps a progressing active turn alive beyond the original attempt timeout", async () => {
|
|
const harness = createStartedThreadHarness();
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 100;
|
|
const onRunProgress = vi.fn();
|
|
params.onRunProgress = onRunProgress;
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
turnCompletionIdleTimeoutMs: 300,
|
|
turnAssistantCompletionIdleTimeoutMs: 300,
|
|
turnTerminalIdleTimeoutMs: 300,
|
|
});
|
|
await harness.waitForMethod("turn/start");
|
|
await vi.waitFor(
|
|
() =>
|
|
expect(onRunProgress).toHaveBeenCalledWith(
|
|
expect.objectContaining({ reason: "turn:start" }),
|
|
),
|
|
fastWait,
|
|
);
|
|
|
|
await new Promise((resolve) => setTimeout(resolve, 60));
|
|
await harness.notify({
|
|
method: "rawResponseItem/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: {
|
|
type: "message",
|
|
id: "raw-progress-1",
|
|
role: "assistant",
|
|
content: [{ type: "output_text", text: "Still working." }],
|
|
},
|
|
},
|
|
});
|
|
await new Promise((resolve) => setTimeout(resolve, 60));
|
|
await harness.notify({
|
|
method: "rawResponseItem/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: {
|
|
type: "message",
|
|
id: "raw-progress-2",
|
|
role: "assistant",
|
|
content: [{ type: "output_text", text: "Almost done." }],
|
|
},
|
|
},
|
|
});
|
|
|
|
expect(harness.request.mock.calls.some(([method]) => method === "turn/interrupt")).toBe(false);
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
|
|
const result = await run;
|
|
expect(result.aborted).toBe(false);
|
|
expect(result.timedOut).toBe(false);
|
|
expect(result.promptError).toBeNull();
|
|
expect(harness.request.mock.calls.some(([method]) => method === "turn/interrupt")).toBe(false);
|
|
const progressReasons = onRunProgress.mock.calls.map(([info]) => info.reason);
|
|
expect(progressReasons).toContain("turn:start");
|
|
expect(
|
|
progressReasons.filter((reason) => reason === "notification:rawResponseItem/completed"),
|
|
).toHaveLength(2);
|
|
});
|
|
|
|
it("does not count non-turn app-server requests as turn attempt progress", async () => {
|
|
const harness = createStartedThreadHarness();
|
|
const warn = vi.spyOn(embeddedAgentLog, "warn").mockImplementation(() => undefined);
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 100;
|
|
const onRunProgress = vi.fn();
|
|
params.onRunProgress = onRunProgress;
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
turnCompletionIdleTimeoutMs: 500,
|
|
turnAssistantCompletionIdleTimeoutMs: 500,
|
|
turnTerminalIdleTimeoutMs: 500,
|
|
});
|
|
await harness.waitForMethod("turn/start");
|
|
await vi.waitFor(
|
|
() =>
|
|
expect(onRunProgress).toHaveBeenCalledWith(
|
|
expect.objectContaining({ reason: "turn:start" }),
|
|
),
|
|
fastWait,
|
|
);
|
|
|
|
await new Promise((resolve) => setTimeout(resolve, 60));
|
|
await harness.handleServerRequest({
|
|
id: "request-account-refresh",
|
|
method: "account/nonTurnRefresh",
|
|
params: {},
|
|
});
|
|
|
|
const result = await run;
|
|
expect(result.aborted).toBe(true);
|
|
expect(result.timedOut).toBe(true);
|
|
expect(result.promptError).toBe(
|
|
"codex app-server turn idle timed out waiting for turn/completed",
|
|
);
|
|
const warnCall = warn.mock.calls.find(
|
|
([message]) => message === "codex app-server turn idle timed out waiting for progress",
|
|
);
|
|
const warnData = warnCall?.[1] as
|
|
| { lastActivityReason?: string; timeoutMs?: number }
|
|
| undefined;
|
|
expect(warnData?.timeoutMs).toBe(100);
|
|
expect(warnData?.lastActivityReason).toBe("turn:start");
|
|
expect(harness.request.mock.calls.some(([method]) => method === "turn/interrupt")).toBe(true);
|
|
expect(onRunProgress.mock.calls.map(([info]) => info.reason)).toEqual(["turn:start"]);
|
|
});
|
|
|
|
it("keeps the turn attempt timeout armed while non-turn requests are pending", async () => {
|
|
const harness = createStartedThreadHarness();
|
|
const warn = vi.spyOn(embeddedAgentLog, "warn").mockImplementation(() => undefined);
|
|
vi.spyOn(authBridge, "refreshCodexAppServerAuthTokens").mockImplementation(
|
|
async () => await new Promise<never>(() => undefined),
|
|
);
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 100;
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
turnCompletionIdleTimeoutMs: 500,
|
|
turnAssistantCompletionIdleTimeoutMs: 500,
|
|
turnTerminalIdleTimeoutMs: 500,
|
|
});
|
|
await harness.waitForMethod("turn/start");
|
|
|
|
await new Promise((resolve) => setTimeout(resolve, 60));
|
|
void harness.handleServerRequest({
|
|
id: "request-auth-refresh",
|
|
method: "account/chatgptAuthTokens/refresh",
|
|
params: {},
|
|
});
|
|
await vi.waitFor(() =>
|
|
expect(authBridge.refreshCodexAppServerAuthTokens).toHaveBeenCalledTimes(1),
|
|
);
|
|
|
|
const result = await run;
|
|
expect(result.aborted).toBe(true);
|
|
expect(result.timedOut).toBe(true);
|
|
expect(result.promptError).toBe(
|
|
"codex app-server turn idle timed out waiting for turn/completed",
|
|
);
|
|
const warnCall = warn.mock.calls.find(
|
|
([message]) => message === "codex app-server turn idle timed out waiting for progress",
|
|
);
|
|
const warnData = warnCall?.[1] as
|
|
| { lastActivityReason?: string; timeoutMs?: number }
|
|
| undefined;
|
|
expect(warnData?.timeoutMs).toBe(100);
|
|
expect(warnData?.lastActivityReason).toBe("turn:start");
|
|
expect(harness.request.mock.calls.some(([method]) => method === "turn/interrupt")).toBe(true);
|
|
});
|
|
|
|
it("counts handled nullable-turn elicitations as turn attempt progress", async () => {
|
|
const harness = createStartedThreadHarness();
|
|
vi.spyOn(elicitationBridge, "handleCodexAppServerElicitationRequest").mockResolvedValue({
|
|
action: "accept",
|
|
content: null,
|
|
_meta: null,
|
|
});
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 100;
|
|
const onRunProgress = vi.fn();
|
|
params.onRunProgress = onRunProgress;
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
turnCompletionIdleTimeoutMs: 300,
|
|
turnAssistantCompletionIdleTimeoutMs: 300,
|
|
turnTerminalIdleTimeoutMs: 300,
|
|
});
|
|
await harness.waitForMethod("turn/start");
|
|
await vi.waitFor(
|
|
() =>
|
|
expect(onRunProgress).toHaveBeenCalledWith(
|
|
expect.objectContaining({ reason: "turn:start" }),
|
|
),
|
|
fastWait,
|
|
);
|
|
|
|
await new Promise((resolve) => setTimeout(resolve, 60));
|
|
await harness.handleServerRequest({
|
|
id: "request-null-turn-elicitation",
|
|
method: "mcpServer/elicitation/request",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: null,
|
|
mode: "form",
|
|
message: "Approve?",
|
|
requestedSchema: { type: "object", properties: {} },
|
|
serverName: "server-1",
|
|
_meta: null,
|
|
},
|
|
});
|
|
await new Promise((resolve) => setTimeout(resolve, 60));
|
|
|
|
expect(harness.request.mock.calls.some(([method]) => method === "turn/interrupt")).toBe(false);
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
|
|
const result = await run;
|
|
expect(result.aborted).toBe(false);
|
|
expect(result.timedOut).toBe(false);
|
|
expect(result.promptError).toBeNull();
|
|
});
|
|
|
|
it("counts pending user input requests as turn attempt progress", async () => {
|
|
const harness = createStartedThreadHarness();
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 250;
|
|
params.onBlockReply = vi.fn();
|
|
const onRunProgress = vi.fn();
|
|
params.onRunProgress = onRunProgress;
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
turnCompletionIdleTimeoutMs: 600,
|
|
turnAssistantCompletionIdleTimeoutMs: 600,
|
|
turnTerminalIdleTimeoutMs: 600,
|
|
});
|
|
await harness.waitForMethod("turn/start");
|
|
await vi.waitFor(
|
|
() =>
|
|
expect(onRunProgress).toHaveBeenCalledWith(
|
|
expect.objectContaining({ reason: "turn:start" }),
|
|
),
|
|
fastWait,
|
|
);
|
|
|
|
await new Promise((resolve) => setTimeout(resolve, 75));
|
|
const response = harness.handleServerRequest({
|
|
id: "request-user-input",
|
|
method: "item/tool/requestUserInput",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
itemId: "input-1",
|
|
questions: [
|
|
{
|
|
id: "mode",
|
|
header: "Mode",
|
|
question: "Pick a mode",
|
|
isOther: false,
|
|
isSecret: false,
|
|
options: [
|
|
{ label: "Fast", description: "Use less reasoning" },
|
|
{ label: "Deep", description: "Use more reasoning" },
|
|
],
|
|
},
|
|
],
|
|
},
|
|
});
|
|
await vi.waitFor(() => expect(params.onBlockReply).toHaveBeenCalledTimes(1), fastWait);
|
|
await new Promise((resolve) => setTimeout(resolve, 125));
|
|
|
|
expect(harness.request.mock.calls.some(([method]) => method === "turn/interrupt")).toBe(false);
|
|
expect(queueActiveRunMessageForTest("session-1", "2")).toBe(true);
|
|
await expect(response).resolves.toEqual({
|
|
answers: { mode: { answers: ["Deep"] } },
|
|
});
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
|
|
const result = await run;
|
|
expect(result.aborted).toBe(false);
|
|
expect(result.timedOut).toBe(false);
|
|
expect(result.promptError).toBeNull();
|
|
});
|
|
|
|
it("does not count mismatched turn-scoped requests as turn attempt progress", async () => {
|
|
const harness = createStartedThreadHarness();
|
|
const warn = vi.spyOn(embeddedAgentLog, "warn").mockImplementation(() => undefined);
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 100;
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
turnCompletionIdleTimeoutMs: 500,
|
|
turnAssistantCompletionIdleTimeoutMs: 500,
|
|
turnTerminalIdleTimeoutMs: 500,
|
|
});
|
|
await harness.waitForMethod("turn/start");
|
|
|
|
await new Promise((resolve) => setTimeout(resolve, 60));
|
|
await harness.handleServerRequest({
|
|
id: "request-foreign-elicitation",
|
|
method: "mcpServer/elicitation/request",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-other",
|
|
mode: "form",
|
|
message: "Approve?",
|
|
requestedSchema: { type: "object", properties: {} },
|
|
serverName: "server-1",
|
|
_meta: null,
|
|
},
|
|
});
|
|
await harness.handleServerRequest({
|
|
id: "request-foreign-user-input",
|
|
method: "item/tool/requestUserInput",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-other",
|
|
itemId: "input-1",
|
|
questions: [],
|
|
},
|
|
});
|
|
await harness.handleServerRequest({
|
|
id: "request-foreign-approval",
|
|
method: "item/commandExecution/requestApproval",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-other",
|
|
itemId: "command-1",
|
|
},
|
|
});
|
|
|
|
const result = await run;
|
|
expect(result.aborted).toBe(true);
|
|
expect(result.timedOut).toBe(true);
|
|
expect(result.promptError).toBe(
|
|
"codex app-server turn idle timed out waiting for turn/completed",
|
|
);
|
|
const warnCall = warn.mock.calls.find(
|
|
([message]) => message === "codex app-server turn idle timed out waiting for progress",
|
|
);
|
|
const warnData = warnCall?.[1] as
|
|
| { lastActivityReason?: string; timeoutMs?: number }
|
|
| undefined;
|
|
expect(warnData?.timeoutMs).toBe(100);
|
|
expect(warnData?.lastActivityReason).toBe("turn:start");
|
|
expect(harness.request.mock.calls.some(([method]) => method === "turn/interrupt")).toBe(true);
|
|
});
|
|
|
|
it("does not count account rate-limit updates as turn completion activity", async () => {
|
|
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
|
|
let handleRequest:
|
|
| ((request: { id: string; method: string; params?: unknown }) => Promise<unknown>)
|
|
| undefined;
|
|
const warn = vi.spyOn(embeddedAgentLog, "warn").mockImplementation(() => undefined);
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-1");
|
|
}
|
|
if (method === "turn/start") {
|
|
return turnStartResult("turn-1", "inProgress");
|
|
}
|
|
return {};
|
|
});
|
|
setCodexAppServerClientFactoryForTest(
|
|
async () =>
|
|
({
|
|
request,
|
|
addNotificationHandler: (handler: typeof notify) => {
|
|
notify = handler;
|
|
return () => undefined;
|
|
},
|
|
addRequestHandler: (
|
|
handler: (request: {
|
|
id: string;
|
|
method: string;
|
|
params?: unknown;
|
|
}) => Promise<unknown>,
|
|
) => {
|
|
handleRequest = handler;
|
|
return () => undefined;
|
|
},
|
|
}) as never,
|
|
);
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 60_000;
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
turnCompletionIdleTimeoutMs: 5,
|
|
turnTerminalIdleTimeoutMs: 60_000,
|
|
});
|
|
await vi.waitFor(() => expect(handleRequest).toBeTypeOf("function"), fastWait);
|
|
|
|
const toolResult = (await handleRequest?.({
|
|
id: "request-tool-1",
|
|
method: "item/tool/call",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
callId: "call-1",
|
|
namespace: null,
|
|
tool: "message",
|
|
arguments: { action: "send", text: "already sent" },
|
|
},
|
|
})) as { success?: boolean };
|
|
expect(toolResult.success).toBe(false);
|
|
await notify(rateLimitsUpdated(Math.ceil(Date.now() / 1000) + 120));
|
|
|
|
const result = await run;
|
|
expect(result.aborted).toBe(true);
|
|
expect(result.timedOut).toBe(true);
|
|
expect(result.promptError).toBe(
|
|
"codex app-server turn idle timed out waiting for turn/completed",
|
|
);
|
|
const warnCall = warn.mock.calls.find(
|
|
([message]) => message === "codex app-server turn idle timed out waiting for completion",
|
|
);
|
|
const warnData = warnCall?.[1] as
|
|
| { lastActivityReason?: string; timeoutMs?: number }
|
|
| undefined;
|
|
expect(warnData?.timeoutMs).toBe(5);
|
|
expect(warnData?.lastActivityReason).toBe("request:item/tool/call:response");
|
|
});
|
|
|
|
it("keeps the post-tool completion watchdog armed across dynamic tool completion bookkeeping", async () => {
|
|
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
|
|
let handleRequest:
|
|
| ((request: { id: string; method: string; params?: unknown }) => Promise<unknown>)
|
|
| undefined;
|
|
const warn = vi.spyOn(embeddedAgentLog, "warn").mockImplementation(() => undefined);
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-1");
|
|
}
|
|
if (method === "turn/start") {
|
|
return turnStartResult("turn-1", "inProgress");
|
|
}
|
|
return {};
|
|
});
|
|
setCodexAppServerClientFactoryForTest(
|
|
async () =>
|
|
({
|
|
request,
|
|
addNotificationHandler: (handler: typeof notify) => {
|
|
notify = handler;
|
|
return () => undefined;
|
|
},
|
|
addRequestHandler: (
|
|
handler: (request: {
|
|
id: string;
|
|
method: string;
|
|
params?: unknown;
|
|
}) => Promise<unknown>,
|
|
) => {
|
|
handleRequest = handler;
|
|
return () => undefined;
|
|
},
|
|
}) as never,
|
|
);
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 60_000;
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
turnCompletionIdleTimeoutMs: 5,
|
|
turnTerminalIdleTimeoutMs: 200,
|
|
});
|
|
await vi.waitFor(() => expect(handleRequest).toBeTypeOf("function"), fastWait);
|
|
|
|
const toolResult = (await handleRequest?.({
|
|
id: "request-tool-1",
|
|
method: "item/tool/call",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
callId: "call-1",
|
|
namespace: null,
|
|
tool: "message",
|
|
arguments: { action: "send", text: "already sent" },
|
|
},
|
|
})) as { success?: boolean };
|
|
expect(toolResult.success).toBe(false);
|
|
await notify({
|
|
method: "item/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: {
|
|
type: "dynamicToolCall",
|
|
id: "call-1",
|
|
tool: "message",
|
|
},
|
|
},
|
|
});
|
|
|
|
const result = await run;
|
|
expect(result.aborted).toBe(true);
|
|
expect(result.timedOut).toBe(true);
|
|
expect(result.promptError).toBe(
|
|
"codex app-server turn idle timed out waiting for turn/completed",
|
|
);
|
|
expect(
|
|
warn.mock.calls.some(
|
|
([message]) => message === "codex app-server turn idle timed out waiting for completion",
|
|
),
|
|
).toBe(true);
|
|
expect(
|
|
warn.mock.calls.some(
|
|
([message]) =>
|
|
message === "codex app-server turn idle timed out waiting for terminal event",
|
|
),
|
|
).toBe(false);
|
|
});
|
|
|
|
it("keeps the post-tool completion watchdog armed across raw tool-output completion", async () => {
|
|
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
|
|
let handleRequest:
|
|
| ((request: { id: string; method: string; params?: unknown }) => Promise<unknown>)
|
|
| undefined;
|
|
const warn = vi.spyOn(embeddedAgentLog, "warn").mockImplementation(() => undefined);
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-1");
|
|
}
|
|
if (method === "turn/start") {
|
|
return turnStartResult("turn-1", "inProgress");
|
|
}
|
|
return {};
|
|
});
|
|
setCodexAppServerClientFactoryForTest(
|
|
async () =>
|
|
({
|
|
request,
|
|
addNotificationHandler: (handler: typeof notify) => {
|
|
notify = handler;
|
|
return () => undefined;
|
|
},
|
|
addRequestHandler: (
|
|
handler: (request: {
|
|
id: string;
|
|
method: string;
|
|
params?: unknown;
|
|
}) => Promise<unknown>,
|
|
) => {
|
|
handleRequest = handler;
|
|
return () => undefined;
|
|
},
|
|
}) as never,
|
|
);
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 60_000;
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
turnCompletionIdleTimeoutMs: 5,
|
|
turnTerminalIdleTimeoutMs: 200,
|
|
});
|
|
await vi.waitFor(() => expect(handleRequest).toBeTypeOf("function"), fastWait);
|
|
|
|
const toolResult = (await handleRequest?.({
|
|
id: "request-tool-1",
|
|
method: "item/tool/call",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
callId: "call-1",
|
|
namespace: null,
|
|
tool: "message",
|
|
arguments: { action: "send", text: "already sent" },
|
|
},
|
|
})) as { success?: boolean };
|
|
expect(toolResult.success).toBe(false);
|
|
await notify({
|
|
method: "rawResponseItem/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: {
|
|
type: "custom_tool_call_output",
|
|
id: "call-1",
|
|
call_id: "call-1",
|
|
output: "already sent",
|
|
},
|
|
},
|
|
});
|
|
|
|
const result = await run;
|
|
expect(result.aborted).toBe(true);
|
|
expect(result.timedOut).toBe(true);
|
|
expect(result.promptError).toBe(
|
|
"codex app-server turn idle timed out waiting for turn/completed",
|
|
);
|
|
const completionWarnCall = warn.mock.calls.find(
|
|
([message]) => message === "codex app-server turn idle timed out waiting for completion",
|
|
);
|
|
const completionWarnData = completionWarnCall?.[1] as
|
|
| { lastActivityReason?: string; lastNotificationItemType?: string; timeoutMs?: number }
|
|
| undefined;
|
|
expect(completionWarnData?.timeoutMs).toBe(5);
|
|
expect(completionWarnData?.lastActivityReason).toBe("notification:rawResponseItem/completed");
|
|
expect(completionWarnData?.lastNotificationItemType).toBe("custom_tool_call_output");
|
|
expect(
|
|
warn.mock.calls.some(
|
|
([message]) =>
|
|
message === "codex app-server turn idle timed out waiting for terminal event",
|
|
),
|
|
).toBe(false);
|
|
});
|
|
|
|
it("keeps waiting when Codex emits a raw assistant item after a dynamic tool response", async () => {
|
|
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
|
|
let handleRequest:
|
|
| ((request: { id: string; method: string; params?: unknown }) => Promise<unknown>)
|
|
| undefined;
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-1");
|
|
}
|
|
if (method === "turn/start") {
|
|
return turnStartResult("turn-1", "inProgress");
|
|
}
|
|
return {};
|
|
});
|
|
setCodexAppServerClientFactoryForTest(
|
|
async () =>
|
|
({
|
|
request,
|
|
addNotificationHandler: (handler: typeof notify) => {
|
|
notify = handler;
|
|
return () => undefined;
|
|
},
|
|
addRequestHandler: (
|
|
handler: (request: {
|
|
id: string;
|
|
method: string;
|
|
params?: unknown;
|
|
}) => Promise<unknown>,
|
|
) => {
|
|
handleRequest = handler;
|
|
return () => undefined;
|
|
},
|
|
}) as never,
|
|
);
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 60_000;
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
turnCompletionIdleTimeoutMs: 5,
|
|
turnAssistantCompletionIdleTimeoutMs: 200,
|
|
turnTerminalIdleTimeoutMs: 200,
|
|
});
|
|
await vi.waitFor(() => expect(handleRequest).toBeTypeOf("function"), fastWait);
|
|
|
|
const toolResult = (await handleRequest?.({
|
|
id: "request-tool-1",
|
|
method: "item/tool/call",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
callId: "call-1",
|
|
namespace: null,
|
|
tool: "message",
|
|
arguments: { action: "send", text: "already sent" },
|
|
},
|
|
})) as { success?: boolean };
|
|
expect(toolResult.success).toBe(false);
|
|
await notify({
|
|
method: "rawResponseItem/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: {
|
|
type: "message",
|
|
id: "raw-status-1",
|
|
role: "assistant",
|
|
content: [{ type: "output_text", text: "I'm writing the report now." }],
|
|
},
|
|
},
|
|
});
|
|
await new Promise((resolve) => setTimeout(resolve, 20));
|
|
expect(request.mock.calls.some(([method]) => method === "turn/interrupt")).toBe(false);
|
|
|
|
await notify({
|
|
method: "turn/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
turn: { id: "turn-1", status: "completed" },
|
|
},
|
|
});
|
|
|
|
const result = await run;
|
|
expect(result.aborted).toBe(false);
|
|
expect(result.timedOut).toBe(false);
|
|
expect(result.promptError).toBeNull();
|
|
expect(request.mock.calls.some(([method]) => method === "turn/interrupt")).toBe(false);
|
|
});
|
|
|
|
it("times out post-tool raw assistant progress after the assistant idle timeout", async () => {
|
|
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
|
|
let handleRequest:
|
|
| ((request: { id: string; method: string; params?: unknown }) => Promise<unknown>)
|
|
| undefined;
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-1");
|
|
}
|
|
if (method === "turn/start") {
|
|
return turnStartResult("turn-1", "inProgress");
|
|
}
|
|
return {};
|
|
});
|
|
setCodexAppServerClientFactoryForTest(
|
|
async () =>
|
|
({
|
|
request,
|
|
addNotificationHandler: (handler: typeof notify) => {
|
|
notify = handler;
|
|
return () => undefined;
|
|
},
|
|
addRequestHandler: (
|
|
handler: (request: {
|
|
id: string;
|
|
method: string;
|
|
params?: unknown;
|
|
}) => Promise<unknown>,
|
|
) => {
|
|
handleRequest = handler;
|
|
return () => undefined;
|
|
},
|
|
}) as never,
|
|
);
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 60_000;
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
turnCompletionIdleTimeoutMs: 50,
|
|
turnAssistantCompletionIdleTimeoutMs: 5,
|
|
turnTerminalIdleTimeoutMs: 500,
|
|
});
|
|
await vi.waitFor(() => expect(handleRequest).toBeTypeOf("function"), fastWait);
|
|
|
|
const toolResult = (await handleRequest?.({
|
|
id: "request-tool-1",
|
|
method: "item/tool/call",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
callId: "call-1",
|
|
namespace: null,
|
|
tool: "message",
|
|
arguments: { action: "send", text: "already sent" },
|
|
},
|
|
})) as { success?: boolean };
|
|
expect(toolResult.success).toBe(false);
|
|
await notify({
|
|
method: "rawResponseItem/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: {
|
|
type: "message",
|
|
id: "raw-status-1",
|
|
role: "assistant",
|
|
content: [{ type: "output_text", text: "I'm writing the report now." }],
|
|
},
|
|
},
|
|
});
|
|
|
|
const result = await run;
|
|
expect(result.aborted).toBe(true);
|
|
expect(result.timedOut).toBe(true);
|
|
expect(result.promptError).toBe(
|
|
"codex app-server turn idle timed out waiting for turn/completed",
|
|
);
|
|
await vi.waitFor(
|
|
() =>
|
|
expect(request).toHaveBeenCalledWith(
|
|
"turn/interrupt",
|
|
{
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
},
|
|
{ timeoutMs: 5_000 },
|
|
),
|
|
{ interval: 1 },
|
|
);
|
|
});
|
|
|
|
it("times out post-native-tool raw assistant progress after the assistant idle timeout", async () => {
|
|
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-1");
|
|
}
|
|
if (method === "turn/start") {
|
|
return turnStartResult("turn-1", "inProgress");
|
|
}
|
|
return {};
|
|
});
|
|
setCodexAppServerClientFactoryForTest(
|
|
async () =>
|
|
({
|
|
request,
|
|
addNotificationHandler: (handler: typeof notify) => {
|
|
notify = handler;
|
|
return () => undefined;
|
|
},
|
|
addRequestHandler: () => () => undefined,
|
|
}) as never,
|
|
);
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 60_000;
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
turnCompletionIdleTimeoutMs: 100,
|
|
turnAssistantCompletionIdleTimeoutMs: 5,
|
|
turnTerminalIdleTimeoutMs: 500,
|
|
});
|
|
await vi.waitFor(
|
|
() =>
|
|
expect(request).toHaveBeenCalledWith("turn/start", expect.anything(), expect.anything()),
|
|
{ interval: 1 },
|
|
);
|
|
await notify({
|
|
method: "item/started",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: { type: "commandExecution", id: "cmd-1", status: "inProgress" },
|
|
},
|
|
});
|
|
await notify({
|
|
method: "item/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: { type: "commandExecution", id: "cmd-1", status: "completed" },
|
|
},
|
|
});
|
|
await notify({
|
|
method: "rawResponseItem/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: {
|
|
type: "message",
|
|
id: "raw-status-1",
|
|
role: "assistant",
|
|
content: [{ type: "output_text", text: "I'm summarizing command output." }],
|
|
},
|
|
},
|
|
});
|
|
|
|
const result = await run;
|
|
expect(result.aborted).toBe(true);
|
|
expect(result.timedOut).toBe(true);
|
|
expect(result.promptError).toBe(
|
|
"codex app-server turn idle timed out waiting for turn/completed",
|
|
);
|
|
await vi.waitFor(
|
|
() =>
|
|
expect(request).toHaveBeenCalledWith(
|
|
"turn/interrupt",
|
|
{
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
},
|
|
{ timeoutMs: 5_000 },
|
|
),
|
|
{ interval: 1 },
|
|
);
|
|
});
|
|
|
|
it("logs raw assistant item context when the terminal watchdog fires", async () => {
|
|
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
|
|
let handleRequest:
|
|
| ((request: { id: string; method: string; params?: unknown }) => Promise<unknown>)
|
|
| undefined;
|
|
const warn = vi.spyOn(embeddedAgentLog, "warn").mockImplementation(() => undefined);
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-1");
|
|
}
|
|
if (method === "turn/start") {
|
|
return turnStartResult("turn-1", "inProgress");
|
|
}
|
|
return {};
|
|
});
|
|
setCodexAppServerClientFactoryForTest(
|
|
async () =>
|
|
({
|
|
request,
|
|
addNotificationHandler: (handler: typeof notify) => {
|
|
notify = handler;
|
|
return () => undefined;
|
|
},
|
|
addRequestHandler: (
|
|
handler: (request: {
|
|
id: string;
|
|
method: string;
|
|
params?: unknown;
|
|
}) => Promise<unknown>,
|
|
) => {
|
|
handleRequest = handler;
|
|
return () => undefined;
|
|
},
|
|
}) as never,
|
|
);
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 60_000;
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
turnCompletionIdleTimeoutMs: 5,
|
|
turnAssistantCompletionIdleTimeoutMs: 500,
|
|
turnTerminalIdleTimeoutMs: 5,
|
|
});
|
|
await vi.waitFor(() => expect(handleRequest).toBeTypeOf("function"), fastWait);
|
|
|
|
const toolResult = (await handleRequest?.({
|
|
id: "request-tool-1",
|
|
method: "item/tool/call",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
callId: "call-1",
|
|
namespace: null,
|
|
tool: "message",
|
|
arguments: { action: "send", text: "already sent" },
|
|
},
|
|
})) as { success?: boolean };
|
|
expect(toolResult.success).toBe(false);
|
|
await notify({
|
|
method: "rawResponseItem/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: {
|
|
type: "message",
|
|
id: "raw-status-1",
|
|
role: "assistant",
|
|
content: [{ type: "output_text", text: "I'm writing the report now." }],
|
|
},
|
|
},
|
|
});
|
|
|
|
const result = await run;
|
|
expect(result.aborted).toBe(true);
|
|
expect(result.timedOut).toBe(true);
|
|
expect(result.promptError).toBe(
|
|
"codex app-server turn idle timed out waiting for turn/completed",
|
|
);
|
|
const terminalWarnCall = warn.mock.calls.find(
|
|
([message]) => message === "codex app-server turn idle timed out waiting for terminal event",
|
|
);
|
|
const terminalWarnData = terminalWarnCall?.[1] as
|
|
| {
|
|
lastActivityReason?: string;
|
|
lastAssistantTextPreview?: string;
|
|
lastNotificationItemId?: string;
|
|
lastNotificationItemRole?: string;
|
|
lastNotificationItemType?: string;
|
|
lastNotificationMethod?: string;
|
|
threadId?: string;
|
|
timeoutMs?: number;
|
|
turnId?: string;
|
|
}
|
|
| undefined;
|
|
expect(terminalWarnData?.threadId).toBe("thread-1");
|
|
expect(terminalWarnData?.turnId).toBe("turn-1");
|
|
expect(terminalWarnData?.timeoutMs).toBe(5);
|
|
expect(terminalWarnData?.lastActivityReason).toBe("notification:rawResponseItem/completed");
|
|
expect(terminalWarnData?.lastNotificationMethod).toBe("rawResponseItem/completed");
|
|
expect(terminalWarnData?.lastNotificationItemId).toBe("raw-status-1");
|
|
expect(terminalWarnData?.lastNotificationItemType).toBe("message");
|
|
expect(terminalWarnData?.lastNotificationItemRole).toBe("assistant");
|
|
expect(terminalWarnData?.lastAssistantTextPreview).toBe("I'm writing the report now.");
|
|
expect(
|
|
warn.mock.calls.some(
|
|
([message]) => message === "codex app-server turn idle timed out waiting for completion",
|
|
),
|
|
).toBe(false);
|
|
});
|
|
|
|
it("releases the session when Codex accepts a turn but never sends progress", async () => {
|
|
const harness = createStartedThreadHarness();
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 60_000;
|
|
|
|
const run = runCodexAppServerAttempt(params, { turnCompletionIdleTimeoutMs: 5 });
|
|
await harness.waitForMethod("turn/start");
|
|
|
|
const result = await run;
|
|
expect(result.aborted).toBe(true);
|
|
expect(result.timedOut).toBe(true);
|
|
expect(result.promptError).toBe(
|
|
"codex app-server turn idle timed out waiting for turn/completed",
|
|
);
|
|
await vi.waitFor(
|
|
() =>
|
|
expect(harness.request).toHaveBeenCalledWith(
|
|
"turn/interrupt",
|
|
{
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
},
|
|
{ timeoutMs: 5_000 },
|
|
),
|
|
{ interval: 1 },
|
|
);
|
|
expect(queueActiveRunMessageForTest("session-1", "after silent turn")).toBe(false);
|
|
});
|
|
|
|
it("does not treat global rate-limit notifications as turn progress", async () => {
|
|
const harness = createStartedThreadHarness();
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 200;
|
|
|
|
const run = runCodexAppServerAttempt(params, { turnCompletionIdleTimeoutMs: 15 });
|
|
await harness.waitForMethod("turn/start");
|
|
await harness.notify(rateLimitsUpdated(Date.now() + 60_000));
|
|
await new Promise((resolve) => setTimeout(resolve, 20));
|
|
|
|
const result = await run;
|
|
expect({
|
|
aborted: result.aborted,
|
|
timedOut: result.timedOut,
|
|
promptError: result.promptError,
|
|
}).toEqual({
|
|
aborted: true,
|
|
timedOut: true,
|
|
promptError: "codex app-server turn idle timed out waiting for turn/completed",
|
|
});
|
|
await vi.waitFor(
|
|
() =>
|
|
expect(harness.request).toHaveBeenCalledWith(
|
|
"turn/interrupt",
|
|
{
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
},
|
|
{ timeoutMs: 5_000 },
|
|
),
|
|
{ interval: 1 },
|
|
);
|
|
});
|
|
|
|
it("yields a macrotask before processing queued app-server notifications", async () => {
|
|
const harness = createStartedThreadHarness();
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 1_000;
|
|
|
|
const run = runCodexAppServerAttempt(params);
|
|
await harness.waitForMethod("turn/start");
|
|
|
|
const notification = rateLimitsUpdated(Date.now() + 60_000);
|
|
const processing = harness.notify(notification);
|
|
await Promise.resolve();
|
|
|
|
expect(readRecentCodexRateLimits()).toBeUndefined();
|
|
await processing;
|
|
expect(readRecentCodexRateLimits()).toEqual(notification.params);
|
|
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await expect(run).resolves.toMatchObject({ aborted: false, timedOut: false });
|
|
});
|
|
|
|
it("releases the session when a completed agent message item goes quiet", async () => {
|
|
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-1");
|
|
}
|
|
if (method === "turn/start") {
|
|
return turnStartResult("turn-1", "inProgress");
|
|
}
|
|
return {};
|
|
});
|
|
setCodexAppServerClientFactoryForTest(
|
|
async () =>
|
|
({
|
|
request,
|
|
addNotificationHandler: (handler: typeof notify) => {
|
|
notify = handler;
|
|
return () => undefined;
|
|
},
|
|
addRequestHandler: () => () => undefined,
|
|
}) as never,
|
|
);
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 200;
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
turnAssistantCompletionIdleTimeoutMs: 5,
|
|
});
|
|
await vi.waitFor(
|
|
() =>
|
|
expect(request).toHaveBeenCalledWith("turn/start", expect.anything(), expect.anything()),
|
|
{ interval: 1 },
|
|
);
|
|
await notify({
|
|
method: "item/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: {
|
|
type: "agentMessage",
|
|
id: "msg-final-1",
|
|
text: "Done.",
|
|
},
|
|
},
|
|
});
|
|
|
|
const result = await run;
|
|
expect({
|
|
aborted: result.aborted,
|
|
timedOut: result.timedOut,
|
|
promptError: result.promptError,
|
|
assistantTexts: result.assistantTexts,
|
|
}).toEqual({
|
|
aborted: false,
|
|
timedOut: false,
|
|
promptError: null,
|
|
assistantTexts: ["Done."],
|
|
});
|
|
await vi.waitFor(
|
|
() =>
|
|
expect(request).toHaveBeenCalledWith(
|
|
"turn/interrupt",
|
|
{
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
},
|
|
{ timeoutMs: 5_000 },
|
|
),
|
|
{ interval: 1 },
|
|
);
|
|
});
|
|
|
|
it("releases the session when a real completed agent message omits text", async () => {
|
|
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-1");
|
|
}
|
|
if (method === "turn/start") {
|
|
return turnStartResult("turn-1", "inProgress");
|
|
}
|
|
return {};
|
|
});
|
|
setCodexAppServerClientFactoryForTest(
|
|
async () =>
|
|
({
|
|
request,
|
|
addNotificationHandler: (handler: typeof notify) => {
|
|
notify = handler;
|
|
return () => undefined;
|
|
},
|
|
addRequestHandler: () => () => undefined,
|
|
}) as never,
|
|
);
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 200;
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
turnAssistantCompletionIdleTimeoutMs: 5,
|
|
});
|
|
await vi.waitFor(
|
|
() =>
|
|
expect(request).toHaveBeenCalledWith("turn/start", expect.anything(), expect.anything()),
|
|
{ interval: 1 },
|
|
);
|
|
await notify({
|
|
method: "item/agentMessage/delta",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
itemId: "msg-final-1",
|
|
delta: "Done.",
|
|
},
|
|
});
|
|
await notify({
|
|
method: "item/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: {
|
|
type: "agentMessage",
|
|
id: "msg-final-1",
|
|
},
|
|
},
|
|
});
|
|
|
|
const result = await run;
|
|
expect({
|
|
aborted: result.aborted,
|
|
timedOut: result.timedOut,
|
|
promptError: result.promptError,
|
|
assistantTexts: result.assistantTexts,
|
|
}).toEqual({
|
|
aborted: false,
|
|
timedOut: false,
|
|
promptError: null,
|
|
assistantTexts: ["Done."],
|
|
});
|
|
await vi.waitFor(
|
|
() =>
|
|
expect(request).toHaveBeenCalledWith(
|
|
"turn/interrupt",
|
|
{
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
},
|
|
{ timeoutMs: 5_000 },
|
|
),
|
|
{ interval: 1 },
|
|
);
|
|
});
|
|
|
|
it("keeps the completed assistant release armed across bookkeeping notifications", async () => {
|
|
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-1");
|
|
}
|
|
if (method === "turn/start") {
|
|
return turnStartResult("turn-1", "inProgress");
|
|
}
|
|
return {};
|
|
});
|
|
setCodexAppServerClientFactoryForTest(
|
|
async () =>
|
|
({
|
|
request,
|
|
addNotificationHandler: (handler: typeof notify) => {
|
|
notify = handler;
|
|
return () => undefined;
|
|
},
|
|
addRequestHandler: () => () => undefined,
|
|
}) as never,
|
|
);
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 200;
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
turnAssistantCompletionIdleTimeoutMs: 5,
|
|
});
|
|
await vi.waitFor(
|
|
() =>
|
|
expect(request).toHaveBeenCalledWith("turn/start", expect.anything(), expect.anything()),
|
|
{ interval: 1 },
|
|
);
|
|
await notify({
|
|
method: "item/agentMessage/delta",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
itemId: "msg-final-1",
|
|
delta: "Done.",
|
|
},
|
|
});
|
|
await notify({
|
|
method: "item/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: {
|
|
type: "agentMessage",
|
|
id: "msg-final-1",
|
|
},
|
|
},
|
|
});
|
|
await notify({
|
|
method: "turn/plan/updated",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
plan: [],
|
|
},
|
|
});
|
|
|
|
const result = await run;
|
|
expect({
|
|
aborted: result.aborted,
|
|
timedOut: result.timedOut,
|
|
promptError: result.promptError,
|
|
assistantTexts: result.assistantTexts,
|
|
}).toEqual({
|
|
aborted: false,
|
|
timedOut: false,
|
|
promptError: null,
|
|
assistantTexts: ["Done."],
|
|
});
|
|
await vi.waitFor(
|
|
() =>
|
|
expect(request).toHaveBeenCalledWith(
|
|
"turn/interrupt",
|
|
{
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
},
|
|
{ timeoutMs: 5_000 },
|
|
),
|
|
{ interval: 1 },
|
|
);
|
|
});
|
|
|
|
it("does not release commentary agent message items", async () => {
|
|
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-1");
|
|
}
|
|
if (method === "turn/start") {
|
|
return turnStartResult("turn-1", "inProgress");
|
|
}
|
|
return {};
|
|
});
|
|
setCodexAppServerClientFactoryForTest(
|
|
async () =>
|
|
({
|
|
request,
|
|
addNotificationHandler: (handler: typeof notify) => {
|
|
notify = handler;
|
|
return () => undefined;
|
|
},
|
|
addRequestHandler: () => () => undefined,
|
|
}) as never,
|
|
);
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 200;
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
turnAssistantCompletionIdleTimeoutMs: 5,
|
|
});
|
|
await vi.waitFor(
|
|
() =>
|
|
expect(request).toHaveBeenCalledWith("turn/start", expect.anything(), expect.anything()),
|
|
{ interval: 1 },
|
|
);
|
|
await notify({
|
|
method: "item/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: {
|
|
type: "agentMessage",
|
|
id: "msg-commentary-1",
|
|
phase: "commentary",
|
|
text: "I am checking the workspace.",
|
|
},
|
|
},
|
|
});
|
|
await new Promise((resolve) => setTimeout(resolve, 20));
|
|
|
|
expect(request).not.toHaveBeenCalledWith("turn/interrupt", expect.anything());
|
|
await notify({
|
|
method: "item/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: {
|
|
type: "agentMessage",
|
|
id: "msg-final-1",
|
|
phase: "final_answer",
|
|
text: "Done.",
|
|
},
|
|
},
|
|
});
|
|
|
|
const result = await run;
|
|
expect({
|
|
aborted: result.aborted,
|
|
timedOut: result.timedOut,
|
|
promptError: result.promptError,
|
|
assistantTexts: result.assistantTexts,
|
|
}).toEqual({
|
|
aborted: false,
|
|
timedOut: false,
|
|
promptError: null,
|
|
assistantTexts: ["Done."],
|
|
});
|
|
});
|
|
|
|
it("does not release or return commentary raw assistant response items", async () => {
|
|
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-1");
|
|
}
|
|
if (method === "turn/start") {
|
|
return turnStartResult("turn-1", "inProgress");
|
|
}
|
|
return {};
|
|
});
|
|
setCodexAppServerClientFactoryForTest(
|
|
async () =>
|
|
({
|
|
request,
|
|
addNotificationHandler: (handler: typeof notify) => {
|
|
notify = handler;
|
|
return () => undefined;
|
|
},
|
|
addRequestHandler: () => () => undefined,
|
|
}) as never,
|
|
);
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 200;
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
turnAssistantCompletionIdleTimeoutMs: 5,
|
|
});
|
|
await vi.waitFor(
|
|
() =>
|
|
expect(request).toHaveBeenCalledWith("turn/start", expect.anything(), expect.anything()),
|
|
{ interval: 1 },
|
|
);
|
|
await notify({
|
|
method: "rawResponseItem/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: {
|
|
type: "message",
|
|
id: "raw-commentary-1",
|
|
role: "assistant",
|
|
phase: "commentary",
|
|
content: [{ type: "output_text", text: "I am checking the workspace." }],
|
|
},
|
|
},
|
|
});
|
|
await new Promise((resolve) => setTimeout(resolve, 20));
|
|
|
|
expect(request).not.toHaveBeenCalledWith("turn/interrupt", expect.anything());
|
|
await notify({
|
|
method: "turn/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
turn: { id: "turn-1", status: "completed" },
|
|
},
|
|
});
|
|
|
|
const result = await run;
|
|
expect({
|
|
aborted: result.aborted,
|
|
timedOut: result.timedOut,
|
|
promptError: result.promptError,
|
|
assistantTexts: result.assistantTexts,
|
|
}).toEqual({
|
|
aborted: false,
|
|
timedOut: false,
|
|
promptError: null,
|
|
assistantTexts: [],
|
|
});
|
|
});
|
|
|
|
it("releases the session after a raw assistant response item without turn completion", async () => {
|
|
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-1");
|
|
}
|
|
if (method === "turn/start") {
|
|
return turnStartResult("turn-1", "inProgress");
|
|
}
|
|
return {};
|
|
});
|
|
setCodexAppServerClientFactoryForTest(
|
|
async () =>
|
|
({
|
|
request,
|
|
addNotificationHandler: (handler: typeof notify) => {
|
|
notify = handler;
|
|
return () => undefined;
|
|
},
|
|
addRequestHandler: () => () => undefined,
|
|
}) as never,
|
|
);
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 200;
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
turnAssistantCompletionIdleTimeoutMs: 5,
|
|
turnTerminalIdleTimeoutMs: 15,
|
|
});
|
|
await vi.waitFor(
|
|
() =>
|
|
expect(request).toHaveBeenCalledWith("turn/start", expect.anything(), expect.anything()),
|
|
{ interval: 1 },
|
|
);
|
|
await notify({
|
|
method: "rawResponseItem/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: {
|
|
type: "message",
|
|
id: "raw-final-1",
|
|
role: "assistant",
|
|
content: [{ type: "output_text", text: "Done." }],
|
|
},
|
|
},
|
|
});
|
|
|
|
const result = await run;
|
|
expect({
|
|
aborted: result.aborted,
|
|
timedOut: result.timedOut,
|
|
promptError: result.promptError,
|
|
assistantTexts: result.assistantTexts,
|
|
}).toEqual({
|
|
aborted: false,
|
|
timedOut: false,
|
|
promptError: null,
|
|
assistantTexts: ["Done."],
|
|
});
|
|
await vi.waitFor(
|
|
() =>
|
|
expect(request).toHaveBeenCalledWith(
|
|
"turn/interrupt",
|
|
{
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
},
|
|
{ timeoutMs: 5_000 },
|
|
),
|
|
{ interval: 1 },
|
|
);
|
|
});
|
|
|
|
it("keeps waiting when a current-turn item is still active", async () => {
|
|
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-1");
|
|
}
|
|
if (method === "turn/start") {
|
|
return turnStartResult("turn-1", "inProgress");
|
|
}
|
|
return {};
|
|
});
|
|
setCodexAppServerClientFactoryForTest(
|
|
async () =>
|
|
({
|
|
request,
|
|
addNotificationHandler: (handler: typeof notify) => {
|
|
notify = handler;
|
|
return () => undefined;
|
|
},
|
|
addRequestHandler: () => () => undefined,
|
|
}) as never,
|
|
);
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 200;
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
turnAssistantCompletionIdleTimeoutMs: 5,
|
|
turnTerminalIdleTimeoutMs: 50,
|
|
});
|
|
await vi.waitFor(
|
|
() =>
|
|
expect(request).toHaveBeenCalledWith("turn/start", expect.anything(), expect.anything()),
|
|
{ interval: 1 },
|
|
);
|
|
await notify({
|
|
method: "item/started",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: { type: "commandExecution", id: "cmd-1", status: "inProgress" },
|
|
},
|
|
});
|
|
await notify({
|
|
method: "item/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: {
|
|
type: "agentMessage",
|
|
id: "msg-final-1",
|
|
text: "Done.",
|
|
},
|
|
},
|
|
});
|
|
await new Promise((resolve) => setTimeout(resolve, 20));
|
|
|
|
expect(request).not.toHaveBeenCalledWith("turn/interrupt", expect.anything());
|
|
await notify({
|
|
method: "item/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: { type: "commandExecution", id: "cmd-1", status: "completed" },
|
|
},
|
|
});
|
|
|
|
const result = await run;
|
|
expect({
|
|
aborted: result.aborted,
|
|
timedOut: result.timedOut,
|
|
promptError: result.promptError,
|
|
assistantTexts: result.assistantTexts,
|
|
}).toEqual({
|
|
aborted: false,
|
|
timedOut: false,
|
|
promptError: null,
|
|
assistantTexts: ["Done."],
|
|
});
|
|
});
|
|
|
|
it("times out promptly when the last completed non-assistant current-turn item is not followed by turn completion", async () => {
|
|
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-1");
|
|
}
|
|
if (method === "turn/start") {
|
|
return turnStartResult("turn-1", "inProgress");
|
|
}
|
|
return {};
|
|
});
|
|
setCodexAppServerClientFactoryForTest(
|
|
async () =>
|
|
({
|
|
request,
|
|
addNotificationHandler: (handler: typeof notify) => {
|
|
notify = handler;
|
|
return () => undefined;
|
|
},
|
|
addRequestHandler: () => () => undefined,
|
|
}) as never,
|
|
);
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 200;
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
turnCompletionIdleTimeoutMs: 5,
|
|
turnTerminalIdleTimeoutMs: 60_000,
|
|
});
|
|
await vi.waitFor(
|
|
() =>
|
|
expect(request).toHaveBeenCalledWith("turn/start", expect.anything(), expect.anything()),
|
|
{ interval: 1 },
|
|
);
|
|
await notify({
|
|
method: "item/started",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: {
|
|
type: "dynamicToolCall",
|
|
id: "tool-1",
|
|
tool: "sessions_list",
|
|
arguments: {},
|
|
status: "inProgress",
|
|
},
|
|
},
|
|
});
|
|
await notify({
|
|
method: "item/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: {
|
|
type: "dynamicToolCall",
|
|
id: "tool-1",
|
|
tool: "sessions_list",
|
|
arguments: {},
|
|
status: "completed",
|
|
success: true,
|
|
contentItems: [],
|
|
},
|
|
},
|
|
});
|
|
|
|
await expect(run).resolves.toMatchObject({
|
|
aborted: true,
|
|
timedOut: true,
|
|
promptError: "codex app-server turn idle timed out waiting for turn/completed",
|
|
});
|
|
await vi.waitFor(
|
|
() =>
|
|
expect(request).toHaveBeenCalledWith(
|
|
"turn/interrupt",
|
|
{
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
},
|
|
{ timeoutMs: 5_000 },
|
|
),
|
|
{ interval: 1 },
|
|
);
|
|
});
|
|
|
|
it("applies before_prompt_build to Codex developer instructions and turn input", async () => {
|
|
const beforePromptBuild = vi.fn(async () => ({
|
|
systemPrompt: "custom codex system",
|
|
prependSystemContext: "pre system",
|
|
appendSystemContext: "post system",
|
|
prependContext: "queued context",
|
|
}));
|
|
initializeGlobalHookRunner(
|
|
createMockPluginRegistry([{ hookName: "before_prompt_build", handler: beforePromptBuild }]),
|
|
);
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const sessionManager = SessionManager.open(sessionFile);
|
|
sessionManager.appendMessage(assistantMessage("previous turn", Date.now()));
|
|
const harness = createStartedThreadHarness();
|
|
|
|
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir));
|
|
await harness.waitForMethod("turn/start");
|
|
await new Promise<void>((resolve) => setImmediate(resolve));
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
|
|
expect(beforePromptBuild).toHaveBeenCalledOnce();
|
|
const [hookInput, hookContext] = mockCall(beforePromptBuild, "before_prompt_build") as [
|
|
{ messages?: Array<{ role?: string }>; prompt?: string },
|
|
{ runId?: string; sessionId?: string },
|
|
];
|
|
expect(hookInput.prompt).toBe("hello");
|
|
expect(hookInput.messages?.[0]?.role).toBe("assistant");
|
|
expect(hookContext.runId).toBe("run-1");
|
|
expect(hookContext.sessionId).toBe("session-1");
|
|
const threadStart = harness.requests.find((request) => request.method === "thread/start");
|
|
const threadStartParams = threadStart?.params as { developerInstructions?: string } | undefined;
|
|
expect(threadStartParams?.developerInstructions).toContain("pre system\n\ncustom codex system");
|
|
const turnStart = harness.requests.find((request) => request.method === "turn/start");
|
|
const turnStartParams = turnStart?.params as
|
|
| { input?: Array<{ text?: string; text_elements?: unknown[]; type?: string }> }
|
|
| undefined;
|
|
expect(turnStartParams?.input).toEqual([
|
|
{ type: "text", text: "queued context\n\nhello", text_elements: [] },
|
|
]);
|
|
});
|
|
|
|
it("projects mirrored history when starting Codex without a native thread binding", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const sessionManager = SessionManager.open(sessionFile);
|
|
sessionManager.appendMessage(userMessage("we are fixing the Opik default project", Date.now()));
|
|
sessionManager.appendMessage(assistantMessage("Opik default project context", Date.now() + 1));
|
|
const harness = createStartedThreadHarness();
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.prompt = "make the default webpage openclaw";
|
|
|
|
const run = runCodexAppServerAttempt(params);
|
|
await harness.waitForMethod("turn/start");
|
|
await new Promise<void>((resolve) => setImmediate(resolve));
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
|
|
const turnStart = harness.requests.find((request) => request.method === "turn/start");
|
|
const inputText =
|
|
(turnStart?.params as { input?: Array<{ text?: string }> } | undefined)?.input?.[0]?.text ??
|
|
"";
|
|
|
|
expect(inputText).toContain("OpenClaw assembled context for this turn:");
|
|
expect(inputText).toContain("we are fixing the Opik default project");
|
|
expect(inputText).toContain("Opik default project context");
|
|
expect(inputText).toContain("Current user request:");
|
|
expect(inputText).toContain("make the default webpage openclaw");
|
|
});
|
|
|
|
it("passes stable workspace files as Codex developer instructions and keeps MEMORY.md as turn context", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const agentsGuidance = "Follow AGENTS guidance.";
|
|
const soulGuidance = "Soul voice goes here.";
|
|
const identityGuidance = "Identity guidance goes here.";
|
|
const toolGuidance = "Tool guidance goes here.";
|
|
const userProfile = "User profile goes here.";
|
|
const heartbeatChecklist = "Heartbeat checklist goes here.";
|
|
const memorySummary = "Memory summary goes here.";
|
|
await fs.mkdir(workspaceDir, { recursive: true });
|
|
await fs.writeFile(path.join(workspaceDir, "AGENTS.md"), agentsGuidance);
|
|
await fs.writeFile(path.join(workspaceDir, "SOUL.md"), soulGuidance);
|
|
await fs.writeFile(path.join(workspaceDir, "IDENTITY.md"), identityGuidance);
|
|
await fs.writeFile(path.join(workspaceDir, "TOOLS.md"), toolGuidance);
|
|
await fs.writeFile(path.join(workspaceDir, "USER.md"), userProfile);
|
|
await fs.writeFile(path.join(workspaceDir, "HEARTBEAT.md"), heartbeatChecklist);
|
|
await fs.writeFile(path.join(workspaceDir, "MEMORY.md"), memorySummary);
|
|
const harness = createStartedThreadHarness();
|
|
|
|
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir));
|
|
await harness.waitForMethod("turn/start");
|
|
await new Promise<void>((resolve) => setImmediate(resolve));
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
const result = await run;
|
|
|
|
const threadStart = harness.requests.find((request) => request.method === "thread/start");
|
|
const threadStartParams = threadStart?.params as {
|
|
config?: { instructions?: string };
|
|
developerInstructions?: string;
|
|
};
|
|
const config = threadStartParams.config;
|
|
|
|
expect(threadStartParams.developerInstructions).toContain("OpenClaw Agent Soul");
|
|
expect(threadStartParams.developerInstructions).toContain(
|
|
"They define who you are, how you work",
|
|
);
|
|
expect(threadStartParams.developerInstructions).toContain(soulGuidance);
|
|
expect(threadStartParams.developerInstructions).toContain(identityGuidance);
|
|
expect(threadStartParams.developerInstructions).toContain(toolGuidance);
|
|
expect(threadStartParams.developerInstructions).toContain(userProfile);
|
|
expect(threadStartParams.developerInstructions).not.toContain(heartbeatChecklist);
|
|
expect(threadStartParams.developerInstructions).not.toContain(memorySummary);
|
|
expect(threadStartParams.developerInstructions).not.toContain("Codex loads AGENTS.md natively");
|
|
expect(threadStartParams.developerInstructions).not.toContain(agentsGuidance);
|
|
expect(config?.instructions).toBeUndefined();
|
|
|
|
const turnStart = harness.requests.find((request) => request.method === "turn/start");
|
|
const turnStartParams = turnStart?.params as {
|
|
input?: Array<{ text?: string }>;
|
|
};
|
|
const inputText = turnStartParams.input?.[0]?.text ?? "";
|
|
expect(inputText).toContain("OpenClaw runtime context for this turn:");
|
|
expect(inputText).not.toContain("does not override Codex system/developer instructions");
|
|
expect(inputText).not.toContain("not developer policy");
|
|
expect(inputText).not.toContain(soulGuidance);
|
|
expect(inputText).not.toContain(identityGuidance);
|
|
expect(inputText).not.toContain(toolGuidance);
|
|
expect(inputText).not.toContain(userProfile);
|
|
expect(inputText).not.toContain(heartbeatChecklist);
|
|
expect(inputText).toContain(memorySummary);
|
|
expect(inputText).toContain("Codex loads AGENTS.md natively");
|
|
expect(inputText).not.toContain(agentsGuidance);
|
|
expect(inputText).toContain("Current user request:\nhello");
|
|
|
|
const fileStats = new Map(
|
|
result.systemPromptReport?.injectedWorkspaceFiles.map((file) => [file.name, file]) ?? [],
|
|
);
|
|
expect(fileStats.get("SOUL.md")).toMatchObject({
|
|
rawChars: soulGuidance.length,
|
|
injectedChars: soulGuidance.length,
|
|
truncated: false,
|
|
});
|
|
expect(fileStats.get("IDENTITY.md")).toMatchObject({
|
|
rawChars: identityGuidance.length,
|
|
injectedChars: identityGuidance.length,
|
|
truncated: false,
|
|
});
|
|
expect(fileStats.get("TOOLS.md")).toMatchObject({
|
|
rawChars: toolGuidance.length,
|
|
injectedChars: toolGuidance.length,
|
|
truncated: false,
|
|
});
|
|
expect(fileStats.get("USER.md")).toMatchObject({
|
|
rawChars: userProfile.length,
|
|
injectedChars: userProfile.length,
|
|
truncated: false,
|
|
});
|
|
expect(fileStats.get("MEMORY.md")).toMatchObject({
|
|
rawChars: memorySummary.length,
|
|
injectedChars: memorySummary.length,
|
|
truncated: false,
|
|
});
|
|
expect(fileStats.get("HEARTBEAT.md")).toMatchObject({
|
|
rawChars: heartbeatChecklist.length,
|
|
injectedChars: 0,
|
|
truncated: false,
|
|
});
|
|
expect(fileStats.get("AGENTS.md")).toMatchObject({
|
|
rawChars: agentsGuidance.length,
|
|
injectedChars: agentsGuidance.length,
|
|
truncated: false,
|
|
});
|
|
});
|
|
|
|
it("points heartbeat Codex turns at HEARTBEAT.md without injecting its contents", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const heartbeatPath = path.join(workspaceDir, "HEARTBEAT.md");
|
|
await fs.mkdir(workspaceDir, { recursive: true });
|
|
await fs.writeFile(heartbeatPath, "Heartbeat checklist goes here.");
|
|
const harness = createStartedThreadHarness();
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.trigger = "heartbeat";
|
|
params.bootstrapContextMode = "lightweight";
|
|
params.bootstrapContextRunKind = "heartbeat";
|
|
|
|
const run = runCodexAppServerAttempt(params);
|
|
await harness.waitForMethod("turn/start");
|
|
await new Promise<void>((resolve) => setImmediate(resolve));
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
|
|
const threadStart = harness.requests.find((request) => request.method === "thread/start");
|
|
const threadStartParams = threadStart?.params as {
|
|
developerInstructions?: string;
|
|
};
|
|
expect(threadStartParams.developerInstructions).not.toContain("Heartbeat checklist goes here.");
|
|
|
|
const turnStart = harness.requests.find((request) => request.method === "turn/start");
|
|
const turnStartParams = turnStart?.params as {
|
|
input?: Array<{ text?: string }>;
|
|
collaborationMode?: {
|
|
settings?: {
|
|
developer_instructions?: string | null;
|
|
};
|
|
};
|
|
};
|
|
const inputText = turnStartParams.input?.[0]?.text ?? "";
|
|
const collaborationInstructions =
|
|
turnStartParams.collaborationMode?.settings?.developer_instructions ?? "";
|
|
|
|
expect(inputText).not.toContain("Heartbeat checklist goes here.");
|
|
expect(collaborationInstructions).toContain("HEARTBEAT.md exists");
|
|
expect(collaborationInstructions).toContain("Read it before proceeding with this heartbeat");
|
|
expect(collaborationInstructions).toContain(heartbeatPath);
|
|
expect(collaborationInstructions).not.toContain("Heartbeat checklist goes here.");
|
|
});
|
|
|
|
it("omits heartbeat Codex workspace pointers for empty HEARTBEAT.md files", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
await fs.mkdir(workspaceDir, { recursive: true });
|
|
await fs.writeFile(path.join(workspaceDir, "HEARTBEAT.md"), "\n\n");
|
|
const harness = createStartedThreadHarness();
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.trigger = "heartbeat";
|
|
params.bootstrapContextMode = "lightweight";
|
|
params.bootstrapContextRunKind = "heartbeat";
|
|
|
|
const run = runCodexAppServerAttempt(params);
|
|
await harness.waitForMethod("turn/start");
|
|
await new Promise<void>((resolve) => setImmediate(resolve));
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
|
|
const turnStart = harness.requests.find((request) => request.method === "turn/start");
|
|
const turnStartParams = turnStart?.params as {
|
|
collaborationMode?: {
|
|
settings?: {
|
|
developer_instructions?: string | null;
|
|
};
|
|
};
|
|
};
|
|
const collaborationInstructions =
|
|
turnStartParams.collaborationMode?.settings?.developer_instructions ?? "";
|
|
|
|
expect(collaborationInstructions).toContain("This is an OpenClaw heartbeat turn");
|
|
expect(collaborationInstructions).not.toContain("HEARTBEAT.md exists");
|
|
});
|
|
|
|
it("remaps Codex bootstrap files under dot-prefixed workspace directories", () => {
|
|
expect(
|
|
testing.remapCodexContextFilePath({
|
|
file: {
|
|
path: "/real/workspace/..context/SOUL.md",
|
|
content: "Soul voice goes here.",
|
|
},
|
|
sourceWorkspaceDir: "/real/workspace",
|
|
targetWorkspaceDir: "/sandbox/workspace",
|
|
}),
|
|
).toEqual({
|
|
path: "/sandbox/workspace/..context/SOUL.md",
|
|
content: "Soul voice goes here.",
|
|
});
|
|
expect(
|
|
testing.remapCodexContextFilePath({
|
|
file: {
|
|
path: "/outside/SOUL.md",
|
|
content: "outside",
|
|
},
|
|
sourceWorkspaceDir: "/real/workspace",
|
|
targetWorkspaceDir: "/sandbox/workspace",
|
|
}),
|
|
).toEqual({
|
|
path: "/outside/SOUL.md",
|
|
content: "outside",
|
|
});
|
|
});
|
|
|
|
it("keeps lightweight cron Codex turns out of OpenClaw bootstrap context", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const exactCommand =
|
|
"cd /Users/phaedrus/Projects/openclaw && /Users/phaedrus/clawd/scripts/clawsweeper-related-scan.py";
|
|
await fs.mkdir(workspaceDir, { recursive: true });
|
|
await fs.writeFile(path.join(workspaceDir, "AGENTS.md"), "Follow AGENTS guidance.");
|
|
await fs.writeFile(path.join(workspaceDir, "SOUL.md"), "Soul voice goes here.");
|
|
const harness = createStartedThreadHarness();
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.trigger = "cron";
|
|
params.prompt = exactCommand;
|
|
params.bootstrapContextMode = "lightweight";
|
|
params.bootstrapContextRunKind = "cron";
|
|
params.skillsSnapshot = {
|
|
prompt: "<available_skills><skill><name>demo</name></skill></available_skills>",
|
|
skills: [],
|
|
};
|
|
|
|
const run = runCodexAppServerAttempt(params);
|
|
await harness.waitForMethod("turn/start");
|
|
await new Promise<void>((resolve) => setImmediate(resolve));
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
const result = await run;
|
|
|
|
const threadStart = harness.requests.find((request) => request.method === "thread/start");
|
|
const threadStartParams = threadStart?.params as {
|
|
developerInstructions?: string;
|
|
config?: Record<string, unknown>;
|
|
};
|
|
expect(threadStartParams.config?.project_doc_max_bytes).toBe(0);
|
|
expect(threadStartParams.developerInstructions).not.toContain("Soul voice goes here.");
|
|
expect(threadStartParams.developerInstructions).not.toContain("Follow AGENTS guidance.");
|
|
expect(threadStartParams.developerInstructions).not.toContain("<available_skills>");
|
|
|
|
const turnStart = harness.requests.find((request) => request.method === "turn/start");
|
|
const turnStartParams = turnStart?.params as {
|
|
input?: Array<{ text?: string }>;
|
|
};
|
|
expect(turnStartParams.input?.[0]?.text).toBe(exactCommand);
|
|
expect(result.systemPromptReport?.skills).toEqual({ promptChars: 0, entries: [] });
|
|
});
|
|
|
|
it("fires llm_input, llm_output, and agent_end hooks for codex turns", async () => {
|
|
const llmInput = vi.fn();
|
|
const llmOutput = vi.fn();
|
|
const agentEnd = vi.fn();
|
|
const onRunAgentEvent = vi.fn();
|
|
const globalAgentEvents: AgentEventPayload[] = [];
|
|
onAgentEvent((event) => globalAgentEvents.push(event));
|
|
initializeGlobalHookRunner(
|
|
createMockPluginRegistry([
|
|
{ hookName: "llm_input", handler: llmInput },
|
|
{ hookName: "llm_output", handler: llmOutput },
|
|
{ hookName: "agent_end", handler: agentEnd },
|
|
]),
|
|
);
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const sessionManager = SessionManager.open(sessionFile);
|
|
sessionManager.appendMessage(assistantMessage("existing context", Date.now()));
|
|
const harness = createStartedThreadHarness();
|
|
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.runtimePlan = createCodexRuntimePlanFixture();
|
|
params.onAgentEvent = onRunAgentEvent;
|
|
const run = runCodexAppServerAttempt(params);
|
|
await harness.waitForMethod("turn/start");
|
|
expect(llmInput).toHaveBeenCalled();
|
|
await new Promise<void>((resolve) => setImmediate(resolve));
|
|
|
|
const [llmInputPayload, llmInputContext] = mockCall(llmInput, "llm_input") as [
|
|
{
|
|
historyMessages?: Array<{ role?: string }>;
|
|
imagesCount?: number;
|
|
model?: string;
|
|
prompt?: string;
|
|
provider?: string;
|
|
runId?: string;
|
|
sessionId?: string;
|
|
systemPrompt?: string;
|
|
},
|
|
{ runId?: string; sessionId?: string; sessionKey?: string },
|
|
];
|
|
expect(llmInputPayload.runId).toBe("run-1");
|
|
expect(llmInputPayload.sessionId).toBe("session-1");
|
|
expect(llmInputPayload.provider).toBe("codex");
|
|
expect(llmInputPayload.model).toBe("gpt-5.4-codex");
|
|
expect(llmInputPayload.prompt).toBe("hello");
|
|
expect(llmInputPayload.imagesCount).toBe(0);
|
|
expect(llmInputPayload.historyMessages?.[0]?.role).toBe("assistant");
|
|
expect(llmInputPayload.systemPrompt).toContain(
|
|
"You are a personal agent running inside OpenClaw.",
|
|
);
|
|
expect(llmInputPayload.systemPrompt).not.toContain(CODEX_GPT5_BEHAVIOR_CONTRACT);
|
|
expect(llmInputContext.runId).toBe("run-1");
|
|
expect(llmInputContext.sessionId).toBe("session-1");
|
|
expect(llmInputContext.sessionKey).toBe("agent:main:session-1");
|
|
|
|
await harness.notify({
|
|
method: "item/agentMessage/delta",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
itemId: "msg-1",
|
|
delta: "hello back",
|
|
},
|
|
});
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
const result = await run;
|
|
|
|
expect(result.assistantTexts).toEqual(["hello back"]);
|
|
expect(llmOutput).toHaveBeenCalledTimes(1);
|
|
expect(agentEnd).toHaveBeenCalledTimes(1);
|
|
const agentEvents = onRunAgentEvent.mock.calls.map(([event]) => event) as Array<{
|
|
data: {
|
|
endedAt?: number;
|
|
phase?: string;
|
|
startedAt?: number;
|
|
text?: string;
|
|
};
|
|
stream: string;
|
|
}>;
|
|
const lifecycleStart = agentEvents.find(
|
|
(event) => event.stream === "lifecycle" && event.data.phase === "start",
|
|
);
|
|
expect(typeof lifecycleStart?.data.startedAt).toBe("number");
|
|
const assistantEvent = agentEvents.find((event) => event.stream === "assistant");
|
|
expect(assistantEvent?.data).toEqual({ text: "hello back" });
|
|
const lifecycleEnd = agentEvents.find(
|
|
(event) => event.stream === "lifecycle" && event.data.phase === "end",
|
|
);
|
|
expect(typeof lifecycleEnd?.data.startedAt).toBe("number");
|
|
expect(typeof lifecycleEnd?.data.endedAt).toBe("number");
|
|
const startIndex = agentEvents.findIndex(
|
|
(event) => event.stream === "lifecycle" && event.data.phase === "start",
|
|
);
|
|
const assistantIndex = agentEvents.findIndex((event) => event.stream === "assistant");
|
|
const endIndex = agentEvents.findIndex(
|
|
(event) => event.stream === "lifecycle" && event.data.phase === "end",
|
|
);
|
|
expect(startIndex).toBeGreaterThanOrEqual(0);
|
|
expect(assistantIndex).toBeGreaterThan(startIndex);
|
|
expect(endIndex).toBeGreaterThan(assistantIndex);
|
|
const globalAssistantEvent = globalAgentEvents.find((event) => event.stream === "assistant");
|
|
expect(globalAssistantEvent?.runId).toBe("run-1");
|
|
expect(globalAssistantEvent?.sessionKey).toBe("agent:main:session-1");
|
|
expect(globalAssistantEvent?.data).toEqual({ text: "hello back" });
|
|
const globalEndEvent = globalAgentEvents.find(
|
|
(event) => event.stream === "lifecycle" && event.data.phase === "end",
|
|
);
|
|
expect(globalEndEvent?.runId).toBe("run-1");
|
|
expect(globalEndEvent?.sessionKey).toBe("agent:main:session-1");
|
|
|
|
const [llmOutputPayload, llmOutputContext] = mockCall(llmOutput, "llm_output") as [
|
|
{
|
|
assistantTexts?: string[];
|
|
harnessId?: string;
|
|
lastAssistant?: { role?: string };
|
|
model?: string;
|
|
provider?: string;
|
|
resolvedRef?: string;
|
|
runId?: string;
|
|
sessionId?: string;
|
|
contextTokenBudget?: number;
|
|
contextWindowSource?: string;
|
|
contextWindowReferenceTokens?: number;
|
|
},
|
|
{
|
|
runId?: string;
|
|
sessionId?: string;
|
|
contextTokenBudget?: number;
|
|
contextWindowSource?: string;
|
|
contextWindowReferenceTokens?: number;
|
|
},
|
|
];
|
|
expect(llmOutputPayload.runId).toBe("run-1");
|
|
expect(llmOutputPayload.sessionId).toBe("session-1");
|
|
expect(llmOutputPayload.provider).toBe("codex");
|
|
expect(llmOutputPayload.model).toBe("gpt-5.4-codex");
|
|
expect(llmOutputPayload.contextTokenBudget).toBe(150_000);
|
|
expect(llmOutputPayload.contextWindowSource).toBe("agentContextTokens");
|
|
expect(llmOutputPayload.contextWindowReferenceTokens).toBe(200_000);
|
|
expect(llmOutputPayload.resolvedRef).toBe("codex/gpt-5.4-codex");
|
|
expect(llmOutputPayload.harnessId).toBe("codex");
|
|
expect(llmOutputPayload.assistantTexts).toEqual(["hello back"]);
|
|
expect(llmOutputPayload.lastAssistant?.role).toBe("assistant");
|
|
expect(llmOutputContext.runId).toBe("run-1");
|
|
expect(llmOutputContext.sessionId).toBe("session-1");
|
|
expect(llmOutputContext.contextTokenBudget).toBe(150_000);
|
|
expect(llmOutputContext.contextWindowSource).toBe("agentContextTokens");
|
|
expect(llmOutputContext.contextWindowReferenceTokens).toBe(200_000);
|
|
const [agentEndPayload, agentEndContext] = mockCall(agentEnd, "agent_end") as [
|
|
{ messages?: Array<{ role?: string }>; success?: boolean },
|
|
{ runId?: string; sessionId?: string },
|
|
];
|
|
expect(agentEndPayload.success).toBe(true);
|
|
expect(agentEndPayload.messages?.some((message) => message.role === "user")).toBe(true);
|
|
expect(agentEndPayload.messages?.some((message) => message.role === "assistant")).toBe(true);
|
|
expect(agentEndContext.runId).toBe("run-1");
|
|
expect(agentEndContext.sessionId).toBe("session-1");
|
|
});
|
|
|
|
it("forwards Codex app-server verbose tool summaries and completed output", async () => {
|
|
const onToolResult = vi.fn();
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const harness = createStartedThreadHarness();
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.verboseLevel = "full";
|
|
params.onToolResult = onToolResult;
|
|
|
|
const run = runCodexAppServerAttempt(params);
|
|
await harness.waitForMethod("turn/start");
|
|
await harness.notify({
|
|
method: "item/started",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: {
|
|
type: "dynamicToolCall",
|
|
id: "tool-1",
|
|
namespace: null,
|
|
tool: "read",
|
|
arguments: { path: "README.md" },
|
|
status: "inProgress",
|
|
contentItems: null,
|
|
success: null,
|
|
durationMs: null,
|
|
},
|
|
},
|
|
});
|
|
await harness.notify({
|
|
method: "item/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: {
|
|
type: "dynamicToolCall",
|
|
id: "tool-1",
|
|
namespace: null,
|
|
tool: "read",
|
|
arguments: { path: "README.md" },
|
|
status: "completed",
|
|
contentItems: [{ type: "inputText", text: "file contents" }],
|
|
success: true,
|
|
durationMs: 12,
|
|
},
|
|
},
|
|
});
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
|
|
expect(onToolResult).toHaveBeenCalledTimes(2);
|
|
expect(onToolResult).toHaveBeenNthCalledWith(1, {
|
|
text: "📖 Read: `from README.md`",
|
|
});
|
|
expect(onToolResult).toHaveBeenNthCalledWith(2, {
|
|
text: "📖 Read: `from README.md`\n```txt\nfile contents\n```",
|
|
});
|
|
});
|
|
|
|
it("registers native hook relay config for an enabled Codex turn and cleans it up", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const harness = createStartedThreadHarness();
|
|
|
|
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
|
|
nativeHookRelay: {
|
|
enabled: true,
|
|
events: ["pre_tool_use"],
|
|
gatewayTimeoutMs: 4321,
|
|
hookTimeoutSec: 9,
|
|
},
|
|
});
|
|
await harness.waitForMethod("turn/start");
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
|
|
const startRequest = harness.requests.find((request) => request.method === "thread/start");
|
|
const startConfig = (startRequest?.params as { config?: Record<string, unknown> } | undefined)
|
|
?.config;
|
|
expect(startConfig?.["features.hooks"]).toBe(true);
|
|
const preToolUseHooks = startConfig?.["hooks.PreToolUse"] as
|
|
| Array<{ hooks?: Array<{ command?: string; timeout?: number; type?: string }> }>
|
|
| undefined;
|
|
const preToolUseCommand = preToolUseHooks?.[0]?.hooks?.[0];
|
|
expect(preToolUseCommand?.type).toBe("command");
|
|
expect(preToolUseCommand?.timeout).toBe(9);
|
|
expect(preToolUseCommand?.command).toContain("--event pre_tool_use --timeout 4321");
|
|
const hookState = startConfig?.["hooks.state"] as Record<
|
|
string,
|
|
{ enabled?: unknown; trusted_hash?: unknown }
|
|
>;
|
|
const preToolUseState = hookState?.["/<session-flags>/config.toml:pre_tool_use:0:0"];
|
|
expect(preToolUseState?.enabled).toBe(true);
|
|
expect(preToolUseState?.trusted_hash).toMatch(/^sha256:[a-f0-9]{64}$/);
|
|
const relayId = extractRelayIdFromThreadRequest(startRequest?.params);
|
|
expect(nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)).toBeUndefined();
|
|
});
|
|
|
|
it("forwards command approval requests through the active native hook relay", async () => {
|
|
const approvalSpy = vi
|
|
.spyOn(approvalBridge, "handleCodexAppServerApprovalRequest")
|
|
.mockResolvedValue({ decision: "decline" });
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const harness = createStartedThreadHarness();
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.messageChannel = "discord";
|
|
params.currentChannelId = "channel:target";
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
nativeHookRelay: {
|
|
enabled: true,
|
|
events: ["pre_tool_use"],
|
|
},
|
|
});
|
|
await harness.waitForMethod("turn/start");
|
|
const startRequest = harness.requests.find((request) => request.method === "thread/start");
|
|
const relayId = extractRelayIdFromThreadRequest(startRequest?.params);
|
|
expect(nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)).toBeDefined();
|
|
|
|
const response = await harness.handleServerRequest({
|
|
id: "request-command-approval",
|
|
method: "item/commandExecution/requestApproval",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
itemId: "cmd-1",
|
|
command: "/bin/bash -lc 'node -v'",
|
|
cwd: workspaceDir,
|
|
},
|
|
});
|
|
|
|
expect(response).toEqual({ decision: "decline" });
|
|
expect(approvalSpy).toHaveBeenCalledTimes(1);
|
|
const approvalArgs = approvalSpy.mock.calls[0]?.[0];
|
|
expect(approvalArgs).toMatchObject({
|
|
method: "item/commandExecution/requestApproval",
|
|
requestParams: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
itemId: "cmd-1",
|
|
command: "/bin/bash -lc 'node -v'",
|
|
cwd: workspaceDir,
|
|
},
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
});
|
|
expect(approvalArgs?.nativeHookRelay).toMatchObject({
|
|
relayId,
|
|
allowedEvents: expect.arrayContaining(["pre_tool_use"]),
|
|
});
|
|
expect(nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)).toMatchObject({
|
|
channelId: "target",
|
|
});
|
|
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
expect(nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)).toBeUndefined();
|
|
});
|
|
|
|
it("promotes implicit Codex yolo approval policy when OpenClaw tool policy exists", async () => {
|
|
initializeGlobalHookRunner(
|
|
createMockPluginRegistry([{ hookName: "before_tool_call", handler: vi.fn() }]),
|
|
);
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const harness = createStartedThreadHarness();
|
|
|
|
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir));
|
|
await harness.waitForMethod("turn/start");
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
|
|
const startRequest = harness.requests.find((request) => request.method === "thread/start");
|
|
const startParams = startRequest?.params as Record<string, unknown> | undefined;
|
|
expect(startParams?.approvalPolicy).toBe("untrusted");
|
|
expect(startParams?.sandbox).toBe("danger-full-access");
|
|
});
|
|
|
|
it("keeps implicit Codex yolo approval policy when untrusted approvals are disallowed", () => {
|
|
const appServer = resolveCodexAppServerRuntimeOptions({ env: {}, requirementsToml: null });
|
|
|
|
const resolved = testing.resolveCodexAppServerForOpenClawToolPolicy({
|
|
appServer,
|
|
pluginConfig: readCodexPluginConfig({}),
|
|
env: {},
|
|
shouldPromote: true,
|
|
canUseUntrustedApprovalPolicy: false,
|
|
});
|
|
|
|
expect(resolved.approvalPolicy).toBe("never");
|
|
});
|
|
|
|
it("keeps explicit Codex yolo mode unpromoted when OpenClaw tool policy exists", async () => {
|
|
initializeGlobalHookRunner(
|
|
createMockPluginRegistry([{ hookName: "before_tool_call", handler: vi.fn() }]),
|
|
);
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const harness = createStartedThreadHarness();
|
|
|
|
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
|
|
pluginConfig: { appServer: { mode: "yolo" } },
|
|
});
|
|
await harness.waitForMethod("turn/start");
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
|
|
const startRequest = harness.requests.find((request) => request.method === "thread/start");
|
|
const startParams = startRequest?.params as Record<string, unknown> | undefined;
|
|
expect(startParams?.approvalPolicy).toBe("never");
|
|
expect(startParams?.sandbox).toBe("danger-full-access");
|
|
});
|
|
|
|
it("ignores invalid Codex app-server env overrides when promoting tool policy approval", async () => {
|
|
initializeGlobalHookRunner(
|
|
createMockPluginRegistry([{ hookName: "before_tool_call", handler: vi.fn() }]),
|
|
);
|
|
vi.stubEnv("OPENCLAW_CODEX_APP_SERVER_MODE", " ");
|
|
vi.stubEnv("OPENCLAW_CODEX_APP_SERVER_APPROVAL_POLICY", "always");
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const harness = createStartedThreadHarness();
|
|
|
|
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir));
|
|
await harness.waitForMethod("turn/start");
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
|
|
const startRequest = harness.requests.find((request) => request.method === "thread/start");
|
|
const startParams = startRequest?.params as Record<string, unknown> | undefined;
|
|
expect(startParams?.approvalPolicy).toBe("untrusted");
|
|
});
|
|
|
|
it("keeps the native hook relay default floor for short Codex turns", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const harness = createStartedThreadHarness();
|
|
const relayFloorMs = 30 * 60_000;
|
|
|
|
const startedAtMs = Date.now();
|
|
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
|
|
nativeHookRelay: {
|
|
enabled: true,
|
|
events: ["pre_tool_use"],
|
|
},
|
|
});
|
|
await harness.waitForMethod("turn/start");
|
|
|
|
const startRequest = harness.requests.find((request) => request.method === "thread/start");
|
|
const relayId = extractRelayIdFromThreadRequest(startRequest?.params);
|
|
const registration = nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId);
|
|
if (!registration) {
|
|
throw new Error("Expected native hook relay registration");
|
|
}
|
|
expect(registration.expiresAtMs - startedAtMs).toBeGreaterThanOrEqual(relayFloorMs);
|
|
expect(registration.expiresAtMs - startedAtMs).toBeLessThan(relayFloorMs + 10_000);
|
|
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
expect(nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)).toBeUndefined();
|
|
});
|
|
|
|
it("throttles default native hook relay renewal on current-turn progress", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const harness = createStartedThreadHarness();
|
|
|
|
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
|
|
nativeHookRelay: {
|
|
enabled: true,
|
|
events: ["pre_tool_use"],
|
|
},
|
|
});
|
|
await harness.waitForMethod("turn/start");
|
|
|
|
const startRequest = harness.requests.find((request) => request.method === "thread/start");
|
|
const relayId = extractRelayIdFromThreadRequest(startRequest?.params);
|
|
const registration = nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId);
|
|
if (!registration) {
|
|
throw new Error("Expected native hook relay registration");
|
|
}
|
|
const firstExpiresAtMs = registration.expiresAtMs;
|
|
|
|
for (const id of ["raw-progress-1", "raw-progress-2"]) {
|
|
await harness.notify({
|
|
method: "rawResponseItem/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: {
|
|
type: "message",
|
|
id,
|
|
role: "assistant",
|
|
content: [{ type: "output_text", text: "Still working." }],
|
|
},
|
|
},
|
|
});
|
|
expect(
|
|
nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)?.expiresAtMs,
|
|
).toBe(firstExpiresAtMs);
|
|
}
|
|
|
|
await harness.notify({
|
|
method: "rawResponseItem/completed",
|
|
params: {
|
|
threadId: "foreign-thread",
|
|
turnId: "turn-1",
|
|
item: {
|
|
type: "message",
|
|
id: "foreign-progress",
|
|
role: "assistant",
|
|
content: [{ type: "output_text", text: "Wrong thread." }],
|
|
},
|
|
},
|
|
});
|
|
expect(
|
|
nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)?.expiresAtMs,
|
|
).toBe(firstExpiresAtMs);
|
|
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
expect(nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)).toBeUndefined();
|
|
});
|
|
|
|
it("preserves an explicit native hook relay ttl", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const harness = createStartedThreadHarness();
|
|
const explicitTtlMs = 123_456;
|
|
|
|
const startedAtMs = Date.now();
|
|
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
|
|
nativeHookRelay: {
|
|
enabled: true,
|
|
events: ["pre_tool_use"],
|
|
ttlMs: explicitTtlMs,
|
|
},
|
|
});
|
|
await harness.waitForMethod("turn/start");
|
|
|
|
const startRequest = harness.requests.find((request) => request.method === "thread/start");
|
|
const relayId = extractRelayIdFromThreadRequest(startRequest?.params);
|
|
const registration = nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId);
|
|
if (!registration) {
|
|
throw new Error("Expected native hook relay registration");
|
|
}
|
|
expect(registration.expiresAtMs - startedAtMs).toBeGreaterThanOrEqual(explicitTtlMs);
|
|
expect(registration.expiresAtMs - startedAtMs).toBeLessThan(explicitTtlMs + 10_000);
|
|
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
expect(nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)).toBeUndefined();
|
|
});
|
|
|
|
it("lets Codex app-server approval modes own native permission requests by default", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const harness = createStartedThreadHarness();
|
|
|
|
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
|
|
pluginConfig: {
|
|
appServer: {
|
|
mode: "guardian",
|
|
},
|
|
},
|
|
});
|
|
await harness.waitForMethod("turn/start");
|
|
|
|
const startRequest = harness.requests.find((request) => request.method === "thread/start");
|
|
const startConfig = (startRequest?.params as { config?: Record<string, unknown> } | undefined)
|
|
?.config;
|
|
expect(startConfig?.["features.hooks"]).toBe(true);
|
|
expect(Array.isArray(startConfig?.["hooks.PreToolUse"])).toBe(true);
|
|
expect(startConfig?.["hooks.PostToolUse"]).toEqual([]);
|
|
expect(startConfig?.["hooks.Stop"]).toEqual([]);
|
|
expect(startConfig).not.toHaveProperty("hooks.PermissionRequest");
|
|
const relayId = extractRelayIdFromThreadRequest(startRequest?.params);
|
|
expect(
|
|
nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)?.allowedEvents,
|
|
).toEqual(["pre_tool_use", "post_tool_use", "before_agent_finalize"]);
|
|
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
expect(nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)).toBeUndefined();
|
|
});
|
|
|
|
it("preserves explicit native permission request relay events in app-server approval modes", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const harness = createStartedThreadHarness();
|
|
|
|
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
|
|
pluginConfig: {
|
|
appServer: {
|
|
mode: "guardian",
|
|
},
|
|
},
|
|
nativeHookRelay: {
|
|
enabled: true,
|
|
events: ["permission_request"],
|
|
},
|
|
});
|
|
await harness.waitForMethod("turn/start");
|
|
|
|
const startRequest = harness.requests.find((request) => request.method === "thread/start");
|
|
const startConfig = (startRequest?.params as { config?: Record<string, unknown> } | undefined)
|
|
?.config;
|
|
expect(startConfig?.["features.hooks"]).toBe(true);
|
|
expect(Array.isArray(startConfig?.["hooks.PermissionRequest"])).toBe(true);
|
|
const relayId = extractRelayIdFromThreadRequest(startRequest?.params);
|
|
expect(
|
|
nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)?.allowedEvents,
|
|
).toEqual(["permission_request"]);
|
|
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
expect(nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)).toBeUndefined();
|
|
});
|
|
|
|
it("keeps native hook relays alive across startup and long Codex turn timeouts", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const harness = createStartedThreadHarness();
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
const abortController = new AbortController();
|
|
const attemptTimeoutMs = 45 * 60_000;
|
|
const startupTimeoutMs = attemptTimeoutMs;
|
|
const turnStartTimeoutMs = attemptTimeoutMs;
|
|
const cleanupGraceMs = 5 * 60_000;
|
|
const expectedRelayTtlMs =
|
|
attemptTimeoutMs + startupTimeoutMs + turnStartTimeoutMs + cleanupGraceMs;
|
|
params.timeoutMs = attemptTimeoutMs;
|
|
params.abortSignal = abortController.signal;
|
|
|
|
const startedAtMs = Date.now();
|
|
const run = runCodexAppServerAttempt(params, {
|
|
nativeHookRelay: {
|
|
enabled: true,
|
|
events: ["pre_tool_use"],
|
|
},
|
|
});
|
|
let completed = false;
|
|
let relayId: string | undefined;
|
|
try {
|
|
await harness.waitForMethod("turn/start");
|
|
|
|
const startRequest = harness.requests.find((request) => request.method === "thread/start");
|
|
relayId = extractRelayIdFromThreadRequest(startRequest?.params);
|
|
const registration = nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId);
|
|
if (!registration) {
|
|
throw new Error("Expected native hook relay registration");
|
|
}
|
|
expect(registration.expiresAtMs - startedAtMs).toBeGreaterThanOrEqual(expectedRelayTtlMs);
|
|
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
completed = true;
|
|
await run;
|
|
expect(
|
|
nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId),
|
|
).toBeUndefined();
|
|
} finally {
|
|
if (!completed) {
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" }).catch(() => {});
|
|
abortController.abort(new Error("test cleanup"));
|
|
await run.catch(() => {});
|
|
}
|
|
}
|
|
});
|
|
|
|
it("reuses the Codex native hook relay id across runs for the same session", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const firstHarness = createStartedThreadHarness();
|
|
|
|
const firstRun = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
|
|
nativeHookRelay: {
|
|
enabled: true,
|
|
events: ["pre_tool_use"],
|
|
},
|
|
});
|
|
await firstHarness.waitForMethod("turn/start");
|
|
await firstHarness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await firstRun;
|
|
|
|
const firstStartRequest = firstHarness.requests.find(
|
|
(request) => request.method === "thread/start",
|
|
);
|
|
const firstRelayId = extractRelayIdFromThreadRequest(firstStartRequest?.params);
|
|
expect(
|
|
nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(firstRelayId),
|
|
).toBeUndefined();
|
|
|
|
const secondHarness = createResumeHarness();
|
|
const secondParams = createParams(sessionFile, workspaceDir);
|
|
secondParams.runId = "run-2";
|
|
const secondRun = runCodexAppServerAttempt(secondParams, {
|
|
nativeHookRelay: {
|
|
enabled: true,
|
|
events: ["pre_tool_use"],
|
|
},
|
|
});
|
|
await secondHarness.waitForMethod("turn/start");
|
|
|
|
const resumeRequest = secondHarness.requests.find(
|
|
(request) => request.method === "thread/resume",
|
|
);
|
|
const secondRelayId = extractRelayIdFromThreadRequest(resumeRequest?.params);
|
|
expect(secondRelayId).toBe(firstRelayId);
|
|
const resumedRegistration =
|
|
nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(firstRelayId);
|
|
expect(resumedRegistration?.runId).toBe("run-2");
|
|
expect(resumedRegistration?.allowedEvents).toEqual(["pre_tool_use"]);
|
|
|
|
await secondHarness.completeTurn({ threadId: "thread-existing", turnId: "turn-1" });
|
|
await secondRun;
|
|
expect(
|
|
nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(firstRelayId),
|
|
).toBeUndefined();
|
|
});
|
|
|
|
it("builds deterministic opaque Codex native hook relay ids", () => {
|
|
const relayId = testing.buildCodexNativeHookRelayId({
|
|
agentId: "dev-codex",
|
|
sessionId: "cu-pr-relay-smoke",
|
|
sessionKey: "agent:dev-codex:cu-pr-relay-smoke",
|
|
});
|
|
|
|
expect(relayId).toBe("codex-8810b5252975550c887ff0def512b25e944bac39");
|
|
expect(relayId).not.toContain("dev-codex");
|
|
expect(relayId).not.toContain("cu-pr-relay-smoke");
|
|
});
|
|
|
|
it("sends clearing Codex native hook config when the relay is disabled", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const harness = createStartedThreadHarness();
|
|
|
|
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
|
|
nativeHookRelay: { enabled: false },
|
|
});
|
|
await harness.waitForMethod("turn/start");
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
|
|
const startRequest = harness.requests.find((request) => request.method === "thread/start");
|
|
const startConfig = (startRequest?.params as { config?: Record<string, unknown> } | undefined)
|
|
?.config;
|
|
expect(startConfig?.["features.hooks"]).toBe(false);
|
|
expect(startConfig?.["hooks.PreToolUse"]).toEqual([]);
|
|
expect(startConfig?.["hooks.PostToolUse"]).toEqual([]);
|
|
expect(startConfig?.["hooks.PermissionRequest"]).toEqual([]);
|
|
expect(startConfig?.["hooks.Stop"]).toEqual([]);
|
|
});
|
|
|
|
it("cleans up native hook relay state when turn/start fails", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const harness = createStartedThreadHarness(async (method) => {
|
|
if (method === "turn/start") {
|
|
throw new Error("turn start exploded");
|
|
}
|
|
return undefined;
|
|
});
|
|
|
|
await expect(
|
|
runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
|
|
nativeHookRelay: { enabled: true },
|
|
}),
|
|
).rejects.toThrow("turn start exploded");
|
|
|
|
const startRequest = harness.requests.find((request) => request.method === "thread/start");
|
|
const relayId = extractRelayIdFromThreadRequest(startRequest?.params);
|
|
expect(nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)).toBeUndefined();
|
|
});
|
|
|
|
it("preserves a healthy binding when invalid image cleanup hits a transient thread", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
await writeExistingBinding(sessionFile, workspaceDir, {
|
|
dynamicToolsFingerprint: JSON.stringify([{ name: "message" }]),
|
|
});
|
|
const harness = createStartedThreadHarness(async (method) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-transient");
|
|
}
|
|
if (method === "turn/start") {
|
|
throw new Error("invalid image_url base64 payload");
|
|
}
|
|
return undefined;
|
|
});
|
|
|
|
await expect(runCodexAppServerAttempt(createParams(sessionFile, workspaceDir))).rejects.toThrow(
|
|
"invalid image_url base64 payload",
|
|
);
|
|
|
|
expect(harness.requests.map((request) => request.method)).toEqual([
|
|
"thread/start",
|
|
"turn/start",
|
|
]);
|
|
const binding = await readCodexAppServerBinding(sessionFile);
|
|
expect(binding?.threadId).toBe("thread-existing");
|
|
});
|
|
|
|
it("preserves a healthy binding when the server rejects unsupported image input", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
|
|
const harness = createAppServerHarness(async (method) => {
|
|
if (method === "thread/resume") {
|
|
return threadStartResult("thread-existing");
|
|
}
|
|
if (method === "turn/start") {
|
|
throw new Error("unsupported image input");
|
|
}
|
|
return {};
|
|
});
|
|
|
|
await expect(runCodexAppServerAttempt(createParams(sessionFile, workspaceDir))).rejects.toThrow(
|
|
"unsupported image input",
|
|
);
|
|
|
|
expect(harness.requests.map((request) => request.method)).toEqual([
|
|
"thread/resume",
|
|
"turn/start",
|
|
]);
|
|
const binding = await readCodexAppServerBinding(sessionFile);
|
|
expect(binding?.threadId).toBe("thread-existing");
|
|
});
|
|
|
|
it("recognizes invalid image payload errors without matching unsupported image input", () => {
|
|
expect(testing.isInvalidCodexImagePayloadError("invalid_image_url")).toBe(true);
|
|
expect(testing.isInvalidCodexImagePayloadError("malformed-base64 image payload")).toBe(true);
|
|
expect(testing.isInvalidCodexImagePayloadError("unsupported image input")).toBe(false);
|
|
});
|
|
|
|
it("preserves Codex usage-limit reset details when turn/start fails", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const resetsAt = Math.ceil(Date.now() / 1000) + 120;
|
|
const authProfileId = "openai-codex:work";
|
|
const harnessRef: { current?: ReturnType<typeof createStartedThreadHarness> } = {};
|
|
const harness = createStartedThreadHarness(async (method) => {
|
|
if (method === "turn/start") {
|
|
if (!harnessRef.current) {
|
|
throw new Error("Expected Codex app-server harness to be initialized");
|
|
}
|
|
void harnessRef.current.notify(rateLimitsUpdated(resetsAt));
|
|
throw Object.assign(new Error("You've reached your usage limit."), {
|
|
data: { codexErrorInfo: "usageLimitExceeded" },
|
|
});
|
|
}
|
|
return undefined;
|
|
});
|
|
harnessRef.current = harness;
|
|
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.authProfileId = authProfileId;
|
|
params.authProfileStore = {
|
|
version: 1,
|
|
profiles: {
|
|
[authProfileId]: {
|
|
type: "oauth",
|
|
provider: "openai-codex",
|
|
access: "access",
|
|
refresh: "refresh",
|
|
expires: Date.now() + 60_000,
|
|
},
|
|
},
|
|
};
|
|
|
|
const result = await runCodexAppServerAttempt(params);
|
|
expect(result.promptErrorSource).toBe("prompt");
|
|
expect(result.promptError).toContain("You've reached your Codex subscription usage limit.");
|
|
expect(result.promptError).toContain("Next reset in");
|
|
});
|
|
|
|
it("uses a recent Codex rate-limit snapshot when turn/start omits reset details", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const resetsAt = Math.ceil(Date.now() / 1000) + 120;
|
|
const authProfileId = "openai-codex:work";
|
|
rememberCodexRateLimits({
|
|
rateLimits: {
|
|
limitId: "codex",
|
|
limitName: "Codex",
|
|
primary: { usedPercent: 100, windowDurationMins: 300, resetsAt },
|
|
secondary: null,
|
|
credits: null,
|
|
planType: "plus",
|
|
rateLimitReachedType: "rate_limit_reached",
|
|
},
|
|
rateLimitsByLimitId: null,
|
|
});
|
|
const harness = createStartedThreadHarness(async (method) => {
|
|
if (method === "turn/start") {
|
|
throw Object.assign(new Error("You've reached your usage limit."), {
|
|
data: { codexErrorInfo: "usageLimitExceeded" },
|
|
});
|
|
}
|
|
return undefined;
|
|
});
|
|
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.authProfileId = authProfileId;
|
|
params.authProfileStore = {
|
|
version: 1,
|
|
profiles: {
|
|
[authProfileId]: {
|
|
type: "oauth",
|
|
provider: "openai-codex",
|
|
access: "access",
|
|
refresh: "refresh",
|
|
expires: Date.now() + 60_000,
|
|
},
|
|
},
|
|
};
|
|
|
|
const run = runCodexAppServerAttempt(params);
|
|
await harness.waitForMethod("turn/start");
|
|
|
|
const result = await run;
|
|
expect(result.promptErrorSource).toBe("prompt");
|
|
expect(result.promptError).toContain("You've reached your Codex subscription usage limit.");
|
|
expect(result.promptError).toContain("Next reset in");
|
|
expect(params.authProfileStore.usageStats?.[authProfileId]?.blockedUntil).toBeUndefined();
|
|
});
|
|
|
|
it("refreshes Codex account rate limits when turn/start omits reset details", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const resetsAt = Math.ceil(Date.now() / 1000) + 120;
|
|
const harness = createStartedThreadHarness(async (method) => {
|
|
if (method === "turn/start") {
|
|
throw Object.assign(new Error("You've reached your usage limit."), {
|
|
data: { codexErrorInfo: "usageLimitExceeded" },
|
|
});
|
|
}
|
|
if (method === "account/rateLimits/read") {
|
|
return rateLimitsUpdated(resetsAt).params;
|
|
}
|
|
return undefined;
|
|
});
|
|
|
|
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir));
|
|
await harness.waitForMethod("account/rateLimits/read");
|
|
|
|
const result = await run;
|
|
expect(result.promptErrorSource).toBe("prompt");
|
|
expect(result.promptError).toContain("You've reached your Codex subscription usage limit.");
|
|
expect(result.promptError).toContain("Next reset in");
|
|
expect(result.promptError).not.toContain("Codex did not return a reset time");
|
|
});
|
|
|
|
it("cleans up native hook relay state when the Codex turn aborts", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const harness = createStartedThreadHarness();
|
|
|
|
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
|
|
nativeHookRelay: { enabled: true },
|
|
});
|
|
await harness.waitForMethod("turn/start");
|
|
const startRequest = harness.requests.find((request) => request.method === "thread/start");
|
|
const relayId = extractRelayIdFromThreadRequest(startRequest?.params);
|
|
expect(abortAgentHarnessRun("session-1")).toBe(true);
|
|
|
|
const result = await run;
|
|
|
|
expect(result.aborted).toBe(true);
|
|
expect(nativeHookRelayTesting.getNativeHookRelayRegistrationForTests(relayId)).toBeUndefined();
|
|
});
|
|
|
|
it("refreshes Codex account rate limits when a failed turn omits reset details", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const resetsAt = Math.ceil(Date.now() / 1000) + 120;
|
|
const harness = createStartedThreadHarness(async (method) => {
|
|
if (method === "account/rateLimits/read") {
|
|
return rateLimitsUpdated(resetsAt).params;
|
|
}
|
|
return undefined;
|
|
});
|
|
|
|
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir));
|
|
await harness.waitForMethod("turn/start");
|
|
await harness.notify({
|
|
method: "turn/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
turn: {
|
|
id: "turn-1",
|
|
status: "failed",
|
|
error: {
|
|
message: "You've reached your usage limit.",
|
|
codexErrorInfo: "usageLimitExceeded",
|
|
},
|
|
},
|
|
},
|
|
});
|
|
|
|
const result = await run;
|
|
|
|
expect(result.promptError).toContain("You've reached your Codex subscription usage limit.");
|
|
expect(result.promptError).toContain("Next reset in");
|
|
expect(result.promptError).not.toContain("Codex did not return a reset time");
|
|
expect(harness.requests.some((request) => request.method === "account/rateLimits/read")).toBe(
|
|
true,
|
|
);
|
|
});
|
|
|
|
it("fires agent_end with failure metadata when the codex turn fails", async () => {
|
|
const agentEnd = vi.fn();
|
|
const onRunAgentEvent = vi.fn();
|
|
initializeGlobalHookRunner(
|
|
createMockPluginRegistry([{ hookName: "agent_end", handler: agentEnd }]),
|
|
);
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const harness = createStartedThreadHarness();
|
|
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.onAgentEvent = onRunAgentEvent;
|
|
const run = runCodexAppServerAttempt(params);
|
|
await harness.waitForMethod("turn/start");
|
|
await harness.notify({
|
|
method: "turn/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
turn: {
|
|
id: "turn-1",
|
|
status: "failed",
|
|
error: { message: "codex exploded" },
|
|
},
|
|
},
|
|
});
|
|
|
|
const result = await run;
|
|
|
|
expect(result.promptError).toBe("codex exploded");
|
|
expect(agentEnd).toHaveBeenCalledTimes(1);
|
|
const agentEvents = onRunAgentEvent.mock.calls.map(([event]) => event) as Array<{
|
|
data: { endedAt?: number; error?: string; phase?: string; startedAt?: number };
|
|
stream: string;
|
|
}>;
|
|
const startEvent = agentEvents.find(
|
|
(event) => event.stream === "lifecycle" && event.data.phase === "start",
|
|
);
|
|
expect(typeof startEvent?.data.startedAt).toBe("number");
|
|
const errorEvent = agentEvents.find(
|
|
(event) => event.stream === "lifecycle" && event.data.phase === "error",
|
|
);
|
|
expect(typeof errorEvent?.data.startedAt).toBe("number");
|
|
expect(typeof errorEvent?.data.endedAt).toBe("number");
|
|
expect(errorEvent?.data.error).toBe("codex exploded");
|
|
expect(agentEvents.some((event) => event.stream === "assistant")).toBe(false);
|
|
const [agentEndPayload, agentEndContext] = mockCall(agentEnd, "agent_end") as [
|
|
{ error?: string; success?: boolean },
|
|
{ runId?: string; sessionId?: string },
|
|
];
|
|
expect(agentEndPayload.success).toBe(false);
|
|
expect(agentEndPayload.error).toBe("codex exploded");
|
|
expect(agentEndContext.runId).toBe("run-1");
|
|
expect(agentEndContext.sessionId).toBe("session-1");
|
|
});
|
|
|
|
it("fires llm_output and agent_end when turn/start fails", async () => {
|
|
const llmInput = vi.fn();
|
|
const llmOutput = vi.fn();
|
|
const agentEnd = vi.fn();
|
|
initializeGlobalHookRunner(
|
|
createMockPluginRegistry([
|
|
{ hookName: "llm_input", handler: llmInput },
|
|
{ hookName: "llm_output", handler: llmOutput },
|
|
{ hookName: "agent_end", handler: agentEnd },
|
|
]),
|
|
);
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
SessionManager.open(sessionFile).appendMessage(
|
|
assistantMessage("existing context", Date.now()),
|
|
);
|
|
createStartedThreadHarness(async (method) => {
|
|
if (method === "turn/start") {
|
|
throw new Error("turn start exploded");
|
|
}
|
|
return undefined;
|
|
});
|
|
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.runtimePlan = createCodexRuntimePlanFixture();
|
|
params.messageChannel = "discord";
|
|
params.messageProvider = "discord-voice";
|
|
params.senderId = "user-123";
|
|
params.senderName = "Test User";
|
|
params.senderUsername = "testuser";
|
|
params.inputProvenance = {
|
|
kind: "external_user",
|
|
sourceChannel: "discord",
|
|
};
|
|
|
|
await expect(runCodexAppServerAttempt(params)).rejects.toThrow("turn start exploded");
|
|
|
|
expect(llmInput).toHaveBeenCalledTimes(1);
|
|
expect(llmOutput).toHaveBeenCalledTimes(1);
|
|
expect(agentEnd).toHaveBeenCalledTimes(1);
|
|
const [llmOutputPayload] = mockCall(llmOutput, "llm_output") as [
|
|
{
|
|
assistantTexts?: string[];
|
|
harnessId?: string;
|
|
model?: string;
|
|
provider?: string;
|
|
resolvedRef?: string;
|
|
runId?: string;
|
|
sessionId?: string;
|
|
},
|
|
unknown,
|
|
];
|
|
expect(llmOutputPayload.assistantTexts).toEqual([]);
|
|
expect(llmOutputPayload.model).toBe("gpt-5.4-codex");
|
|
expect(llmOutputPayload.provider).toBe("codex");
|
|
expect(llmOutputPayload.resolvedRef).toBe("codex/gpt-5.4-codex");
|
|
expect(llmOutputPayload.harnessId).toBe("codex");
|
|
expect(llmOutputPayload.runId).toBe("run-1");
|
|
expect(llmOutputPayload.sessionId).toBe("session-1");
|
|
const [agentEndPayload] = mockCall(agentEnd, "agent_end") as [
|
|
{ error?: string; messages?: Array<{ role?: string }>; success?: boolean },
|
|
unknown,
|
|
];
|
|
expect(agentEndPayload.success).toBe(false);
|
|
expect(agentEndPayload.error).toBe("turn start exploded");
|
|
expect(agentEndPayload.messages?.some((message) => message.role === "assistant")).toBe(true);
|
|
const userMessage = agentEndPayload.messages?.find((message) => message.role === "user") as
|
|
| {
|
|
content?: unknown;
|
|
provenance?: unknown;
|
|
role?: string;
|
|
senderId?: unknown;
|
|
senderLabel?: unknown;
|
|
senderName?: unknown;
|
|
senderUsername?: unknown;
|
|
sourceChannel?: unknown;
|
|
}
|
|
| undefined;
|
|
expect(userMessage).toMatchObject({
|
|
role: "user",
|
|
content: "hello",
|
|
sourceChannel: "discord",
|
|
senderId: "user-123",
|
|
senderName: "Test User",
|
|
senderUsername: "testuser",
|
|
senderLabel: "Test User (user-123)",
|
|
provenance: {
|
|
kind: "external_user",
|
|
sourceChannel: "discord",
|
|
},
|
|
});
|
|
});
|
|
|
|
it("fires agent_end with success false when the codex turn is aborted", async () => {
|
|
const agentEnd = vi.fn();
|
|
initializeGlobalHookRunner(
|
|
createMockPluginRegistry([{ hookName: "agent_end", handler: agentEnd }]),
|
|
);
|
|
const { waitForMethod } = createStartedThreadHarness();
|
|
const run = runCodexAppServerAttempt(
|
|
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
|
|
{ pluginConfig: { appServer: { mode: "yolo" } } },
|
|
);
|
|
|
|
await waitForMethod("turn/start");
|
|
expect(abortAgentHarnessRun("session-1")).toBe(true);
|
|
|
|
const result = await run;
|
|
expect(result.aborted).toBe(true);
|
|
expect(agentEnd).toHaveBeenCalledTimes(1);
|
|
const [agentEndPayload] = mockCall(agentEnd, "agent_end") as [{ success?: boolean }, unknown];
|
|
expect(agentEndPayload.success).toBe(false);
|
|
});
|
|
|
|
it("forwards queued user input and aborts the active app-server turn", async () => {
|
|
const { requests, waitForMethod } = createStartedThreadHarness();
|
|
|
|
const run = runCodexAppServerAttempt(
|
|
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
|
|
{ pluginConfig: { appServer: { mode: "yolo" } } },
|
|
);
|
|
await waitForMethod("turn/start");
|
|
|
|
expect(queueActiveRunMessageForTest("session-1", "more context", { debounceMs: 1 })).toBe(true);
|
|
await vi.waitFor(() => expect(requests.map((entry) => entry.method)).toContain("turn/steer"), {
|
|
interval: 1,
|
|
});
|
|
expect(abortAgentHarnessRun("session-1")).toBe(true);
|
|
await vi.waitFor(
|
|
() => expect(requests.map((entry) => entry.method)).toContain("turn/interrupt"),
|
|
{ interval: 1 },
|
|
);
|
|
|
|
const result = await run;
|
|
expect(result.aborted).toBe(true);
|
|
const threadStart = requests.find((entry) => entry.method === "thread/start");
|
|
const threadStartParams = threadStart?.params as
|
|
| {
|
|
approvalPolicy?: string;
|
|
approvalsReviewer?: string;
|
|
developerInstructions?: string;
|
|
model?: string;
|
|
sandbox?: string;
|
|
}
|
|
| undefined;
|
|
expect(threadStartParams?.model).toBe("gpt-5.4-codex");
|
|
expect(threadStartParams?.approvalPolicy).toBe("never");
|
|
expect(threadStartParams?.sandbox).toBe("danger-full-access");
|
|
expect(threadStartParams?.approvalsReviewer).toBe("user");
|
|
expect(threadStartParams?.developerInstructions).not.toContain(CODEX_GPT5_BEHAVIOR_CONTRACT);
|
|
const steer = requests.find((entry) => entry.method === "turn/steer");
|
|
expect(steer?.params).toEqual({
|
|
threadId: "thread-1",
|
|
expectedTurnId: "turn-1",
|
|
input: [{ type: "text", text: "more context", text_elements: [] }],
|
|
});
|
|
const interrupt = requests.find((entry) => entry.method === "turn/interrupt");
|
|
expect(interrupt?.params).toEqual({ threadId: "thread-1", turnId: "turn-1" });
|
|
});
|
|
|
|
it("batches default queued steering before sending turn/steer", async () => {
|
|
const { requests, waitForMethod, completeTurn } = createStartedThreadHarness();
|
|
|
|
const run = runCodexAppServerAttempt(
|
|
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
|
|
);
|
|
await waitForMethod("turn/start");
|
|
|
|
expect(queueActiveRunMessageForTest("session-1", "first", { debounceMs: 5 })).toBe(true);
|
|
expect(queueActiveRunMessageForTest("session-1", "second", { debounceMs: 5 })).toBe(true);
|
|
|
|
await vi.waitFor(
|
|
() =>
|
|
expect(requests.filter((entry) => entry.method === "turn/steer")).toEqual([
|
|
{
|
|
method: "turn/steer",
|
|
params: {
|
|
threadId: "thread-1",
|
|
expectedTurnId: "turn-1",
|
|
input: [
|
|
{ type: "text", text: "first", text_elements: [] },
|
|
{ type: "text", text: "second", text_elements: [] },
|
|
],
|
|
},
|
|
},
|
|
]),
|
|
{ interval: 1 },
|
|
);
|
|
|
|
await completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
});
|
|
|
|
it("resolves queued steering only after turn/steer is accepted", async () => {
|
|
const request = vi.fn(async () => ({ turnId: "turn-1" }));
|
|
const queue = testing.createCodexSteeringQueue({
|
|
client: { request } as never,
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
answerPendingUserInput: () => false,
|
|
signal: new AbortController().signal,
|
|
});
|
|
|
|
await expect(queue.queue("accepted", { debounceMs: 0 })).resolves.toBeUndefined();
|
|
|
|
expect(request).toHaveBeenCalledWith("turn/steer", {
|
|
threadId: "thread-1",
|
|
expectedTurnId: "turn-1",
|
|
input: [{ type: "text", text: "accepted", text_elements: [] }],
|
|
});
|
|
});
|
|
|
|
it("rejects queued steering when turn/steer is rejected", async () => {
|
|
const request = vi.fn(async () => {
|
|
throw new Error("cannot steer a compact turn");
|
|
});
|
|
const queue = testing.createCodexSteeringQueue({
|
|
client: { request } as never,
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
answerPendingUserInput: () => false,
|
|
signal: new AbortController().signal,
|
|
});
|
|
|
|
await expect(queue.queue("rejected", { debounceMs: 0 })).rejects.toThrow(
|
|
"cannot steer a compact turn",
|
|
);
|
|
|
|
expect(request).toHaveBeenCalledWith("turn/steer", {
|
|
threadId: "thread-1",
|
|
expectedTurnId: "turn-1",
|
|
input: [{ type: "text", text: "rejected", text_elements: [] }],
|
|
});
|
|
});
|
|
|
|
it("rejects queued steering when the run aborts before debounce flush", async () => {
|
|
const controller = new AbortController();
|
|
const request = vi.fn(async () => ({ turnId: "turn-1" }));
|
|
const queue = testing.createCodexSteeringQueue({
|
|
client: { request } as never,
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
answerPendingUserInput: () => false,
|
|
signal: controller.signal,
|
|
});
|
|
|
|
const queued = queue.queue("aborted", { debounceMs: 0 });
|
|
const rejected = expect(queued).rejects.toThrow("codex app-server steering queue aborted");
|
|
controller.abort();
|
|
|
|
await rejected;
|
|
expect(request).not.toHaveBeenCalled();
|
|
});
|
|
|
|
it("flushes pending default queued steering during normal turn cleanup", async () => {
|
|
const { requests, waitForMethod, completeTurn } = createStartedThreadHarness();
|
|
|
|
const run = runCodexAppServerAttempt(
|
|
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
|
|
);
|
|
await waitForMethod("turn/start");
|
|
|
|
expect(queueActiveRunMessageForTest("session-1", "late steer", { debounceMs: 30_000 })).toBe(
|
|
true,
|
|
);
|
|
|
|
await completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
|
|
expect(requests.filter((entry) => entry.method === "turn/steer")).toEqual([
|
|
{
|
|
method: "turn/steer",
|
|
params: {
|
|
threadId: "thread-1",
|
|
expectedTurnId: "turn-1",
|
|
input: [{ type: "text", text: "late steer", text_elements: [] }],
|
|
},
|
|
},
|
|
]);
|
|
});
|
|
|
|
it("batches explicit all-mode steering before sending turn/steer", async () => {
|
|
const { requests, waitForMethod, completeTurn } = createStartedThreadHarness();
|
|
|
|
const run = runCodexAppServerAttempt(
|
|
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
|
|
);
|
|
await waitForMethod("turn/start");
|
|
|
|
expect(queueActiveRunMessageForTest("session-1", "first", { steeringMode: "all" })).toBe(true);
|
|
expect(queueActiveRunMessageForTest("session-1", "second", { steeringMode: "all" })).toBe(true);
|
|
|
|
await vi.waitFor(
|
|
() =>
|
|
expect(requests.filter((entry) => entry.method === "turn/steer")).toEqual([
|
|
{
|
|
method: "turn/steer",
|
|
params: {
|
|
threadId: "thread-1",
|
|
expectedTurnId: "turn-1",
|
|
input: [
|
|
{ type: "text", text: "first", text_elements: [] },
|
|
{ type: "text", text: "second", text_elements: [] },
|
|
],
|
|
},
|
|
},
|
|
]),
|
|
{ interval: 1 },
|
|
);
|
|
|
|
await completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
});
|
|
|
|
it("routes request_user_input prompts through the active run follow-up queue", async () => {
|
|
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
|
|
let handleRequest:
|
|
| ((request: { id: string; method: string; params?: unknown }) => Promise<unknown>)
|
|
| undefined;
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult();
|
|
}
|
|
if (method === "turn/start") {
|
|
return turnStartResult();
|
|
}
|
|
return {};
|
|
});
|
|
setCodexAppServerClientFactoryForTest(
|
|
async () =>
|
|
({
|
|
request,
|
|
addNotificationHandler: (handler: typeof notify) => {
|
|
notify = handler;
|
|
return () => undefined;
|
|
},
|
|
addRequestHandler: (
|
|
handler: (request: {
|
|
id: string;
|
|
method: string;
|
|
params?: unknown;
|
|
}) => Promise<unknown>,
|
|
) => {
|
|
handleRequest = handler;
|
|
return () => undefined;
|
|
},
|
|
}) as never,
|
|
);
|
|
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.onBlockReply = vi.fn();
|
|
const run = runCodexAppServerAttempt(params);
|
|
await vi.waitFor(
|
|
() => expect(request.mock.calls.map(([method]) => method)).toContain("turn/start"),
|
|
{ interval: 1 },
|
|
);
|
|
await vi.waitFor(() => expect(handleRequest).toBeTypeOf("function"), fastWait);
|
|
|
|
const response = handleRequest?.({
|
|
id: "request-input-1",
|
|
method: "item/tool/requestUserInput",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
itemId: "ask-1",
|
|
questions: [
|
|
{
|
|
id: "mode",
|
|
header: "Mode",
|
|
question: "Pick a mode",
|
|
isOther: false,
|
|
isSecret: false,
|
|
options: [
|
|
{ label: "Fast", description: "Use less reasoning" },
|
|
{ label: "Deep", description: "Use more reasoning" },
|
|
],
|
|
},
|
|
],
|
|
},
|
|
});
|
|
|
|
await vi.waitFor(() => expect(params.onBlockReply).toHaveBeenCalledTimes(1), fastWait);
|
|
expect(queueActiveRunMessageForTest("session-1", "2")).toBe(true);
|
|
await expect(response).resolves.toEqual({
|
|
answers: { mode: { answers: ["Deep"] } },
|
|
});
|
|
const requestCalls = request.mock.calls as unknown as Array<[string, unknown]>;
|
|
expect(
|
|
requestCalls.some(
|
|
([method, callParams]) =>
|
|
method === "turn/steer" &&
|
|
(callParams as { expectedTurnId?: string } | undefined)?.expectedTurnId === "turn-1",
|
|
),
|
|
).toBe(false);
|
|
|
|
await notify({
|
|
method: "turn/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
turn: { id: "turn-1", status: "completed" },
|
|
},
|
|
});
|
|
await run;
|
|
});
|
|
|
|
it("does not leak unhandled rejections when shutdown closes before interrupt", async () => {
|
|
const unhandledRejections: unknown[] = [];
|
|
const onUnhandledRejection = (reason: unknown) => {
|
|
unhandledRejections.push(reason);
|
|
};
|
|
process.on("unhandledRejection", onUnhandledRejection);
|
|
try {
|
|
const { waitForMethod } = createStartedThreadHarness(async (method) => {
|
|
if (method === "turn/interrupt") {
|
|
throw new Error("codex app-server client is closed");
|
|
}
|
|
});
|
|
const abortController = new AbortController();
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.abortSignal = abortController.signal;
|
|
|
|
const run = runCodexAppServerAttempt(params);
|
|
await waitForMethod("turn/start");
|
|
abortController.abort("shutdown");
|
|
|
|
const result = await run;
|
|
expect(result.aborted).toBe(true);
|
|
await new Promise((resolve) => setImmediate(resolve));
|
|
expect(unhandledRejections).toStrictEqual([]);
|
|
} finally {
|
|
process.off("unhandledRejection", onUnhandledRejection);
|
|
}
|
|
});
|
|
|
|
it("forwards image attachments to the app-server turn input", async () => {
|
|
const { requests, waitForMethod, completeTurn } = createStartedThreadHarness();
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
const pngBase64 =
|
|
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+/p9sAAAAASUVORK5CYII=";
|
|
params.model = createCodexTestModel("codex", ["text", "image"]);
|
|
params.images = [
|
|
{
|
|
type: "image",
|
|
mimeType: "image/png",
|
|
data: pngBase64,
|
|
},
|
|
];
|
|
|
|
const run = runCodexAppServerAttempt(params);
|
|
await waitForMethod("turn/start");
|
|
await completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
|
|
const turnStart = requests.find((entry) => entry.method === "turn/start");
|
|
const turnStartParams = turnStart?.params as
|
|
| { input?: Array<{ text?: string; text_elements?: unknown[]; type?: string; url?: string }> }
|
|
| undefined;
|
|
expect(turnStartParams?.input).toEqual([
|
|
{ type: "text", text: "hello", text_elements: [] },
|
|
{ type: "image", url: `data:image/png;base64,${pngBase64}` },
|
|
]);
|
|
});
|
|
|
|
it("does not drop turn completion notifications emitted while turn/start is in flight", async () => {
|
|
let harness: ReturnType<typeof createAppServerHarness>;
|
|
harness = createAppServerHarness(async (method) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult();
|
|
}
|
|
if (method === "turn/start") {
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
return turnStartResult("turn-1", "completed");
|
|
}
|
|
return {};
|
|
});
|
|
|
|
const result = await runCodexAppServerAttempt(
|
|
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
|
|
);
|
|
expect(result.aborted).toBe(false);
|
|
expect(result.timedOut).toBe(false);
|
|
});
|
|
|
|
it("does not fail when a buffered terminal notification is followed by client close", async () => {
|
|
let harness: ReturnType<typeof createAppServerHarness>;
|
|
let resolveBufferedTerminal!: () => void;
|
|
const bufferedTerminal = new Promise<void>((resolve) => {
|
|
resolveBufferedTerminal = resolve;
|
|
});
|
|
harness = createAppServerHarness(async (method) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult();
|
|
}
|
|
if (method === "turn/start") {
|
|
await harness.notify({
|
|
method: "item/started",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: { id: "tool-1", type: "commandExecution" },
|
|
},
|
|
});
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
resolveBufferedTerminal();
|
|
return turnStartResult("turn-1", "inProgress");
|
|
}
|
|
return {};
|
|
});
|
|
|
|
const run = runCodexAppServerAttempt(
|
|
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
|
|
{ turnTerminalIdleTimeoutMs: 60_000 },
|
|
);
|
|
await bufferedTerminal;
|
|
await new Promise<void>((resolve) => setImmediate(resolve));
|
|
harness.close();
|
|
|
|
const result = await run;
|
|
expect(result.promptError ?? undefined).toBeUndefined();
|
|
expect(result.aborted).toBe(false);
|
|
expect(result.timedOut).toBe(false);
|
|
});
|
|
|
|
it("does not time out when turn progress arrives before turn/start returns", async () => {
|
|
let harness: ReturnType<typeof createAppServerHarness>;
|
|
harness = createAppServerHarness(async (method) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult();
|
|
}
|
|
if (method === "turn/start") {
|
|
await harness.notify({
|
|
method: "turn/started",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
turn: { id: "turn-1", status: "inProgress" },
|
|
},
|
|
});
|
|
return turnStartResult("turn-1", "inProgress");
|
|
}
|
|
return {};
|
|
});
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 60_000;
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
turnCompletionIdleTimeoutMs: 5,
|
|
turnTerminalIdleTimeoutMs: 60_000,
|
|
});
|
|
await harness.waitForMethod("turn/start");
|
|
await new Promise((resolve) => setTimeout(resolve, 20));
|
|
expect(harness.request.mock.calls.some(([method]) => method === "turn/interrupt")).toBe(false);
|
|
await harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
|
|
const result = await run;
|
|
expect(result.aborted).toBe(false);
|
|
expect(result.timedOut).toBe(false);
|
|
});
|
|
|
|
it("completes when turn/start returns a terminal turn without a follow-up notification", async () => {
|
|
const harness = createAppServerHarness(async (method) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult();
|
|
}
|
|
if (method === "turn/start") {
|
|
return {
|
|
turn: {
|
|
id: "turn-1",
|
|
status: "completed",
|
|
items: [{ type: "agentMessage", id: "msg-1", text: "done from response" }],
|
|
},
|
|
};
|
|
}
|
|
return {};
|
|
});
|
|
|
|
const result = await runCodexAppServerAttempt(
|
|
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
|
|
);
|
|
|
|
expect(harness.requests.map((entry) => entry.method)).toContain("turn/start");
|
|
expect(result.assistantTexts).toEqual(["done from response"]);
|
|
expect(result.aborted).toBe(false);
|
|
expect(result.timedOut).toBe(false);
|
|
});
|
|
|
|
it("surfaces Codex-native image generation saved paths as reply media", async () => {
|
|
const harness = createStartedThreadHarness();
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
|
|
const run = runCodexAppServerAttempt(params);
|
|
await harness.waitForMethod("turn/start");
|
|
await harness.notify({
|
|
method: "turn/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
turn: {
|
|
id: "turn-1",
|
|
status: "completed",
|
|
items: [
|
|
{
|
|
type: "imageGeneration",
|
|
id: "ig_123",
|
|
status: "completed",
|
|
revisedPrompt: "A tiny blue square",
|
|
result: "Zm9v",
|
|
savedPath: "/tmp/codex-home/generated_images/session-1/ig_123.png",
|
|
},
|
|
],
|
|
},
|
|
},
|
|
});
|
|
|
|
const result = await run;
|
|
expect(result.assistantTexts).toEqual([]);
|
|
expect(result.toolMediaUrls).toEqual(["/tmp/codex-home/generated_images/session-1/ig_123.png"]);
|
|
});
|
|
|
|
it("does not complete on unscoped turn/completed notifications", async () => {
|
|
const harness = createStartedThreadHarness();
|
|
const run = runCodexAppServerAttempt(
|
|
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
|
|
);
|
|
let resolved = false;
|
|
void run.then(() => {
|
|
resolved = true;
|
|
});
|
|
|
|
await harness.waitForMethod("turn/start");
|
|
await harness.notify({
|
|
method: "turn/completed",
|
|
params: {
|
|
turn: {
|
|
id: "turn-1",
|
|
status: "completed",
|
|
items: [{ type: "agentMessage", id: "msg-wrong", text: "wrong completion" }],
|
|
},
|
|
},
|
|
});
|
|
await new Promise<void>((resolve) => setImmediate(resolve));
|
|
expect(resolved).toBe(false);
|
|
|
|
await harness.notify({
|
|
method: "turn/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turn: {
|
|
id: "turn-1",
|
|
status: "completed",
|
|
items: [{ type: "agentMessage", id: "msg-right", text: "final completion" }],
|
|
},
|
|
},
|
|
});
|
|
|
|
const result = await run;
|
|
expect(result.assistantTexts).toEqual(["final completion"]);
|
|
expect(result.aborted).toBe(false);
|
|
expect(result.timedOut).toBe(false);
|
|
});
|
|
|
|
it("releases completion when Codex raw-events an interrupted turn marker", async () => {
|
|
const harness = createStartedThreadHarness();
|
|
const run = runCodexAppServerAttempt(
|
|
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
|
|
{ turnTerminalIdleTimeoutMs: 60_000 },
|
|
);
|
|
let resolved = false;
|
|
void run.then(() => {
|
|
resolved = true;
|
|
});
|
|
|
|
await harness.waitForMethod("turn/start");
|
|
await harness.notify({
|
|
method: "rawResponseItem/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: {
|
|
id: "abort-marker-1",
|
|
type: "message",
|
|
role: "user",
|
|
content: [
|
|
{
|
|
type: "input_text",
|
|
text: "<turn_aborted>\nThe user interrupted the previous turn on purpose. Any running unified exec processes may still be running in the background. If any tools/commands were aborted, they may have partially executed.\n</turn_aborted>",
|
|
},
|
|
],
|
|
},
|
|
},
|
|
});
|
|
|
|
const result = await run;
|
|
expect(resolved).toBe(true);
|
|
expect(result.aborted).toBe(true);
|
|
expect(result.timedOut).toBe(false);
|
|
expect(result.promptError).toBeNull();
|
|
expect(harness.request.mock.calls.some(([method]) => method === "turn/interrupt")).toBe(false);
|
|
});
|
|
|
|
it("releases completion when the app-server client closes during an active turn", async () => {
|
|
const harness = createStartedThreadHarness();
|
|
const run = runCodexAppServerAttempt(
|
|
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
|
|
{ turnTerminalIdleTimeoutMs: 60_000 },
|
|
);
|
|
|
|
await harness.waitForMethod("turn/start");
|
|
await new Promise<void>((resolve) => setImmediate(resolve));
|
|
harness.close();
|
|
|
|
const result = await run;
|
|
expect(result.promptError).toBe("codex app-server client closed before turn completed");
|
|
expect(result.aborted).toBe(false);
|
|
expect(result.timedOut).toBe(false);
|
|
});
|
|
|
|
it("does not fail a turn when the client closes after terminal completion is queued", async () => {
|
|
const harness = createStartedThreadHarness();
|
|
const run = runCodexAppServerAttempt(
|
|
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
|
|
{ turnTerminalIdleTimeoutMs: 60_000 },
|
|
);
|
|
|
|
await harness.waitForMethod("turn/start");
|
|
const completed = harness.completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
harness.close();
|
|
await completed;
|
|
|
|
const result = await run;
|
|
expect(result.promptError ?? undefined).toBeUndefined();
|
|
expect(result.aborted).toBe(false);
|
|
expect(result.timedOut).toBe(false);
|
|
});
|
|
|
|
it("does not treat a user prompt containing the interrupted marker as terminal", async () => {
|
|
const harness = createStartedThreadHarness();
|
|
const markerPrompt =
|
|
"<turn_aborted>\nThe user interrupted the previous turn on purpose. Any running unified exec processes may still be running in the background. If any tools/commands were aborted, they may have partially executed.\n</turn_aborted>";
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.prompt = markerPrompt;
|
|
const run = runCodexAppServerAttempt(params, { turnTerminalIdleTimeoutMs: 60_000 });
|
|
let resolved = false;
|
|
void run.then(() => {
|
|
resolved = true;
|
|
});
|
|
|
|
await harness.waitForMethod("turn/start");
|
|
await harness.notify({
|
|
method: "rawResponseItem/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
item: {
|
|
id: "user-prompt-1",
|
|
type: "message",
|
|
role: "user",
|
|
content: [
|
|
{
|
|
type: "input_text",
|
|
text: markerPrompt,
|
|
},
|
|
],
|
|
},
|
|
},
|
|
});
|
|
await new Promise<void>((resolve) => setImmediate(resolve));
|
|
expect(resolved).toBe(false);
|
|
|
|
await harness.notify({
|
|
method: "turn/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turn: {
|
|
id: "turn-1",
|
|
status: "completed",
|
|
items: [{ type: "agentMessage", id: "msg-1", text: "It marks an interrupted turn." }],
|
|
},
|
|
},
|
|
});
|
|
|
|
const result = await run;
|
|
expect(resolved).toBe(true);
|
|
expect(result.aborted).toBe(false);
|
|
expect(result.timedOut).toBe(false);
|
|
expect(result.assistantTexts).toEqual(["It marks an interrupted turn."]);
|
|
});
|
|
|
|
it("releases completion when a projector callback throws during turn/completed", async () => {
|
|
// Regression for openclaw/openclaw#67996: a throw inside the projector's
|
|
// turn/completed handler must not strand resolveCompletion, otherwise the
|
|
// gateway session lane stays locked and every follow-up message queues
|
|
// behind a run that will never resolve.
|
|
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-1");
|
|
}
|
|
if (method === "turn/start") {
|
|
return turnStartResult("turn-1", "inProgress");
|
|
}
|
|
return {};
|
|
});
|
|
setCodexAppServerClientFactoryForTest(
|
|
async () =>
|
|
({
|
|
request,
|
|
addNotificationHandler: (handler: typeof notify) => {
|
|
notify = handler;
|
|
return () => undefined;
|
|
},
|
|
addRequestHandler: () => () => undefined,
|
|
}) as never,
|
|
);
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.onAgentEvent = () => {
|
|
throw new Error("downstream consumer exploded");
|
|
};
|
|
const run = runCodexAppServerAttempt(params);
|
|
await vi.waitFor(() =>
|
|
expect(request.mock.calls.map(([method]) => method)).toContain("turn/start"),
|
|
);
|
|
await notify({
|
|
method: "turn/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turn: {
|
|
id: "turn-1",
|
|
status: "completed",
|
|
items: [{ id: "plan-1", type: "plan", text: "step one\nstep two" }],
|
|
},
|
|
},
|
|
});
|
|
const result = await run;
|
|
expect(result.aborted).toBe(false);
|
|
expect(result.timedOut).toBe(false);
|
|
});
|
|
|
|
it("routes MCP approval elicitations through the native bridge", async () => {
|
|
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
|
|
let handleRequest:
|
|
| ((request: { id: string; method: string; params?: unknown }) => Promise<unknown>)
|
|
| undefined;
|
|
const bridgeSpy = vi
|
|
.spyOn(elicitationBridge, "handleCodexAppServerElicitationRequest")
|
|
.mockResolvedValue({
|
|
action: "accept",
|
|
content: { approve: true },
|
|
_meta: null,
|
|
});
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-1");
|
|
}
|
|
if (method === "turn/start") {
|
|
return turnStartResult("turn-1", "inProgress");
|
|
}
|
|
return {};
|
|
});
|
|
setCodexAppServerClientFactoryForTest(
|
|
async () =>
|
|
({
|
|
request,
|
|
addNotificationHandler: (handler: typeof notify) => {
|
|
notify = handler;
|
|
return () => undefined;
|
|
},
|
|
addRequestHandler: (
|
|
handler: (request: {
|
|
id: string;
|
|
method: string;
|
|
params?: unknown;
|
|
}) => Promise<unknown>,
|
|
) => {
|
|
handleRequest = handler;
|
|
return () => undefined;
|
|
},
|
|
}) as never,
|
|
);
|
|
|
|
const run = runCodexAppServerAttempt(
|
|
createParams(path.join(tempDir, "session.jsonl"), path.join(tempDir, "workspace")),
|
|
);
|
|
await vi.waitFor(() => expect(handleRequest).toBeTypeOf("function"));
|
|
|
|
const result = await handleRequest?.({
|
|
id: "request-elicitation-1",
|
|
method: "mcpServer/elicitation/request",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
serverName: "codex_apps__github",
|
|
mode: "form",
|
|
},
|
|
});
|
|
|
|
expect(result).toEqual({
|
|
action: "accept",
|
|
content: { approve: true },
|
|
_meta: null,
|
|
});
|
|
const [bridgeCall] = mockCall(bridgeSpy, "elicitation bridge") as [
|
|
{ threadId?: string; turnId?: string },
|
|
];
|
|
expect(bridgeCall.threadId).toBe("thread-1");
|
|
expect(bridgeCall.turnId).toBe("turn-1");
|
|
|
|
await notify({
|
|
method: "turn/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
turn: { id: "turn-1", status: "completed" },
|
|
},
|
|
});
|
|
await run;
|
|
});
|
|
|
|
it("passes session plugin app policy context to elicitation handling", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const agentDir = path.join(tempDir, "agent");
|
|
const pluginConfig = {
|
|
codexPlugins: {
|
|
enabled: true,
|
|
plugins: {
|
|
"google-calendar": {
|
|
marketplaceName: "openai-curated",
|
|
pluginName: "google-calendar",
|
|
},
|
|
},
|
|
},
|
|
};
|
|
const appServer = resolveCodexAppServerRuntimeOptions({
|
|
pluginConfig: readCodexPluginConfig(pluginConfig),
|
|
});
|
|
defaultCodexAppInventoryCache.clear();
|
|
await defaultCodexAppInventoryCache.refreshNow({
|
|
key: buildCodexPluginAppCacheKey({
|
|
appServer,
|
|
agentDir,
|
|
}),
|
|
request: async () => ({
|
|
data: [
|
|
{
|
|
id: "google-calendar-app",
|
|
name: "Google Calendar",
|
|
description: null,
|
|
logoUrl: null,
|
|
logoUrlDark: null,
|
|
distributionChannel: null,
|
|
branding: null,
|
|
appMetadata: null,
|
|
labels: null,
|
|
installUrl: null,
|
|
isAccessible: true,
|
|
isEnabled: true,
|
|
pluginDisplayNames: [],
|
|
},
|
|
],
|
|
nextCursor: null,
|
|
}),
|
|
});
|
|
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
|
|
let handleRequest:
|
|
| ((request: { id: string; method: string; params?: unknown }) => Promise<unknown>)
|
|
| undefined;
|
|
const bridgeSpy = vi
|
|
.spyOn(elicitationBridge, "handleCodexAppServerElicitationRequest")
|
|
.mockResolvedValue({
|
|
action: "decline",
|
|
content: null,
|
|
_meta: null,
|
|
});
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "plugin/list") {
|
|
return {
|
|
marketplaces: [
|
|
{
|
|
name: "openai-curated",
|
|
path: "/marketplaces/openai-curated",
|
|
interface: null,
|
|
plugins: [
|
|
{
|
|
id: "google-calendar",
|
|
name: "google-calendar",
|
|
source: { type: "remote" },
|
|
installed: true,
|
|
enabled: true,
|
|
installPolicy: "AVAILABLE",
|
|
authPolicy: "ON_USE",
|
|
availability: "AVAILABLE",
|
|
interface: null,
|
|
},
|
|
],
|
|
},
|
|
],
|
|
marketplaceLoadErrors: [],
|
|
featuredPluginIds: [],
|
|
};
|
|
}
|
|
if (method === "plugin/read") {
|
|
return {
|
|
plugin: {
|
|
marketplaceName: "openai-curated",
|
|
marketplacePath: "/marketplaces/openai-curated",
|
|
summary: {
|
|
id: "google-calendar",
|
|
name: "google-calendar",
|
|
source: { type: "remote" },
|
|
installed: true,
|
|
enabled: true,
|
|
installPolicy: "AVAILABLE",
|
|
authPolicy: "ON_USE",
|
|
availability: "AVAILABLE",
|
|
interface: null,
|
|
},
|
|
description: null,
|
|
skills: [],
|
|
apps: [
|
|
{
|
|
id: "google-calendar-app",
|
|
name: "Google Calendar",
|
|
description: null,
|
|
installUrl: null,
|
|
needsAuth: false,
|
|
},
|
|
],
|
|
mcpServers: ["google-calendar"],
|
|
},
|
|
};
|
|
}
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-1");
|
|
}
|
|
if (method === "turn/start") {
|
|
return turnStartResult("turn-1", "inProgress");
|
|
}
|
|
return {};
|
|
});
|
|
setCodexAppServerClientFactoryForTest(
|
|
async () =>
|
|
({
|
|
request,
|
|
addNotificationHandler: (handler: typeof notify) => {
|
|
notify = handler;
|
|
return () => undefined;
|
|
},
|
|
addRequestHandler: (
|
|
handler: (request: {
|
|
id: string;
|
|
method: string;
|
|
params?: unknown;
|
|
}) => Promise<unknown>,
|
|
) => {
|
|
handleRequest = handler;
|
|
return () => undefined;
|
|
},
|
|
}) as never,
|
|
);
|
|
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.agentDir = agentDir;
|
|
const run = runCodexAppServerAttempt(params, { pluginConfig });
|
|
await vi.waitFor(() => expect(handleRequest).toBeTypeOf("function"));
|
|
|
|
const result = await handleRequest?.({
|
|
id: "request-elicitation-1",
|
|
method: "mcpServer/elicitation/request",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
serverName: "google-calendar",
|
|
mode: "form",
|
|
},
|
|
});
|
|
|
|
expect(result).toEqual({
|
|
action: "decline",
|
|
content: null,
|
|
_meta: null,
|
|
});
|
|
const [bridgeCall] = mockCall(bridgeSpy, "elicitation bridge") as [
|
|
{
|
|
pluginAppPolicyContext?: {
|
|
apps?: Record<string, { mcpServerNames?: string[]; pluginName?: string }>;
|
|
};
|
|
threadId?: string;
|
|
turnId?: string;
|
|
},
|
|
];
|
|
expect(bridgeCall.threadId).toBe("thread-1");
|
|
expect(bridgeCall.turnId).toBe("turn-1");
|
|
const calendarPolicy = bridgeCall.pluginAppPolicyContext?.apps?.["google-calendar-app"];
|
|
expect(calendarPolicy?.pluginName).toBe("google-calendar");
|
|
expect(calendarPolicy?.mcpServerNames).toEqual(["google-calendar"]);
|
|
const requestCalls = request.mock.calls as unknown as Array<[string, unknown, unknown?]>;
|
|
const threadStart = requestCalls.find(([method]) => method === "thread/start");
|
|
const threadStartParams = threadStart?.[1] as
|
|
| { approvalPolicy?: { granular?: { mcp_elicitations?: boolean } } }
|
|
| undefined;
|
|
expect(threadStartParams?.approvalPolicy?.granular?.mcp_elicitations).toBe(true);
|
|
const turnStart = requestCalls.find(([method]) => method === "turn/start");
|
|
const turnStartParams = turnStart?.[1] as
|
|
| { approvalPolicy?: { granular?: { mcp_elicitations?: boolean } } }
|
|
| undefined;
|
|
expect(turnStartParams?.approvalPolicy?.granular?.mcp_elicitations).toBe(true);
|
|
|
|
await notify({
|
|
method: "turn/completed",
|
|
params: {
|
|
threadId: "thread-1",
|
|
turnId: "turn-1",
|
|
turn: { id: "turn-1", status: "completed" },
|
|
},
|
|
});
|
|
await run;
|
|
});
|
|
|
|
it("keys plugin app inventory by the resolved Codex account", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const agentDir = path.join(tempDir, "agent");
|
|
const authProfileId = "openai-codex:work";
|
|
const pluginConfig = {
|
|
codexPlugins: {
|
|
enabled: true,
|
|
plugins: {
|
|
"google-calendar": {
|
|
marketplaceName: "openai-curated",
|
|
pluginName: "google-calendar",
|
|
},
|
|
},
|
|
},
|
|
};
|
|
const appServer = resolveCodexAppServerRuntimeOptions({
|
|
pluginConfig: readCodexPluginConfig(pluginConfig),
|
|
});
|
|
defaultCodexAppInventoryCache.clear();
|
|
await defaultCodexAppInventoryCache.refreshNow({
|
|
key: buildCodexPluginAppCacheKey({
|
|
appServer,
|
|
agentDir,
|
|
authProfileId,
|
|
accountId: "account-work",
|
|
}),
|
|
request: async () => ({
|
|
data: [
|
|
{
|
|
id: "google-calendar-app",
|
|
name: "Google Calendar",
|
|
description: null,
|
|
logoUrl: null,
|
|
logoUrlDark: null,
|
|
distributionChannel: null,
|
|
branding: null,
|
|
appMetadata: null,
|
|
labels: null,
|
|
installUrl: null,
|
|
isAccessible: true,
|
|
isEnabled: true,
|
|
pluginDisplayNames: [],
|
|
},
|
|
],
|
|
nextCursor: null,
|
|
}),
|
|
});
|
|
const { requests, waitForMethod, completeTurn } = createStartedThreadHarness(async (method) => {
|
|
if (method === "plugin/list") {
|
|
return {
|
|
marketplaces: [
|
|
{
|
|
name: "openai-curated",
|
|
path: "/marketplaces/openai-curated",
|
|
interface: null,
|
|
plugins: [
|
|
{
|
|
id: "google-calendar",
|
|
name: "google-calendar",
|
|
source: { type: "remote" },
|
|
installed: true,
|
|
enabled: true,
|
|
installPolicy: "AVAILABLE",
|
|
authPolicy: "ON_USE",
|
|
availability: "AVAILABLE",
|
|
interface: null,
|
|
},
|
|
],
|
|
},
|
|
],
|
|
marketplaceLoadErrors: [],
|
|
featuredPluginIds: [],
|
|
};
|
|
}
|
|
if (method === "plugin/read") {
|
|
return {
|
|
plugin: {
|
|
marketplaceName: "openai-curated",
|
|
marketplacePath: "/marketplaces/openai-curated",
|
|
summary: {
|
|
id: "google-calendar",
|
|
name: "google-calendar",
|
|
source: { type: "remote" },
|
|
installed: true,
|
|
enabled: true,
|
|
installPolicy: "AVAILABLE",
|
|
authPolicy: "ON_USE",
|
|
availability: "AVAILABLE",
|
|
interface: null,
|
|
},
|
|
description: null,
|
|
skills: [],
|
|
apps: [
|
|
{
|
|
id: "google-calendar-app",
|
|
name: "Google Calendar",
|
|
description: null,
|
|
installUrl: null,
|
|
needsAuth: false,
|
|
},
|
|
],
|
|
mcpServers: ["google-calendar"],
|
|
},
|
|
};
|
|
}
|
|
if (method === "app/list") {
|
|
throw new Error("app/list should use the account-keyed cache entry");
|
|
}
|
|
return undefined;
|
|
});
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.agentDir = agentDir;
|
|
params.authProfileId = authProfileId;
|
|
params.authProfileStore = {
|
|
version: 1,
|
|
profiles: {
|
|
[authProfileId]: {
|
|
type: "oauth",
|
|
provider: "openai-codex",
|
|
access: "access-token",
|
|
refresh: "refresh-token",
|
|
expires: Date.now() + 60_000,
|
|
accountId: "account-work",
|
|
email: "work@example.test",
|
|
},
|
|
},
|
|
};
|
|
|
|
const run = runCodexAppServerAttempt(params, { pluginConfig });
|
|
await waitForMethod("turn/start");
|
|
await completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
|
|
const threadStart = requests.find((entry) => entry.method === "thread/start");
|
|
const threadStartParams = threadStart?.params as
|
|
| { config?: { apps?: Record<string, { enabled?: boolean }> } }
|
|
| undefined;
|
|
expect(threadStartParams?.config?.apps?.["google-calendar-app"]?.enabled).toBe(true);
|
|
expect(requests.map((entry) => entry.method)).not.toContain("app/list");
|
|
});
|
|
|
|
it("keys plugin app inventory by inherited API key fallback credentials", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const agentDir = path.join(tempDir, "agent");
|
|
const pluginConfig = {
|
|
codexPlugins: {
|
|
enabled: true,
|
|
plugins: {
|
|
"google-calendar": {
|
|
marketplaceName: "openai-curated",
|
|
pluginName: "google-calendar",
|
|
},
|
|
},
|
|
},
|
|
};
|
|
const appServer = resolveCodexAppServerRuntimeOptions({
|
|
pluginConfig: readCodexPluginConfig(pluginConfig),
|
|
});
|
|
defaultCodexAppInventoryCache.clear();
|
|
await defaultCodexAppInventoryCache.refreshNow({
|
|
key: buildCodexPluginAppCacheKey({
|
|
appServer,
|
|
agentDir,
|
|
envApiKeyFingerprint: resolveCodexAppServerEnvApiKeyCacheKey({
|
|
startOptions: appServer.start,
|
|
baseEnv: { CODEX_API_KEY: "old-codex-env-key" },
|
|
}),
|
|
}),
|
|
request: async () => ({
|
|
data: [
|
|
{
|
|
id: "google-calendar-app",
|
|
name: "Google Calendar",
|
|
description: null,
|
|
logoUrl: null,
|
|
logoUrlDark: null,
|
|
distributionChannel: null,
|
|
branding: null,
|
|
appMetadata: null,
|
|
labels: null,
|
|
installUrl: null,
|
|
isAccessible: true,
|
|
isEnabled: true,
|
|
pluginDisplayNames: [],
|
|
},
|
|
],
|
|
nextCursor: null,
|
|
}),
|
|
});
|
|
vi.stubEnv("CODEX_API_KEY", "new-codex-env-key");
|
|
vi.stubEnv("OPENAI_API_KEY", "");
|
|
const { requests, waitForMethod, completeTurn } = createStartedThreadHarness(async (method) => {
|
|
if (method === "app/list") {
|
|
return {
|
|
data: [
|
|
{
|
|
id: "google-calendar-app",
|
|
name: "Google Calendar",
|
|
description: null,
|
|
logoUrl: null,
|
|
logoUrlDark: null,
|
|
distributionChannel: null,
|
|
branding: null,
|
|
appMetadata: null,
|
|
labels: null,
|
|
installUrl: null,
|
|
isAccessible: true,
|
|
isEnabled: true,
|
|
pluginDisplayNames: [],
|
|
},
|
|
],
|
|
nextCursor: null,
|
|
};
|
|
}
|
|
if (method === "plugin/list") {
|
|
return {
|
|
marketplaces: [
|
|
{
|
|
name: "openai-curated",
|
|
path: "/marketplaces/openai-curated",
|
|
interface: null,
|
|
plugins: [
|
|
{
|
|
id: "google-calendar",
|
|
name: "google-calendar",
|
|
source: { type: "remote" },
|
|
installed: true,
|
|
enabled: true,
|
|
installPolicy: "AVAILABLE",
|
|
authPolicy: "ON_USE",
|
|
availability: "AVAILABLE",
|
|
interface: null,
|
|
},
|
|
],
|
|
},
|
|
],
|
|
marketplaceLoadErrors: [],
|
|
featuredPluginIds: [],
|
|
};
|
|
}
|
|
if (method === "plugin/read") {
|
|
return {
|
|
plugin: {
|
|
marketplaceName: "openai-curated",
|
|
marketplacePath: "/marketplaces/openai-curated",
|
|
summary: {
|
|
id: "google-calendar",
|
|
name: "google-calendar",
|
|
source: { type: "remote" },
|
|
installed: true,
|
|
enabled: true,
|
|
installPolicy: "AVAILABLE",
|
|
authPolicy: "ON_USE",
|
|
availability: "AVAILABLE",
|
|
interface: null,
|
|
},
|
|
description: null,
|
|
skills: [],
|
|
apps: [
|
|
{
|
|
id: "google-calendar-app",
|
|
name: "Google Calendar",
|
|
description: null,
|
|
installUrl: null,
|
|
needsAuth: false,
|
|
},
|
|
],
|
|
mcpServers: ["google-calendar"],
|
|
},
|
|
};
|
|
}
|
|
return undefined;
|
|
});
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.agentDir = agentDir;
|
|
|
|
const run = runCodexAppServerAttempt(params, { pluginConfig });
|
|
await waitForMethod("turn/start");
|
|
await completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
|
|
expect(requests.map((entry) => entry.method)).toContain("app/list");
|
|
const threadStart = requests.find((entry) => entry.method === "thread/start");
|
|
const threadStartParams = threadStart?.params as
|
|
| { config?: { apps?: Record<string, { enabled?: boolean }> } }
|
|
| undefined;
|
|
expect(threadStartParams?.config?.apps?.["google-calendar-app"]?.enabled).toBe(true);
|
|
});
|
|
|
|
it("times out app-server startup before thread setup can hang forever", async () => {
|
|
setCodexAppServerClientFactoryForTest(() => new Promise<never>(() => undefined));
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 1;
|
|
|
|
await expect(runCodexAppServerAttempt(params, { startupTimeoutFloorMs: 1 })).rejects.toThrow(
|
|
"codex app-server startup timed out",
|
|
);
|
|
expect(queueActiveRunMessageForTest("session-1", "after timeout")).toBe(false);
|
|
});
|
|
|
|
it("passes the selected auth profile into app-server startup", async () => {
|
|
const seenAuthProfileIds: Array<string | undefined> = [];
|
|
const seenAgentDirs: Array<string | undefined> = [];
|
|
const { requests, waitForMethod, completeTurn } = createStartedThreadHarness(undefined, {
|
|
onStart: (authProfileId, agentDir) => {
|
|
seenAuthProfileIds.push(authProfileId);
|
|
seenAgentDirs.push(agentDir);
|
|
},
|
|
});
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.authProfileId = "openai-codex:work";
|
|
params.agentDir = path.join(tempDir, "agent");
|
|
|
|
const run = runCodexAppServerAttempt(params);
|
|
await vi.waitFor(() => expect(seenAuthProfileIds).toEqual(["openai-codex:work"]), {
|
|
interval: 1,
|
|
});
|
|
await waitForMethod("turn/start");
|
|
await new Promise<void>((resolve) => setImmediate(resolve));
|
|
await completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
|
|
expect(seenAuthProfileIds).toEqual(["openai-codex:work"]);
|
|
expect(seenAgentDirs).toEqual([path.join(tempDir, "agent")]);
|
|
expect(requests.map((entry) => entry.method)).toContain("turn/start");
|
|
});
|
|
|
|
it("times out turn start before the active run handle is installed", async () => {
|
|
const request = vi.fn(
|
|
async (method: string, _params?: unknown, options?: { timeoutMs?: number }) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-1");
|
|
}
|
|
if (method === "turn/start") {
|
|
return await new Promise<never>((_, reject) => {
|
|
setTimeout(() => reject(new Error("turn/start timed out")), options?.timeoutMs ?? 0);
|
|
});
|
|
}
|
|
return {};
|
|
},
|
|
);
|
|
setCodexAppServerClientFactoryForTest(
|
|
async () =>
|
|
({
|
|
request,
|
|
addNotificationHandler: () => () => undefined,
|
|
addRequestHandler: () => () => undefined,
|
|
}) as never,
|
|
);
|
|
const params = createParams(
|
|
path.join(tempDir, "session.jsonl"),
|
|
path.join(tempDir, "workspace"),
|
|
);
|
|
params.timeoutMs = 1;
|
|
|
|
await expect(runCodexAppServerAttempt(params)).rejects.toThrow("turn/start timed out");
|
|
expect(queueActiveRunMessageForTest("session-1", "after timeout")).toBe(false);
|
|
});
|
|
|
|
it("keeps extended history enabled when resuming a bound Codex thread", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
|
|
const { requests, waitForMethod, completeTurn } = createResumeHarness();
|
|
|
|
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
|
|
pluginConfig: { appServer: { mode: "yolo" } },
|
|
});
|
|
await waitForMethod("turn/start");
|
|
await completeTurn({ threadId: "thread-existing", turnId: "turn-1" });
|
|
await run;
|
|
|
|
expectResumeRequest(requests, {
|
|
threadId: "thread-existing",
|
|
model: "gpt-5.4-codex",
|
|
approvalPolicy: "never",
|
|
approvalsReviewer: "user",
|
|
sandbox: "danger-full-access",
|
|
persistExtendedHistory: true,
|
|
});
|
|
const resumeRequest = requests.find((request) => request.method === "thread/resume");
|
|
const resumeRequestParams = resumeRequest?.params as Record<string, unknown> | undefined;
|
|
expect(resumeRequestParams?.developerInstructions).not.toContain(CODEX_GPT5_BEHAVIOR_CONTRACT);
|
|
});
|
|
|
|
it("starts a fresh Codex thread before resume when the native rollout is over budget", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const agentDir = path.join(tempDir, "agent");
|
|
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
|
|
await fs.writeFile(
|
|
path.join(path.dirname(sessionFile), "sessions.json"),
|
|
JSON.stringify({
|
|
"agent:main:session-1": {
|
|
sessionFile,
|
|
totalTokens: 12_000,
|
|
},
|
|
}),
|
|
);
|
|
const rolloutDir = path.join(agentDir, "codex-home", "sessions");
|
|
await fs.mkdir(rolloutDir, { recursive: true });
|
|
await fs.writeFile(
|
|
path.join(rolloutDir, "rollout-thread-existing.jsonl"),
|
|
`${JSON.stringify({
|
|
payload: {
|
|
type: "token_count",
|
|
info: {
|
|
total_token_usage: {
|
|
total_tokens: 70_000,
|
|
},
|
|
},
|
|
},
|
|
})}\n`,
|
|
);
|
|
const { requests, waitForMethod, completeTurn } = createStartedThreadHarness();
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.agentDir = agentDir;
|
|
params.config = {
|
|
agents: {
|
|
defaults: {
|
|
compaction: {
|
|
truncateAfterCompaction: true,
|
|
maxActiveTranscriptBytes: "1mb",
|
|
},
|
|
},
|
|
},
|
|
} as never;
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
pluginConfig: { appServer: { mode: "yolo" } },
|
|
});
|
|
await waitForMethod("turn/start");
|
|
await completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
|
|
expect(requests.map((entry) => entry.method)).toContain("thread/start");
|
|
expect(requests.map((entry) => entry.method)).not.toContain("thread/resume");
|
|
const savedBinding = await readCodexAppServerBinding(sessionFile);
|
|
expect(savedBinding?.threadId).toBe("thread-1");
|
|
});
|
|
|
|
it("preserves bound auth when rotating an over-budget native rollout", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const agentDir = path.join(tempDir, "agent");
|
|
await writeExistingBinding(sessionFile, workspaceDir, {
|
|
authProfileId: "openai-codex:work",
|
|
dynamicToolsFingerprint: "[]",
|
|
});
|
|
await fs.writeFile(
|
|
path.join(path.dirname(sessionFile), "sessions.json"),
|
|
JSON.stringify({
|
|
"agent:main:session-1": {
|
|
sessionFile,
|
|
totalTokens: 12_000,
|
|
},
|
|
}),
|
|
);
|
|
const rolloutDir = path.join(agentDir, "codex-home", "sessions");
|
|
await fs.mkdir(rolloutDir, { recursive: true });
|
|
await fs.writeFile(
|
|
path.join(rolloutDir, "rollout-thread-existing.jsonl"),
|
|
`${JSON.stringify({
|
|
payload: {
|
|
type: "token_count",
|
|
info: {
|
|
total_token_usage: {
|
|
total_tokens: 70_000,
|
|
},
|
|
},
|
|
},
|
|
})}\n`,
|
|
);
|
|
const seenAuthProfileIds: Array<string | undefined> = [];
|
|
const { requests, waitForMethod, completeTurn } = createStartedThreadHarness(undefined, {
|
|
onStart: (authProfileId) => {
|
|
seenAuthProfileIds.push(authProfileId);
|
|
},
|
|
});
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
delete params.authProfileId;
|
|
params.agentDir = agentDir;
|
|
params.config = {
|
|
agents: {
|
|
defaults: {
|
|
compaction: {
|
|
truncateAfterCompaction: true,
|
|
maxActiveTranscriptBytes: "1mb",
|
|
},
|
|
},
|
|
},
|
|
} as never;
|
|
|
|
const run = runCodexAppServerAttempt(params, {
|
|
pluginConfig: { appServer: { mode: "yolo" } },
|
|
});
|
|
await vi.waitFor(() => expect(seenAuthProfileIds).toEqual(["openai-codex:work"]), {
|
|
interval: 1,
|
|
});
|
|
await waitForMethod("turn/start");
|
|
await completeTurn({ threadId: "thread-1", turnId: "turn-1" });
|
|
await run;
|
|
|
|
expect(requests.map((entry) => entry.method)).toContain("thread/start");
|
|
expect(requests.map((entry) => entry.method)).not.toContain("thread/resume");
|
|
expect(seenAuthProfileIds).toEqual(["openai-codex:work"]);
|
|
const savedBinding = await readCodexAppServerBinding(sessionFile);
|
|
expect(savedBinding?.authProfileId).toBe("openai-codex:work");
|
|
expect(savedBinding?.threadId).toBe("thread-1");
|
|
});
|
|
|
|
it("does not use a default byte limit when maxActiveTranscriptBytes is unset", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const agentDir = path.join(tempDir, "agent");
|
|
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
|
|
await fs.writeFile(
|
|
path.join(path.dirname(sessionFile), "sessions.json"),
|
|
JSON.stringify({
|
|
"agent:main:session-1": {
|
|
sessionFile,
|
|
totalTokens: 12_000,
|
|
},
|
|
}),
|
|
);
|
|
const rolloutDir = path.join(agentDir, "codex-home", "sessions");
|
|
await fs.mkdir(rolloutDir, { recursive: true });
|
|
await fs.writeFile(
|
|
path.join(rolloutDir, "rollout-thread-existing.jsonl"),
|
|
"x".repeat(2_000_000),
|
|
);
|
|
|
|
const binding = await testing.rotateOversizedCodexAppServerStartupBinding({
|
|
binding: await readCodexAppServerBinding(sessionFile),
|
|
sessionFile,
|
|
agentDir,
|
|
config: {
|
|
agents: {
|
|
defaults: {
|
|
compaction: {
|
|
truncateAfterCompaction: true,
|
|
},
|
|
},
|
|
},
|
|
} as never,
|
|
});
|
|
|
|
expect(binding?.threadId).toBe("thread-existing");
|
|
const savedBinding = await readCodexAppServerBinding(sessionFile);
|
|
expect(savedBinding?.threadId).toBe("thread-existing");
|
|
});
|
|
|
|
it("honors shorthand byte units for native rollout limits", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const agentDir = path.join(tempDir, "agent");
|
|
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
|
|
await fs.writeFile(
|
|
path.join(path.dirname(sessionFile), "sessions.json"),
|
|
JSON.stringify({
|
|
"agent:main:session-1": {
|
|
sessionFile,
|
|
totalTokens: 12_000,
|
|
},
|
|
}),
|
|
);
|
|
const rolloutDir = path.join(agentDir, "codex-home", "sessions");
|
|
await fs.mkdir(rolloutDir, { recursive: true });
|
|
await fs.writeFile(path.join(rolloutDir, "rollout-thread-existing.jsonl"), "x".repeat(2_000));
|
|
|
|
const binding = await testing.rotateOversizedCodexAppServerStartupBinding({
|
|
binding: await readCodexAppServerBinding(sessionFile),
|
|
sessionFile,
|
|
agentDir,
|
|
config: {
|
|
agents: {
|
|
defaults: {
|
|
compaction: {
|
|
truncateAfterCompaction: true,
|
|
maxActiveTranscriptBytes: "1k",
|
|
},
|
|
},
|
|
},
|
|
} as never,
|
|
});
|
|
|
|
expect(binding).toBeUndefined();
|
|
const savedBinding = await readCodexAppServerBinding(sessionFile);
|
|
expect(savedBinding).toBeUndefined();
|
|
});
|
|
|
|
it("honors custom Codex home rollout files for native rollout limits", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const agentDir = path.join(tempDir, "agent");
|
|
const codexHome = path.join(tempDir, "custom-codex-home");
|
|
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
|
|
await fs.writeFile(
|
|
path.join(path.dirname(sessionFile), "sessions.json"),
|
|
JSON.stringify({
|
|
"agent:main:session-1": {
|
|
sessionFile,
|
|
totalTokens: 12_000,
|
|
},
|
|
}),
|
|
);
|
|
const rolloutDir = path.join(codexHome, "sessions");
|
|
await fs.mkdir(rolloutDir, { recursive: true });
|
|
await fs.writeFile(path.join(rolloutDir, "rollout-thread-existing.jsonl"), "x".repeat(2_000));
|
|
|
|
const binding = await testing.rotateOversizedCodexAppServerStartupBinding({
|
|
binding: await readCodexAppServerBinding(sessionFile),
|
|
sessionFile,
|
|
agentDir,
|
|
codexHome,
|
|
config: {
|
|
agents: {
|
|
defaults: {
|
|
compaction: {
|
|
truncateAfterCompaction: true,
|
|
maxActiveTranscriptBytes: 1_000,
|
|
},
|
|
},
|
|
},
|
|
} as never,
|
|
});
|
|
|
|
expect(binding).toBeUndefined();
|
|
const savedBinding = await readCodexAppServerBinding(sessionFile);
|
|
expect(savedBinding).toBeUndefined();
|
|
});
|
|
|
|
it("uses current rollout token usage before cumulative usage", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const agentDir = path.join(tempDir, "agent");
|
|
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
|
|
await fs.writeFile(
|
|
path.join(path.dirname(sessionFile), "sessions.json"),
|
|
JSON.stringify({
|
|
"agent:main:session-1": {
|
|
sessionFile,
|
|
totalTokens: 12_000,
|
|
},
|
|
}),
|
|
);
|
|
const rolloutDir = path.join(agentDir, "codex-home", "sessions");
|
|
await fs.mkdir(rolloutDir, { recursive: true });
|
|
await fs.writeFile(
|
|
path.join(rolloutDir, "rollout-thread-existing.jsonl"),
|
|
`${JSON.stringify({
|
|
payload: {
|
|
type: "token_count",
|
|
info: {
|
|
total_token_usage: {
|
|
total_tokens: 70_000,
|
|
},
|
|
last_token_usage: {
|
|
total_tokens: 12_000,
|
|
},
|
|
},
|
|
},
|
|
})}\n`,
|
|
);
|
|
|
|
const binding = await testing.rotateOversizedCodexAppServerStartupBinding({
|
|
binding: await readCodexAppServerBinding(sessionFile),
|
|
sessionFile,
|
|
agentDir,
|
|
config: {
|
|
agents: {
|
|
defaults: {
|
|
compaction: {
|
|
truncateAfterCompaction: true,
|
|
maxActiveTranscriptBytes: "1mb",
|
|
},
|
|
},
|
|
},
|
|
} as never,
|
|
});
|
|
|
|
expect(binding?.threadId).toBe("thread-existing");
|
|
const savedBinding = await readCodexAppServerBinding(sessionFile);
|
|
expect(savedBinding?.threadId).toBe("thread-existing");
|
|
});
|
|
|
|
it("ignores stale session token totals for native rollout rotation", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const agentDir = path.join(tempDir, "agent");
|
|
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
|
|
await fs.writeFile(
|
|
path.join(path.dirname(sessionFile), "sessions.json"),
|
|
JSON.stringify({
|
|
"agent:main:session-1": {
|
|
sessionFile,
|
|
totalTokens: 70_000,
|
|
totalTokensFresh: false,
|
|
},
|
|
}),
|
|
);
|
|
const rolloutDir = path.join(agentDir, "codex-home", "sessions");
|
|
await fs.mkdir(rolloutDir, { recursive: true });
|
|
await fs.writeFile(
|
|
path.join(rolloutDir, "rollout-thread-existing.jsonl"),
|
|
`${JSON.stringify({
|
|
payload: {
|
|
type: "token_count",
|
|
info: {
|
|
last_token_usage: {
|
|
total_tokens: 12_000,
|
|
},
|
|
},
|
|
},
|
|
})}\n`,
|
|
);
|
|
|
|
const binding = await testing.rotateOversizedCodexAppServerStartupBinding({
|
|
binding: await readCodexAppServerBinding(sessionFile),
|
|
sessionFile,
|
|
agentDir,
|
|
config: {
|
|
agents: {
|
|
defaults: {
|
|
compaction: {
|
|
truncateAfterCompaction: true,
|
|
maxActiveTranscriptBytes: "1mb",
|
|
},
|
|
},
|
|
},
|
|
} as never,
|
|
});
|
|
|
|
expect(binding?.threadId).toBe("thread-existing");
|
|
const savedBinding = await readCodexAppServerBinding(sessionFile);
|
|
expect(savedBinding?.threadId).toBe("thread-existing");
|
|
});
|
|
|
|
it("streams rollout token scans without reading the whole file", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const agentDir = path.join(tempDir, "agent");
|
|
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
|
|
await fs.writeFile(
|
|
path.join(path.dirname(sessionFile), "sessions.json"),
|
|
JSON.stringify({
|
|
"agent:main:session-1": {
|
|
sessionFile,
|
|
totalTokens: 12_000,
|
|
},
|
|
}),
|
|
);
|
|
const rolloutDir = path.join(agentDir, "codex-home", "sessions");
|
|
await fs.mkdir(rolloutDir, { recursive: true });
|
|
const rolloutFile = path.join(rolloutDir, "rollout-thread-existing.jsonl");
|
|
await fs.writeFile(
|
|
rolloutFile,
|
|
`${JSON.stringify({
|
|
payload: {
|
|
type: "token_count",
|
|
info: {
|
|
last_token_usage: {
|
|
total_tokens: 70_000,
|
|
},
|
|
},
|
|
},
|
|
})}\n`,
|
|
);
|
|
const readFileSpy = vi.spyOn(fs, "readFile");
|
|
|
|
const binding = await testing.rotateOversizedCodexAppServerStartupBinding({
|
|
binding: await readCodexAppServerBinding(sessionFile),
|
|
sessionFile,
|
|
agentDir,
|
|
config: {
|
|
agents: {
|
|
defaults: {
|
|
compaction: {
|
|
truncateAfterCompaction: true,
|
|
maxActiveTranscriptBytes: "1mb",
|
|
},
|
|
},
|
|
},
|
|
} as never,
|
|
});
|
|
|
|
expect(binding).toBeUndefined();
|
|
expect(readFileSpy.mock.calls.some(([file]) => file === rolloutFile)).toBe(false);
|
|
const savedBinding = await readCodexAppServerBinding(sessionFile);
|
|
expect(savedBinding).toBeUndefined();
|
|
});
|
|
|
|
it("clears byte-oversized rollouts before reading their contents", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const agentDir = path.join(tempDir, "agent");
|
|
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
|
|
await fs.writeFile(
|
|
path.join(path.dirname(sessionFile), "sessions.json"),
|
|
JSON.stringify({
|
|
"agent:main:session-1": {
|
|
sessionFile,
|
|
totalTokens: 12_000,
|
|
},
|
|
}),
|
|
);
|
|
const rolloutDir = path.join(agentDir, "codex-home", "sessions");
|
|
await fs.mkdir(rolloutDir, { recursive: true });
|
|
const rolloutFile = path.join(rolloutDir, "rollout-thread-existing.jsonl");
|
|
await fs.writeFile(rolloutFile, "x".repeat(2_000));
|
|
const readFileSpy = vi.spyOn(fs, "readFile");
|
|
|
|
const binding = await testing.rotateOversizedCodexAppServerStartupBinding({
|
|
binding: await readCodexAppServerBinding(sessionFile),
|
|
sessionFile,
|
|
agentDir,
|
|
config: {
|
|
agents: {
|
|
defaults: {
|
|
compaction: {
|
|
truncateAfterCompaction: true,
|
|
maxActiveTranscriptBytes: 1_000,
|
|
},
|
|
},
|
|
},
|
|
} as never,
|
|
});
|
|
|
|
expect(binding).toBeUndefined();
|
|
expect(readFileSpy.mock.calls.some(([file]) => file === rolloutFile)).toBe(false);
|
|
const savedBinding = await readCodexAppServerBinding(sessionFile);
|
|
expect(savedBinding).toBeUndefined();
|
|
});
|
|
|
|
it("clears native rollouts at the configured byte limit", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const agentDir = path.join(tempDir, "agent");
|
|
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
|
|
await fs.writeFile(
|
|
path.join(path.dirname(sessionFile), "sessions.json"),
|
|
JSON.stringify({
|
|
"agent:main:session-1": {
|
|
sessionFile,
|
|
totalTokens: 12_000,
|
|
},
|
|
}),
|
|
);
|
|
const rolloutDir = path.join(agentDir, "codex-home", "sessions");
|
|
await fs.mkdir(rolloutDir, { recursive: true });
|
|
await fs.writeFile(path.join(rolloutDir, "rollout-thread-existing.jsonl"), "x".repeat(1_000));
|
|
|
|
const binding = await testing.rotateOversizedCodexAppServerStartupBinding({
|
|
binding: await readCodexAppServerBinding(sessionFile),
|
|
sessionFile,
|
|
agentDir,
|
|
config: {
|
|
agents: {
|
|
defaults: {
|
|
compaction: {
|
|
truncateAfterCompaction: true,
|
|
maxActiveTranscriptBytes: 1_000,
|
|
},
|
|
},
|
|
},
|
|
} as never,
|
|
});
|
|
|
|
expect(binding).toBeUndefined();
|
|
const savedBinding = await readCodexAppServerBinding(sessionFile);
|
|
expect(savedBinding).toBeUndefined();
|
|
});
|
|
|
|
it("resumes a bound Codex thread when only dynamic tool descriptions change", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
const appServer = createThreadLifecycleAppServerOptions();
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-existing");
|
|
}
|
|
if (method === "thread/resume") {
|
|
return threadStartResult("thread-existing");
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
});
|
|
|
|
await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [
|
|
createMessageDynamicTool("Send and manage messages for the current Slack thread."),
|
|
],
|
|
appServer,
|
|
});
|
|
const binding = await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [
|
|
createMessageDynamicTool("Send and manage messages for the current Discord channel."),
|
|
],
|
|
appServer,
|
|
});
|
|
|
|
expect(binding.threadId).toBe("thread-existing");
|
|
expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/start", "thread/resume"]);
|
|
});
|
|
|
|
it("resumes a bound Codex thread when dynamic tools are reordered", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
const appServer = createThreadLifecycleAppServerOptions();
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-existing");
|
|
}
|
|
if (method === "thread/resume") {
|
|
return threadStartResult("thread-existing");
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
});
|
|
|
|
await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [createNamedDynamicTool("wiki_status"), createNamedDynamicTool("diffs")],
|
|
appServer,
|
|
});
|
|
const binding = await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [createNamedDynamicTool("diffs"), createNamedDynamicTool("wiki_status")],
|
|
appServer,
|
|
});
|
|
|
|
expect(binding.threadId).toBe("thread-existing");
|
|
expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/start", "thread/resume"]);
|
|
});
|
|
|
|
it("starts a fresh Codex thread for legacy context-engine sidecars without metadata", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.contextEngine = {
|
|
info: { id: "lossless-claw", name: "Lossless Claw", ownsCompaction: true },
|
|
assemble: vi.fn(),
|
|
compact: vi.fn(),
|
|
} as never;
|
|
params.contextTokenBudget = 400_000;
|
|
const appServer = createThreadLifecycleAppServerOptions();
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-fresh");
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
});
|
|
|
|
const binding = await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [],
|
|
appServer,
|
|
});
|
|
|
|
expect(binding.threadId).toBe("thread-fresh");
|
|
expect(binding.lifecycle).toEqual({
|
|
action: "started",
|
|
rotatedContextEngineBinding: true,
|
|
});
|
|
expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/start"]);
|
|
const savedBinding = await readCodexAppServerBinding(sessionFile);
|
|
expect(savedBinding?.contextEngine?.engineId).toBe("lossless-claw");
|
|
expect(savedBinding?.contextEngine?.policyFingerprint).toContain('"contextTokenBudget":400000');
|
|
});
|
|
|
|
it("resumes a Codex thread when context-engine sidecar metadata is compatible", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const contextEngine = {
|
|
schemaVersion: 1 as const,
|
|
engineId: "lossless-claw",
|
|
policyFingerprint:
|
|
'{"schemaVersion":1,"engineId":"lossless-claw","ownsCompaction":true,"contextTokenBudget":400000,"projectionMaxChars":1000000}',
|
|
};
|
|
await writeExistingBinding(sessionFile, workspaceDir, {
|
|
dynamicToolsFingerprint: "[]",
|
|
contextEngine,
|
|
});
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.contextEngine = {
|
|
info: { id: "lossless-claw", name: "Lossless Claw", ownsCompaction: true },
|
|
assemble: vi.fn(),
|
|
compact: vi.fn(),
|
|
} as never;
|
|
params.contextTokenBudget = 400_000;
|
|
const appServer = createThreadLifecycleAppServerOptions();
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/resume") {
|
|
return threadStartResult("thread-existing");
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
});
|
|
|
|
const binding = await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [],
|
|
appServer,
|
|
});
|
|
|
|
expect(binding.threadId).toBe("thread-existing");
|
|
expect(binding.lifecycle).toEqual({ action: "resumed" });
|
|
expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/resume"]);
|
|
});
|
|
|
|
it("starts a fresh Codex thread when context-engine sidecar metadata is no longer active", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
await writeExistingBinding(sessionFile, workspaceDir, {
|
|
dynamicToolsFingerprint: "[]",
|
|
contextEngine: {
|
|
schemaVersion: 1,
|
|
engineId: "lossless-claw",
|
|
policyFingerprint:
|
|
'{"schemaVersion":1,"engineId":"lossless-claw","ownsCompaction":true,"contextTokenBudget":400000,"projectionMaxChars":1000000}',
|
|
},
|
|
});
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
const appServer = createThreadLifecycleAppServerOptions();
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-fresh");
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
});
|
|
|
|
const binding = await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [],
|
|
appServer,
|
|
});
|
|
|
|
expect(binding.threadId).toBe("thread-fresh");
|
|
expect(binding.lifecycle).toEqual({
|
|
action: "started",
|
|
rotatedContextEngineBinding: true,
|
|
});
|
|
expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/start"]);
|
|
const savedBinding = await readCodexAppServerBinding(sessionFile);
|
|
expect(savedBinding?.contextEngine).toBeUndefined();
|
|
});
|
|
|
|
it("starts a fresh Codex thread when context-engine policy metadata changes", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
await writeExistingBinding(sessionFile, workspaceDir, {
|
|
dynamicToolsFingerprint: "[]",
|
|
contextEngine: {
|
|
schemaVersion: 1,
|
|
engineId: "lossless-claw",
|
|
policyFingerprint:
|
|
'{"schemaVersion":1,"engineId":"lossless-claw","engineVersion":"1.0.0","ownsCompaction":true,"turnMaintenanceMode":"foreground","citationsMode":"inline","contextTokenBudget":400000,"projectionMaxChars":1000000}',
|
|
},
|
|
});
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
params.contextEngine = {
|
|
info: {
|
|
id: "lossless-claw",
|
|
name: "Lossless Claw",
|
|
version: "1.0.1",
|
|
ownsCompaction: true,
|
|
turnMaintenanceMode: "foreground",
|
|
},
|
|
assemble: vi.fn(),
|
|
compact: vi.fn(),
|
|
} as never;
|
|
params.config = { memory: { citations: "inline" } } as never;
|
|
params.contextTokenBudget = 400_000;
|
|
const appServer = createThreadLifecycleAppServerOptions();
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-fresh");
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
});
|
|
|
|
const binding = await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [],
|
|
appServer,
|
|
});
|
|
|
|
expect(binding.threadId).toBe("thread-fresh");
|
|
expect(binding.lifecycle).toEqual({
|
|
action: "started",
|
|
rotatedContextEngineBinding: true,
|
|
});
|
|
expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/start"]);
|
|
const savedBinding = await readCodexAppServerBinding(sessionFile);
|
|
expect(savedBinding?.contextEngine?.policyFingerprint).toContain('"engineVersion":"1.0.1"');
|
|
expect(savedBinding?.contextEngine?.policyFingerprint).toContain(
|
|
'"turnMaintenanceMode":"foreground"',
|
|
);
|
|
expect(savedBinding?.contextEngine?.policyFingerprint).toContain('"citationsMode":"inline"');
|
|
});
|
|
|
|
it("keeps the previous dynamic tool fingerprint for transient no-tool maintenance turns", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
const appServer = createThreadLifecycleAppServerOptions();
|
|
let nextThread = 1;
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult(`thread-${nextThread++}`);
|
|
}
|
|
if (method === "thread/resume") {
|
|
return threadStartResult("thread-1");
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
});
|
|
|
|
await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [createMessageDynamicTool("Send and manage messages.")],
|
|
appServer,
|
|
});
|
|
const fingerprint = (await readCodexAppServerBinding(sessionFile))?.dynamicToolsFingerprint;
|
|
await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [],
|
|
appServer,
|
|
});
|
|
await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [createMessageDynamicTool("Send and manage messages.")],
|
|
appServer,
|
|
});
|
|
|
|
const binding = await readCodexAppServerBinding(sessionFile);
|
|
expect(binding?.dynamicToolsFingerprint).toBe(fingerprint);
|
|
expect(binding?.threadId).toBe("thread-1");
|
|
expect(request.mock.calls.map(([method]) => method)).toEqual([
|
|
"thread/start",
|
|
"thread/start",
|
|
"thread/resume",
|
|
]);
|
|
});
|
|
|
|
it("keeps plugin app bindings across transient native-tool-disabled turns", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const pluginAppPolicyContext = createPluginAppPolicyContext();
|
|
await writeExistingBinding(sessionFile, workspaceDir, {
|
|
pluginAppsFingerprint: "plugin-apps-config-1",
|
|
pluginAppsInputFingerprint: "plugin-apps-input-1",
|
|
pluginAppPolicyContext,
|
|
});
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
const appServer = createThreadLifecycleAppServerOptions();
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-transient");
|
|
}
|
|
if (method === "thread/resume") {
|
|
return threadStartResult("thread-existing");
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
});
|
|
const buildDenyAllPluginThreadConfig = vi.fn(async () => ({
|
|
enabled: true,
|
|
configPatch: {
|
|
apps: {
|
|
_default: {
|
|
enabled: false,
|
|
destructive_enabled: false,
|
|
open_world_enabled: false,
|
|
},
|
|
},
|
|
},
|
|
fingerprint: "plugin-apps-deny-all",
|
|
inputFingerprint: "plugin-apps-input-deny-all",
|
|
policyContext: { fingerprint: "plugin-policy-deny-all", apps: {}, pluginAppIds: {} },
|
|
diagnostics: [],
|
|
}));
|
|
const buildEnabledPluginThreadConfig = vi.fn(async () => ({
|
|
enabled: true,
|
|
configPatch: createPluginAppConfigPatch(),
|
|
fingerprint: "plugin-apps-config-1",
|
|
inputFingerprint: "plugin-apps-input-1",
|
|
policyContext: pluginAppPolicyContext,
|
|
diagnostics: [],
|
|
}));
|
|
|
|
await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [],
|
|
appServer,
|
|
nativeCodeModeEnabled: false,
|
|
pluginThreadConfig: {
|
|
enabled: true,
|
|
inputFingerprint: "plugin-apps-input-deny-all",
|
|
enabledPluginConfigKeys: [],
|
|
build: buildDenyAllPluginThreadConfig,
|
|
},
|
|
});
|
|
const savedAfterDeny = await readCodexAppServerBinding(sessionFile);
|
|
|
|
expect(savedAfterDeny?.threadId).toBe("thread-existing");
|
|
expect(savedAfterDeny?.pluginAppsFingerprint).toBe("plugin-apps-config-1");
|
|
expect(savedAfterDeny?.pluginAppsInputFingerprint).toBe("plugin-apps-input-1");
|
|
|
|
await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [],
|
|
appServer,
|
|
pluginThreadConfig: {
|
|
enabled: true,
|
|
inputFingerprint: "plugin-apps-input-1",
|
|
enabledPluginConfigKeys: ["google-calendar"],
|
|
build: buildEnabledPluginThreadConfig,
|
|
},
|
|
});
|
|
|
|
expect(buildDenyAllPluginThreadConfig).toHaveBeenCalledTimes(1);
|
|
expect(buildEnabledPluginThreadConfig).toHaveBeenCalledTimes(1);
|
|
const requestCalls = request.mock.calls as unknown as Array<[string, { config?: unknown }]>;
|
|
expect(requestCalls.map(([method]) => method)).toEqual(["thread/start", "thread/resume"]);
|
|
expect(requestCalls[0]?.[1].config).toMatchObject({
|
|
apps: {
|
|
_default: {
|
|
enabled: false,
|
|
destructive_enabled: false,
|
|
open_world_enabled: false,
|
|
},
|
|
},
|
|
});
|
|
const savedAfterAllowed = await readCodexAppServerBinding(sessionFile);
|
|
expect(savedAfterAllowed?.threadId).toBe("thread-existing");
|
|
expect(savedAfterAllowed?.pluginAppsFingerprint).toBe("plugin-apps-config-1");
|
|
expect(savedAfterAllowed?.pluginAppsInputFingerprint).toBe("plugin-apps-input-1");
|
|
expect(savedAfterAllowed?.pluginAppPolicyContext).toEqual(pluginAppPolicyContext);
|
|
});
|
|
|
|
it("preserves the binding when the app-server closes during thread resume", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
|
|
const appServer = createThreadLifecycleAppServerOptions();
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/resume") {
|
|
throw new Error("codex app-server client is closed");
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
});
|
|
|
|
await expect(
|
|
startOrResumeThread({
|
|
client: { request } as never,
|
|
params: createParams(sessionFile, workspaceDir),
|
|
cwd: workspaceDir,
|
|
dynamicTools: [],
|
|
appServer,
|
|
}),
|
|
).rejects.toThrow("codex app-server client is closed");
|
|
|
|
expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/resume"]);
|
|
const binding = await readCodexAppServerBinding(sessionFile);
|
|
expect(binding?.threadId).toBe("thread-existing");
|
|
});
|
|
|
|
it("restarts the app-server once when a shared client closes during startup", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
|
|
const requests: string[][] = [];
|
|
let starts = 0;
|
|
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
|
|
setCodexAppServerClientFactoryForTest(async () => {
|
|
const startIndex = starts++;
|
|
const methods: string[] = [];
|
|
requests.push(methods);
|
|
return {
|
|
request: vi.fn(async (method: string) => {
|
|
methods.push(method);
|
|
if (method === "thread/resume" && startIndex === 0) {
|
|
throw new Error("codex app-server client is closed");
|
|
}
|
|
if (method === "thread/resume") {
|
|
return threadStartResult("thread-existing");
|
|
}
|
|
if (method === "turn/start") {
|
|
return turnStartResult();
|
|
}
|
|
return {};
|
|
}),
|
|
addNotificationHandler: (handler: typeof notify) => {
|
|
notify = handler;
|
|
return () => undefined;
|
|
},
|
|
addRequestHandler: () => () => undefined,
|
|
} as never;
|
|
});
|
|
|
|
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir));
|
|
await vi.waitFor(() => expect(requests[1]).toContain("turn/start"), fastWait);
|
|
await notify({
|
|
method: "turn/completed",
|
|
params: {
|
|
threadId: "thread-existing",
|
|
turnId: "turn-1",
|
|
turn: { id: "turn-1", status: "completed" },
|
|
},
|
|
});
|
|
|
|
const result = await run;
|
|
expect(result.aborted).toBe(false);
|
|
expect(requests).toEqual([["thread/resume"], ["thread/resume", "turn/start"]]);
|
|
});
|
|
|
|
it("tolerates a second app-server close while retrying startup", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
|
|
const requests: string[][] = [];
|
|
let starts = 0;
|
|
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
|
|
setCodexAppServerClientFactoryForTest(async () => {
|
|
const startIndex = starts++;
|
|
const methods: string[] = [];
|
|
requests.push(methods);
|
|
return {
|
|
request: vi.fn(async (method: string) => {
|
|
methods.push(method);
|
|
if (method === "thread/resume" && startIndex < 2) {
|
|
throw new Error("codex app-server client is closed");
|
|
}
|
|
if (method === "thread/resume") {
|
|
return threadStartResult("thread-existing");
|
|
}
|
|
if (method === "turn/start") {
|
|
return turnStartResult();
|
|
}
|
|
return {};
|
|
}),
|
|
addNotificationHandler: (handler: typeof notify) => {
|
|
notify = handler;
|
|
return () => undefined;
|
|
},
|
|
addRequestHandler: () => () => undefined,
|
|
} as never;
|
|
});
|
|
|
|
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir));
|
|
await vi.waitFor(() => expect(requests[2]).toContain("turn/start"), fastWait);
|
|
await notify({
|
|
method: "turn/completed",
|
|
params: {
|
|
threadId: "thread-existing",
|
|
turnId: "turn-1",
|
|
turn: { id: "turn-1", status: "completed" },
|
|
},
|
|
});
|
|
|
|
const result = await run;
|
|
expect(result.aborted).toBe(false);
|
|
expect(requests).toEqual([
|
|
["thread/resume"],
|
|
["thread/resume"],
|
|
["thread/resume", "turn/start"],
|
|
]);
|
|
});
|
|
|
|
it("passes native hook relay config on thread start and resume", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
const appServer = createThreadLifecycleAppServerOptions();
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-existing");
|
|
}
|
|
if (method === "thread/resume") {
|
|
return threadStartResult("thread-existing");
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
});
|
|
const config = {
|
|
"features.hooks": true,
|
|
"hooks.PreToolUse": [],
|
|
};
|
|
const expectedConfig = {
|
|
...config,
|
|
"features.code_mode": true,
|
|
"features.code_mode_only": false,
|
|
};
|
|
|
|
await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [],
|
|
appServer,
|
|
config,
|
|
});
|
|
await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [],
|
|
appServer,
|
|
config,
|
|
});
|
|
|
|
const requestCalls = request.mock.calls as unknown as Array<[string, { config?: unknown }]>;
|
|
expect(requestCalls.map(([method]) => method)).toEqual(["thread/start", "thread/resume"]);
|
|
expect(requestCalls[0]?.[1].config).toEqual(expectedConfig);
|
|
expect(requestCalls[1]?.[1].config).toEqual(expectedConfig);
|
|
});
|
|
|
|
it("merges native hook relay config with plugin app config when starting a thread", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
const appServer = createThreadLifecycleAppServerOptions();
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-plugins");
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
});
|
|
const pluginAppPolicyContext = createPluginAppPolicyContext();
|
|
const buildPluginThreadConfig = vi.fn(async () => ({
|
|
enabled: true,
|
|
configPatch: createPluginAppConfigPatch(),
|
|
fingerprint: "plugin-apps-config-1",
|
|
inputFingerprint: "plugin-apps-input-1",
|
|
policyContext: pluginAppPolicyContext,
|
|
diagnostics: [],
|
|
}));
|
|
|
|
await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [],
|
|
appServer,
|
|
config: { "features.hooks": true, hooks: { PreToolUse: [] } },
|
|
pluginThreadConfig: {
|
|
enabled: true,
|
|
inputFingerprint: "plugin-apps-input-1",
|
|
enabledPluginConfigKeys: ["google-calendar"],
|
|
build: buildPluginThreadConfig,
|
|
},
|
|
});
|
|
|
|
expect(buildPluginThreadConfig).toHaveBeenCalledTimes(1);
|
|
const requestCalls = request.mock.calls as unknown as Array<[string, { config?: unknown }]>;
|
|
expect(requestCalls.map(([method]) => method)).toEqual(["thread/start"]);
|
|
expect(requestCalls[0]?.[1].config).toEqual({
|
|
"features.hooks": true,
|
|
"features.code_mode": true,
|
|
"features.code_mode_only": false,
|
|
hooks: { PreToolUse: [] },
|
|
...createPluginAppConfigPatch(),
|
|
});
|
|
const binding = await readCodexAppServerBinding(sessionFile);
|
|
expect(binding?.threadId).toBe("thread-plugins");
|
|
expect(binding?.pluginAppsFingerprint).toBe("plugin-apps-config-1");
|
|
expect(binding?.pluginAppsInputFingerprint).toBe("plugin-apps-input-1");
|
|
expect(binding?.pluginAppPolicyContext).toEqual(pluginAppPolicyContext);
|
|
});
|
|
|
|
it("keeps native hook relay config as the final thread config patch", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
const appServer = createThreadLifecycleAppServerOptions();
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start" || method === "thread/resume") {
|
|
return threadStartResult("thread-hooks");
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
});
|
|
const pluginAppPolicyContext = createPluginAppPolicyContext();
|
|
const finalConfigPatch = {
|
|
"features.hooks": true,
|
|
"hooks.PreToolUse": [
|
|
{
|
|
hooks: [{ type: "command", command: "openclaw-native-hook-relay", timeout: 5 }],
|
|
},
|
|
],
|
|
};
|
|
const buildPluginThreadConfig = vi.fn(async () => ({
|
|
enabled: true,
|
|
configPatch: {
|
|
"features.hooks": false,
|
|
"hooks.PreToolUse": [],
|
|
...createPluginAppConfigPatch(),
|
|
},
|
|
fingerprint: "plugin-apps-config-1",
|
|
inputFingerprint: "plugin-apps-input-1",
|
|
policyContext: pluginAppPolicyContext,
|
|
diagnostics: [],
|
|
}));
|
|
const pluginThreadConfig = {
|
|
enabled: true,
|
|
inputFingerprint: "plugin-apps-input-1",
|
|
build: buildPluginThreadConfig,
|
|
};
|
|
|
|
await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [],
|
|
appServer,
|
|
config: { "features.hooks": false },
|
|
finalConfigPatch,
|
|
pluginThreadConfig,
|
|
});
|
|
await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [],
|
|
appServer,
|
|
config: { "features.hooks": false },
|
|
finalConfigPatch,
|
|
pluginThreadConfig: {
|
|
...pluginThreadConfig,
|
|
enabledPluginConfigKeys: ["google-calendar"],
|
|
},
|
|
});
|
|
|
|
const requestCalls = request.mock.calls as unknown as Array<[string, { config?: unknown }]>;
|
|
expect(requestCalls.map(([method]) => method)).toEqual(["thread/start", "thread/resume"]);
|
|
expect(requestCalls[0]?.[1].config).toMatchObject({
|
|
"features.hooks": true,
|
|
"features.code_mode": true,
|
|
"features.code_mode_only": false,
|
|
"hooks.PreToolUse": finalConfigPatch["hooks.PreToolUse"],
|
|
...createPluginAppConfigPatch(),
|
|
});
|
|
expect(requestCalls[1]?.[1].config).toMatchObject({
|
|
"features.hooks": true,
|
|
"features.code_mode": true,
|
|
"features.code_mode_only": false,
|
|
"hooks.PreToolUse": finalConfigPatch["hooks.PreToolUse"],
|
|
});
|
|
});
|
|
|
|
it("revalidates compatible plugin app bindings without resending app config", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
const appServer = createThreadLifecycleAppServerOptions();
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start" || method === "thread/resume") {
|
|
return threadStartResult("thread-plugins");
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
});
|
|
const pluginAppPolicyContext = createPluginAppPolicyContext();
|
|
const buildPluginThreadConfig = vi.fn(async () => ({
|
|
enabled: true,
|
|
configPatch: createPluginAppConfigPatch(),
|
|
fingerprint: "plugin-apps-config-1",
|
|
inputFingerprint: "plugin-apps-input-1",
|
|
policyContext: pluginAppPolicyContext,
|
|
diagnostics: [],
|
|
}));
|
|
|
|
await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [],
|
|
appServer,
|
|
config: { "features.hooks": true },
|
|
pluginThreadConfig: {
|
|
enabled: true,
|
|
inputFingerprint: "plugin-apps-input-1",
|
|
build: buildPluginThreadConfig,
|
|
},
|
|
});
|
|
const binding = await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [],
|
|
appServer,
|
|
config: { "features.hooks": true },
|
|
pluginThreadConfig: {
|
|
enabled: true,
|
|
inputFingerprint: "plugin-apps-input-1",
|
|
enabledPluginConfigKeys: ["google-calendar"],
|
|
build: buildPluginThreadConfig,
|
|
},
|
|
});
|
|
|
|
expect(binding.pluginAppPolicyContext).toEqual(pluginAppPolicyContext);
|
|
expect(buildPluginThreadConfig).toHaveBeenCalledTimes(2);
|
|
const requestCalls = request.mock.calls as unknown as Array<[string, { config?: unknown }]>;
|
|
expect(requestCalls.map(([method]) => method)).toEqual(["thread/start", "thread/resume"]);
|
|
expect(requestCalls[0]?.[1].config).toEqual({
|
|
"features.hooks": true,
|
|
"features.code_mode": true,
|
|
"features.code_mode_only": false,
|
|
...createPluginAppConfigPatch(),
|
|
});
|
|
expect(requestCalls[1]?.[1].config).toEqual({
|
|
"features.hooks": true,
|
|
"features.code_mode": true,
|
|
"features.code_mode_only": false,
|
|
});
|
|
});
|
|
|
|
it("starts a new plugin app thread when full binding revalidation removes an app", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
await writeExistingBinding(sessionFile, workspaceDir, {
|
|
dynamicToolsFingerprint: "[]",
|
|
pluginAppsFingerprint: "plugin-apps-config-1",
|
|
pluginAppsInputFingerprint: "plugin-apps-input-1",
|
|
pluginAppPolicyContext: createPluginAppPolicyContext(),
|
|
});
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
const appServer = createThreadLifecycleAppServerOptions();
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-revalidated");
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
});
|
|
const emptyPolicyContext = { fingerprint: "plugin-policy-empty", apps: {}, pluginAppIds: {} };
|
|
const buildPluginThreadConfig = vi.fn(async () => ({
|
|
enabled: true,
|
|
configPatch: {
|
|
apps: {
|
|
_default: {
|
|
enabled: false,
|
|
destructive_enabled: false,
|
|
open_world_enabled: false,
|
|
},
|
|
},
|
|
},
|
|
fingerprint: "plugin-apps-empty",
|
|
inputFingerprint: "plugin-apps-input-1",
|
|
policyContext: emptyPolicyContext,
|
|
diagnostics: [],
|
|
}));
|
|
|
|
await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [],
|
|
appServer,
|
|
pluginThreadConfig: {
|
|
enabled: true,
|
|
inputFingerprint: "plugin-apps-input-1",
|
|
enabledPluginConfigKeys: ["google-calendar"],
|
|
build: buildPluginThreadConfig,
|
|
},
|
|
});
|
|
|
|
expect(buildPluginThreadConfig).toHaveBeenCalledTimes(1);
|
|
const requestCalls = request.mock.calls as unknown as Array<[string, { config?: unknown }]>;
|
|
expect(requestCalls.map(([method]) => method)).toEqual(["thread/start"]);
|
|
expect(requestCalls[0]?.[1].config).toEqual({
|
|
"features.code_mode": true,
|
|
"features.code_mode_only": false,
|
|
apps: {
|
|
_default: {
|
|
enabled: false,
|
|
destructive_enabled: false,
|
|
open_world_enabled: false,
|
|
},
|
|
},
|
|
});
|
|
const binding = await readCodexAppServerBinding(sessionFile);
|
|
expect(binding?.threadId).toBe("thread-revalidated");
|
|
expect(binding?.pluginAppsFingerprint).toBe("plugin-apps-empty");
|
|
expect(binding?.pluginAppPolicyContext).toEqual(emptyPolicyContext);
|
|
});
|
|
|
|
it("keeps the existing plugin app binding when revalidation fails", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const pluginAppPolicyContext = createPluginAppPolicyContext();
|
|
await writeExistingBinding(sessionFile, workspaceDir, {
|
|
dynamicToolsFingerprint: "[]",
|
|
pluginAppsFingerprint: "plugin-apps-config-1",
|
|
pluginAppsInputFingerprint: "plugin-apps-input-1",
|
|
pluginAppPolicyContext,
|
|
});
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
const appServer = createThreadLifecycleAppServerOptions();
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/resume") {
|
|
return threadStartResult("thread-existing");
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
});
|
|
|
|
await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [],
|
|
appServer,
|
|
pluginThreadConfig: {
|
|
enabled: true,
|
|
inputFingerprint: "plugin-apps-input-1",
|
|
enabledPluginConfigKeys: ["google-calendar"],
|
|
build: async () => {
|
|
throw new Error("plugin inventory unavailable");
|
|
},
|
|
},
|
|
});
|
|
|
|
const requestCalls = request.mock.calls as unknown as Array<[string, { config?: unknown }]>;
|
|
expect(requestCalls.map(([method]) => method)).toEqual(["thread/resume"]);
|
|
expect(requestCalls[0]?.[1].config).toEqual({
|
|
"features.code_mode": true,
|
|
"features.code_mode_only": false,
|
|
});
|
|
const binding = await readCodexAppServerBinding(sessionFile);
|
|
expect(binding?.threadId).toBe("thread-existing");
|
|
expect(binding?.pluginAppsFingerprint).toBe("plugin-apps-config-1");
|
|
expect(binding?.pluginAppsInputFingerprint).toBe("plugin-apps-input-1");
|
|
expect(binding?.pluginAppPolicyContext).toEqual(pluginAppPolicyContext);
|
|
});
|
|
|
|
it("rebuilds an empty plugin app binding after app inventory recovers", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
await writeExistingBinding(sessionFile, workspaceDir, {
|
|
dynamicToolsFingerprint: "[]",
|
|
pluginAppsFingerprint: "plugin-apps-empty",
|
|
pluginAppsInputFingerprint: "plugin-apps-input-1",
|
|
pluginAppPolicyContext: { fingerprint: "plugin-policy-empty", apps: {}, pluginAppIds: {} },
|
|
});
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
const appServer = createThreadLifecycleAppServerOptions();
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-recovered");
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
});
|
|
const pluginAppPolicyContext = createPluginAppPolicyContext();
|
|
const buildPluginThreadConfig = vi.fn(async () => ({
|
|
enabled: true,
|
|
configPatch: createPluginAppConfigPatch(),
|
|
fingerprint: "plugin-apps-config-1",
|
|
inputFingerprint: "plugin-apps-input-1",
|
|
policyContext: pluginAppPolicyContext,
|
|
diagnostics: [],
|
|
}));
|
|
|
|
await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [],
|
|
appServer,
|
|
pluginThreadConfig: {
|
|
enabled: true,
|
|
inputFingerprint: "plugin-apps-input-1",
|
|
build: buildPluginThreadConfig,
|
|
},
|
|
});
|
|
|
|
expect(buildPluginThreadConfig).toHaveBeenCalledTimes(1);
|
|
const requestCalls = request.mock.calls as unknown as Array<[string, { config?: unknown }]>;
|
|
expect(requestCalls.map(([method]) => method)).toEqual(["thread/start"]);
|
|
expect(requestCalls[0]?.[1].config).toEqual({
|
|
...createPluginAppConfigPatch(),
|
|
"features.code_mode": true,
|
|
"features.code_mode_only": false,
|
|
});
|
|
const binding = await readCodexAppServerBinding(sessionFile);
|
|
expect(binding?.threadId).toBe("thread-recovered");
|
|
expect(binding?.pluginAppsFingerprint).toBe("plugin-apps-config-1");
|
|
expect(binding?.pluginAppPolicyContext).toEqual(pluginAppPolicyContext);
|
|
});
|
|
|
|
it("keeps an empty plugin app binding when recovery still produces the same config", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const emptyPolicyContext = { fingerprint: "plugin-policy-empty", apps: {}, pluginAppIds: {} };
|
|
await writeExistingBinding(sessionFile, workspaceDir, {
|
|
dynamicToolsFingerprint: "[]",
|
|
pluginAppsFingerprint: "plugin-apps-empty",
|
|
pluginAppsInputFingerprint: "plugin-apps-input-1",
|
|
pluginAppPolicyContext: emptyPolicyContext,
|
|
});
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
const appServer = createThreadLifecycleAppServerOptions();
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/resume") {
|
|
return threadStartResult("thread-existing");
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
});
|
|
const buildPluginThreadConfig = vi.fn(async () => ({
|
|
enabled: true,
|
|
configPatch: {
|
|
apps: {
|
|
_default: {
|
|
enabled: false,
|
|
destructive_enabled: false,
|
|
open_world_enabled: false,
|
|
},
|
|
},
|
|
},
|
|
fingerprint: "plugin-apps-empty",
|
|
inputFingerprint: "plugin-apps-input-1",
|
|
policyContext: emptyPolicyContext,
|
|
diagnostics: [],
|
|
}));
|
|
|
|
await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [],
|
|
appServer,
|
|
pluginThreadConfig: {
|
|
enabled: true,
|
|
inputFingerprint: "plugin-apps-input-1",
|
|
build: buildPluginThreadConfig,
|
|
},
|
|
});
|
|
|
|
expect(buildPluginThreadConfig).toHaveBeenCalledTimes(1);
|
|
const requestCalls = request.mock.calls as unknown as Array<[string, { config?: unknown }]>;
|
|
expect(requestCalls.map(([method]) => method)).toEqual(["thread/resume"]);
|
|
expect(requestCalls[0]?.[1].config).toEqual({
|
|
"features.code_mode": true,
|
|
"features.code_mode_only": false,
|
|
});
|
|
});
|
|
|
|
it("rebuilds a partial plugin app binding after another plugin recovers", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
await writeExistingBinding(sessionFile, workspaceDir, {
|
|
dynamicToolsFingerprint: "[]",
|
|
pluginAppsFingerprint: "plugin-apps-partial",
|
|
pluginAppsInputFingerprint: "plugin-apps-input-1",
|
|
pluginAppPolicyContext: createPluginAppPolicyContext(),
|
|
});
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
const appServer = createThreadLifecycleAppServerOptions();
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-recovered");
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
});
|
|
const recoveredPolicyContext = createTwoPluginAppPolicyContext();
|
|
const buildPluginThreadConfig = vi.fn(async () => ({
|
|
enabled: true,
|
|
configPatch: createTwoPluginAppConfigPatch(),
|
|
fingerprint: "plugin-apps-config-2",
|
|
inputFingerprint: "plugin-apps-input-1",
|
|
policyContext: recoveredPolicyContext,
|
|
diagnostics: [],
|
|
}));
|
|
|
|
await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [],
|
|
appServer,
|
|
pluginThreadConfig: {
|
|
enabled: true,
|
|
inputFingerprint: "plugin-apps-input-1",
|
|
enabledPluginConfigKeys: ["google-calendar", "gmail"],
|
|
build: buildPluginThreadConfig,
|
|
},
|
|
});
|
|
|
|
expect(buildPluginThreadConfig).toHaveBeenCalledTimes(1);
|
|
const requestCalls = request.mock.calls as unknown as Array<[string, { config?: unknown }]>;
|
|
expect(requestCalls.map(([method]) => method)).toEqual(["thread/start"]);
|
|
expect(requestCalls[0]?.[1].config).toEqual({
|
|
...createTwoPluginAppConfigPatch(),
|
|
"features.code_mode": true,
|
|
"features.code_mode_only": false,
|
|
});
|
|
const binding = await readCodexAppServerBinding(sessionFile);
|
|
expect(binding?.threadId).toBe("thread-recovered");
|
|
expect(binding?.pluginAppsFingerprint).toBe("plugin-apps-config-2");
|
|
expect(binding?.pluginAppPolicyContext).toEqual(recoveredPolicyContext);
|
|
});
|
|
|
|
it("rebuilds a partial plugin app binding after another app from the same plugin recovers", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
await writeExistingBinding(sessionFile, workspaceDir, {
|
|
dynamicToolsFingerprint: "[]",
|
|
pluginAppsFingerprint: "plugin-apps-partial",
|
|
pluginAppsInputFingerprint: "plugin-apps-input-1",
|
|
pluginAppPolicyContext: {
|
|
...createPluginAppPolicyContext(),
|
|
pluginAppIds: {
|
|
"google-calendar": ["google-calendar-app", "google-calendar-secondary-app"],
|
|
},
|
|
},
|
|
});
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
const appServer = createThreadLifecycleAppServerOptions();
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-recovered");
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
});
|
|
const recoveredPolicyContext = createTwoCalendarAppPolicyContext();
|
|
const buildPluginThreadConfig = vi.fn(async () => ({
|
|
enabled: true,
|
|
configPatch: createTwoCalendarAppConfigPatch(),
|
|
fingerprint: "plugin-apps-config-calendar-2",
|
|
inputFingerprint: "plugin-apps-input-1",
|
|
policyContext: recoveredPolicyContext,
|
|
diagnostics: [],
|
|
}));
|
|
|
|
await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [],
|
|
appServer,
|
|
pluginThreadConfig: {
|
|
enabled: true,
|
|
inputFingerprint: "plugin-apps-input-1",
|
|
enabledPluginConfigKeys: ["google-calendar"],
|
|
build: buildPluginThreadConfig,
|
|
},
|
|
});
|
|
|
|
expect(buildPluginThreadConfig).toHaveBeenCalledTimes(1);
|
|
const requestCalls = request.mock.calls as unknown as Array<[string, { config?: unknown }]>;
|
|
expect(requestCalls.map(([method]) => method)).toEqual(["thread/start"]);
|
|
expect(requestCalls[0]?.[1].config).toEqual({
|
|
...createTwoCalendarAppConfigPatch(),
|
|
"features.code_mode": true,
|
|
"features.code_mode_only": false,
|
|
});
|
|
const binding = await readCodexAppServerBinding(sessionFile);
|
|
expect(binding?.threadId).toBe("thread-recovered");
|
|
expect(binding?.pluginAppsFingerprint).toBe("plugin-apps-config-calendar-2");
|
|
expect(binding?.pluginAppPolicyContext).toEqual(recoveredPolicyContext);
|
|
});
|
|
|
|
it("starts a new configured thread for legacy bindings missing plugin app metadata", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
const appServer = createThreadLifecycleAppServerOptions();
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult("thread-plugins");
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
});
|
|
const pluginAppPolicyContext = createPluginAppPolicyContext();
|
|
|
|
await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [],
|
|
appServer,
|
|
pluginThreadConfig: {
|
|
enabled: true,
|
|
inputFingerprint: "plugin-apps-input-1",
|
|
build: async () => ({
|
|
enabled: true,
|
|
configPatch: createPluginAppConfigPatch(),
|
|
fingerprint: "plugin-apps-config-1",
|
|
inputFingerprint: "plugin-apps-input-1",
|
|
policyContext: pluginAppPolicyContext,
|
|
diagnostics: [],
|
|
}),
|
|
},
|
|
});
|
|
|
|
const requestCalls = request.mock.calls as unknown as Array<[string, { config?: unknown }]>;
|
|
expect(requestCalls.map(([method]) => method)).toEqual(["thread/start"]);
|
|
expect(requestCalls[0]?.[1].config).toEqual({
|
|
...createPluginAppConfigPatch(),
|
|
"features.code_mode": true,
|
|
"features.code_mode_only": false,
|
|
});
|
|
const binding = await readCodexAppServerBinding(sessionFile);
|
|
expect(binding?.threadId).toBe("thread-plugins");
|
|
expect(binding?.pluginAppsFingerprint).toBe("plugin-apps-config-1");
|
|
expect(binding?.pluginAppPolicyContext).toEqual(pluginAppPolicyContext);
|
|
});
|
|
|
|
it("starts a new Codex thread when dynamic tool schemas change", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
const appServer = createThreadLifecycleAppServerOptions();
|
|
let nextThread = 1;
|
|
const request = vi.fn(async (method: string) => {
|
|
if (method === "thread/start") {
|
|
return threadStartResult(`thread-${nextThread++}`);
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
});
|
|
|
|
await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [createMessageDynamicTool("Send and manage messages.", ["send"])],
|
|
appServer,
|
|
});
|
|
const binding = await startOrResumeThread({
|
|
client: { request } as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [createMessageDynamicTool("Send and manage messages.", ["send", "read"])],
|
|
appServer,
|
|
});
|
|
|
|
expect(binding.threadId).toBe("thread-2");
|
|
expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/start", "thread/start"]);
|
|
});
|
|
|
|
it("passes configured app-server policy, sandbox, service tier, and model on resume", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
await writeExistingBinding(sessionFile, workspaceDir, { model: "gpt-5.2" });
|
|
const { requests, waitForMethod, completeTurn } = createResumeHarness();
|
|
|
|
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
|
|
pluginConfig: {
|
|
appServer: {
|
|
approvalPolicy: "on-request",
|
|
approvalsReviewer: "guardian_subagent",
|
|
sandbox: "danger-full-access",
|
|
serviceTier: "fast",
|
|
},
|
|
},
|
|
});
|
|
await waitForMethod("turn/start");
|
|
await completeTurn({ threadId: "thread-existing", turnId: "turn-1" });
|
|
await run;
|
|
|
|
expectResumeRequest(requests, {
|
|
threadId: "thread-existing",
|
|
model: "gpt-5.4-codex",
|
|
approvalPolicy: "on-request",
|
|
approvalsReviewer: "guardian_subagent",
|
|
sandbox: "danger-full-access",
|
|
serviceTier: "priority",
|
|
persistExtendedHistory: true,
|
|
});
|
|
const resumeRequest = requests.find((request) => request.method === "thread/resume");
|
|
const resumeRequestParams = resumeRequest?.params as Record<string, unknown> | undefined;
|
|
const resumeConfig = resumeRequestParams?.config as Record<string, unknown> | undefined;
|
|
expect(resumeConfig?.["features.hooks"]).toBe(true);
|
|
expect(resumeConfig?.["features.code_mode"]).toBe(true);
|
|
expect(resumeConfig?.["features.code_mode_only"]).toBe(false);
|
|
expect(resumeRequestParams?.developerInstructions).not.toContain(CODEX_GPT5_BEHAVIOR_CONTRACT);
|
|
const turnRequest = requests.find((request) => request.method === "turn/start");
|
|
const turnRequestParams = turnRequest?.params as Record<string, unknown> | undefined;
|
|
expect(turnRequestParams?.approvalPolicy).toBe("on-request");
|
|
expect(turnRequestParams?.approvalsReviewer).toBe("guardian_subagent");
|
|
expect(turnRequestParams?.sandboxPolicy).toEqual({ type: "dangerFullAccess" });
|
|
expect(turnRequestParams?.serviceTier).toBe("priority");
|
|
expect(turnRequestParams?.model).toBe("gpt-5.4-codex");
|
|
});
|
|
|
|
it("maps active OpenClaw sandbox egress into Codex workspace-write turns", () => {
|
|
const appServer = resolveCodexAppServerRuntimeOptions({
|
|
pluginConfig: {
|
|
appServer: {
|
|
approvalPolicy: "never",
|
|
sandbox: "danger-full-access",
|
|
},
|
|
},
|
|
});
|
|
|
|
expect(
|
|
testing.resolveCodexAppServerSandboxPolicyForOpenClawSandbox(
|
|
appServer,
|
|
{
|
|
enabled: true,
|
|
backendId: "docker",
|
|
docker: { network: "none" },
|
|
} as never,
|
|
"/tmp/workspace",
|
|
),
|
|
).toEqual({
|
|
type: "workspaceWrite",
|
|
writableRoots: ["/tmp/workspace"],
|
|
networkAccess: false,
|
|
excludeTmpdirEnvVar: false,
|
|
excludeSlashTmp: false,
|
|
});
|
|
|
|
expect(
|
|
testing.resolveCodexAppServerSandboxPolicyForOpenClawSandbox(
|
|
{ ...appServer, sandbox: "workspace-write" },
|
|
{
|
|
enabled: true,
|
|
backendId: "docker",
|
|
docker: { network: "bridge" },
|
|
} as never,
|
|
"/tmp/workspace",
|
|
),
|
|
).toEqual({
|
|
type: "workspaceWrite",
|
|
writableRoots: ["/tmp/workspace"],
|
|
networkAccess: true,
|
|
excludeTmpdirEnvVar: false,
|
|
excludeSlashTmp: false,
|
|
});
|
|
|
|
expect(
|
|
testing.resolveCodexAppServerSandboxPolicyForOpenClawSandbox(
|
|
appServer,
|
|
{
|
|
enabled: true,
|
|
backendId: "docker",
|
|
docker: {
|
|
network: "bridge",
|
|
binds: [
|
|
"/tmp/openclaw-writable-data:/data:rw",
|
|
"/tmp/openclaw-readonly-data:/readonly:ro",
|
|
],
|
|
},
|
|
} as never,
|
|
"/tmp/workspace",
|
|
),
|
|
).toEqual({
|
|
type: "workspaceWrite",
|
|
writableRoots: ["/tmp/workspace", path.resolve("/tmp/openclaw-writable-data")],
|
|
networkAccess: true,
|
|
excludeTmpdirEnvVar: false,
|
|
excludeSlashTmp: false,
|
|
});
|
|
|
|
expect(
|
|
testing.resolveCodexAppServerSandboxPolicyForOpenClawSandbox(
|
|
appServer,
|
|
{
|
|
enabled: true,
|
|
backendId: "ssh",
|
|
} as never,
|
|
"/tmp/workspace",
|
|
),
|
|
).toEqual({
|
|
type: "workspaceWrite",
|
|
writableRoots: ["/tmp/workspace"],
|
|
networkAccess: true,
|
|
excludeTmpdirEnvVar: false,
|
|
excludeSlashTmp: false,
|
|
});
|
|
|
|
expect(
|
|
testing.resolveCodexAppServerSandboxPolicyForOpenClawSandbox(
|
|
appServer,
|
|
null,
|
|
"/tmp/workspace",
|
|
),
|
|
).toBeUndefined();
|
|
expect(
|
|
testing.resolveCodexAppServerSandboxPolicyForOpenClawSandbox(
|
|
{ ...appServer, sandbox: "read-only" },
|
|
{ enabled: true } as never,
|
|
"/tmp/workspace",
|
|
),
|
|
).toBeUndefined();
|
|
});
|
|
|
|
it("passes current Codex service tier request values through app-server resume and turn requests", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
await writeExistingBinding(sessionFile, workspaceDir, { model: "gpt-5.2" });
|
|
const { requests, waitForMethod, completeTurn } = createResumeHarness();
|
|
|
|
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir), {
|
|
pluginConfig: {
|
|
appServer: {
|
|
approvalPolicy: "on-request",
|
|
sandbox: "danger-full-access",
|
|
serviceTier: "priority",
|
|
},
|
|
},
|
|
});
|
|
await waitForMethod("turn/start");
|
|
await completeTurn({ threadId: "thread-existing", turnId: "turn-1" });
|
|
await run;
|
|
|
|
const resumeRequest = requests.find((request) => request.method === "thread/resume");
|
|
const resumeRequestParams = resumeRequest?.params as Record<string, unknown> | undefined;
|
|
expect(resumeRequestParams?.serviceTier).toBe("priority");
|
|
const turnRequest = requests.find((request) => request.method === "turn/start");
|
|
const turnRequestParams = turnRequest?.params as Record<string, unknown> | undefined;
|
|
expect(turnRequestParams?.serviceTier).toBe("priority");
|
|
});
|
|
|
|
it("keys plugin app inventory by websocket credentials without exposing them", () => {
|
|
const first = resolveCodexPluginAppCacheEndpoint({
|
|
start: {
|
|
transport: "websocket",
|
|
command: "codex",
|
|
args: [],
|
|
url: "ws://127.0.0.1:39175",
|
|
authToken: "token-first",
|
|
headers: { Authorization: "Bearer first" },
|
|
},
|
|
});
|
|
const second = resolveCodexPluginAppCacheEndpoint({
|
|
start: {
|
|
transport: "websocket",
|
|
command: "codex",
|
|
args: [],
|
|
url: "ws://127.0.0.1:39175",
|
|
authToken: "token-second",
|
|
headers: { Authorization: "Bearer second" },
|
|
},
|
|
});
|
|
|
|
expect(first).not.toEqual(second);
|
|
expect(first).not.toContain("token-first");
|
|
expect(first).not.toContain("Bearer first");
|
|
expect(second).not.toContain("token-second");
|
|
expect(second).not.toContain("Bearer second");
|
|
});
|
|
|
|
it("redacts plugin thread config eligibility log data", () => {
|
|
const appServer = {
|
|
start: {
|
|
transport: "websocket" as const,
|
|
command: "codex",
|
|
commandSource: "config" as const,
|
|
args: [],
|
|
url: "ws://127.0.0.1:39175",
|
|
authToken: "token-secret",
|
|
headers: {
|
|
Authorization: "Bearer secret",
|
|
"X-Test-Token": "header-secret",
|
|
},
|
|
env: {
|
|
CODEX_HOME: "/tmp/codex-home",
|
|
OPENAI_API_KEY: "env-secret",
|
|
},
|
|
},
|
|
codeModeOnly: false,
|
|
requestTimeoutMs: 60_000,
|
|
turnCompletionIdleTimeoutMs: 60_000,
|
|
approvalPolicy: "never" as const,
|
|
approvalsReviewer: "user" as const,
|
|
sandbox: "danger-full-access" as const,
|
|
serviceTier: "priority" as const,
|
|
};
|
|
const resolvedPluginPolicy = resolveCodexPluginsPolicy({
|
|
codexPlugins: {
|
|
enabled: true,
|
|
plugins: {
|
|
"google-calendar": {
|
|
marketplaceName: "openai-curated",
|
|
pluginName: "google-calendar",
|
|
},
|
|
},
|
|
},
|
|
});
|
|
const logData = testing.buildCodexPluginThreadConfigEligibilityLogData({
|
|
sessionId: "session-1",
|
|
sessionKey: "agent:main:session-1",
|
|
pluginThreadConfigRequired: true,
|
|
resolvedPluginPolicy,
|
|
enabledPluginConfigKeys: ["google-calendar"],
|
|
pluginAppCacheKey: buildCodexPluginAppCacheKey({
|
|
appServer,
|
|
agentDir: "/tmp/agent",
|
|
authProfileId: "openai-codex:work",
|
|
accountId: "account-work",
|
|
envApiKeyFingerprint: "env-key",
|
|
}),
|
|
startupAuthProfileId: "openai-codex:work",
|
|
appServer,
|
|
});
|
|
|
|
expect(logData).toEqual(
|
|
expect.objectContaining({
|
|
sessionId: "session-1",
|
|
sessionKey: "agent:main:session-1",
|
|
enabled: true,
|
|
policyConfigured: true,
|
|
policyEnabled: true,
|
|
pluginConfigKeys: ["google-calendar"],
|
|
enabledPluginConfigKeys: ["google-calendar"],
|
|
appCacheKeyFingerprint: expect.stringMatching(/^sha256:/),
|
|
authProfileId: "openai-codex:work",
|
|
appServerTransport: "websocket",
|
|
appServerCommandSource: "config",
|
|
}),
|
|
);
|
|
expect(logData).not.toHaveProperty("appCacheKeyInput");
|
|
const serialized = JSON.stringify(logData);
|
|
expect(serialized).not.toContain("token-secret");
|
|
expect(serialized).not.toContain("Bearer secret");
|
|
expect(serialized).not.toContain("header-secret");
|
|
expect(serialized).not.toContain("env-secret");
|
|
expect(serialized).not.toContain("/tmp/codex-home");
|
|
});
|
|
|
|
it("builds resume and turn params from the currently selected OpenClaw model", () => {
|
|
const params = createParams("/tmp/session.jsonl", "/tmp/workspace");
|
|
const appServer = {
|
|
start: {
|
|
transport: "stdio" as const,
|
|
command: "codex",
|
|
args: ["app-server", "--listen", "stdio://"],
|
|
headers: {},
|
|
},
|
|
codeModeOnly: false,
|
|
requestTimeoutMs: 60_000,
|
|
turnCompletionIdleTimeoutMs: 60_000,
|
|
approvalPolicy: "on-request" as const,
|
|
approvalsReviewer: "guardian_subagent" as const,
|
|
sandbox: "danger-full-access" as const,
|
|
serviceTier: "flex" as const,
|
|
};
|
|
|
|
const resumeParams = buildThreadResumeParams(params, { threadId: "thread-1", appServer });
|
|
expect(resumeParams).toEqual({
|
|
threadId: "thread-1",
|
|
model: "gpt-5.4-codex",
|
|
approvalPolicy: "on-request",
|
|
approvalsReviewer: "guardian_subagent",
|
|
config: {
|
|
"features.code_mode": true,
|
|
"features.code_mode_only": false,
|
|
},
|
|
sandbox: "danger-full-access",
|
|
serviceTier: "flex",
|
|
developerInstructions: resumeParams.developerInstructions,
|
|
persistExtendedHistory: true,
|
|
});
|
|
expect(resumeParams.developerInstructions).not.toContain(CODEX_GPT5_BEHAVIOR_CONTRACT);
|
|
const turnParams = buildTurnStartParams(params, {
|
|
threadId: "thread-1",
|
|
cwd: "/tmp/workspace",
|
|
appServer,
|
|
});
|
|
expect(turnParams.threadId).toBe("thread-1");
|
|
expect(turnParams.cwd).toBe("/tmp/workspace");
|
|
expect(turnParams.model).toBe("gpt-5.4-codex");
|
|
expect(turnParams.approvalPolicy).toBe("on-request");
|
|
expect(turnParams.approvalsReviewer).toBe("guardian_subagent");
|
|
expect(turnParams.sandboxPolicy).toEqual({ type: "dangerFullAccess" });
|
|
expect(turnParams.serviceTier).toBe("flex");
|
|
expect(turnParams.collaborationMode).toEqual({
|
|
mode: "default",
|
|
settings: {
|
|
model: "gpt-5.4-codex",
|
|
reasoning_effort: "medium",
|
|
developer_instructions: null,
|
|
},
|
|
});
|
|
});
|
|
|
|
it("uses turn-scoped collaboration instructions for heartbeat Codex turns", () => {
|
|
const params = createParams("/tmp/session.jsonl", "/tmp/workspace");
|
|
params.trigger = "heartbeat";
|
|
|
|
const heartbeatCollaborationMode = buildTurnCollaborationMode(params, {
|
|
heartbeatCollaborationInstructions:
|
|
"HEARTBEAT.md exists at /tmp/workspace/HEARTBEAT.md. Read it before proceeding.",
|
|
});
|
|
expect(heartbeatCollaborationMode.mode).toBe("default");
|
|
expect(heartbeatCollaborationMode.settings.model).toBe("gpt-5.4-codex");
|
|
expect(heartbeatCollaborationMode.settings.reasoning_effort).toBe("medium");
|
|
expect(heartbeatCollaborationMode.settings.developer_instructions).toContain(
|
|
"This is an OpenClaw heartbeat turn. Apply these instructions only to this heartbeat wake",
|
|
);
|
|
expect(heartbeatCollaborationMode.settings.developer_instructions).toContain(
|
|
"Use heartbeats to create useful proactive progress",
|
|
);
|
|
expect(heartbeatCollaborationMode.settings.developer_instructions).toContain(
|
|
"If `heartbeat_respond` is not already available and `tool_search` is available",
|
|
);
|
|
expect(heartbeatCollaborationMode.settings.developer_instructions).toContain(
|
|
"HEARTBEAT.md exists at /tmp/workspace/HEARTBEAT.md.",
|
|
);
|
|
|
|
params.trigger = "user";
|
|
expect(
|
|
buildTurnCollaborationMode(params, {
|
|
heartbeatCollaborationInstructions:
|
|
"HEARTBEAT.md exists at /tmp/workspace/HEARTBEAT.md. Read it before proceeding.",
|
|
}).settings.developer_instructions,
|
|
).toBeNull();
|
|
});
|
|
|
|
it("uses turn-scoped collaboration instructions for cron Codex turns", () => {
|
|
const params = createParams("/tmp/session.jsonl", "/tmp/workspace");
|
|
params.trigger = "cron";
|
|
|
|
const cronCollaborationMode = buildTurnCollaborationMode(params);
|
|
expect(cronCollaborationMode.mode).toBe("default");
|
|
expect(cronCollaborationMode.settings.model).toBe("gpt-5.4-codex");
|
|
expect(cronCollaborationMode.settings.reasoning_effort).toBe("medium");
|
|
expect(cronCollaborationMode.settings.developer_instructions).toContain(
|
|
"This is an OpenClaw cron automation turn",
|
|
);
|
|
expect(cronCollaborationMode.settings.developer_instructions).toContain(
|
|
"If it asks you to run an exact command, run that command before doing any investigation",
|
|
);
|
|
expect(cronCollaborationMode.settings.developer_instructions).toContain(
|
|
"Use context already provided by the runtime",
|
|
);
|
|
});
|
|
|
|
it("preserves the bound auth profile when resume params omit authProfileId", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
await writeExistingBinding(sessionFile, workspaceDir, {
|
|
authProfileId: "openai-codex:bound",
|
|
});
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
delete params.authProfileId;
|
|
params.agentDir = path.join(tempDir, "agent");
|
|
|
|
const binding = await startOrResumeThread({
|
|
client: {
|
|
request: async (method: string) => {
|
|
if (method === "thread/resume") {
|
|
return threadStartResult("thread-existing");
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
},
|
|
} as never,
|
|
params,
|
|
cwd: workspaceDir,
|
|
dynamicTools: [],
|
|
appServer: {
|
|
start: {
|
|
transport: "stdio",
|
|
command: "codex",
|
|
args: ["app-server"],
|
|
headers: {},
|
|
},
|
|
codeModeOnly: false,
|
|
requestTimeoutMs: 60_000,
|
|
turnCompletionIdleTimeoutMs: 60_000,
|
|
approvalPolicy: "never",
|
|
approvalsReviewer: "user",
|
|
sandbox: "workspace-write",
|
|
},
|
|
});
|
|
|
|
expect(binding.authProfileId).toBe("openai-codex:bound");
|
|
});
|
|
|
|
it("reuses the bound auth profile for app-server startup when params omit it", async () => {
|
|
const sessionFile = path.join(tempDir, "session.jsonl");
|
|
const workspaceDir = path.join(tempDir, "workspace");
|
|
await writeExistingBinding(sessionFile, workspaceDir, {
|
|
authProfileId: "openai-codex:bound",
|
|
dynamicToolsFingerprint: "[]",
|
|
});
|
|
const seenAuthProfileIds: Array<string | undefined> = [];
|
|
const seenAgentDirs: Array<string | undefined> = [];
|
|
const { requests, waitForMethod, completeTurn } = createAppServerHarness(
|
|
async (method: string) => {
|
|
if (method === "thread/resume") {
|
|
return threadStartResult("thread-existing");
|
|
}
|
|
if (method === "turn/start") {
|
|
return turnStartResult();
|
|
}
|
|
throw new Error(`unexpected method: ${method}`);
|
|
},
|
|
{
|
|
onStart: (authProfileId, agentDir) => {
|
|
seenAuthProfileIds.push(authProfileId);
|
|
seenAgentDirs.push(agentDir);
|
|
},
|
|
},
|
|
);
|
|
const params = createParams(sessionFile, workspaceDir);
|
|
delete params.authProfileId;
|
|
params.agentDir = path.join(tempDir, "agent");
|
|
|
|
const run = runCodexAppServerAttempt(params);
|
|
await vi.waitFor(() => expect(seenAuthProfileIds).toEqual(["openai-codex:bound"]), {
|
|
interval: 1,
|
|
});
|
|
await waitForMethod("turn/start");
|
|
await new Promise<void>((resolve) => setImmediate(resolve));
|
|
await completeTurn({ threadId: "thread-existing", turnId: "turn-1" });
|
|
await run;
|
|
|
|
expect(seenAuthProfileIds).toEqual(["openai-codex:bound"]);
|
|
expect(seenAgentDirs).toEqual([path.join(tempDir, "agent")]);
|
|
expect(requests.map((entry) => entry.method)).toContain("turn/start");
|
|
});
|
|
});
|