fix: restart closed codex app-server clients

This commit is contained in:
Peter Steinberger
2026-05-02 07:38:59 +01:00
parent d3f9bed1c3
commit 33a26cd807
5 changed files with 133 additions and 20 deletions

View File

@@ -28,6 +28,7 @@ Docs: https://docs.openclaw.ai
- Active Memory: use the configured recall timeout as the blocking prompt-build hook budget by default and move cold-start setup grace behind explicit `setupGraceTimeoutMs` config, so the plugin no longer silently extends 15000 ms configs to 45000 ms on the main lane. Fixes #75843. Thanks @vishutdhar.
- Agents/sandbox: preserve existing workspace file modes when sandbox edits atomically replace files, so 0644 files do not collapse to 0600 after Write/Edit/apply_patch. Fixes #44077. Thanks @patosullivan.
- Agents/models: keep legacy CLI runtime model refs such as `claude-cli/*` in the configured allowlist after canonical runtime migration, so cron `payload.model` overrides keep working. Fixes #75753. Thanks @RyanSandoval.
- Codex/app-server: restart the shared Codex app-server client once when it closes during startup thread resume, preserving the existing thread binding instead of retrying `thread/start` on a closed client. Thanks @vincentkoc.
- Gateway/watch: keep colored subsystem log prefixes in the managed tmux pane even when the parent shell exports `NO_COLOR`, while preserving explicit `FORCE_COLOR=0` opt-out. Thanks @vincentkoc.
- Agents/compaction: submit a non-empty runtime-event marker for pre-compaction memory flush turns, so strict Anthropic providers no longer reject the silent flush as an empty user message. Fixes #75305. Thanks @sableassistant3777-source.
- Plugin SDK: re-export `isPrivateIpAddress` from `plugin-sdk/ssrf-runtime`, restoring source-checkout builds for SearXNG and Firecrawl private-network guards. Thanks @vincentkoc.

View File

@@ -48,6 +48,16 @@ export class CodexAppServerRpcError extends Error {
}
}
export function isCodexAppServerConnectionClosedError(error: unknown): boolean {
if (!(error instanceof Error)) {
return false;
}
return (
error.message === "codex app-server client is closed" ||
error.message.startsWith("codex app-server exited:")
);
}
type CodexServerRequestHandler = (
request: Required<Pick<RpcRequest, "id" | "method">> & { params?: JsonValue },
) => Promise<JsonValue | undefined> | JsonValue | undefined;

View File

@@ -25,7 +25,7 @@ import { CODEX_GPT5_BEHAVIOR_CONTRACT } from "../../prompt-overlay.js";
import * as elicitationBridge from "./elicitation-bridge.js";
import type { CodexServerNotification } from "./protocol.js";
import { runCodexAppServerAttempt, __testing } from "./run-attempt.js";
import { writeCodexAppServerBinding } from "./session-binding.js";
import { readCodexAppServerBinding, writeCodexAppServerBinding } from "./session-binding.js";
import { createCodexTestModel } from "./test-support.js";
import {
buildThreadResumeParams,
@@ -1941,6 +1941,82 @@ describe("runCodexAppServerAttempt", () => {
expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/start", "thread/resume"]);
});
it("preserves the binding when the app-server closes during thread resume", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
const appServer = createThreadLifecycleAppServerOptions();
const request = vi.fn(async (method: string) => {
if (method === "thread/resume") {
throw new Error("codex app-server client is closed");
}
throw new Error(`unexpected method: ${method}`);
});
await expect(
startOrResumeThread({
client: { request } as never,
params: createParams(sessionFile, workspaceDir),
cwd: workspaceDir,
dynamicTools: [],
appServer,
}),
).rejects.toThrow("codex app-server client is closed");
expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/resume"]);
await expect(readCodexAppServerBinding(sessionFile)).resolves.toMatchObject({
threadId: "thread-existing",
});
});
it("restarts the app-server once when a shared client closes during startup", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
const requests: string[][] = [];
let starts = 0;
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
__testing.setCodexAppServerClientFactoryForTests(async () => {
const startIndex = starts++;
const methods: string[] = [];
requests.push(methods);
return {
request: vi.fn(async (method: string) => {
methods.push(method);
if (method === "thread/resume" && startIndex === 0) {
throw new Error("codex app-server client is closed");
}
if (method === "thread/resume") {
return threadStartResult("thread-existing");
}
if (method === "turn/start") {
return turnStartResult();
}
return {};
}),
addNotificationHandler: (handler: typeof notify) => {
notify = handler;
return () => undefined;
},
addRequestHandler: () => () => undefined,
} as never;
});
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir));
await vi.waitFor(() => expect(requests[1]).toContain("turn/start"), { interval: 1 });
await notify({
method: "turn/completed",
params: {
threadId: "thread-existing",
turnId: "turn-1",
turn: { id: "turn-1", status: "completed" },
},
});
await expect(run).resolves.toMatchObject({ aborted: false });
expect(requests).toEqual([["thread/resume"], ["thread/resume", "turn/start"]]);
});
it("passes native hook relay config on thread start and resume", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");

View File

@@ -42,7 +42,11 @@ import {
createCodexAppServerClientFactoryTestHooks,
defaultCodexAppServerClientFactory,
} from "./client-factory.js";
import { isCodexAppServerApprovalRequest, type CodexAppServerClient } from "./client.js";
import {
isCodexAppServerApprovalRequest,
isCodexAppServerConnectionClosedError,
type CodexAppServerClient,
} from "./client.js";
import { ensureCodexComputerUse } from "./computer-use.js";
import {
readCodexPluginConfig,
@@ -512,23 +516,42 @@ export async function runCodexAppServerAttempt(
timeoutFloorMs: options.startupTimeoutFloorMs,
signal: runAbortController.signal,
operation: async () => {
const startupClient = await clientFactory(appServer.start, startupAuthProfileId, agentDir);
await ensureCodexComputerUse({
client: startupClient,
pluginConfig: options.pluginConfig,
timeoutMs: appServer.requestTimeoutMs,
signal: runAbortController.signal,
});
const startupThread = await startOrResumeThread({
client: startupClient,
params,
cwd: effectiveWorkspace,
dynamicTools: toolBridge.specs,
appServer,
developerInstructions: promptBuild.developerInstructions,
config: nativeHookRelayConfig,
});
return { client: startupClient, thread: startupThread };
const startupAttempt = async () => {
const startupClient = await clientFactory(
appServer.start,
startupAuthProfileId,
agentDir,
);
await ensureCodexComputerUse({
client: startupClient,
pluginConfig: options.pluginConfig,
timeoutMs: appServer.requestTimeoutMs,
signal: runAbortController.signal,
});
const startupThread = await startOrResumeThread({
client: startupClient,
params,
cwd: effectiveWorkspace,
dynamicTools: toolBridge.specs,
appServer,
developerInstructions: promptBuild.developerInstructions,
config: nativeHookRelayConfig,
});
return { client: startupClient, thread: startupThread };
};
try {
return await startupAttempt();
} catch (error) {
if (runAbortController.signal.aborted || !isCodexAppServerConnectionClosedError(error)) {
throw error;
}
embeddedAgentLog.warn(
"codex app-server connection closed during startup; restarting app-server and retrying",
{ error },
);
clearSharedCodexAppServerClient();
return await startupAttempt();
}
},
}));
emitCodexAppServerEvent(params, {

View File

@@ -4,7 +4,7 @@ import {
} from "openclaw/plugin-sdk/agent-harness-runtime";
import { renderCodexPromptOverlay } from "../../prompt-overlay.js";
import { isModernCodexModel } from "../../provider.js";
import type { CodexAppServerClient } from "./client.js";
import { isCodexAppServerConnectionClosedError, type CodexAppServerClient } from "./client.js";
import { codexSandboxPolicyForTurn, type CodexAppServerRuntimeOptions } from "./config.js";
import {
assertCodexThreadResumeResponse,
@@ -86,6 +86,9 @@ export async function startOrResumeThread(params: {
dynamicToolsFingerprint,
};
} catch (error) {
if (isCodexAppServerConnectionClosedError(error)) {
throw error;
}
embeddedAgentLog.warn("codex app-server thread resume failed; starting a new thread", {
error,
});