mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 06:50:43 +00:00
fix: retry codex app-server startup closes
This commit is contained in:
@@ -40,6 +40,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Memory Wiki: accept relative Markdown links that include the `.md` suffix during broken-wikilink validation, avoiding false positives for native render-mode links. Thanks @Kenneth8128.
|
||||
- OpenAI Codex: show the device-pairing code in the interactive SSH/headless prompt while keeping the short-lived code out of persistent runtime logs. Fixes #74212. Thanks @da22le123.
|
||||
- QA Lab: stop gateway children when the suite parent disappears, so interrupted local QA runs cannot leave hot orphaned gateways behind.
|
||||
- Codex/app-server: tolerate a second connection close during startup recovery and include retry counts plus stringified errors in the restart warning, so concurrent lanes do not fail after one shared-client race.
|
||||
- Plugins/CLI: cache plugin CLI registration entries per command program so completion state generation does not repeat the full plugin sweep in one invocation. Thanks @ScientificProgrammer.
|
||||
- Plugins: reuse gateway-bindable plugin loader cache entries for later default-mode loads without serving default-built registries to gateway-bound requests, reducing repeated plugin registration during dispatch. Refs #61756. Thanks @DmitryPogodaev.
|
||||
- Gateway/secrets: include the caught error message in `secrets.reload` and `secrets.resolve` warning logs while keeping RPC errors generic, so operators can diagnose reload and permission failures. Thanks @davidangularme.
|
||||
|
||||
@@ -2017,6 +2017,58 @@ describe("runCodexAppServerAttempt", () => {
|
||||
expect(requests).toEqual([["thread/resume"], ["thread/resume", "turn/start"]]);
|
||||
});
|
||||
|
||||
it("tolerates a second app-server close while retrying startup", async () => {
|
||||
const sessionFile = path.join(tempDir, "session.jsonl");
|
||||
const workspaceDir = path.join(tempDir, "workspace");
|
||||
await writeExistingBinding(sessionFile, workspaceDir, { dynamicToolsFingerprint: "[]" });
|
||||
const requests: string[][] = [];
|
||||
let starts = 0;
|
||||
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
|
||||
__testing.setCodexAppServerClientFactoryForTests(async () => {
|
||||
const startIndex = starts++;
|
||||
const methods: string[] = [];
|
||||
requests.push(methods);
|
||||
return {
|
||||
request: vi.fn(async (method: string) => {
|
||||
methods.push(method);
|
||||
if (method === "thread/resume" && startIndex < 2) {
|
||||
throw new Error("codex app-server client is closed");
|
||||
}
|
||||
if (method === "thread/resume") {
|
||||
return threadStartResult("thread-existing");
|
||||
}
|
||||
if (method === "turn/start") {
|
||||
return turnStartResult();
|
||||
}
|
||||
return {};
|
||||
}),
|
||||
addNotificationHandler: (handler: typeof notify) => {
|
||||
notify = handler;
|
||||
return () => undefined;
|
||||
},
|
||||
addRequestHandler: () => () => undefined,
|
||||
} as never;
|
||||
});
|
||||
|
||||
const run = runCodexAppServerAttempt(createParams(sessionFile, workspaceDir));
|
||||
await vi.waitFor(() => expect(requests[2]).toContain("turn/start"), { interval: 1 });
|
||||
await notify({
|
||||
method: "turn/completed",
|
||||
params: {
|
||||
threadId: "thread-existing",
|
||||
turnId: "turn-1",
|
||||
turn: { id: "turn-1", status: "completed" },
|
||||
},
|
||||
});
|
||||
|
||||
await expect(run).resolves.toMatchObject({ aborted: false });
|
||||
expect(requests).toEqual([
|
||||
["thread/resume"],
|
||||
["thread/resume"],
|
||||
["thread/resume", "turn/start"],
|
||||
]);
|
||||
});
|
||||
|
||||
it("passes native hook relay config on thread start and resume", async () => {
|
||||
const sessionFile = path.join(tempDir, "session.jsonl");
|
||||
const workspaceDir = path.join(tempDir, "workspace");
|
||||
|
||||
@@ -95,6 +95,7 @@ import { createCodexUserInputBridge } from "./user-input-bridge.js";
|
||||
import { filterToolsForVisionInputs } from "./vision-tools.js";
|
||||
|
||||
const CODEX_DYNAMIC_TOOL_TIMEOUT_MS = 30_000;
|
||||
const CODEX_APP_SERVER_STARTUP_CONNECTION_CLOSE_MAX_ATTEMPTS = 3;
|
||||
const CODEX_TURN_COMPLETION_IDLE_TIMEOUT_MS = 60_000;
|
||||
const CODEX_TURN_TERMINAL_IDLE_TIMEOUT_MS = 30 * 60_000;
|
||||
const CODEX_STEER_ALL_DEBOUNCE_MS = 500;
|
||||
@@ -543,24 +544,51 @@ export async function runCodexAppServerAttempt(
|
||||
});
|
||||
return { client: startupClient, thread: startupThread };
|
||||
};
|
||||
try {
|
||||
return await startupAttempt();
|
||||
} catch (error) {
|
||||
if (runAbortController.signal.aborted || !isCodexAppServerConnectionClosedError(error)) {
|
||||
throw error;
|
||||
for (
|
||||
let attempt = 1;
|
||||
attempt <= CODEX_APP_SERVER_STARTUP_CONNECTION_CLOSE_MAX_ATTEMPTS;
|
||||
attempt += 1
|
||||
) {
|
||||
try {
|
||||
return await startupAttempt();
|
||||
} catch (error) {
|
||||
if (
|
||||
runAbortController.signal.aborted ||
|
||||
!isCodexAppServerConnectionClosedError(error)
|
||||
) {
|
||||
throw error;
|
||||
}
|
||||
const failedClient = attemptedClient;
|
||||
const clearedSharedClient = clearSharedCodexAppServerClientIfCurrent(failedClient);
|
||||
if (startupClientForCleanup === failedClient) {
|
||||
startupClientForCleanup = undefined;
|
||||
}
|
||||
attemptedClient = undefined;
|
||||
if (attempt >= CODEX_APP_SERVER_STARTUP_CONNECTION_CLOSE_MAX_ATTEMPTS) {
|
||||
embeddedAgentLog.warn(
|
||||
"codex app-server connection closed during startup; retries exhausted",
|
||||
{
|
||||
attempt,
|
||||
maxAttempts: CODEX_APP_SERVER_STARTUP_CONNECTION_CLOSE_MAX_ATTEMPTS,
|
||||
clearedSharedClient,
|
||||
error: formatErrorMessage(error),
|
||||
},
|
||||
);
|
||||
throw error;
|
||||
}
|
||||
embeddedAgentLog.warn(
|
||||
"codex app-server connection closed during startup; restarting app-server and retrying",
|
||||
{
|
||||
attempt,
|
||||
nextAttempt: attempt + 1,
|
||||
maxAttempts: CODEX_APP_SERVER_STARTUP_CONNECTION_CLOSE_MAX_ATTEMPTS,
|
||||
clearedSharedClient,
|
||||
error: formatErrorMessage(error),
|
||||
},
|
||||
);
|
||||
}
|
||||
embeddedAgentLog.warn(
|
||||
"codex app-server connection closed during startup; restarting app-server and retrying",
|
||||
{ error },
|
||||
);
|
||||
const failedClient = attemptedClient;
|
||||
clearSharedCodexAppServerClientIfCurrent(failedClient);
|
||||
if (startupClientForCleanup === failedClient) {
|
||||
startupClientForCleanup = undefined;
|
||||
}
|
||||
attemptedClient = undefined;
|
||||
return await startupAttempt();
|
||||
}
|
||||
throw new Error("codex app-server startup retry loop exited unexpectedly");
|
||||
},
|
||||
}));
|
||||
startupClientForCleanup = undefined;
|
||||
|
||||
Reference in New Issue
Block a user