fix: codex and similar processes keep dying on pty, solved by refactoring process spawning (#14257)

* exec: clean up PTY resources on timeout and exit

* cli: harden resume cleanup and watchdog stalled runs

* cli: productionize PTY and resume reliability paths

* docs: add PTY process supervision architecture plan

* docs: rewrite PTY supervision plan as pre-rewrite baseline

* docs: switch PTY supervision plan to one-go execution

* docs: add one-line root cause to PTY supervision plan

* docs: add OS contracts and test matrix to PTY supervision plan

* docs: define process-supervisor package placement and scope

* docs: tie supervisor plan to existing CI lanes

* docs: place PTY supervisor plan under src/process

* refactor(process): route exec and cli runs through supervisor

* docs(process): refresh PTY supervision plan

* wip

* fix(process): harden supervisor timeout and PTY termination

* fix(process): harden supervisor adapters env and wait handling

* ci: avoid failing formal conformance on comment permissions

* test(ui): fix cron request mock argument typing

* fix(ui): remove leftover conflict marker

* fix: supervise PTY processes (#14257) (openclaw#14257) (thanks @onutc)
This commit is contained in:
Onur
2026-02-16 09:32:05 +08:00
committed by GitHub
parent a73e7786e7
commit cd44a0d01e
32 changed files with 2759 additions and 855 deletions

View File

@@ -1,5 +1,9 @@
import type { OpenClawConfig } from "../config/config.js";
import type { CliBackendConfig } from "../config/types.js";
import {
CLI_FRESH_WATCHDOG_DEFAULTS,
CLI_RESUME_WATCHDOG_DEFAULTS,
} from "./cli-watchdog-defaults.js";
import { normalizeProviderId } from "./model-selection.js";
export type ResolvedCliBackend = {
@@ -49,6 +53,12 @@ const DEFAULT_CLAUDE_BACKEND: CliBackendConfig = {
systemPromptMode: "append",
systemPromptWhen: "first",
clearEnv: ["ANTHROPIC_API_KEY", "ANTHROPIC_API_KEY_OLD"],
reliability: {
watchdog: {
fresh: { ...CLI_FRESH_WATCHDOG_DEFAULTS },
resume: { ...CLI_RESUME_WATCHDOG_DEFAULTS },
},
},
serialize: true,
};
@@ -73,6 +83,12 @@ const DEFAULT_CODEX_BACKEND: CliBackendConfig = {
sessionMode: "existing",
imageArg: "--image",
imageMode: "repeat",
reliability: {
watchdog: {
fresh: { ...CLI_FRESH_WATCHDOG_DEFAULTS },
resume: { ...CLI_RESUME_WATCHDOG_DEFAULTS },
},
},
serialize: true,
};
@@ -96,6 +112,10 @@ function mergeBackendConfig(base: CliBackendConfig, override?: CliBackendConfig)
if (!override) {
return { ...base };
}
const baseFresh = base.reliability?.watchdog?.fresh ?? {};
const baseResume = base.reliability?.watchdog?.resume ?? {};
const overrideFresh = override.reliability?.watchdog?.fresh ?? {};
const overrideResume = override.reliability?.watchdog?.resume ?? {};
return {
...base,
...override,
@@ -106,6 +126,22 @@ function mergeBackendConfig(base: CliBackendConfig, override?: CliBackendConfig)
sessionIdFields: override.sessionIdFields ?? base.sessionIdFields,
sessionArgs: override.sessionArgs ?? base.sessionArgs,
resumeArgs: override.resumeArgs ?? base.resumeArgs,
reliability: {
...base.reliability,
...override.reliability,
watchdog: {
...base.reliability?.watchdog,
...override.reliability?.watchdog,
fresh: {
...baseFresh,
...overrideFresh,
},
resume: {
...baseResume,
...overrideResume,
},
},
},
};
}