fix: code/cli acpx reliability 20260304 (#34020)

* agents: switch claude-cli defaults to bypassPermissions

* agents: add claude-cli default args coverage

* agents: emit watchdog stall system event for cli runs

* agents: test cli watchdog stall system event

* acpx: fallback to sessions new when ensure returns no ids

* acpx tests: mock sessions new fallback path

* acpx tests: cover ensure-empty fallback flow

* skills: clarify claude print mode without pty

* docs: update cli-backends claude default args

* docs: refresh cli live test default args

* gateway tests: align live claude args defaults

* changelog: credit claude/acpx reliability fixes

* Agents: normalize legacy Claude permission flag overrides

* Tests: cover legacy Claude permission override normalization

* Changelog: note legacy Claude permission flag auto-normalization

* ACPX: fail fast when ensure/new return no session IDs

* ACPX tests: support empty sessions new fixture output

* ACPX tests: assert ensureSession failure when IDs missing

* CLI runner: scope watchdog heartbeat wake to session

* CLI runner tests: assert session-scoped watchdog wake

* Update CHANGELOG.md
This commit is contained in:
Vincent Koc
2026-03-03 22:15:28 -08:00
committed by GitHub
parent dfb4cb87f9
commit 4d183af0cf
12 changed files with 362 additions and 30 deletions

View File

@@ -7,6 +7,8 @@ import { runCliAgent } from "./cli-runner.js";
import { resolveCliNoOutputTimeoutMs } from "./cli-runner/helpers.js";
const supervisorSpawnMock = vi.fn();
const enqueueSystemEventMock = vi.fn();
const requestHeartbeatNowMock = vi.fn();
vi.mock("../process/supervisor/index.js", () => ({
getProcessSupervisor: () => ({
@@ -18,6 +20,14 @@ vi.mock("../process/supervisor/index.js", () => ({
}),
}));
vi.mock("../infra/system-events.js", () => ({
enqueueSystemEvent: (...args: unknown[]) => enqueueSystemEventMock(...args),
}));
vi.mock("../infra/heartbeat-wake.js", () => ({
requestHeartbeatNow: (...args: unknown[]) => requestHeartbeatNowMock(...args),
}));
type MockRunExit = {
reason:
| "manual-cancel"
@@ -49,6 +59,8 @@ function createManagedRun(exit: MockRunExit, pid = 1234) {
describe("runCliAgent with process supervisor", () => {
beforeEach(() => {
supervisorSpawnMock.mockClear();
enqueueSystemEventMock.mockClear();
requestHeartbeatNowMock.mockClear();
});
it("runs CLI through supervisor and returns payload", async () => {
@@ -124,6 +136,46 @@ describe("runCliAgent with process supervisor", () => {
).rejects.toThrow("produced no output");
});
it("enqueues a system event and heartbeat wake on no-output watchdog timeout for session runs", async () => {
supervisorSpawnMock.mockResolvedValueOnce(
createManagedRun({
reason: "no-output-timeout",
exitCode: null,
exitSignal: "SIGKILL",
durationMs: 200,
stdout: "",
stderr: "",
timedOut: true,
noOutputTimedOut: true,
}),
);
await expect(
runCliAgent({
sessionId: "s1",
sessionKey: "agent:main:main",
sessionFile: "/tmp/session.jsonl",
workspaceDir: "/tmp",
prompt: "hi",
provider: "codex-cli",
model: "gpt-5.2-codex",
timeoutMs: 1_000,
runId: "run-2b",
cliSessionId: "thread-123",
}),
).rejects.toThrow("produced no output");
expect(enqueueSystemEventMock).toHaveBeenCalledTimes(1);
const [notice, opts] = enqueueSystemEventMock.mock.calls[0] ?? [];
expect(String(notice)).toContain("produced no output");
expect(String(notice)).toContain("interactive input or an approval prompt");
expect(opts).toMatchObject({ sessionKey: "agent:main:main" });
expect(requestHeartbeatNowMock).toHaveBeenCalledWith({
reason: "cli:watchdog:stall",
sessionKey: "agent:main:main",
});
});
it("fails with timeout when overall timeout trips", async () => {
supervisorSpawnMock.mockResolvedValueOnce(
createManagedRun({