fix: handle CLI session expired errors gracefully instead of crashing gateway (#31090)

* fix: handle CLI session expired errors gracefully

- Add session_expired to FailoverReason type
- Add isCliSessionExpiredErrorMessage to detect expired CLI sessions
- Modify runCliAgent to retry with new session when session expires
- Update agentCommand to clear expired session IDs from session store
- Add proper error handling to prevent gateway crashes on expired sessions

Fixes #30986

* fix: add session_expired to AuthProfileFailureReason and missing log import

* fix: type cli-runner usage field to match EmbeddedPiAgentMeta

* fix: harden CLI session-expiry recovery handling

* build: regenerate host env security policy swift

---------

Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
Frank Yang
2026-03-02 09:11:05 +08:00
committed by GitHub
parent a95c8077e8
commit ed86252aa5
9 changed files with 481 additions and 206 deletions

View File

@@ -153,6 +153,50 @@ describe("runCliAgent with process supervisor", () => {
).rejects.toThrow("exceeded timeout");
});
it("rethrows the retry failure when session-expired recovery retry also fails", async () => {
supervisorSpawnMock.mockResolvedValueOnce(
createManagedRun({
reason: "exit",
exitCode: 1,
exitSignal: null,
durationMs: 150,
stdout: "",
stderr: "session expired",
timedOut: false,
noOutputTimedOut: false,
}),
);
supervisorSpawnMock.mockResolvedValueOnce(
createManagedRun({
reason: "exit",
exitCode: 1,
exitSignal: null,
durationMs: 150,
stdout: "",
stderr: "rate limit exceeded",
timedOut: false,
noOutputTimedOut: false,
}),
);
await expect(
runCliAgent({
sessionId: "s1",
sessionKey: "agent:main:subagent:retry",
sessionFile: "/tmp/session.jsonl",
workspaceDir: "/tmp",
prompt: "hi",
provider: "codex-cli",
model: "gpt-5.2-codex",
timeoutMs: 1_000,
runId: "run-retry-failure",
cliSessionId: "thread-123",
}),
).rejects.toThrow("rate limit exceeded");
expect(supervisorSpawnMock).toHaveBeenCalledTimes(2);
});
it("falls back to per-agent workspace when workspaceDir is missing", async () => {
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-cli-runner-"));
const fallbackWorkspace = path.join(tempDir, "workspace-main");