feat(agents): implement state-aware failover and lane suspension

Summary:
- Persist quota-suspension state transitions and reload fresh suspension state before failover handoff injection.
- Restore suspended lanes to configured concurrency and share failover-to-suspension reason mapping across fallback and embedded runner paths.
- Export model.failover diagnostics via OTLP and cover queueing/resume behavior with regressions.

Verification:
- pnpm test src/config/sessions/store.pruning.integration.test.ts src/process/command-queue.test.ts src/agents/session-suspension.test.ts src/agents/model-fallback.test.ts extensions/diagnostics-otel/src/service.test.ts
- git diff --check
- pnpm exec oxfmt --check --threads=1 on changed TypeScript files
- GitHub checks: 92 successful, 0 pending, 0 failed on head 962146be88
- Review threads: none unresolved
This commit is contained in:
Mert Başar
2026-05-08 02:34:05 +03:00
committed by GitHub
parent e29f4ff6b8
commit 029ca8c268
24 changed files with 817 additions and 26 deletions

View File

@@ -364,6 +364,35 @@ describe("command queue", () => {
}
});
it("keeps work queued while a lane has zero concurrency and drains after resume", async () => {
const lane = `suspended-lane-${Date.now()}-${Math.random().toString(16).slice(2)}`;
setCommandLaneConcurrency(lane, 0);
let ran = false;
const task = enqueueCommandInLane(lane, async () => {
ran = true;
return "resumed";
});
await Promise.resolve();
expect(ran).toBe(false);
expect(getCommandLaneSnapshot(lane)).toMatchObject({
activeCount: 0,
queuedCount: 1,
maxConcurrent: 0,
});
setCommandLaneConcurrency(lane, 1);
await expect(task).resolves.toBe("resumed");
expect(ran).toBe(true);
expect(getCommandLaneSnapshot(lane)).toMatchObject({
activeCount: 0,
queuedCount: 0,
maxConcurrent: 1,
});
});
it("getCommandLaneSnapshot reports active and queued work for one lane", async () => {
const lane = `snapshot-lane-${Date.now()}-${Math.random().toString(16).slice(2)}`;
setCommandLaneConcurrency(lane, 1);