mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 08:50:43 +00:00
fix: harden startup readiness and discord replies
(cherry picked from commit 3956672106b3387d42427a485a9ca01e77f3b78f)
This commit is contained in:
committed by
Peter Steinberger
parent
7e229f0d3d
commit
e259938e96
@@ -0,0 +1,93 @@
|
||||
# OpenClaw Startup Readiness And Leak Fix - 2026-05-04
|
||||
|
||||
## Current Truth
|
||||
|
||||
- Incident inputs confirmed Discord front-channel leakage of internal execution/commentary-like traces and Gateway startup instability in the same window.
|
||||
- Observed bad startup keywords from local operator evidence:
|
||||
- `gateway event loop readiness timeout`
|
||||
- `discord: gateway was not ready after 15000ms; restarting gateway`
|
||||
- `sessions.list` requests around 40 seconds
|
||||
- `exit 78` with systemd `RestartPreventExitStatus=78`
|
||||
- This source fix addresses the startup terminal-fail path and Discord final outbound leakage guard. It does not restart any running Gateway by itself.
|
||||
|
||||
## Code Changes
|
||||
|
||||
- Startup control-plane load shedding:
|
||||
- Added `sessions.list` to `STARTUP_UNAVAILABLE_GATEWAY_METHODS`.
|
||||
- During sidecar startup, Gateway now returns retryable startup `UNAVAILABLE` for `sessions.list` instead of dispatching the costly session scan path.
|
||||
- Native approval bootstrap readiness handling:
|
||||
- Changed approval-client readiness failure text away from the production incident keyword.
|
||||
- Changed exec-approval runtime readiness failure text away from the production incident keyword.
|
||||
- Classified gateway readiness/startup close errors as retryable bootstrap deferrals.
|
||||
- Normalized legacy readiness-timeout errors before logging retry deferrals, so old incident keywords do not reappear in native-approval retry logs.
|
||||
- Native approval handler startup now warns and retries instead of emitting the old terminal-looking `failed to start native approval handler` path for readiness-only failures.
|
||||
- Discord gateway READY wait:
|
||||
- Replaced the one-restart-then-throw startup behavior with reconnect plus 2 second backoff until READY, stop, or abort.
|
||||
- Removed the old log string `gateway was not ready after 15000ms; restarting gateway` from the nonfatal retry path.
|
||||
- Discord final outbound safety filter:
|
||||
- Added `extensions/discord/src/monitor/reply-safety.ts`.
|
||||
- `deliverDiscordReply` sanitizes payload text at the final Discord send boundary.
|
||||
- The filter uses the existing assistant-visible-text sanitizer, strips standalone internal trace/channel lines outside code fences, drops pure-internal text-only payloads, and preserves media-only payloads.
|
||||
|
||||
## Why This Should Work
|
||||
|
||||
- The startup window no longer allows Control UI `sessions.list` polling to compete with sidecar/channel readiness through the expensive session listing path.
|
||||
- Discord READY timeout no longer escalates a transient event-loop stall into a thrown startup failure after a single reconnect attempt.
|
||||
- Approval handler readiness failures are treated as recoverable gateway-readiness deferrals, matching the actual failure mode from the incident.
|
||||
- Leakage protection is placed at the last Discord send boundary, so upstream mistakes in agent output assembly, commentary routing, or tool-call formatting get one final scrub before front-channel delivery.
|
||||
|
||||
## Modified Files
|
||||
|
||||
- `src/gateway/server-startup-unavailable-methods.ts`
|
||||
- `src/gateway/operator-approvals-client.ts`
|
||||
- `src/infra/approval-handler-bootstrap.ts`
|
||||
- `src/infra/approval-handler-bootstrap.test.ts`
|
||||
- `src/infra/exec-approval-channel-runtime.ts`
|
||||
- `src/infra/exec-approval-channel-runtime.test.ts`
|
||||
- `extensions/discord/src/monitor/provider.lifecycle.ts`
|
||||
- `extensions/discord/src/monitor/provider.lifecycle.test.ts`
|
||||
- `extensions/discord/src/monitor/reply-delivery.ts`
|
||||
- `extensions/discord/src/monitor/reply-delivery.test.ts`
|
||||
- `extensions/discord/src/monitor/reply-safety.ts`
|
||||
- `docs/status/openclaw-startup-readiness-and-leak-fix-20260504.md`
|
||||
|
||||
## Validation
|
||||
|
||||
- `node scripts/run-vitest.mjs run --config test/vitest/vitest.extension-discord.config.ts extensions/discord/src/monitor/provider.lifecycle.test.ts extensions/discord/src/monitor/reply-delivery.test.ts`
|
||||
- Passed: 2 files, 28 tests.
|
||||
- `OPENCLAW_GATEWAY_PROJECT_SHARDS=1 node scripts/run-vitest.mjs run --config test/vitest/vitest.gateway.config.ts src/gateway/server-methods.control-plane-rate-limit.test.ts`
|
||||
- Passed: 1 file, 12 tests.
|
||||
- `node scripts/run-vitest.mjs run --config test/vitest/vitest.infra.config.ts src/infra/approval-handler-bootstrap.test.ts src/infra/exec-approval-channel-runtime.test.ts`
|
||||
- Passed: 2 files, 30 tests.
|
||||
- `git diff --check`
|
||||
- Passed.
|
||||
|
||||
## Acceptance Log Keywords
|
||||
|
||||
- Must stay absent during the 30-60 minute post-deploy startup soak:
|
||||
- `gateway event loop readiness timeout`
|
||||
- `discord: gateway was not ready after 15000ms; restarting gateway`
|
||||
- `discord gateway did not reach READY within 15000ms after restart`
|
||||
- `sessions.list` with 40 second scale durations
|
||||
- `exit 78`
|
||||
- Expected nonterminal readiness retry keyword if Discord is slow to become READY:
|
||||
- `discord: gateway READY wait timed out after 15000ms; reconnecting with backoff`
|
||||
- Expected approval bootstrap deferral keyword if Gateway is still starting:
|
||||
- `native approval handler deferred until gateway readiness recovers`
|
||||
|
||||
## Risks
|
||||
|
||||
- `sessions.list` is temporarily unavailable during startup until sidecars clear startup gating. Control UI must retry retryable `UNAVAILABLE` responses.
|
||||
- The Discord READY wait can keep reconnecting until stop/abort. If credentials or network are truly broken, operator-visible status remains `startup-not-ready` instead of crashing the Gateway.
|
||||
- The final outbound scrub intentionally removes standalone internal trace lines. A user-visible reply that literally begins with `analysis:`, `commentary:`, or tool execution labels outside a code fence will be stripped from Discord text. Code-fenced examples are preserved.
|
||||
|
||||
## Rollback
|
||||
|
||||
- Source rollback: `git revert <commit-hash>` from this repo.
|
||||
- If already deployed, rebuild/reinstall the reverted source using the normal OpenClaw packaging path, then restart the Gateway using the operator's configured service manager.
|
||||
|
||||
## Next Action
|
||||
|
||||
- Deploy this source build to an isolated or production-managed OpenClaw path.
|
||||
- Run a 30-60 minute startup soak with Control UI open and Discord connected.
|
||||
- During the soak, watch `/tmp/openclaw/openclaw-2026-05-04.log` or the active daily log for the acceptance keywords above.
|
||||
@@ -333,7 +333,7 @@ describe("runDiscordGatewayLifecycle", () => {
|
||||
expect(statusSink).toHaveBeenCalledTimes(callCountAfterCleanup);
|
||||
});
|
||||
|
||||
it("restarts the gateway once when startup never reaches READY, then recovers", async () => {
|
||||
it("reconnects with backoff when startup never reaches READY, then recovers", async () => {
|
||||
vi.useFakeTimers();
|
||||
try {
|
||||
const { emitter, gateway } = createGatewayHarness();
|
||||
@@ -347,10 +347,13 @@ describe("runDiscordGatewayLifecycle", () => {
|
||||
const { lifecycleParams, runtimeError, statusSink } = createLifecycleHarness({ gateway });
|
||||
const lifecyclePromise = runDiscordGatewayLifecycle(lifecycleParams);
|
||||
|
||||
await vi.advanceTimersByTimeAsync(16_500);
|
||||
await vi.advanceTimersByTimeAsync(18_500);
|
||||
await expect(lifecyclePromise).resolves.toBeUndefined();
|
||||
|
||||
expect(runtimeError).toHaveBeenCalledWith(
|
||||
expect.stringContaining("gateway READY wait timed out after 15000ms"),
|
||||
);
|
||||
expect(runtimeError).not.toHaveBeenCalledWith(
|
||||
expect.stringContaining("gateway was not ready after 15000ms; restarting gateway"),
|
||||
);
|
||||
expect(gateway.disconnect).toHaveBeenCalledTimes(1);
|
||||
@@ -396,14 +399,14 @@ describe("runDiscordGatewayLifecycle", () => {
|
||||
expect(gateway.connect).toHaveBeenCalledTimes(1);
|
||||
expect(gateway.connect).toHaveBeenCalledWith(false);
|
||||
|
||||
await vi.advanceTimersByTimeAsync(1_000);
|
||||
await vi.advanceTimersByTimeAsync(3_000);
|
||||
await expect(lifecyclePromise).resolves.toBeUndefined();
|
||||
} finally {
|
||||
vi.useRealTimers();
|
||||
}
|
||||
});
|
||||
|
||||
it("fails when startup still is not ready after a restart", async () => {
|
||||
it("keeps retrying when startup still is not ready after a reconnect", async () => {
|
||||
vi.useFakeTimers();
|
||||
try {
|
||||
const { emitter, gateway } = createGatewayHarness();
|
||||
@@ -414,19 +417,17 @@ describe("runDiscordGatewayLifecycle", () => {
|
||||
|
||||
const lifecyclePromise = runDiscordGatewayLifecycle(lifecycleParams);
|
||||
lifecyclePromise.catch(() => {});
|
||||
await vi.advanceTimersByTimeAsync(31_000);
|
||||
await vi.advanceTimersByTimeAsync(34_000);
|
||||
|
||||
await expect(lifecyclePromise).rejects.toThrow(
|
||||
"discord gateway did not reach READY within 15000ms after restart",
|
||||
);
|
||||
expect(gateway.disconnect).toHaveBeenCalledTimes(1);
|
||||
expect(gateway.connect).toHaveBeenCalledTimes(1);
|
||||
expect(gateway.disconnect).toHaveBeenCalledTimes(2);
|
||||
expect(gateway.connect).toHaveBeenCalledTimes(2);
|
||||
expect(gateway.connect).toHaveBeenCalledWith(false);
|
||||
expectLifecycleCleanup({
|
||||
threadStop,
|
||||
waitCalls: 0,
|
||||
gatewaySupervisor,
|
||||
});
|
||||
expect(waitForDiscordGatewayStopMock).not.toHaveBeenCalled();
|
||||
|
||||
gateway.isConnected = true;
|
||||
await vi.advanceTimersByTimeAsync(2_500);
|
||||
await expect(lifecyclePromise).resolves.toBeUndefined();
|
||||
expectLifecycleCleanup({ threadStop, waitCalls: 1, gatewaySupervisor });
|
||||
} finally {
|
||||
vi.useRealTimers();
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@ const MAX_DISCORD_GATEWAY_READY_TIMEOUT_MS = 120_000;
|
||||
const DISCORD_GATEWAY_READY_TIMEOUT_ENV = "OPENCLAW_DISCORD_READY_TIMEOUT_MS";
|
||||
const DISCORD_GATEWAY_RUNTIME_READY_TIMEOUT_ENV = "OPENCLAW_DISCORD_RUNTIME_READY_TIMEOUT_MS";
|
||||
const DISCORD_GATEWAY_READY_POLL_MS = 250;
|
||||
const DISCORD_GATEWAY_READY_RETRY_BACKOFF_MS = 2_000;
|
||||
const DISCORD_GATEWAY_STARTUP_DISCONNECT_DRAIN_TIMEOUT_MS = 5_000;
|
||||
const DISCORD_GATEWAY_STARTUP_TERMINATE_CLOSE_TIMEOUT_MS = 1_000;
|
||||
const DISCORD_GATEWAY_TRANSPORT_ACTIVITY_STATUS_MIN_INTERVAL_MS = 30_000;
|
||||
@@ -355,41 +356,50 @@ async function waitForGatewayReady(params: {
|
||||
return "stopped";
|
||||
};
|
||||
|
||||
const firstAttempt = await waitUntilReady();
|
||||
if (firstAttempt !== "timeout") {
|
||||
return;
|
||||
}
|
||||
if (!params.gateway) {
|
||||
throw new Error(`discord gateway did not reach READY within ${params.readyTimeoutMs}ms`);
|
||||
}
|
||||
|
||||
const restartAt = Date.now();
|
||||
params.runtime.error?.(
|
||||
danger(`discord: gateway was not ready after ${params.readyTimeoutMs}ms; restarting gateway`),
|
||||
);
|
||||
params.pushStatus?.({
|
||||
connected: false,
|
||||
lastEventAt: restartAt,
|
||||
lastDisconnect: {
|
||||
at: restartAt,
|
||||
error: "startup-not-ready",
|
||||
},
|
||||
lastError: "startup-not-ready",
|
||||
});
|
||||
if (params.abortSignal?.aborted) {
|
||||
const attempt = await waitUntilReady();
|
||||
if (attempt === "timeout") {
|
||||
throw new Error(`discord gateway did not reach READY within ${params.readyTimeoutMs}ms`);
|
||||
}
|
||||
return;
|
||||
}
|
||||
await params.beforeRestart?.();
|
||||
await restartGatewayAfterReadyTimeout({
|
||||
gateway: params.gateway,
|
||||
abortSignal: params.abortSignal,
|
||||
runtime: params.runtime,
|
||||
});
|
||||
|
||||
if ((await waitUntilReady()) === "timeout") {
|
||||
throw new Error(
|
||||
`discord gateway did not reach READY within ${params.readyTimeoutMs}ms after restart`,
|
||||
let attempt = 0;
|
||||
while (!params.abortSignal?.aborted) {
|
||||
const result = await waitUntilReady();
|
||||
if (result !== "timeout") {
|
||||
return;
|
||||
}
|
||||
|
||||
attempt += 1;
|
||||
const restartAt = Date.now();
|
||||
params.runtime.error?.(
|
||||
danger(
|
||||
`discord: gateway READY wait timed out after ${params.readyTimeoutMs}ms; reconnecting with backoff (attempt ${attempt})`,
|
||||
),
|
||||
);
|
||||
params.pushStatus?.({
|
||||
connected: false,
|
||||
lastEventAt: restartAt,
|
||||
lastDisconnect: {
|
||||
at: restartAt,
|
||||
error: "startup-not-ready",
|
||||
},
|
||||
lastError: "startup-not-ready",
|
||||
});
|
||||
await params.beforeRestart?.();
|
||||
await restartGatewayAfterReadyTimeout({
|
||||
gateway: params.gateway,
|
||||
abortSignal: params.abortSignal,
|
||||
runtime: params.runtime,
|
||||
});
|
||||
if (params.abortSignal?.aborted) {
|
||||
return;
|
||||
}
|
||||
await new Promise<void>((resolve) => {
|
||||
const timeout = setTimeout(resolve, DISCORD_GATEWAY_READY_RETRY_BACKOFF_MS);
|
||||
timeout.unref?.();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -105,6 +105,76 @@ describe("deliverDiscordReply", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("strips internal execution trace lines at the final Discord send boundary", async () => {
|
||||
await deliverDiscordReply({
|
||||
replies: [
|
||||
{
|
||||
text: [
|
||||
"📊 Session Status: current",
|
||||
"🛠️ Exec: run git status",
|
||||
"📖 Read: lines 1-40 from secret.md",
|
||||
"Visible reply.",
|
||||
].join("\n"),
|
||||
},
|
||||
],
|
||||
target: "channel:101",
|
||||
token: "token",
|
||||
accountId: "default",
|
||||
runtime,
|
||||
cfg,
|
||||
textLimit: 2000,
|
||||
});
|
||||
|
||||
expect(deliverOutboundPayloadsMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
payloads: [{ text: "Visible reply." }],
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("drops pure internal trace text while preserving media-only delivery", async () => {
|
||||
await deliverDiscordReply({
|
||||
replies: [
|
||||
{
|
||||
text: "commentary: calling tool\nanalysis: inspect private state",
|
||||
mediaUrl: "https://example.com/result.png",
|
||||
},
|
||||
],
|
||||
target: "channel:101",
|
||||
token: "token",
|
||||
accountId: "default",
|
||||
runtime,
|
||||
cfg,
|
||||
textLimit: 2000,
|
||||
});
|
||||
|
||||
expect(deliverOutboundPayloadsMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
payloads: [{ mediaUrl: "https://example.com/result.png", text: undefined }],
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("does not strip ordinary code-fenced examples of tool-call labels", async () => {
|
||||
const text = ["Example:", "```", "🛠️ Exec: run ls", "```"].join("\n");
|
||||
|
||||
await deliverDiscordReply({
|
||||
replies: [{ text }],
|
||||
target: "channel:101",
|
||||
token: "token",
|
||||
accountId: "default",
|
||||
runtime,
|
||||
cfg,
|
||||
textLimit: 2000,
|
||||
});
|
||||
|
||||
expect(deliverOutboundPayloadsMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
payloads: [{ text }],
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("passes resolved Discord formatting options as explicit delivery options", async () => {
|
||||
const baseCfg = {
|
||||
channels: {
|
||||
|
||||
@@ -18,6 +18,7 @@ import type { RuntimeEnv } from "openclaw/plugin-sdk/runtime-env";
|
||||
import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime";
|
||||
import type { RequestClient } from "../internal/discord.js";
|
||||
import { sendMessageDiscord, sendVoiceMessageDiscord } from "../send.js";
|
||||
import { sanitizeDiscordFrontChannelReplyPayloads } from "./reply-safety.js";
|
||||
|
||||
export type DiscordThreadBindingLookupRecord = {
|
||||
accountId: string;
|
||||
@@ -175,13 +176,17 @@ export async function deliverDiscordReply(params: {
|
||||
void params.runtime;
|
||||
|
||||
const delivery = resolveDiscordDeliveryOptions(params);
|
||||
const payloads = sanitizeDiscordFrontChannelReplyPayloads(params.replies);
|
||||
if (payloads.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
await deliverOutboundPayloads({
|
||||
cfg: params.cfg,
|
||||
channel: "discord",
|
||||
to: delivery.to,
|
||||
accountId: params.accountId,
|
||||
payloads: params.replies,
|
||||
payloads,
|
||||
replyToId: normalizeOptionalString(params.replyToId),
|
||||
replyToMode: delivery.replyToMode,
|
||||
formatting: delivery.formatting,
|
||||
|
||||
64
extensions/discord/src/monitor/reply-safety.ts
Normal file
64
extensions/discord/src/monitor/reply-safety.ts
Normal file
@@ -0,0 +1,64 @@
|
||||
import type { ReplyPayload } from "openclaw/plugin-sdk/reply-dispatch-runtime";
|
||||
import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-payload";
|
||||
import { sanitizeAssistantVisibleText } from "openclaw/plugin-sdk/text-runtime";
|
||||
|
||||
const DISCORD_INTERNAL_TRACE_LINE_RE =
|
||||
/^(?:>\s*)?(?:(?:📊|🛠️|📖|📝|🔍|🔎|⚙️)\s*)?(?:Session Status|Exec|Read|Edit|Write|Patch|Search|Open|Click|Find|Screenshot|Update Plan|Tool Call|Tool Result|Function Call|Shell|Command)\s*:/i;
|
||||
const DISCORD_INTERNAL_CHANNEL_LINE_RE =
|
||||
/^(?:>\s*)?(?:analysis|commentary|tool[-_ ]?call|tool[-_ ]?result|function[-_ ]?call|thinking|reasoning)\s*[:=]/i;
|
||||
|
||||
function stripDiscordInternalTraceLines(text: string): string {
|
||||
let inFence = false;
|
||||
const kept: string[] = [];
|
||||
for (const line of text.split(/\r?\n/)) {
|
||||
if (/^\s*```/.test(line)) {
|
||||
inFence = !inFence;
|
||||
kept.push(line);
|
||||
continue;
|
||||
}
|
||||
if (!inFence) {
|
||||
const trimmed = line.trim();
|
||||
if (
|
||||
DISCORD_INTERNAL_TRACE_LINE_RE.test(trimmed) ||
|
||||
DISCORD_INTERNAL_CHANNEL_LINE_RE.test(trimmed)
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
kept.push(line);
|
||||
}
|
||||
return kept.join("\n");
|
||||
}
|
||||
|
||||
function collapseExcessBlankLines(text: string): string {
|
||||
return text.replace(/[ \t]+\n/g, "\n").replace(/\n{3,}/g, "\n\n");
|
||||
}
|
||||
|
||||
export function sanitizeDiscordFrontChannelText(text: string): string {
|
||||
const withoutAssistantScaffolding = sanitizeAssistantVisibleText(text);
|
||||
const withoutTraceLines = stripDiscordInternalTraceLines(withoutAssistantScaffolding);
|
||||
return collapseExcessBlankLines(withoutTraceLines).trim();
|
||||
}
|
||||
|
||||
export function sanitizeDiscordFrontChannelReplyPayloads(
|
||||
payloads: readonly ReplyPayload[],
|
||||
): ReplyPayload[] {
|
||||
const safePayloads: ReplyPayload[] = [];
|
||||
for (const payload of payloads) {
|
||||
const originalParts = resolveSendableOutboundReplyParts(payload);
|
||||
const safeText =
|
||||
typeof payload.text === "string"
|
||||
? sanitizeDiscordFrontChannelText(payload.text)
|
||||
: payload.text;
|
||||
const nextPayload =
|
||||
safeText === payload.text
|
||||
? payload
|
||||
: ({ ...payload, text: safeText || undefined } as ReplyPayload);
|
||||
const nextParts = resolveSendableOutboundReplyParts(nextPayload);
|
||||
if (!nextParts.hasText && !originalParts.hasMedia) {
|
||||
continue;
|
||||
}
|
||||
safePayloads.push(nextPayload);
|
||||
}
|
||||
return safePayloads;
|
||||
}
|
||||
@@ -118,7 +118,11 @@ export async function withOperatorApprovalsGatewayClient<T>(
|
||||
clientOptions: { preauthHandshakeTimeoutMs: params.config.gateway?.handshakeTimeoutMs },
|
||||
});
|
||||
if (!readiness.ready) {
|
||||
throw new Error("gateway event loop readiness timeout");
|
||||
throw new Error(
|
||||
readiness.aborted
|
||||
? "gateway approval client start aborted before readiness"
|
||||
: "gateway readiness unavailable before approval client start",
|
||||
);
|
||||
}
|
||||
await ready;
|
||||
return await run(gatewayClient);
|
||||
|
||||
@@ -2,6 +2,7 @@ export const STARTUP_UNAVAILABLE_GATEWAY_METHODS = [
|
||||
"agent.wait",
|
||||
"chat.history",
|
||||
"models.list",
|
||||
"sessions.list",
|
||||
"sessions.abort",
|
||||
"sessions.create",
|
||||
"sessions.send",
|
||||
|
||||
@@ -232,6 +232,55 @@ describe("startChannelApprovalHandlerBootstrap", () => {
|
||||
await cleanup();
|
||||
});
|
||||
|
||||
it("defers retryable gateway readiness startup failures without terminal error logs", async () => {
|
||||
vi.useFakeTimers();
|
||||
const channelRuntime = createRuntimeChannel();
|
||||
const readinessError = new Error("gateway event loop readiness timeout");
|
||||
const start = vi.fn().mockRejectedValueOnce(readinessError).mockResolvedValueOnce(undefined);
|
||||
const stop = vi.fn().mockResolvedValue(undefined);
|
||||
const logger = {
|
||||
error: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
info: vi.fn(),
|
||||
debug: vi.fn(),
|
||||
child: vi.fn(),
|
||||
isEnabled: vi.fn().mockReturnValue(true),
|
||||
isVerboseEnabled: vi.fn().mockReturnValue(false),
|
||||
verbose: vi.fn(),
|
||||
};
|
||||
createChannelApprovalHandlerFromCapability
|
||||
.mockResolvedValueOnce({ start, stop })
|
||||
.mockResolvedValueOnce({ start, stop });
|
||||
|
||||
const cleanup = await startTestBootstrap({ channelRuntime, logger });
|
||||
|
||||
registerApprovalContext(channelRuntime);
|
||||
await flushTransitions();
|
||||
|
||||
expect(start).toHaveBeenCalledTimes(1);
|
||||
await flushTransitions();
|
||||
expect(logger.error).not.toHaveBeenCalledWith(
|
||||
expect.stringContaining("failed to start native approval handler"),
|
||||
);
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
expect.stringContaining("native approval handler deferred until gateway readiness recovers"),
|
||||
);
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
expect.stringContaining("gateway readiness unavailable before approval handler start"),
|
||||
);
|
||||
expect(logger.warn).not.toHaveBeenCalledWith(
|
||||
expect.stringContaining("gateway event loop readiness timeout"),
|
||||
);
|
||||
|
||||
await vi.advanceTimersByTimeAsync(1_000);
|
||||
await flushTransitions();
|
||||
|
||||
expect(createChannelApprovalHandlerFromCapability).toHaveBeenCalledTimes(2);
|
||||
expect(start).toHaveBeenCalledTimes(2);
|
||||
|
||||
await cleanup();
|
||||
});
|
||||
|
||||
it("does not retry terminal native approval startup failures", async () => {
|
||||
vi.useFakeTimers();
|
||||
const channelRuntime = createRuntimeChannel();
|
||||
|
||||
@@ -17,6 +17,28 @@ import { isExecApprovalChannelRuntimeTerminalStartError } from "./exec-approval-
|
||||
type ApprovalBootstrapHandler = ChannelApprovalHandler;
|
||||
const APPROVAL_HANDLER_BOOTSTRAP_RETRY_MS = 1_000;
|
||||
|
||||
function isRetryableApprovalBootstrapStartError(error: unknown): boolean {
|
||||
const message = String(error);
|
||||
return (
|
||||
message.includes("gateway readiness unavailable before approval client start") ||
|
||||
message.includes("gateway approval client start aborted before readiness") ||
|
||||
message.includes("gateway readiness unavailable before exec approval runtime start") ||
|
||||
message.includes("gateway approval runtime start aborted before readiness") ||
|
||||
message.includes("gateway event loop readiness timeout") ||
|
||||
message.includes("gateway starting") ||
|
||||
message.includes("code=1013") ||
|
||||
message.includes("close code 1013")
|
||||
);
|
||||
}
|
||||
|
||||
function formatRetryableApprovalBootstrapStartError(error: unknown): string {
|
||||
const message = String(error);
|
||||
if (message.includes("gateway event loop readiness timeout")) {
|
||||
return "gateway readiness unavailable before approval handler start";
|
||||
}
|
||||
return message;
|
||||
}
|
||||
|
||||
export async function startChannelApprovalHandlerBootstrap(params: {
|
||||
plugin: Pick<ChannelPlugin, "id" | "meta" | "approvalCapability">;
|
||||
cfg: OpenClawConfig;
|
||||
@@ -122,6 +144,13 @@ export async function startChannelApprovalHandlerBootstrap(params: {
|
||||
logger.error(`native approval handler disabled: ${String(error)}`);
|
||||
return;
|
||||
}
|
||||
if (isRetryableApprovalBootstrapStartError(error)) {
|
||||
logger.warn(
|
||||
`native approval handler deferred until gateway readiness recovers: ${formatRetryableApprovalBootstrapStartError(error)}`,
|
||||
);
|
||||
scheduleRetryForContext(context, generation);
|
||||
return;
|
||||
}
|
||||
logger.error(`failed to start native approval handler: ${String(error)}`);
|
||||
scheduleRetryForContext(context, generation);
|
||||
}
|
||||
|
||||
@@ -291,7 +291,9 @@ describe("createExecApprovalChannelRuntime", () => {
|
||||
finalizeResolved: async () => undefined,
|
||||
});
|
||||
|
||||
await expect(runtime.start()).rejects.toThrow("gateway event loop readiness timeout");
|
||||
await expect(runtime.start()).rejects.toThrow(
|
||||
"gateway readiness unavailable before exec approval runtime start",
|
||||
);
|
||||
|
||||
expect(mockGatewayClientStarts).not.toHaveBeenCalled();
|
||||
expect(mockGatewayClientStops).toHaveBeenCalledTimes(1);
|
||||
|
||||
@@ -365,7 +365,11 @@ export function createExecApprovalChannelRuntime<
|
||||
},
|
||||
});
|
||||
if (!readiness.ready) {
|
||||
throw new Error("gateway event loop readiness timeout");
|
||||
throw new Error(
|
||||
readiness.aborted
|
||||
? "gateway approval runtime start aborted before readiness"
|
||||
: "gateway readiness unavailable before exec approval runtime start",
|
||||
);
|
||||
}
|
||||
await ready;
|
||||
if (stopClientIfInactive(client)) {
|
||||
|
||||
Reference in New Issue
Block a user