fix(qqbot): derive outbound watchdog from configured timeouts (#85267) (#86500)

Summary:
- The branch replaces QQBot's hardcoded outbound response watchdog with a resolver based on existing agent/provider `timeoutSeconds` settings, adds regression tests, and updates the changelog.
- PR surface: Source +113, Tests +116, Docs +1. Total +230 across 5 files.
- Reproducibility: yes. at source level: current main and the latest release use a hardcoded 300000 ms QQBot o ... s an 1800s provider timeout. I did not run the reporter's live QQBot/Ollama setup in this read-only review.

Automerge notes:
- PR branch already contained follow-up commit before automerge: test(qqbot): cover slow provider response watchdog
- PR branch already contained follow-up commit before automerge: fix(qqbot): derive outbound watchdog from configured timeouts (#85267)
- PR branch already contained follow-up commit before automerge: fix(clawsweeper): address review for automerge-openclaw-openclaw-8527…

Validation:
- ClawSweeper review passed for head 7bd829292a.
- Required merge gates passed before the squash merge.

Prepared head SHA: 7bd829292a
Review: https://github.com/openclaw/openclaw/pull/86500#issuecomment-4534669816

Co-authored-by: SymbolStar <symbolstar@users.noreply.github.com>
Co-authored-by: Onur Solmaz <2453968+osolmaz@users.noreply.github.com>
Co-authored-by: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com>
Co-authored-by: clawsweeper[bot] <274271284+clawsweeper[bot]@users.noreply.github.com>
Approved-by: osolmaz
Co-authored-by: osolmaz <2453968+osolmaz@users.noreply.github.com>
This commit is contained in:
clawsweeper[bot]
2026-05-25 14:52:42 +00:00
committed by GitHub
parent 6f695c1864
commit aa702cf3db
5 changed files with 232 additions and 2 deletions

View File

@@ -33,6 +33,7 @@ Docs: https://docs.openclaw.ai
- Cron: seed active scheduled and manual cron task rows with a progress summary so status surfaces do not look blank while jobs run. (#86313) Thanks @ferminquant.
- Updater: exclude prerelease git tags from stable channel resolution so source updates do not check out newer alpha/rc/preview/canary tags. (#86260) Thanks @stevenepalmer.
- Security/Audit: flag webhook `hooks.token` reuse of active Gateway password auth in `openclaw security audit` while keeping password-mode startup compatibility. (#84338) Thanks @coygeek.
- QQBot: derive the outbound reply watchdog from configured agent and provider timeouts so slow local model replies are not cut off at five minutes. Fixes #85267. (#85271) Thanks @SymbolStar.
- Agents/heartbeat: stop heartbeat turns after the first valid `heartbeat_respond` so repeated response loops do not burn tokens. (#86357) Thanks @udaymanish6.
- Tasks: keep retained lost tasks out of default status health counts, explain their cleanup window during maintenance, and prune lost task records after 24 hours instead of the general 7-day terminal retention.
- Memory-core: keep REM dreaming focused on live light-staged memories and mark staged entries as considered so old recall history no longer dominates fresh candidates. (#86302) Thanks @SebTardif.

View File

@@ -171,6 +171,47 @@ describe("dispatchOutbound", () => {
vi.clearAllMocks();
});
it("keeps waiting past 300s when a slow provider timeout is configured", async () => {
vi.useFakeTimers();
try {
const runtime = makeRuntime({
onDeliver: async (deliver) => {
await new Promise<void>((resolve) => setTimeout(resolve, 301_000));
await deliver({ text: "late answer" }, { kind: "block" });
},
});
let settled = false;
const dispatchPromise = dispatchOutbound(makeInbound(), {
runtime,
cfg: {
models: { providers: { ollama: { timeoutSeconds: 1800 } } },
},
account,
}).finally(() => {
settled = true;
});
await vi.advanceTimersByTimeAsync(300_000);
expect(settled).toBe(false);
expect(sendTextMock).not.toHaveBeenCalled();
await vi.advanceTimersByTimeAsync(1_000);
await dispatchPromise;
expect(sendTextMock).toHaveBeenCalledWith(
expect.anything(),
"late answer",
expect.anything(),
expect.anything(),
);
} finally {
vi.clearAllTimers();
vi.useRealTimers();
}
});
it("marks voice-only inbound as audio without adding voice paths to MediaPaths", async () => {
let finalized: Record<string, unknown> | undefined;
const runtime = makeRuntime({ onFinalize: (ctx) => (finalized = ctx) });

View File

@@ -33,6 +33,7 @@ import {
import { StreamingController, shouldUseOfficialC2cStream } from "../messaging/streaming-c2c.js";
import { audioFileToSilkBase64 } from "../utils/audio.js";
import type { InboundContext } from "./inbound-context.js";
import { resolveResponseTimeoutMs } from "./response-timeout.js";
import type {
GatewayAccount,
EngineLogger,
@@ -42,7 +43,12 @@ import type {
// ============ Config ============
const RESPONSE_TIMEOUT = 300_000;
// Historical floor for the QQBot outbound response watchdog (5 min). The
// effective wait budget is now derived from existing
// `agents.defaults.timeoutSeconds` and `models.providers.<id>.timeoutSeconds`
// via `resolveResponseTimeoutMs(cfg)` — see issue #85267, where a slow
// local ollama/qwen3.5:27b turn was capped at 5 min despite a configured
// 1800s provider timeout.
const TOOL_ONLY_TIMEOUT = 60_000;
const MAX_TOOL_RENEWALS = 3;
const TOOL_MEDIA_SEND_TIMEOUT = 45_000;
@@ -149,12 +155,16 @@ export async function dispatchOutbound(
};
// ---- Timeout promise ----
// #85267: derive watchdog from existing agent / provider timeout config so
// a longer configured ceiling (e.g. slow local ollama models) is not
// silently undercut by a plugin-local 5-minute cap.
const responseTimeoutMs = resolveResponseTimeoutMs(cfg);
const timeoutPromise = new Promise<void>((_, reject) => {
timeoutId = setTimeout(() => {
if (!hasResponse) {
reject(new Error("Response timeout"));
}
}, RESPONSE_TIMEOUT);
}, responseTimeoutMs);
});
// ---- Deliver deps ----

View File

@@ -0,0 +1,75 @@
import { describe, expect, it } from "vitest";
import {
DEFAULT_RESPONSE_TIMEOUT_MS,
resolveResponseTimeoutMs,
} from "./response-timeout.js";
describe("resolveResponseTimeoutMs", () => {
it("falls back to the historical 5-minute floor when no timeouts configured", () => {
expect(resolveResponseTimeoutMs({})).toBe(DEFAULT_RESPONSE_TIMEOUT_MS);
expect(resolveResponseTimeoutMs(undefined)).toBe(DEFAULT_RESPONSE_TIMEOUT_MS);
expect(resolveResponseTimeoutMs(null)).toBe(DEFAULT_RESPONSE_TIMEOUT_MS);
});
it("honors longer agents.defaults.timeoutSeconds", () => {
expect(
resolveResponseTimeoutMs({ agents: { defaults: { timeoutSeconds: 900 } } }),
).toBe(900_000);
});
it("ignores agents.defaults.timeoutSeconds shorter than the historical floor", () => {
// Issue #85267: a configured 60s agent timeout must not undercut the
// historical 5-minute watchdog floor for previously-working setups.
expect(
resolveResponseTimeoutMs({ agents: { defaults: { timeoutSeconds: 60 } } }),
).toBe(DEFAULT_RESPONSE_TIMEOUT_MS);
});
it("honors models.providers.<id>.timeoutSeconds for slow local providers (#85267)", () => {
// Direct repro shape: ollama + qwen3.5:27b with 1800s timeout. Without
// this fix, QQBot capped at 300s and surfaced "LLM request timed out".
expect(
resolveResponseTimeoutMs({
models: { providers: { ollama: { timeoutSeconds: 1800 } } },
}),
).toBe(1_800_000);
});
it("takes the maximum across multiple configured providers and agents", () => {
expect(
resolveResponseTimeoutMs({
agents: { defaults: { timeoutSeconds: 600 } },
models: {
providers: {
ollama: { timeoutSeconds: 1800 },
"lm-studio": { timeoutSeconds: 900 },
openai: { timeoutSeconds: 60 },
},
},
}),
).toBe(1_800_000);
});
it("ignores non-positive or non-numeric timeout values", () => {
expect(
resolveResponseTimeoutMs({
agents: { defaults: { timeoutSeconds: -1 } },
models: {
providers: {
ollama: { timeoutSeconds: 0 },
broken: { timeoutSeconds: "1800" as unknown as number },
naN: { timeoutSeconds: Number.NaN },
},
},
}),
).toBe(DEFAULT_RESPONSE_TIMEOUT_MS);
});
it("clamps to MAX_SAFE_TIMEOUT_MS for absurd inputs", () => {
const huge = resolveResponseTimeoutMs({
models: { providers: { ollama: { timeoutSeconds: 10_000_000 } } },
});
expect(huge).toBeLessThanOrEqual(2_147_000_000);
expect(huge).toBeGreaterThan(DEFAULT_RESPONSE_TIMEOUT_MS);
});
});

View File

@@ -0,0 +1,103 @@
/**
* QQBot outbound response watchdog timeout resolver.
*
* Background — issue #85267:
* The reporter ran openclaw + ollama + `qwen3.5:27b` (a slow local model)
* with `models.providers.ollama.timeoutSeconds: 1800` and saw the
* QQBot reply path abort at ~5 minutes with "LLM request timed out",
* despite the direct ollama call to the same model working. The
* embedded-runner / idle-timeout layer already honors longer
* provider timeouts (see `src/agents/pi-embedded-runner/run/llm-idle-timeout.ts`),
* but the QQBot outbound dispatcher held an independent hardcoded
* `RESPONSE_TIMEOUT = 300_000` watchdog that quietly undercut the
* configured ceiling.
*
* Fix shape (clawsweeper `clawsweeper:fix-shape-clear`):
* Don't add a new QQBot-only knob. Instead derive the QQBot wait
* budget from the existing agent/provider timeout settings the user
* already configured:
* - `agents.defaults.timeoutSeconds`
* - `models.providers.<id>.timeoutSeconds` (max across configured providers)
* Take the maximum and clamp to `[DEFAULT_RESPONSE_TIMEOUT_MS, MAX_SAFE_TIMEOUT_MS]`.
* The default floor preserves the existing 5-minute guard for users
* that have not configured any longer ceiling — i.e. a no-op for
* typical cloud-model deployments.
*/
/**
* Default QQBot outbound response watchdog when no config override is
* present. Preserves the historical 5-minute guard for unconfigured
* deployments.
*/
export const DEFAULT_RESPONSE_TIMEOUT_MS = 300_000;
/**
* Upper bound to keep the watchdog inside the safe `setTimeout` range
* (approximately 24.8 days). Mirrors `MAX_SAFE_TIMEOUT_MS` in
* `src/agents/pi-embedded-runner/run/llm-idle-timeout.ts`.
*/
const MAX_SAFE_TIMEOUT_MS = 2_147_000_000;
interface AgentsDefaultsLike {
timeoutSeconds?: unknown;
}
interface AgentsBlockLike {
defaults?: AgentsDefaultsLike;
}
interface ProviderEntryLike {
timeoutSeconds?: unknown;
}
interface ModelsBlockLike {
providers?: Record<string, ProviderEntryLike | undefined> | undefined;
}
interface CfgShape {
agents?: AgentsBlockLike;
models?: ModelsBlockLike;
}
function positiveSecondsToMs(value: unknown): number | undefined {
if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) {
return undefined;
}
return Math.floor(value * 1000);
}
/**
* Resolve the QQBot outbound response watchdog (ms).
*
* The watchdog is the longest of:
* - `DEFAULT_RESPONSE_TIMEOUT_MS` (5 min, historical floor)
* - `cfg.agents.defaults.timeoutSeconds` converted to ms
* - the maximum `cfg.models.providers.<id>.timeoutSeconds` across
* configured providers, converted to ms
*
* Returns at most `MAX_SAFE_TIMEOUT_MS` so the chosen value is always
* a safe `setTimeout` argument.
*/
export function resolveResponseTimeoutMs(cfg: unknown): number {
const candidates: number[] = [DEFAULT_RESPONSE_TIMEOUT_MS];
const typed = (cfg ?? {}) as CfgShape;
const agentDefaultMs = positiveSecondsToMs(typed.agents?.defaults?.timeoutSeconds);
if (agentDefaultMs !== undefined) {
candidates.push(agentDefaultMs);
}
const providers = typed.models?.providers;
if (providers && typeof providers === "object") {
for (const entry of Object.values(providers)) {
const providerMs = positiveSecondsToMs(entry?.timeoutSeconds);
if (providerMs !== undefined) {
candidates.push(providerMs);
}
}
}
const chosen = Math.max(...candidates);
return Math.min(chosen, MAX_SAFE_TIMEOUT_MS);
}