fix(qqbot): derive outbound watchdog from configured timeouts (#85267) (#86500)

Summary: - The branch replaces QQBot's hardcoded outbound response watchdog with a resolver based on existing agent/provider `timeoutSeconds` settings, adds regression tests, and updates the changelog. - PR surface: Source +113, Tests +116, Docs +1. Total +230 across 5 files. - Reproducibility: yes. at source level: current main and the latest release use a hardcoded 300000 ms QQBot o ... s an 1800s provider timeout. I did not run the reporter's live QQBot/Ollama setup in this read-only review. Automerge notes: - PR branch already contained follow-up commit before automerge: test(qqbot): cover slow provider response watchdog - PR branch already contained follow-up commit before automerge: fix(qqbot): derive outbound watchdog from configured timeouts (#85267) - PR branch already contained follow-up commit before automerge: fix(clawsweeper): address review for automerge-openclaw-openclaw-8527… Validation: - ClawSweeper review passed for head 7bd829292a. - Required merge gates passed before the squash merge. Prepared head SHA: 7bd829292a Review: https://github.com/openclaw/openclaw/pull/86500#issuecomment-4534669816 Co-authored-by: SymbolStar <symbolstar@users.noreply.github.com> Co-authored-by: Onur Solmaz <2453968+osolmaz@users.noreply.github.com> Co-authored-by: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com> Co-authored-by: clawsweeper[bot] <274271284+clawsweeper[bot]@users.noreply.github.com> Approved-by: osolmaz Co-authored-by: osolmaz <2453968+osolmaz@users.noreply.github.com>
2026-07-12 16:16:02 +00:00 · 2026-05-25 14:52:42 +00:00
parent 6f695c1864
commit aa702cf3db
5 changed files with 232 additions and 2 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -33,6 +33,7 @@ Docs: https://docs.openclaw.ai
 - Cron: seed active scheduled and manual cron task rows with a progress summary so status surfaces do not look blank while jobs run. (#86313) Thanks @ferminquant.
 - Updater: exclude prerelease git tags from stable channel resolution so source updates do not check out newer alpha/rc/preview/canary tags. (#86260) Thanks @stevenepalmer.
 - Security/Audit: flag webhook `hooks.token` reuse of active Gateway password auth in `openclaw security audit` while keeping password-mode startup compatibility. (#84338) Thanks @coygeek.
+- QQBot: derive the outbound reply watchdog from configured agent and provider timeouts so slow local model replies are not cut off at five minutes. Fixes #85267. (#85271) Thanks @SymbolStar.
 - Agents/heartbeat: stop heartbeat turns after the first valid `heartbeat_respond` so repeated response loops do not burn tokens. (#86357) Thanks @udaymanish6.
 - Tasks: keep retained lost tasks out of default status health counts, explain their cleanup window during maintenance, and prune lost task records after 24 hours instead of the general 7-day terminal retention.
 - Memory-core: keep REM dreaming focused on live light-staged memories and mark staged entries as considered so old recall history no longer dominates fresh candidates. (#86302) Thanks @SebTardif.
--- a/extensions/qqbot/src/engine/gateway/outbound-dispatch.test.ts
+++ b/extensions/qqbot/src/engine/gateway/outbound-dispatch.test.ts
@@ -171,6 +171,47 @@ describe("dispatchOutbound", () => {
    vi.clearAllMocks();
  });

+  it("keeps waiting past 300s when a slow provider timeout is configured", async () => {
+    vi.useFakeTimers();
+    try {
+      const runtime = makeRuntime({
+        onDeliver: async (deliver) => {
+          await new Promise<void>((resolve) => setTimeout(resolve, 301_000));
+          await deliver({ text: "late answer" }, { kind: "block" });
+        },
+      });
+      let settled = false;
+
+      const dispatchPromise = dispatchOutbound(makeInbound(), {
+        runtime,
+        cfg: {
+          models: { providers: { ollama: { timeoutSeconds: 1800 } } },
+        },
+        account,
+      }).finally(() => {
+        settled = true;
+      });
+
+      await vi.advanceTimersByTimeAsync(300_000);
+
+      expect(settled).toBe(false);
+      expect(sendTextMock).not.toHaveBeenCalled();
+
+      await vi.advanceTimersByTimeAsync(1_000);
+      await dispatchPromise;
+
+      expect(sendTextMock).toHaveBeenCalledWith(
+        expect.anything(),
+        "late answer",
+        expect.anything(),
+        expect.anything(),
+      );
+    } finally {
+      vi.clearAllTimers();
+      vi.useRealTimers();
+    }
+  });
+
  it("marks voice-only inbound as audio without adding voice paths to MediaPaths", async () => {
    let finalized: Record<string, unknown> | undefined;
    const runtime = makeRuntime({ onFinalize: (ctx) => (finalized = ctx) });
--- a/extensions/qqbot/src/engine/gateway/outbound-dispatch.ts
+++ b/extensions/qqbot/src/engine/gateway/outbound-dispatch.ts
@@ -33,6 +33,7 @@ import {
 import { StreamingController, shouldUseOfficialC2cStream } from "../messaging/streaming-c2c.js";
 import { audioFileToSilkBase64 } from "../utils/audio.js";
 import type { InboundContext } from "./inbound-context.js";
+import { resolveResponseTimeoutMs } from "./response-timeout.js";
 import type {
  GatewayAccount,
  EngineLogger,
@@ -42,7 +43,12 @@ import type {

 // ============ Config ============

-const RESPONSE_TIMEOUT = 300_000;
+// Historical floor for the QQBot outbound response watchdog (5 min). The
+// effective wait budget is now derived from existing
+// `agents.defaults.timeoutSeconds` and `models.providers.<id>.timeoutSeconds`
+// via `resolveResponseTimeoutMs(cfg)` — see issue #85267, where a slow
+// local ollama/qwen3.5:27b turn was capped at 5 min despite a configured
+// 1800s provider timeout.
 const TOOL_ONLY_TIMEOUT = 60_000;
 const MAX_TOOL_RENEWALS = 3;
 const TOOL_MEDIA_SEND_TIMEOUT = 45_000;
@@ -149,12 +155,16 @@ export async function dispatchOutbound(
  };

  // ---- Timeout promise ----
+  // #85267: derive watchdog from existing agent / provider timeout config so
+  // a longer configured ceiling (e.g. slow local ollama models) is not
+  // silently undercut by a plugin-local 5-minute cap.
+  const responseTimeoutMs = resolveResponseTimeoutMs(cfg);
  const timeoutPromise = new Promise<void>((_, reject) => {
    timeoutId = setTimeout(() => {
      if (!hasResponse) {
        reject(new Error("Response timeout"));
      }
-    }, RESPONSE_TIMEOUT);
+    }, responseTimeoutMs);
  });

  // ---- Deliver deps ----
--- a/extensions/qqbot/src/engine/gateway/response-timeout.test.ts
+++ b/extensions/qqbot/src/engine/gateway/response-timeout.test.ts
@@ -0,0 +1,75 @@
+import { describe, expect, it } from "vitest";
+import {
+  DEFAULT_RESPONSE_TIMEOUT_MS,
+  resolveResponseTimeoutMs,
+} from "./response-timeout.js";
+
+describe("resolveResponseTimeoutMs", () => {
+  it("falls back to the historical 5-minute floor when no timeouts configured", () => {
+    expect(resolveResponseTimeoutMs({})).toBe(DEFAULT_RESPONSE_TIMEOUT_MS);
+    expect(resolveResponseTimeoutMs(undefined)).toBe(DEFAULT_RESPONSE_TIMEOUT_MS);
+    expect(resolveResponseTimeoutMs(null)).toBe(DEFAULT_RESPONSE_TIMEOUT_MS);
+  });
+
+  it("honors longer agents.defaults.timeoutSeconds", () => {
+    expect(
+      resolveResponseTimeoutMs({ agents: { defaults: { timeoutSeconds: 900 } } }),
+    ).toBe(900_000);
+  });
+
+  it("ignores agents.defaults.timeoutSeconds shorter than the historical floor", () => {
+    // Issue #85267: a configured 60s agent timeout must not undercut the
+    // historical 5-minute watchdog floor for previously-working setups.
+    expect(
+      resolveResponseTimeoutMs({ agents: { defaults: { timeoutSeconds: 60 } } }),
+    ).toBe(DEFAULT_RESPONSE_TIMEOUT_MS);
+  });
+
+  it("honors models.providers.<id>.timeoutSeconds for slow local providers (#85267)", () => {
+    // Direct repro shape: ollama + qwen3.5:27b with 1800s timeout. Without
+    // this fix, QQBot capped at 300s and surfaced "LLM request timed out".
+    expect(
+      resolveResponseTimeoutMs({
+        models: { providers: { ollama: { timeoutSeconds: 1800 } } },
+      }),
+    ).toBe(1_800_000);
+  });
+
+  it("takes the maximum across multiple configured providers and agents", () => {
+    expect(
+      resolveResponseTimeoutMs({
+        agents: { defaults: { timeoutSeconds: 600 } },
+        models: {
+          providers: {
+            ollama: { timeoutSeconds: 1800 },
+            "lm-studio": { timeoutSeconds: 900 },
+            openai: { timeoutSeconds: 60 },
+          },
+        },
+      }),
+    ).toBe(1_800_000);
+  });
+
+  it("ignores non-positive or non-numeric timeout values", () => {
+    expect(
+      resolveResponseTimeoutMs({
+        agents: { defaults: { timeoutSeconds: -1 } },
+        models: {
+          providers: {
+            ollama: { timeoutSeconds: 0 },
+            broken: { timeoutSeconds: "1800" as unknown as number },
+            naN: { timeoutSeconds: Number.NaN },
+          },
+        },
+      }),
+    ).toBe(DEFAULT_RESPONSE_TIMEOUT_MS);
+  });
+
+  it("clamps to MAX_SAFE_TIMEOUT_MS for absurd inputs", () => {
+    const huge = resolveResponseTimeoutMs({
+      models: { providers: { ollama: { timeoutSeconds: 10_000_000 } } },
+    });
+    expect(huge).toBeLessThanOrEqual(2_147_000_000);
+    expect(huge).toBeGreaterThan(DEFAULT_RESPONSE_TIMEOUT_MS);
+  });
+});
--- a/extensions/qqbot/src/engine/gateway/response-timeout.ts
+++ b/extensions/qqbot/src/engine/gateway/response-timeout.ts
@@ -0,0 +1,103 @@
+/**
+ * QQBot outbound response watchdog timeout resolver.
+ *
+ * Background — issue #85267:
+ *   The reporter ran openclaw + ollama + `qwen3.5:27b` (a slow local model)
+ *   with `models.providers.ollama.timeoutSeconds: 1800` and saw the
+ *   QQBot reply path abort at ~5 minutes with "LLM request timed out",
+ *   despite the direct ollama call to the same model working. The
+ *   embedded-runner / idle-timeout layer already honors longer
+ *   provider timeouts (see `src/agents/pi-embedded-runner/run/llm-idle-timeout.ts`),
+ *   but the QQBot outbound dispatcher held an independent hardcoded
+ *   `RESPONSE_TIMEOUT = 300_000` watchdog that quietly undercut the
+ *   configured ceiling.
+ *
+ * Fix shape (clawsweeper `clawsweeper:fix-shape-clear`):
+ *   Don't add a new QQBot-only knob. Instead derive the QQBot wait
+ *   budget from the existing agent/provider timeout settings the user
+ *   already configured:
+ *     - `agents.defaults.timeoutSeconds`
+ *     - `models.providers.<id>.timeoutSeconds` (max across configured providers)
+ *   Take the maximum and clamp to `[DEFAULT_RESPONSE_TIMEOUT_MS, MAX_SAFE_TIMEOUT_MS]`.
+ *   The default floor preserves the existing 5-minute guard for users
+ *   that have not configured any longer ceiling — i.e. a no-op for
+ *   typical cloud-model deployments.
+ */
+
+/**
+ * Default QQBot outbound response watchdog when no config override is
+ * present. Preserves the historical 5-minute guard for unconfigured
+ * deployments.
+ */
+export const DEFAULT_RESPONSE_TIMEOUT_MS = 300_000;
+
+/**
+ * Upper bound to keep the watchdog inside the safe `setTimeout` range
+ * (approximately 24.8 days). Mirrors `MAX_SAFE_TIMEOUT_MS` in
+ * `src/agents/pi-embedded-runner/run/llm-idle-timeout.ts`.
+ */
+const MAX_SAFE_TIMEOUT_MS = 2_147_000_000;
+
+interface AgentsDefaultsLike {
+  timeoutSeconds?: unknown;
+}
+
+interface AgentsBlockLike {
+  defaults?: AgentsDefaultsLike;
+}
+
+interface ProviderEntryLike {
+  timeoutSeconds?: unknown;
+}
+
+interface ModelsBlockLike {
+  providers?: Record<string, ProviderEntryLike | undefined> | undefined;
+}
+
+interface CfgShape {
+  agents?: AgentsBlockLike;
+  models?: ModelsBlockLike;
+}
+
+function positiveSecondsToMs(value: unknown): number | undefined {
+  if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) {
+    return undefined;
+  }
+  return Math.floor(value * 1000);
+}
+
+/**
+ * Resolve the QQBot outbound response watchdog (ms).
+ *
+ * The watchdog is the longest of:
+ *   - `DEFAULT_RESPONSE_TIMEOUT_MS` (5 min, historical floor)
+ *   - `cfg.agents.defaults.timeoutSeconds` converted to ms
+ *   - the maximum `cfg.models.providers.<id>.timeoutSeconds` across
+ *     configured providers, converted to ms
+ *
+ * Returns at most `MAX_SAFE_TIMEOUT_MS` so the chosen value is always
+ * a safe `setTimeout` argument.
+ */
+export function resolveResponseTimeoutMs(cfg: unknown): number {
+  const candidates: number[] = [DEFAULT_RESPONSE_TIMEOUT_MS];
+
+  const typed = (cfg ?? {}) as CfgShape;
+
+  const agentDefaultMs = positiveSecondsToMs(typed.agents?.defaults?.timeoutSeconds);
+  if (agentDefaultMs !== undefined) {
+    candidates.push(agentDefaultMs);
+  }
+
+  const providers = typed.models?.providers;
+  if (providers && typeof providers === "object") {
+    for (const entry of Object.values(providers)) {
+      const providerMs = positiveSecondsToMs(entry?.timeoutSeconds);
+      if (providerMs !== undefined) {
+        candidates.push(providerMs);
+      }
+    }
+  }
+
+  const chosen = Math.max(...candidates);
+  return Math.min(chosen, MAX_SAFE_TIMEOUT_MS);
+}