docs: document embedded failover image tests

2026-06-28 10:33:34 +00:00 · 2026-06-04 14:51:49 -04:00
parent c7c67fc790
commit d91f645d28
8 changed files with 62 additions and 0 deletions
--- a/src/agents/embedded-agent-runner/run/failover-observation.test.ts
+++ b/src/agents/embedded-agent-runner/run/failover-observation.test.ts
@@ -1,3 +1,5 @@
+// Failover observation tests pin the warning payloads emitted when embedded
+// runs decide whether to retry, rotate profiles, fall back, or surface errors.
 import { afterEach, describe, expect, it, vi } from "vitest";
 import { log } from "../logger.js";
 import {
@@ -8,6 +10,8 @@ import {
 function normalizeObservation(
  overrides: Partial<Parameters<typeof normalizeFailoverDecisionObservationBase>[0]>,
 ) {
+  // Keep the base case boring so each test only states the failure dimension
+  // whose log metadata should change.
  return normalizeFailoverDecisionObservationBase({
    stage: "assistant",
    runId: "run:base",
@@ -41,6 +45,8 @@ function firstWarnDetails(warnSpy: { mock: { calls: unknown[][] } }): {
  sourceModel?: string;
  sourceProvider?: string;
 } {
+  // The logger intentionally records structured details separate from the
+  // console message, so assertions can cover both machine and human evidence.
  return firstWarnCall(warnSpy)[1] as {
    consoleMessage?: string;
    model?: string;
@@ -159,6 +165,8 @@ describe("createFailoverDecisionLogger", () => {
    logDecision("rotate_profile");

    const observation = firstWarnDetails(warnSpy);
+    // Raw provider bodies stay in structured preview fields; console output
+    // must not dump HTML auth pages into user-visible retry diagnostics.
    expect(observation.providerRuntimeFailureKind).toBe("auth_html");
    expect(observation.rawErrorPreview).toBe(
      "401 <!DOCTYPE html><html><body>Unauthorized</body></html>",
--- a/src/agents/embedded-agent-runner/run/failover-policy.test.ts
+++ b/src/agents/embedded-agent-runner/run/failover-policy.test.ts
@@ -1,8 +1,12 @@
+// Failover policy tests cover the embedded run decision table for retry,
+// profile rotation, fallback model escalation, and user-visible errors.
 import { describe, expect, it } from "vitest";
 import { mergeRetryFailoverReason, resolveRunFailoverDecision } from "./failover-policy.js";

 describe("resolveRunFailoverDecision", () => {
  it("escalates retry-limit exhaustion for replay-safe failover reasons", () => {
+    // Retry-limit exhaustion is only a model-fallback signal when the carried
+    // reason is known to be safe to replay against a different model.
    expect(
      resolveRunFailoverDecision({
        stage: "retry_limit",
@@ -28,6 +32,8 @@ describe("resolveRunFailoverDecision", () => {
  });

  it("prefers prompt-side profile rotation before fallback", () => {
+    // Prompt construction can fail before any model output exists, so rotate
+    // the current provider profile before spending the configured fallback.
    expect(
      resolveRunFailoverDecision({
        stage: "prompt",
@@ -97,6 +103,8 @@ describe("resolveRunFailoverDecision", () => {
  });

  it("ignores stale classified assistant-side 429 text without error stopReason", () => {
+    // Classifiers may see old assistant text in the transcript. Without an
+    // actual failure signal, stale billing/rate-limit text is not failover.
    expect(
      resolveRunFailoverDecision({
        stage: "assistant",
@@ -299,6 +307,8 @@ describe("resolveRunFailoverDecision", () => {
  });

  it("does not rotate harness-owned assistant timeouts", () => {
+    // Harness-owned transports already implement their own retry envelope;
+    // core failover should not double-rotate on those synthetic timeouts.
    expect(
      resolveRunFailoverDecision({
        stage: "assistant",
--- a/src/agents/embedded-agent-runner/run/fallbacks.test.ts
+++ b/src/agents/embedded-agent-runner/run/fallbacks.test.ts
@@ -1,3 +1,5 @@
+// Fallback configuration tests pin how embedded runs detect model fallback
+// availability from explicit overrides versus normal agent config.
 import { describe, expect, it } from "vitest";
 import type { OpenClawConfig } from "../../../config/types.openclaw.js";
 import { hasEmbeddedRunConfiguredModelFallbacks } from "./fallbacks.js";
@@ -13,6 +15,8 @@ describe("hasEmbeddedRunConfiguredModelFallbacks", () => {
  });

  it("treats explicit empty modelFallbacksOverride as disabling fallbacks", () => {
+    // An explicit empty override is a caller decision, not a request to fall
+    // back to defaults from the persisted OpenClaw config.
    const cfg: OpenClawConfig = {
      agents: {
        defaults: {
--- a/src/agents/embedded-agent-runner/run/helpers.resolve-error-context.test.ts
+++ b/src/agents/embedded-agent-runner/run/helpers.resolve-error-context.test.ts
@@ -1,3 +1,5 @@
+// Error-context helper tests keep failure metadata pointed at the model that
+// actually failed, even when the embedded harness wraps the provider call.
 import { describe, expect, it } from "vitest";
 import { resolveActiveErrorContext } from "./helpers.js";

@@ -24,6 +26,8 @@ describe("resolveActiveErrorContext", () => {
  });

  it("ignores the embedded OpenClaw harness provider when the model provider is known", () => {
+    // The OpenClaw harness id is a transport wrapper, not the provider users
+    // need in diagnostics when a concrete upstream model ref is available.
    const result = resolveActiveErrorContext({
      provider: "openrouter",
      model: "openai/gpt-5.4",
--- a/src/agents/embedded-agent-runner/run/helpers.test.ts
+++ b/src/agents/embedded-agent-runner/run/helpers.test.ts
@@ -1,3 +1,5 @@
+// Embedded run helper tests cover final assistant text extraction and error
+// metadata assembly shared by normal exits and failure paths.
 import type { AssistantMessage } from "openclaw/plugin-sdk/llm";
 import { describe, expect, it } from "vitest";
 import { createUsageAccumulator } from "../usage-accumulator.js";
@@ -11,6 +13,8 @@ function makeAssistantMessage(
  content: AssistantMessage["content"],
  phase?: string,
 ): AssistantMessage {
+  // Minimal assistant fixture with usage fields required by the SDK type; the
+  // tested helpers only care about content, phase, and final metadata.
  return {
    api: "responses",
    provider: "openai",
@@ -33,6 +37,8 @@ function makeAssistantMessage(

 describe("resolveFinalAssistantVisibleText", () => {
  it("prefers final_answer text over commentary blocks", () => {
+    // Commentary can be streamed before the final answer; user-visible result
+    // extraction must choose the signed final phase when present.
    const lastAssistant = makeAssistantMessage([
      {
        type: "text",
@@ -81,6 +87,8 @@ describe("resolveFinalAssistantVisibleText", () => {

 describe("buildErrorAgentMeta", () => {
  it("preserves active session file for error exits after transcript rotation", () => {
+    // Error metadata follows the active session after transcript rotation so
+    // diagnostics and resume links point at the file that contains the failure.
    expect(
      buildErrorAgentMeta({
        sessionId: "session-rotated",
--- a/src/agents/embedded-agent-runner/run/history-image-prune.test.ts
+++ b/src/agents/embedded-agent-runner/run/history-image-prune.test.ts
@@ -1,3 +1,5 @@
+// History image prune tests keep provider replay compact by replacing stale
+// image bytes and media references while preserving recent user context.
 import type { AgentMessage } from "openclaw/plugin-sdk/agent-core";
 import type { ImageContent } from "openclaw/plugin-sdk/llm";
 import { describe, expect, it } from "vitest";
@@ -62,6 +64,8 @@ function expectImageMessagePreserved(messages: AgentMessage[], errorMessage: str
 }

 function oldEnoughTail(): AgentMessage[] {
+  // Four assistant turns makes the first message old enough to prune while
+  // keeping each test focused on content rewriting instead of turn counting.
  const assistantTurn = () => castAgentMessage({ role: "assistant", content: "ack" });
  const userText = () => castAgentMessage({ role: "user", content: "more" });
  return [
@@ -100,6 +104,8 @@ describe("pruneProcessedHistoryImages", () => {
  });

  it("scrubs old media attachment markers from text blocks", () => {
+    // Text references are scrubbed alongside image blocks so old paths and
+    // media URIs cannot rehydrate stale images on a later replay.
    const messages: AgentMessage[] = [
      castAgentMessage({
        role: "user",
@@ -224,6 +230,8 @@ describe("pruneProcessedHistoryImages", () => {
  });

  it("does not count multiple assistant messages from one tool loop as separate turns", () => {
+    // Tool-call assistant messages belong to one model turn; counting each
+    // message separately would prune images too aggressively inside tool loops.
    const messages: AgentMessage[] = [
      castAgentMessage({
        role: "user",
@@ -337,6 +345,8 @@ describe("installHistoryImagePruneContextTransform", () => {
  const image: ImageContent = { type: "image", data: "abc", mimeType: "image/png" };

  it("prunes the provider replay view after an existing context transform", async () => {
+    // The transform wrapper prunes only the replay view returned to providers,
+    // leaving upstream transform output and restore behavior intact.
    const messages: AgentMessage[] = [
      castAgentMessage({ role: "user", content: "fresh prompt" }),
      ...oldEnoughTail(),
--- a/src/agents/embedded-agent-runner/run/idle-timeout-breaker.test.ts
+++ b/src/agents/embedded-agent-runner/run/idle-timeout-breaker.test.ts
@@ -1,3 +1,5 @@
+// Idle-timeout breaker tests cover the outer run-loop guard that stops
+// repeated silent provider attempts from spinning forever.
 import { describe, expect, it } from "vitest";
 import {
  MAX_CONSECUTIVE_IDLE_TIMEOUTS_BEFORE_OUTPUT,
@@ -25,6 +27,8 @@ describe("stepIdleTimeoutBreaker (#76293)", () => {
    }>,
    options?: { cap?: number },
  ) {
+    // Drive one persistent breaker state across attempts, matching the run
+    // loop scope where profile rotation and retry sessions would otherwise reset.
    const state = createIdleTimeoutBreakerState();
    const steps: Array<{ consecutive: number; tripped: boolean }> = [];
    for (const input of inputs) {
--- a/src/agents/embedded-agent-runner/run/images.test.ts
+++ b/src/agents/embedded-agent-runner/run/images.test.ts
@@ -1,3 +1,5 @@
+// Prompt image tests cover local reference parsing, sandbox-aware loading, and
+// attachment ordering for embedded runs that send images to vision models.
 import fs from "node:fs/promises";
 import os from "node:os";
 import path from "node:path";
@@ -35,6 +37,8 @@ function expectImageReferenceCount(prompt: string, count: number) {
 }

 function expectSingleImageReference(prompt: string) {
+  // Most parser cases should find exactly one local image ref; this helper
+  // keeps failures about over-detection obvious.
  const refs = expectImageReferenceCount(prompt, 1);
  return refs[0];
 }
@@ -83,6 +87,8 @@ describe("detectImageReferences", () => {
  });

  it("ignores OpenClaw CLI image cache paths from prior prompt transcripts", () => {
+    // Cache paths from generated tool reminders are replay artifacts, not new
+    // user attachments to hydrate again.
    const refs = detectImageReferences(
      [
        '<system-reminder>Called the Read tool with {"file_path":"/Users/ada/.openclaw/workspace/.openclaw-cli-images/stale.png"}</system-reminder>',
@@ -194,6 +200,8 @@ describe("detectImageReferences", () => {
  });

  it("dedupe casing follows host filesystem conventions", () => {
+    // Windows resolves these as the same path, while POSIX hosts preserve both
+    // candidates because case can identify different files.
    const prompt = "Look at /tmp/Image.png and /tmp/image.png";
    if (process.platform === "win32") {
      expect(detectImageReferences(prompt)).toStrictEqual([
@@ -403,6 +411,8 @@ describe("modelSupportsImages", () => {

 describe("loadImageFromRef", () => {
  it("hydrates managed inbound media URIs before workspace path resolution", async () => {
+    // Managed media URIs are canonical inbound attachment handles and should
+    // work even when workspaceOnly would reject ordinary outside paths.
    const stateDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-native-image-uri-"));
    const workspaceDir = path.join(stateDir, "workspace-agent");
    const inboundDir = path.join(stateDir, "media", "inbound");
@@ -575,6 +585,8 @@ describe("detectAndLoadPromptImages", () => {
  });

  it("preserves attachment order when offloaded refs and inline images are mixed", () => {
+    // The model receives images in the user's attachment order, not grouped by
+    // storage mechanism.
    const merged = mergePromptAttachmentImages({
      imageOrder: ["offloaded", "inline"],
      existingImages: [{ type: "image", data: "small-b", mimeType: "image/png" }],
@@ -616,6 +628,8 @@ describe("detectAndLoadPromptImages", () => {
  });

  it("blocks prompt image refs outside workspace when sandbox workspaceOnly is enabled", async () => {
+    // Sandbox workspaceOnly uses the bridge to validate mounted paths; ordinary
+    // prompt refs outside the workspace are detected but intentionally skipped.
    const stateDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-native-image-sandbox-"));
    const sandboxRoot = path.join(stateDir, "sandbox");
    const agentRoot = path.join(stateDir, "agent");