docs: document embedded failover image tests

This commit is contained in:
Peter Steinberger
2026-06-04 14:51:49 -04:00
parent c7c67fc790
commit d91f645d28
8 changed files with 62 additions and 0 deletions

View File

@@ -1,3 +1,5 @@
// Failover observation tests pin the warning payloads emitted when embedded
// runs decide whether to retry, rotate profiles, fall back, or surface errors.
import { afterEach, describe, expect, it, vi } from "vitest";
import { log } from "../logger.js";
import {
@@ -8,6 +10,8 @@ import {
function normalizeObservation(
overrides: Partial<Parameters<typeof normalizeFailoverDecisionObservationBase>[0]>,
) {
// Keep the base case boring so each test only states the failure dimension
// whose log metadata should change.
return normalizeFailoverDecisionObservationBase({
stage: "assistant",
runId: "run:base",
@@ -41,6 +45,8 @@ function firstWarnDetails(warnSpy: { mock: { calls: unknown[][] } }): {
sourceModel?: string;
sourceProvider?: string;
} {
// The logger intentionally records structured details separate from the
// console message, so assertions can cover both machine and human evidence.
return firstWarnCall(warnSpy)[1] as {
consoleMessage?: string;
model?: string;
@@ -159,6 +165,8 @@ describe("createFailoverDecisionLogger", () => {
logDecision("rotate_profile");
const observation = firstWarnDetails(warnSpy);
// Raw provider bodies stay in structured preview fields; console output
// must not dump HTML auth pages into user-visible retry diagnostics.
expect(observation.providerRuntimeFailureKind).toBe("auth_html");
expect(observation.rawErrorPreview).toBe(
"401 <!DOCTYPE html><html><body>Unauthorized</body></html>",

View File

@@ -1,8 +1,12 @@
// Failover policy tests cover the embedded run decision table for retry,
// profile rotation, fallback model escalation, and user-visible errors.
import { describe, expect, it } from "vitest";
import { mergeRetryFailoverReason, resolveRunFailoverDecision } from "./failover-policy.js";
describe("resolveRunFailoverDecision", () => {
it("escalates retry-limit exhaustion for replay-safe failover reasons", () => {
// Retry-limit exhaustion is only a model-fallback signal when the carried
// reason is known to be safe to replay against a different model.
expect(
resolveRunFailoverDecision({
stage: "retry_limit",
@@ -28,6 +32,8 @@ describe("resolveRunFailoverDecision", () => {
});
it("prefers prompt-side profile rotation before fallback", () => {
// Prompt construction can fail before any model output exists, so rotate
// the current provider profile before spending the configured fallback.
expect(
resolveRunFailoverDecision({
stage: "prompt",
@@ -97,6 +103,8 @@ describe("resolveRunFailoverDecision", () => {
});
it("ignores stale classified assistant-side 429 text without error stopReason", () => {
// Classifiers may see old assistant text in the transcript. Without an
// actual failure signal, stale billing/rate-limit text is not failover.
expect(
resolveRunFailoverDecision({
stage: "assistant",
@@ -299,6 +307,8 @@ describe("resolveRunFailoverDecision", () => {
});
it("does not rotate harness-owned assistant timeouts", () => {
// Harness-owned transports already implement their own retry envelope;
// core failover should not double-rotate on those synthetic timeouts.
expect(
resolveRunFailoverDecision({
stage: "assistant",

View File

@@ -1,3 +1,5 @@
// Fallback configuration tests pin how embedded runs detect model fallback
// availability from explicit overrides versus normal agent config.
import { describe, expect, it } from "vitest";
import type { OpenClawConfig } from "../../../config/types.openclaw.js";
import { hasEmbeddedRunConfiguredModelFallbacks } from "./fallbacks.js";
@@ -13,6 +15,8 @@ describe("hasEmbeddedRunConfiguredModelFallbacks", () => {
});
it("treats explicit empty modelFallbacksOverride as disabling fallbacks", () => {
// An explicit empty override is a caller decision, not a request to fall
// back to defaults from the persisted OpenClaw config.
const cfg: OpenClawConfig = {
agents: {
defaults: {

View File

@@ -1,3 +1,5 @@
// Error-context helper tests keep failure metadata pointed at the model that
// actually failed, even when the embedded harness wraps the provider call.
import { describe, expect, it } from "vitest";
import { resolveActiveErrorContext } from "./helpers.js";
@@ -24,6 +26,8 @@ describe("resolveActiveErrorContext", () => {
});
it("ignores the embedded OpenClaw harness provider when the model provider is known", () => {
// The OpenClaw harness id is a transport wrapper, not the provider users
// need in diagnostics when a concrete upstream model ref is available.
const result = resolveActiveErrorContext({
provider: "openrouter",
model: "openai/gpt-5.4",

View File

@@ -1,3 +1,5 @@
// Embedded run helper tests cover final assistant text extraction and error
// metadata assembly shared by normal exits and failure paths.
import type { AssistantMessage } from "openclaw/plugin-sdk/llm";
import { describe, expect, it } from "vitest";
import { createUsageAccumulator } from "../usage-accumulator.js";
@@ -11,6 +13,8 @@ function makeAssistantMessage(
content: AssistantMessage["content"],
phase?: string,
): AssistantMessage {
// Minimal assistant fixture with usage fields required by the SDK type; the
// tested helpers only care about content, phase, and final metadata.
return {
api: "responses",
provider: "openai",
@@ -33,6 +37,8 @@ function makeAssistantMessage(
describe("resolveFinalAssistantVisibleText", () => {
it("prefers final_answer text over commentary blocks", () => {
// Commentary can be streamed before the final answer; user-visible result
// extraction must choose the signed final phase when present.
const lastAssistant = makeAssistantMessage([
{
type: "text",
@@ -81,6 +87,8 @@ describe("resolveFinalAssistantVisibleText", () => {
describe("buildErrorAgentMeta", () => {
it("preserves active session file for error exits after transcript rotation", () => {
// Error metadata follows the active session after transcript rotation so
// diagnostics and resume links point at the file that contains the failure.
expect(
buildErrorAgentMeta({
sessionId: "session-rotated",

View File

@@ -1,3 +1,5 @@
// History image prune tests keep provider replay compact by replacing stale
// image bytes and media references while preserving recent user context.
import type { AgentMessage } from "openclaw/plugin-sdk/agent-core";
import type { ImageContent } from "openclaw/plugin-sdk/llm";
import { describe, expect, it } from "vitest";
@@ -62,6 +64,8 @@ function expectImageMessagePreserved(messages: AgentMessage[], errorMessage: str
}
function oldEnoughTail(): AgentMessage[] {
// Four assistant turns makes the first message old enough to prune while
// keeping each test focused on content rewriting instead of turn counting.
const assistantTurn = () => castAgentMessage({ role: "assistant", content: "ack" });
const userText = () => castAgentMessage({ role: "user", content: "more" });
return [
@@ -100,6 +104,8 @@ describe("pruneProcessedHistoryImages", () => {
});
it("scrubs old media attachment markers from text blocks", () => {
// Text references are scrubbed alongside image blocks so old paths and
// media URIs cannot rehydrate stale images on a later replay.
const messages: AgentMessage[] = [
castAgentMessage({
role: "user",
@@ -224,6 +230,8 @@ describe("pruneProcessedHistoryImages", () => {
});
it("does not count multiple assistant messages from one tool loop as separate turns", () => {
// Tool-call assistant messages belong to one model turn; counting each
// message separately would prune images too aggressively inside tool loops.
const messages: AgentMessage[] = [
castAgentMessage({
role: "user",
@@ -337,6 +345,8 @@ describe("installHistoryImagePruneContextTransform", () => {
const image: ImageContent = { type: "image", data: "abc", mimeType: "image/png" };
it("prunes the provider replay view after an existing context transform", async () => {
// The transform wrapper prunes only the replay view returned to providers,
// leaving upstream transform output and restore behavior intact.
const messages: AgentMessage[] = [
castAgentMessage({ role: "user", content: "fresh prompt" }),
...oldEnoughTail(),

View File

@@ -1,3 +1,5 @@
// Idle-timeout breaker tests cover the outer run-loop guard that stops
// repeated silent provider attempts from spinning forever.
import { describe, expect, it } from "vitest";
import {
MAX_CONSECUTIVE_IDLE_TIMEOUTS_BEFORE_OUTPUT,
@@ -25,6 +27,8 @@ describe("stepIdleTimeoutBreaker (#76293)", () => {
}>,
options?: { cap?: number },
) {
// Drive one persistent breaker state across attempts, matching the run
// loop scope where profile rotation and retry sessions would otherwise reset.
const state = createIdleTimeoutBreakerState();
const steps: Array<{ consecutive: number; tripped: boolean }> = [];
for (const input of inputs) {

View File

@@ -1,3 +1,5 @@
// Prompt image tests cover local reference parsing, sandbox-aware loading, and
// attachment ordering for embedded runs that send images to vision models.
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
@@ -35,6 +37,8 @@ function expectImageReferenceCount(prompt: string, count: number) {
}
function expectSingleImageReference(prompt: string) {
// Most parser cases should find exactly one local image ref; this helper
// keeps failures about over-detection obvious.
const refs = expectImageReferenceCount(prompt, 1);
return refs[0];
}
@@ -83,6 +87,8 @@ describe("detectImageReferences", () => {
});
it("ignores OpenClaw CLI image cache paths from prior prompt transcripts", () => {
// Cache paths from generated tool reminders are replay artifacts, not new
// user attachments to hydrate again.
const refs = detectImageReferences(
[
'<system-reminder>Called the Read tool with {"file_path":"/Users/ada/.openclaw/workspace/.openclaw-cli-images/stale.png"}</system-reminder>',
@@ -194,6 +200,8 @@ describe("detectImageReferences", () => {
});
it("dedupe casing follows host filesystem conventions", () => {
// Windows resolves these as the same path, while POSIX hosts preserve both
// candidates because case can identify different files.
const prompt = "Look at /tmp/Image.png and /tmp/image.png";
if (process.platform === "win32") {
expect(detectImageReferences(prompt)).toStrictEqual([
@@ -403,6 +411,8 @@ describe("modelSupportsImages", () => {
describe("loadImageFromRef", () => {
it("hydrates managed inbound media URIs before workspace path resolution", async () => {
// Managed media URIs are canonical inbound attachment handles and should
// work even when workspaceOnly would reject ordinary outside paths.
const stateDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-native-image-uri-"));
const workspaceDir = path.join(stateDir, "workspace-agent");
const inboundDir = path.join(stateDir, "media", "inbound");
@@ -575,6 +585,8 @@ describe("detectAndLoadPromptImages", () => {
});
it("preserves attachment order when offloaded refs and inline images are mixed", () => {
// The model receives images in the user's attachment order, not grouped by
// storage mechanism.
const merged = mergePromptAttachmentImages({
imageOrder: ["offloaded", "inline"],
existingImages: [{ type: "image", data: "small-b", mimeType: "image/png" }],
@@ -616,6 +628,8 @@ describe("detectAndLoadPromptImages", () => {
});
it("blocks prompt image refs outside workspace when sandbox workspaceOnly is enabled", async () => {
// Sandbox workspaceOnly uses the bridge to validate mounted paths; ordinary
// prompt refs outside the workspace are detected but intentionally skipped.
const stateDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-native-image-sandbox-"));
const sandboxRoot = path.join(stateDir, "sandbox");
const agentRoot = path.join(stateDir, "agent");