mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-28 10:33:34 +00:00
docs: document embedded failover image tests
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
// Failover observation tests pin the warning payloads emitted when embedded
|
||||
// runs decide whether to retry, rotate profiles, fall back, or surface errors.
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { log } from "../logger.js";
|
||||
import {
|
||||
@@ -8,6 +10,8 @@ import {
|
||||
function normalizeObservation(
|
||||
overrides: Partial<Parameters<typeof normalizeFailoverDecisionObservationBase>[0]>,
|
||||
) {
|
||||
// Keep the base case boring so each test only states the failure dimension
|
||||
// whose log metadata should change.
|
||||
return normalizeFailoverDecisionObservationBase({
|
||||
stage: "assistant",
|
||||
runId: "run:base",
|
||||
@@ -41,6 +45,8 @@ function firstWarnDetails(warnSpy: { mock: { calls: unknown[][] } }): {
|
||||
sourceModel?: string;
|
||||
sourceProvider?: string;
|
||||
} {
|
||||
// The logger intentionally records structured details separate from the
|
||||
// console message, so assertions can cover both machine and human evidence.
|
||||
return firstWarnCall(warnSpy)[1] as {
|
||||
consoleMessage?: string;
|
||||
model?: string;
|
||||
@@ -159,6 +165,8 @@ describe("createFailoverDecisionLogger", () => {
|
||||
logDecision("rotate_profile");
|
||||
|
||||
const observation = firstWarnDetails(warnSpy);
|
||||
// Raw provider bodies stay in structured preview fields; console output
|
||||
// must not dump HTML auth pages into user-visible retry diagnostics.
|
||||
expect(observation.providerRuntimeFailureKind).toBe("auth_html");
|
||||
expect(observation.rawErrorPreview).toBe(
|
||||
"401 <!DOCTYPE html><html><body>Unauthorized</body></html>",
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
// Failover policy tests cover the embedded run decision table for retry,
|
||||
// profile rotation, fallback model escalation, and user-visible errors.
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { mergeRetryFailoverReason, resolveRunFailoverDecision } from "./failover-policy.js";
|
||||
|
||||
describe("resolveRunFailoverDecision", () => {
|
||||
it("escalates retry-limit exhaustion for replay-safe failover reasons", () => {
|
||||
// Retry-limit exhaustion is only a model-fallback signal when the carried
|
||||
// reason is known to be safe to replay against a different model.
|
||||
expect(
|
||||
resolveRunFailoverDecision({
|
||||
stage: "retry_limit",
|
||||
@@ -28,6 +32,8 @@ describe("resolveRunFailoverDecision", () => {
|
||||
});
|
||||
|
||||
it("prefers prompt-side profile rotation before fallback", () => {
|
||||
// Prompt construction can fail before any model output exists, so rotate
|
||||
// the current provider profile before spending the configured fallback.
|
||||
expect(
|
||||
resolveRunFailoverDecision({
|
||||
stage: "prompt",
|
||||
@@ -97,6 +103,8 @@ describe("resolveRunFailoverDecision", () => {
|
||||
});
|
||||
|
||||
it("ignores stale classified assistant-side 429 text without error stopReason", () => {
|
||||
// Classifiers may see old assistant text in the transcript. Without an
|
||||
// actual failure signal, stale billing/rate-limit text is not failover.
|
||||
expect(
|
||||
resolveRunFailoverDecision({
|
||||
stage: "assistant",
|
||||
@@ -299,6 +307,8 @@ describe("resolveRunFailoverDecision", () => {
|
||||
});
|
||||
|
||||
it("does not rotate harness-owned assistant timeouts", () => {
|
||||
// Harness-owned transports already implement their own retry envelope;
|
||||
// core failover should not double-rotate on those synthetic timeouts.
|
||||
expect(
|
||||
resolveRunFailoverDecision({
|
||||
stage: "assistant",
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
// Fallback configuration tests pin how embedded runs detect model fallback
|
||||
// availability from explicit overrides versus normal agent config.
|
||||
import { describe, expect, it } from "vitest";
|
||||
import type { OpenClawConfig } from "../../../config/types.openclaw.js";
|
||||
import { hasEmbeddedRunConfiguredModelFallbacks } from "./fallbacks.js";
|
||||
@@ -13,6 +15,8 @@ describe("hasEmbeddedRunConfiguredModelFallbacks", () => {
|
||||
});
|
||||
|
||||
it("treats explicit empty modelFallbacksOverride as disabling fallbacks", () => {
|
||||
// An explicit empty override is a caller decision, not a request to fall
|
||||
// back to defaults from the persisted OpenClaw config.
|
||||
const cfg: OpenClawConfig = {
|
||||
agents: {
|
||||
defaults: {
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
// Error-context helper tests keep failure metadata pointed at the model that
|
||||
// actually failed, even when the embedded harness wraps the provider call.
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { resolveActiveErrorContext } from "./helpers.js";
|
||||
|
||||
@@ -24,6 +26,8 @@ describe("resolveActiveErrorContext", () => {
|
||||
});
|
||||
|
||||
it("ignores the embedded OpenClaw harness provider when the model provider is known", () => {
|
||||
// The OpenClaw harness id is a transport wrapper, not the provider users
|
||||
// need in diagnostics when a concrete upstream model ref is available.
|
||||
const result = resolveActiveErrorContext({
|
||||
provider: "openrouter",
|
||||
model: "openai/gpt-5.4",
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
// Embedded run helper tests cover final assistant text extraction and error
|
||||
// metadata assembly shared by normal exits and failure paths.
|
||||
import type { AssistantMessage } from "openclaw/plugin-sdk/llm";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { createUsageAccumulator } from "../usage-accumulator.js";
|
||||
@@ -11,6 +13,8 @@ function makeAssistantMessage(
|
||||
content: AssistantMessage["content"],
|
||||
phase?: string,
|
||||
): AssistantMessage {
|
||||
// Minimal assistant fixture with usage fields required by the SDK type; the
|
||||
// tested helpers only care about content, phase, and final metadata.
|
||||
return {
|
||||
api: "responses",
|
||||
provider: "openai",
|
||||
@@ -33,6 +37,8 @@ function makeAssistantMessage(
|
||||
|
||||
describe("resolveFinalAssistantVisibleText", () => {
|
||||
it("prefers final_answer text over commentary blocks", () => {
|
||||
// Commentary can be streamed before the final answer; user-visible result
|
||||
// extraction must choose the signed final phase when present.
|
||||
const lastAssistant = makeAssistantMessage([
|
||||
{
|
||||
type: "text",
|
||||
@@ -81,6 +87,8 @@ describe("resolveFinalAssistantVisibleText", () => {
|
||||
|
||||
describe("buildErrorAgentMeta", () => {
|
||||
it("preserves active session file for error exits after transcript rotation", () => {
|
||||
// Error metadata follows the active session after transcript rotation so
|
||||
// diagnostics and resume links point at the file that contains the failure.
|
||||
expect(
|
||||
buildErrorAgentMeta({
|
||||
sessionId: "session-rotated",
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
// History image prune tests keep provider replay compact by replacing stale
|
||||
// image bytes and media references while preserving recent user context.
|
||||
import type { AgentMessage } from "openclaw/plugin-sdk/agent-core";
|
||||
import type { ImageContent } from "openclaw/plugin-sdk/llm";
|
||||
import { describe, expect, it } from "vitest";
|
||||
@@ -62,6 +64,8 @@ function expectImageMessagePreserved(messages: AgentMessage[], errorMessage: str
|
||||
}
|
||||
|
||||
function oldEnoughTail(): AgentMessage[] {
|
||||
// Four assistant turns makes the first message old enough to prune while
|
||||
// keeping each test focused on content rewriting instead of turn counting.
|
||||
const assistantTurn = () => castAgentMessage({ role: "assistant", content: "ack" });
|
||||
const userText = () => castAgentMessage({ role: "user", content: "more" });
|
||||
return [
|
||||
@@ -100,6 +104,8 @@ describe("pruneProcessedHistoryImages", () => {
|
||||
});
|
||||
|
||||
it("scrubs old media attachment markers from text blocks", () => {
|
||||
// Text references are scrubbed alongside image blocks so old paths and
|
||||
// media URIs cannot rehydrate stale images on a later replay.
|
||||
const messages: AgentMessage[] = [
|
||||
castAgentMessage({
|
||||
role: "user",
|
||||
@@ -224,6 +230,8 @@ describe("pruneProcessedHistoryImages", () => {
|
||||
});
|
||||
|
||||
it("does not count multiple assistant messages from one tool loop as separate turns", () => {
|
||||
// Tool-call assistant messages belong to one model turn; counting each
|
||||
// message separately would prune images too aggressively inside tool loops.
|
||||
const messages: AgentMessage[] = [
|
||||
castAgentMessage({
|
||||
role: "user",
|
||||
@@ -337,6 +345,8 @@ describe("installHistoryImagePruneContextTransform", () => {
|
||||
const image: ImageContent = { type: "image", data: "abc", mimeType: "image/png" };
|
||||
|
||||
it("prunes the provider replay view after an existing context transform", async () => {
|
||||
// The transform wrapper prunes only the replay view returned to providers,
|
||||
// leaving upstream transform output and restore behavior intact.
|
||||
const messages: AgentMessage[] = [
|
||||
castAgentMessage({ role: "user", content: "fresh prompt" }),
|
||||
...oldEnoughTail(),
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
// Idle-timeout breaker tests cover the outer run-loop guard that stops
|
||||
// repeated silent provider attempts from spinning forever.
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
MAX_CONSECUTIVE_IDLE_TIMEOUTS_BEFORE_OUTPUT,
|
||||
@@ -25,6 +27,8 @@ describe("stepIdleTimeoutBreaker (#76293)", () => {
|
||||
}>,
|
||||
options?: { cap?: number },
|
||||
) {
|
||||
// Drive one persistent breaker state across attempts, matching the run
|
||||
// loop scope where profile rotation and retry sessions would otherwise reset.
|
||||
const state = createIdleTimeoutBreakerState();
|
||||
const steps: Array<{ consecutive: number; tripped: boolean }> = [];
|
||||
for (const input of inputs) {
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
// Prompt image tests cover local reference parsing, sandbox-aware loading, and
|
||||
// attachment ordering for embedded runs that send images to vision models.
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
@@ -35,6 +37,8 @@ function expectImageReferenceCount(prompt: string, count: number) {
|
||||
}
|
||||
|
||||
function expectSingleImageReference(prompt: string) {
|
||||
// Most parser cases should find exactly one local image ref; this helper
|
||||
// keeps failures about over-detection obvious.
|
||||
const refs = expectImageReferenceCount(prompt, 1);
|
||||
return refs[0];
|
||||
}
|
||||
@@ -83,6 +87,8 @@ describe("detectImageReferences", () => {
|
||||
});
|
||||
|
||||
it("ignores OpenClaw CLI image cache paths from prior prompt transcripts", () => {
|
||||
// Cache paths from generated tool reminders are replay artifacts, not new
|
||||
// user attachments to hydrate again.
|
||||
const refs = detectImageReferences(
|
||||
[
|
||||
'<system-reminder>Called the Read tool with {"file_path":"/Users/ada/.openclaw/workspace/.openclaw-cli-images/stale.png"}</system-reminder>',
|
||||
@@ -194,6 +200,8 @@ describe("detectImageReferences", () => {
|
||||
});
|
||||
|
||||
it("dedupe casing follows host filesystem conventions", () => {
|
||||
// Windows resolves these as the same path, while POSIX hosts preserve both
|
||||
// candidates because case can identify different files.
|
||||
const prompt = "Look at /tmp/Image.png and /tmp/image.png";
|
||||
if (process.platform === "win32") {
|
||||
expect(detectImageReferences(prompt)).toStrictEqual([
|
||||
@@ -403,6 +411,8 @@ describe("modelSupportsImages", () => {
|
||||
|
||||
describe("loadImageFromRef", () => {
|
||||
it("hydrates managed inbound media URIs before workspace path resolution", async () => {
|
||||
// Managed media URIs are canonical inbound attachment handles and should
|
||||
// work even when workspaceOnly would reject ordinary outside paths.
|
||||
const stateDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-native-image-uri-"));
|
||||
const workspaceDir = path.join(stateDir, "workspace-agent");
|
||||
const inboundDir = path.join(stateDir, "media", "inbound");
|
||||
@@ -575,6 +585,8 @@ describe("detectAndLoadPromptImages", () => {
|
||||
});
|
||||
|
||||
it("preserves attachment order when offloaded refs and inline images are mixed", () => {
|
||||
// The model receives images in the user's attachment order, not grouped by
|
||||
// storage mechanism.
|
||||
const merged = mergePromptAttachmentImages({
|
||||
imageOrder: ["offloaded", "inline"],
|
||||
existingImages: [{ type: "image", data: "small-b", mimeType: "image/png" }],
|
||||
@@ -616,6 +628,8 @@ describe("detectAndLoadPromptImages", () => {
|
||||
});
|
||||
|
||||
it("blocks prompt image refs outside workspace when sandbox workspaceOnly is enabled", async () => {
|
||||
// Sandbox workspaceOnly uses the bridge to validate mounted paths; ordinary
|
||||
// prompt refs outside the workspace are detected but intentionally skipped.
|
||||
const stateDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-native-image-sandbox-"));
|
||||
const sandboxRoot = path.join(stateDir, "sandbox");
|
||||
const agentRoot = path.join(stateDir, "agent");
|
||||
|
||||
Reference in New Issue
Block a user