docs: document embedded runner cache tests

This commit is contained in:
Peter Steinberger
2026-06-04 14:36:52 -04:00
parent 810f29b5f6
commit cd26595d6f
7 changed files with 39 additions and 7 deletions

View File

@@ -1,3 +1,4 @@
// Coverage for OpenRouter model capability loading and cache invalidation.
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
@@ -6,6 +7,8 @@ import { importFreshModule } from "openclaw/plugin-sdk/test-fixtures";
import { afterEach, describe, expect, it, vi } from "vitest";
async function withOpenRouterStateDir(run: (stateDir: string) => Promise<void>) {
// Each case gets an isolated state dir because the module persists capability
// rows through the plugin state store across imports.
const stateDir = mkdtempSync(join(tmpdir(), "openclaw-openrouter-capabilities-"));
resetPluginStateStoreForTests();
process.env.OPENCLAW_STATE_DIR = stateDir;
@@ -28,6 +31,7 @@ async function withOpenRouterStateDir(run: (stateDir: string) => Promise<void>)
}
async function importOpenRouterModelCapabilities(scope: string) {
// Import fresh per scope so module-level caches cannot mask persistence bugs.
return await importFreshModule<typeof import("./openrouter-model-capabilities.js")>(
import.meta.url,
`./openrouter-model-capabilities.js?scope=${scope}`,
@@ -139,6 +143,8 @@ describe("openrouter-model-capabilities", () => {
});
it("does not reuse retired JSON caches with precomputed OpenRouter context windows", async () => {
// Old JSON caches stored unnormalized provider context windows; force a live
// refresh so endpoint-specific caps are used instead.
await withOpenRouterStateDir(async (stateDir) => {
const modelId = "nvidia/nemotron-3-super-120b-a12b:free";
const cacheDir = join(stateDir, "cache");

View File

@@ -1,3 +1,4 @@
// Coverage for detecting repeated tool loops immediately after compaction.
import { describe, expect, it } from "vitest";
import {
createPostCompactionLoopGuard,
@@ -5,6 +6,8 @@ import {
} from "./post-compaction-loop-guard.js";
function callOutcome(toolName: string, args: unknown, result: string) {
// The guard compares stable hashes instead of full payloads to keep runtime
// state bounded.
return { toolName, argsHash: JSON.stringify(args), resultHash: result };
}
@@ -36,6 +39,8 @@ describe("createPostCompactionLoopGuard", () => {
});
it("aborts on the windowSize-th identical (tool,args,result) call within the window", () => {
// Repeating the same tool, args, and result right after compaction means the
// model likely lost progress and is stuck replaying the same recovery step.
const guard = createPostCompactionLoopGuard({ windowSize: 3 });
guard.armPostCompaction();
expect(

View File

@@ -1,3 +1,4 @@
// Coverage for prompt-cache diagnostic tracking across turns.
import { beforeEach, describe, expect, it } from "vitest";
import {
beginPromptCacheObservation,
@@ -18,6 +19,8 @@ describe("prompt cache observability", () => {
});
it("tracks cache-relevant changes and reports a real cache-read drop", () => {
// Observability only emits when a material cache-read drop follows a tracked
// cache-affecting change.
const first = beginPromptCacheObservation({
sessionId: "session-1",
sessionKey: "agent:main",
@@ -110,6 +113,8 @@ describe("prompt cache observability", () => {
});
it("treats reordered tool lists as the same diagnostics tool set", () => {
// Tool list ordering is deterministic for payloads but should not create a
// false cache-break diagnostic when the set is unchanged.
beginPromptCacheObservation({
sessionId: "session-1",
provider: "openai",
@@ -138,6 +143,8 @@ describe("prompt cache observability", () => {
});
it("tracks recurring prompt-cache affinity across rotating session ids", () => {
// Cron-style isolated runs use promptCacheKey to carry cache affinity across
// new session ids.
beginPromptCacheObservation({
sessionId: "isolated-run-1",
promptCacheKey: "openclaw-cron-stable-cache-key",

View File

@@ -1,3 +1,4 @@
// Coverage for prompt-cache retention resolution by provider and model API.
import { describe, expect, it } from "vitest";
import { isGooglePromptCacheEligible, resolveCacheRetention } from "./prompt-cache-retention.js";
@@ -31,11 +32,8 @@ describe("prompt cache retention", () => {
});
it("passes explicit cacheRetention through for openai-completions providers when supportsPromptCacheKey (issue #81281)", () => {
// Regression: openai-completions providers with prefix-caching backends
// (oMLX, llama.cpp, etc.) set compat.supportsPromptCacheKey: true and
// cacheRetention: "long" but the wrapper was silently dropping the
// user's explicit cacheRetention because the provider is neither in the
// anthropic family nor google-eligible.
// Regression: prefix-caching OpenAI-compatible backends opt in with
// supportsPromptCacheKey, so explicit user retention must pass through.
expect(
resolveCacheRetention(
{ cacheRetention: "long" },
@@ -67,8 +65,7 @@ describe("prompt cache retention", () => {
it("does not honor explicit cacheRetention for openai-completions without supportsPromptCacheKey", () => {
// Providers that route via openai-completions but do not advertise prompt
// caching (e.g. amazon-bedrock proxying amazon.* nova models) must keep
// the explicit cacheRetention from leaking into the outgoing payload.
// caching must keep retention out of outgoing payloads.
expect(
resolveCacheRetention(
{ cacheRetention: "long" },

View File

@@ -1,3 +1,4 @@
// Coverage for normalizing assistant replay content before provider requests.
import type { AgentMessage } from "openclaw/plugin-sdk/agent-core";
import { describe, expect, it } from "vitest";
import {
@@ -19,6 +20,8 @@ function bedrockAssistant(
stopReason: "error" | "stop" | "toolUse" | "length" = "error",
usageOverrides: Record<string, number> = {},
): AgentMessage {
// Bedrock fixtures cover providers that can return empty or legacy-shaped
// assistant content during streamed turns.
return {
role: "assistant",
content,
@@ -65,6 +68,8 @@ function openclawTranscriptAssistant(model: "delivery-mirror" | "gateway-injecte
describe("normalizeAssistantReplayContent", () => {
it("converts mid-turn assistant content: [] to a non-empty sentinel text block when stopReason is error", () => {
// Mid-turn failure sentinels preserve request turn ordering without
// pretending the failed assistant generated useful content.
const messages = [userMessage("hello"), bedrockAssistant([], "error"), userMessage("retry")];
const out = normalizeAssistantReplayContent(messages);
expect(out).not.toBe(messages);
@@ -229,6 +234,8 @@ describe("normalizeAssistantReplayContent", () => {
});
it("filters openclaw delivery-mirror and gateway-injected assistant messages from replay", () => {
// Gateway mirror entries are transcript artifacts, not model-authored
// assistant turns, so they must not be sent back to providers.
const messages = [
userMessage("hello"),
openclawTranscriptAssistant("delivery-mirror"),

View File

@@ -1,3 +1,4 @@
// Coverage for embedded resource loader discovery restrictions.
import { describe, expect, it, vi } from "vitest";
import { DefaultResourceLoader } from "../sessions/index.js";
import {
@@ -6,6 +7,8 @@ import {
} from "./resource-loader.js";
vi.mock("../sessions/index.js", () => ({
// Constructor mock captures options so tests can assert discovery policy
// without touching filesystem-backed session resources.
DefaultResourceLoader: vi.fn(function DefaultResourceLoaderLocal(
this: Record<string, unknown>,
options: unknown,
@@ -19,6 +22,8 @@ vi.mock("../sessions/index.js", () => ({
describe("createEmbeddedAgentResourceLoader", () => {
it("keeps inline extensions but disables filesystem discovery", () => {
// Embedded runs pass explicit extension factories; filesystem discovery is
// disabled to avoid loading ambient workspace extensions.
const settingsManager = {};
const extensionFactories = [vi.fn()];

View File

@@ -1,8 +1,11 @@
// Coverage for deciding when embedded run results should trigger model fallback.
import { describe, expect, it } from "vitest";
import { classifyEmbeddedAgentRunResultForModelFallback } from "./result-fallback-classifier.js";
describe("classifyEmbeddedAgentRunResultForModelFallback", () => {
it("does not fallback when sessions_spawn accepted a child session", () => {
// Accepted child sessions mean the turn made progress even if the parent did
// not emit a normal assistant reply.
expect(
classifyEmbeddedAgentRunResultForModelFallback({
provider: "mock-openai",
@@ -47,6 +50,8 @@ describe("classifyEmbeddedAgentRunResultForModelFallback", () => {
});
it("preserves hook block results with auth-like error payload text", () => {
// Hook policy blocks are intentional local decisions, not provider failures
// that should rotate models.
const result = classifyEmbeddedAgentRunResultForModelFallback({
provider: "custom",
model: "gpt-5.5",