docs: document embedded runner cache tests

2026-06-28 05:33:36 +00:00 · 2026-06-04 14:36:52 -04:00
parent 810f29b5f6
commit cd26595d6f
7 changed files with 39 additions and 7 deletions
--- a/src/agents/embedded-agent-runner/openrouter-model-capabilities.test.ts
+++ b/src/agents/embedded-agent-runner/openrouter-model-capabilities.test.ts
@@ -1,3 +1,4 @@
+// Coverage for OpenRouter model capability loading and cache invalidation.
 import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
@@ -6,6 +7,8 @@ import { importFreshModule } from "openclaw/plugin-sdk/test-fixtures";
 import { afterEach, describe, expect, it, vi } from "vitest";

 async function withOpenRouterStateDir(run: (stateDir: string) => Promise<void>) {
+  // Each case gets an isolated state dir because the module persists capability
+  // rows through the plugin state store across imports.
  const stateDir = mkdtempSync(join(tmpdir(), "openclaw-openrouter-capabilities-"));
  resetPluginStateStoreForTests();
  process.env.OPENCLAW_STATE_DIR = stateDir;
@@ -28,6 +31,7 @@ async function withOpenRouterStateDir(run: (stateDir: string) => Promise<void>)
 }

 async function importOpenRouterModelCapabilities(scope: string) {
+  // Import fresh per scope so module-level caches cannot mask persistence bugs.
  return await importFreshModule<typeof import("./openrouter-model-capabilities.js")>(
    import.meta.url,
    `./openrouter-model-capabilities.js?scope=${scope}`,
@@ -139,6 +143,8 @@ describe("openrouter-model-capabilities", () => {
  });

  it("does not reuse retired JSON caches with precomputed OpenRouter context windows", async () => {
+    // Old JSON caches stored unnormalized provider context windows; force a live
+    // refresh so endpoint-specific caps are used instead.
    await withOpenRouterStateDir(async (stateDir) => {
      const modelId = "nvidia/nemotron-3-super-120b-a12b:free";
      const cacheDir = join(stateDir, "cache");
--- a/src/agents/embedded-agent-runner/post-compaction-loop-guard.test.ts
+++ b/src/agents/embedded-agent-runner/post-compaction-loop-guard.test.ts
@@ -1,3 +1,4 @@
+// Coverage for detecting repeated tool loops immediately after compaction.
 import { describe, expect, it } from "vitest";
 import {
  createPostCompactionLoopGuard,
@@ -5,6 +6,8 @@ import {
 } from "./post-compaction-loop-guard.js";

 function callOutcome(toolName: string, args: unknown, result: string) {
+  // The guard compares stable hashes instead of full payloads to keep runtime
+  // state bounded.
  return { toolName, argsHash: JSON.stringify(args), resultHash: result };
 }

@@ -36,6 +39,8 @@ describe("createPostCompactionLoopGuard", () => {
  });

  it("aborts on the windowSize-th identical (tool,args,result) call within the window", () => {
+    // Repeating the same tool, args, and result right after compaction means the
+    // model likely lost progress and is stuck replaying the same recovery step.
    const guard = createPostCompactionLoopGuard({ windowSize: 3 });
    guard.armPostCompaction();
    expect(
--- a/src/agents/embedded-agent-runner/prompt-cache-observability.test.ts
+++ b/src/agents/embedded-agent-runner/prompt-cache-observability.test.ts
@@ -1,3 +1,4 @@
+// Coverage for prompt-cache diagnostic tracking across turns.
 import { beforeEach, describe, expect, it } from "vitest";
 import {
  beginPromptCacheObservation,
@@ -18,6 +19,8 @@ describe("prompt cache observability", () => {
  });

  it("tracks cache-relevant changes and reports a real cache-read drop", () => {
+    // Observability only emits when a material cache-read drop follows a tracked
+    // cache-affecting change.
    const first = beginPromptCacheObservation({
      sessionId: "session-1",
      sessionKey: "agent:main",
@@ -110,6 +113,8 @@ describe("prompt cache observability", () => {
  });

  it("treats reordered tool lists as the same diagnostics tool set", () => {
+    // Tool list ordering is deterministic for payloads but should not create a
+    // false cache-break diagnostic when the set is unchanged.
    beginPromptCacheObservation({
      sessionId: "session-1",
      provider: "openai",
@@ -138,6 +143,8 @@ describe("prompt cache observability", () => {
  });

  it("tracks recurring prompt-cache affinity across rotating session ids", () => {
+    // Cron-style isolated runs use promptCacheKey to carry cache affinity across
+    // new session ids.
    beginPromptCacheObservation({
      sessionId: "isolated-run-1",
      promptCacheKey: "openclaw-cron-stable-cache-key",
--- a/src/agents/embedded-agent-runner/prompt-cache-retention.test.ts
+++ b/src/agents/embedded-agent-runner/prompt-cache-retention.test.ts
@@ -1,3 +1,4 @@
+// Coverage for prompt-cache retention resolution by provider and model API.
 import { describe, expect, it } from "vitest";
 import { isGooglePromptCacheEligible, resolveCacheRetention } from "./prompt-cache-retention.js";

@@ -31,11 +32,8 @@ describe("prompt cache retention", () => {
  });

  it("passes explicit cacheRetention through for openai-completions providers when supportsPromptCacheKey (issue #81281)", () => {
-    // Regression: openai-completions providers with prefix-caching backends
-    // (oMLX, llama.cpp, etc.) set compat.supportsPromptCacheKey: true and
-    // cacheRetention: "long" but the wrapper was silently dropping the
-    // user's explicit cacheRetention because the provider is neither in the
-    // anthropic family nor google-eligible.
+    // Regression: prefix-caching OpenAI-compatible backends opt in with
+    // supportsPromptCacheKey, so explicit user retention must pass through.
    expect(
      resolveCacheRetention(
        { cacheRetention: "long" },
@@ -67,8 +65,7 @@ describe("prompt cache retention", () => {

  it("does not honor explicit cacheRetention for openai-completions without supportsPromptCacheKey", () => {
    // Providers that route via openai-completions but do not advertise prompt
-    // caching (e.g. amazon-bedrock proxying amazon.* nova models) must keep
-    // the explicit cacheRetention from leaking into the outgoing payload.
+    // caching must keep retention out of outgoing payloads.
    expect(
      resolveCacheRetention(
        { cacheRetention: "long" },
--- a/src/agents/embedded-agent-runner/replay-history.test.ts
+++ b/src/agents/embedded-agent-runner/replay-history.test.ts
@@ -1,3 +1,4 @@
+// Coverage for normalizing assistant replay content before provider requests.
 import type { AgentMessage } from "openclaw/plugin-sdk/agent-core";
 import { describe, expect, it } from "vitest";
 import {
@@ -19,6 +20,8 @@ function bedrockAssistant(
  stopReason: "error" | "stop" | "toolUse" | "length" = "error",
  usageOverrides: Record<string, number> = {},
 ): AgentMessage {
+  // Bedrock fixtures cover providers that can return empty or legacy-shaped
+  // assistant content during streamed turns.
  return {
    role: "assistant",
    content,
@@ -65,6 +68,8 @@ function openclawTranscriptAssistant(model: "delivery-mirror" | "gateway-injecte

 describe("normalizeAssistantReplayContent", () => {
  it("converts mid-turn assistant content: [] to a non-empty sentinel text block when stopReason is error", () => {
+    // Mid-turn failure sentinels preserve request turn ordering without
+    // pretending the failed assistant generated useful content.
    const messages = [userMessage("hello"), bedrockAssistant([], "error"), userMessage("retry")];
    const out = normalizeAssistantReplayContent(messages);
    expect(out).not.toBe(messages);
@@ -229,6 +234,8 @@ describe("normalizeAssistantReplayContent", () => {
  });

  it("filters openclaw delivery-mirror and gateway-injected assistant messages from replay", () => {
+    // Gateway mirror entries are transcript artifacts, not model-authored
+    // assistant turns, so they must not be sent back to providers.
    const messages = [
      userMessage("hello"),
      openclawTranscriptAssistant("delivery-mirror"),
--- a/src/agents/embedded-agent-runner/resource-loader.test.ts
+++ b/src/agents/embedded-agent-runner/resource-loader.test.ts
@@ -1,3 +1,4 @@
+// Coverage for embedded resource loader discovery restrictions.
 import { describe, expect, it, vi } from "vitest";
 import { DefaultResourceLoader } from "../sessions/index.js";
 import {
@@ -6,6 +7,8 @@ import {
 } from "./resource-loader.js";

 vi.mock("../sessions/index.js", () => ({
+  // Constructor mock captures options so tests can assert discovery policy
+  // without touching filesystem-backed session resources.
  DefaultResourceLoader: vi.fn(function DefaultResourceLoaderLocal(
    this: Record<string, unknown>,
    options: unknown,
@@ -19,6 +22,8 @@ vi.mock("../sessions/index.js", () => ({

 describe("createEmbeddedAgentResourceLoader", () => {
  it("keeps inline extensions but disables filesystem discovery", () => {
+    // Embedded runs pass explicit extension factories; filesystem discovery is
+    // disabled to avoid loading ambient workspace extensions.
    const settingsManager = {};
    const extensionFactories = [vi.fn()];

--- a/src/agents/embedded-agent-runner/result-fallback-classifier.test.ts
+++ b/src/agents/embedded-agent-runner/result-fallback-classifier.test.ts
@@ -1,8 +1,11 @@
+// Coverage for deciding when embedded run results should trigger model fallback.
 import { describe, expect, it } from "vitest";
 import { classifyEmbeddedAgentRunResultForModelFallback } from "./result-fallback-classifier.js";

 describe("classifyEmbeddedAgentRunResultForModelFallback", () => {
  it("does not fallback when sessions_spawn accepted a child session", () => {
+    // Accepted child sessions mean the turn made progress even if the parent did
+    // not emit a normal assistant reply.
    expect(
      classifyEmbeddedAgentRunResultForModelFallback({
        provider: "mock-openai",
@@ -47,6 +50,8 @@ describe("classifyEmbeddedAgentRunResultForModelFallback", () => {
  });

  it("preserves hook block results with auth-like error payload text", () => {
+    // Hook policy blocks are intentional local decisions, not provider failures
+    // that should rotate models.
    const result = classifyEmbeddedAgentRunResultForModelFallback({
      provider: "custom",
      model: "gpt-5.5",