openclaw/src/agents/code-mode.test.ts

import { afterEach, describe, expect, it, vi } from "vitest";
import { setPluginToolMeta } from "../plugins/tools.js";
import {
  applyCodeModeCatalog,
  CODE_MODE_EXEC_TOOL_NAME,
  CODE_MODE_WAIT_TOOL_NAME,
  createCodeModeTools,
  resolveCodeModeConfig,
  __testing,
} from "./code-mode.js";
import { createToolSearchCatalogRef, type ToolSearchCatalogRef } from "./tool-search.js";
import {
  TOOL_CALL_RAW_TOOL_NAME,
  TOOL_DESCRIBE_RAW_TOOL_NAME,
  TOOL_SEARCH_CODE_MODE_TOOL_NAME,
  TOOL_SEARCH_RAW_TOOL_NAME,
} from "./tool-search.js";
import { jsonResult, type AnyAgentTool } from "./tools/common.js";

function fakeTool(name: string, description: string): AnyAgentTool {
  return {
    name,
    label: name,
    description,
    parameters: {
      type: "object",
      properties: {
        value: { type: "string" },
      },
    },
    execute: vi.fn(async (_toolCallId, input) => jsonResult({ name, input })),
  };
}

function pluginTool(name: string, description: string, pluginId = "fake-code-mode"): AnyAgentTool {
  const tool = fakeTool(name, description);
  setPluginToolMeta(tool, {
    pluginId,
    optional: true,
  });
  return tool;
}

function pluginToolWithExecute(
  name: string,
  description: string,
  execute: AnyAgentTool["execute"],
): AnyAgentTool {
  const tool = pluginTool(name, description);
  tool.execute = vi.fn(execute) as AnyAgentTool["execute"];
  return tool;
}

function resultDetails(result: { details?: unknown }): Record<string, unknown> {
  expect(result.details).toBeDefined();
  expect(typeof result.details).toBe("object");
  return result.details as Record<string, unknown>;
}

function createCodeModeHarness(params: { catalogRef?: ToolSearchCatalogRef } = {}) {
  const catalogRef = params.catalogRef ?? createToolSearchCatalogRef();
  const config = { tools: { codeMode: true } } as never;
  const ctx = {
    config,
    runtimeConfig: config,
    sessionId: "session-code-mode",
    sessionKey: "agent:main:main",
    runId: "run-code-mode",
    catalogRef,
  };
  const tools = createCodeModeTools(ctx);
  return { catalogRef, config, ctx, tools };
}

async function runUntilCompleted(params: {
  execTool: AnyAgentTool;
  waitTool: AnyAgentTool;
  code: string;
  language?: "javascript" | "typescript";
}) {
  let details = resultDetails(
    await params.execTool.execute("code-call-1", {
      code: params.code,
      language: params.language,
    }),
  );
  for (let index = 0; index < 8 && details.status === "waiting"; index += 1) {
    const runId = details.runId;
    expect(typeof runId).toBe("string");
    details = resultDetails(await params.waitTool.execute(`code-wait-${index}`, { runId }));
  }
  return details;
}

describe("Code Mode", () => {
  afterEach(() => {
    __testing.activeRuns.clear();
    __testing.resumingRunIds.clear();
  });

  it("resolves object config defaults", () => {
    expect(resolveCodeModeConfig({ tools: { codeMode: true } } as never).enabled).toBe(true);
    const resolved = resolveCodeModeConfig({
      tools: {
        codeMode: {
          timeoutMs: 1234,
          languages: ["typescript"],
        },
      },
    } as never);
    expect(resolved.enabled).toBe(false);
    expect(resolveCodeModeConfig({ tools: { codeMode: { enabled: true } } } as never).enabled).toBe(
      true,
    );
    expect(resolved.runtime).toBe("quickjs-wasi");
    expect(resolved.mode).toBe("only");
    expect(resolved.timeoutMs).toBe(1234);
    expect(resolved.languages).toEqual(["typescript"]);
    const limitedSearch = resolveCodeModeConfig({
      tools: {
        codeMode: {
          enabled: true,
          maxSearchLimit: 3,
        },
      },
    } as never);
    expect(limitedSearch.searchDefaultLimit).toBe(3);
    expect(limitedSearch.maxSearchLimit).toBe(3);
  });

  it("resolves the packaged worker URL from stable and hashed dist modules", () => {
    expect(
      __testing.resolveCodeModeWorkerUrl("file:///repo/dist/agents/code-mode.js").pathname,
    ).toBe("/repo/dist/agents/code-mode.worker.js");
    expect(
      __testing.resolveCodeModeWorkerUrl("file:///repo/dist/selection-abc123.js").pathname,
    ).toBe("/repo/dist/agents/code-mode.worker.js");
  });

  it("hides all normal tools behind exec and wait", () => {
    const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness();
    const shellExec = fakeTool("exec", "Run shell command");
    const ticket = pluginTool("fake_create_ticket", "Create a fake ticket");

    const compacted = applyCodeModeCatalog({
      tools: [...codeModeTools, shellExec, ticket],
      config,
      sessionId: "session-code-mode",
      sessionKey: "agent:main:main",
      runId: "run-code-mode",
      catalogRef,
    });

    expect(compacted.tools.map((tool) => tool.name)).toEqual([
      CODE_MODE_EXEC_TOOL_NAME,
      CODE_MODE_WAIT_TOOL_NAME,
    ]);
    expect(compacted.catalogToolCount).toBe(2);
  });

  it("uses a flat enum for the exec language schema", () => {
    const { tools } = createCodeModeHarness();
    const parameters = tools[0].parameters as {
      properties?: Record<string, Record<string, unknown>>;
    };
    const language = parameters.properties?.language;

    expect(language).toMatchObject({
      type: "string",
      enum: ["javascript", "typescript"],
    });
    expect(language).not.toHaveProperty("anyOf");
    expect(language).not.toHaveProperty("oneOf");
  });

  it("removes legacy Tool Search controls from the visible code mode surface", () => {
    const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness();
    const compacted = applyCodeModeCatalog({
      tools: [
        ...codeModeTools,
        fakeTool(TOOL_SEARCH_CODE_MODE_TOOL_NAME, "legacy code surface"),
        fakeTool(TOOL_SEARCH_RAW_TOOL_NAME, "legacy search"),
        fakeTool(TOOL_DESCRIBE_RAW_TOOL_NAME, "legacy describe"),
        fakeTool(TOOL_CALL_RAW_TOOL_NAME, "legacy call"),
        pluginTool("fake_create_ticket", "Create a fake ticket"),
      ],
      config,
      sessionId: "session-code-mode",
      sessionKey: "agent:main:main",
      runId: "run-code-mode",
      catalogRef,
    });

    expect(compacted.tools.map((tool) => tool.name)).toEqual([
      CODE_MODE_EXEC_TOOL_NAME,
      CODE_MODE_WAIT_TOOL_NAME,
    ]);
    expect(compacted.catalogToolCount).toBe(1);
  });

  it("runs JavaScript through QuickJS-WASI and resumes nested tool calls with wait", async () => {
    const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness();
    const ticket = pluginTool("fake_create_ticket", "Create a fake ticket");
    applyCodeModeCatalog({
      tools: [...codeModeTools, ticket],
      config,
      sessionId: "session-code-mode",
      sessionKey: "agent:main:main",
      runId: "run-code-mode",
      catalogRef,
    });

    const details = await runUntilCompleted({
      execTool: codeModeTools[0],
      waitTool: codeModeTools[1],
      code: `
        const hits = await tools.search("ticket", { limit: 1 });
        const described = await tools.describe(hits[0].id);
        const called = await tools.call(described.id, { value: "ship" });
        text("created");
        return called.result.details;
      `,
    });

    expect(details.status).toBe("completed");
    expect(details.value).toEqual({
      name: "fake_create_ticket",
      input: { value: "ship" },
    });
    expect(details.output).toEqual([{ type: "text", text: "created" }]);
    expect(ticket.execute).toHaveBeenCalledTimes(1);
  });

  it("marks yield suspensions and resumes the snapshot with wait", async () => {
    const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness();
    applyCodeModeCatalog({
      tools: [...codeModeTools, pluginTool("fake_noop", "Noop")],
      config,
      sessionId: "session-code-mode",
      sessionKey: "agent:main:main",
      runId: "run-code-mode",
      catalogRef,
    });

    const first = resultDetails(
      await codeModeTools[0].execute("code-call-yield", {
        code: `
          text("before");
          await yield_control("pause");
          text("after");
          return "done";
        `,
      }),
    );

    expect(first.status).toBe("waiting");
    expect(first.reason).toBe("yield");
    expect(first.output).toEqual([{ type: "text", text: "before" }]);

    const runId = first.runId;
    expect(typeof runId).toBe("string");
    const resumed = resultDetails(await codeModeTools[1].execute("code-wait-yield", { runId }));

    expect(resumed.status).toBe("completed");
    expect(resumed.value).toBe("done");
    expect(resumed.output).toEqual([
      { type: "text", text: "before" },
      { type: "text", text: "after" },
    ]);
  });

  it("rejects wait calls from a different session scope", async () => {
    const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness();
    applyCodeModeCatalog({
      tools: [...codeModeTools, pluginTool("fake_noop", "Noop")],
      config,
      sessionId: "session-code-mode",
      sessionKey: "agent:main:main",
      runId: "run-code-mode",
      catalogRef,
    });

    const first = resultDetails(
      await codeModeTools[0].execute("code-call-wrong-session", {
        code: 'await yield_control("pause"); return "done";',
      }),
    );
    expect(first.status).toBe("waiting");
    const otherWaitTool = createCodeModeTools({
      config,
      runtimeConfig: config,
      sessionId: "other-session",
      sessionKey: "agent:other:main",
      runId: "run-code-mode",
      catalogRef,
    })[1];

    await expect(
      otherWaitTool.execute("code-wait-wrong-session", { runId: first.runId }),
    ).rejects.toThrow("different session");
  });

  it("rejects concurrent waits for the same suspended run", async () => {
    const catalogRef = createToolSearchCatalogRef();
    const config = {
      tools: {
        codeMode: {
          enabled: true,
          timeoutMs: 100,
        },
      },
    } as never;
    const ctx = {
      config,
      runtimeConfig: config,
      sessionId: "session-code-mode",
      sessionKey: "agent:main:main",
      runId: "run-code-mode",
      catalogRef,
    };
    const codeModeTools = createCodeModeTools(ctx);
    applyCodeModeCatalog({
      tools: [
        ...codeModeTools,
        pluginToolWithExecute(
          "fake_slow",
          "Slow helper",
          async () => await new Promise<never>(() => undefined),
        ),
      ],
      config,
      sessionId: "session-code-mode",
      sessionKey: "agent:main:main",
      runId: "run-code-mode",
      catalogRef,
    });

    const first = resultDetails(
      await codeModeTools[0].execute("code-call-concurrent-wait", {
        code: "await tools.fake_slow({}); return 'done';",
      }),
    );
    expect(first.status).toBe("waiting");

    const firstWait = codeModeTools[1].execute("code-wait-concurrent-a", {
      runId: first.runId,
    });
    await expect(
      codeModeTools[1].execute("code-wait-concurrent-b", { runId: first.runId }),
    ).rejects.toThrow("already being resumed");
    const stillWaiting = resultDetails(await firstWait);

    expect(stillWaiting.status).toBe("waiting");
    expect(stillWaiting.runId).toBe(first.runId);
  });

  it("reports only unsettled pending tool calls when wait times out", async () => {
    const catalogRef = createToolSearchCatalogRef();
    const config = {
      tools: {
        codeMode: {
          enabled: true,
          timeoutMs: 100,
        },
      },
    } as never;
    const ctx = {
      config,
      runtimeConfig: config,
      sessionId: "session-code-mode",
      sessionKey: "agent:main:main",
      runId: "run-code-mode",
      catalogRef,
    };
    const codeModeTools = createCodeModeTools(ctx);
    applyCodeModeCatalog({
      tools: [
        ...codeModeTools,
        pluginTool("fake_fast", "Fast helper"),
        pluginToolWithExecute(
          "fake_slow",
          "Slow helper",
          async () => await new Promise<never>(() => undefined),
        ),
      ],
      config,
      sessionId: "session-code-mode",
      sessionKey: "agent:main:main",
      runId: "run-code-mode",
      catalogRef,
    });

    const first = resultDetails(
      await codeModeTools[0].execute("code-call-timeout", {
        code: `
          const fast = tools.fake_fast({});
          const slow = tools.fake_slow({});
          await fast;
          await slow;
          return "done";
        `,
      }),
    );
    expect(first.status).toBe("waiting");
    expect(first.pendingToolCalls).toHaveLength(2);

    const second = resultDetails(
      await codeModeTools[1].execute("code-wait-timeout", { runId: first.runId }),
    );

    expect(second.status).toBe("waiting");
    expect(second.pendingToolCalls).toEqual([expect.objectContaining({ method: "call" })]);
  });

  it("does not load TypeScript for plain JavaScript code mode runs", async () => {
    const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness();
    applyCodeModeCatalog({
      tools: [...codeModeTools, pluginTool("fake_noop", "Noop")],
      config,
      sessionId: "session-code-mode",
      sessionKey: "agent:main:main",
      runId: "run-code-mode",
      catalogRef,
    });

    const details = await runUntilCompleted({
      execTool: codeModeTools[0],
      waitTool: codeModeTools[1],
      code: "return 42;",
    });

    expect(details.status).toBe("completed");
    expect(details.value).toBe(42);
    expect(__testing.getTypescriptRuntimePromise()).toBeNull();
  });

  it("allows identifiers and strings that contain import without module access", async () => {
    const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness();
    applyCodeModeCatalog({
      tools: [...codeModeTools, pluginTool("fake_noop", "Noop")],
      config,
      sessionId: "session-code-mode",
      sessionKey: "agent:main:main",
      runId: "run-code-mode",
      catalogRef,
    });

    const details = await runUntilCompleted({
      execTool: codeModeTools[0],
      waitTool: codeModeTools[1],
      code: `
        const important = 41;
        const message = "import docs later";
        return important + (message.includes("import") ? 1 : 0);
      `,
    });

    expect(details.status).toBe("completed");
    expect(details.value).toBe(42);
  });

  it("fails pending promises that have no host bridge work", async () => {
    const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness();
    applyCodeModeCatalog({
      tools: [...codeModeTools, pluginTool("fake_noop", "Noop")],
      config,
      sessionId: "session-code-mode",
      sessionKey: "agent:main:main",
      runId: "run-code-mode",
      catalogRef,
    });

    const beforeRunCount = __testing.activeRuns.size;
    const details = resultDetails(
      await codeModeTools[0].execute("code-call-empty-wait", {
        code: "await new Promise(() => undefined); return 'never';",
      }),
    );

    expect(details.status).toBe("failed");
    expect(String(details.error)).toContain("pending without host work");
    expect(__testing.activeRuns.size).toBe(beforeRunCount);
  });

  it("clamps omitted code-mode catalog search limits to maxSearchLimit", async () => {
    const catalogRef = createToolSearchCatalogRef();
    const config = {
      tools: {
        codeMode: {
          enabled: true,
          maxSearchLimit: 3,
        },
      },
    } as never;
    const ctx = {
      config,
      runtimeConfig: config,
      sessionId: "session-code-mode",
      sessionKey: "agent:main:main",
      runId: "run-code-mode",
      catalogRef,
    };
    const codeModeTools = createCodeModeTools(ctx);
    applyCodeModeCatalog({
      tools: [
        ...codeModeTools,
        pluginTool("fake_ticket_one", "ticket helper"),
        pluginTool("fake_ticket_two", "ticket helper"),
        pluginTool("fake_ticket_three", "ticket helper"),
        pluginTool("fake_ticket_four", "ticket helper"),
        pluginTool("fake_ticket_five", "ticket helper"),
      ],
      config,
      sessionId: "session-code-mode",
      sessionKey: "agent:main:main",
      runId: "run-code-mode",
      catalogRef,
    });

    const details = await runUntilCompleted({
      execTool: codeModeTools[0],
      waitTool: codeModeTools[1],
      code: 'const hits = await tools.search("ticket"); return hits.length;',
    });

    expect(details.status).toBe("completed");
    expect(details.value).toBe(3);
  });

  it("supports TypeScript source transform", async () => {
    const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness();
    applyCodeModeCatalog({
      tools: [...codeModeTools, pluginTool("fake_noop", "Noop")],
      config,
      sessionId: "session-code-mode",
      sessionKey: "agent:main:main",
      runId: "run-code-mode",
      catalogRef,
    });

    const details = await runUntilCompleted({
      execTool: codeModeTools[0],
      waitTool: codeModeTools[1],
      language: "typescript",
      code: `
        const value: number = 40 + 2;
        return { value };
      `,
    });

    expect(details.status).toBe("completed");
    expect(details.value).toEqual({ value: 42 });
  });

  it.each([
    "const fs = require('node:fs'); return fs;",
    "return import('node:fs');",
    "return import.meta.url;",
    "return `${import('node:fs')}`;",
  ])("rejects module access: %s", async (code) => {
    const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness();
    applyCodeModeCatalog({
      tools: [...codeModeTools, pluginTool("fake_noop", "Noop")],
      config,
      sessionId: "session-code-mode",
      sessionKey: "agent:main:main",
      runId: "run-code-mode",
      catalogRef,
    });

    const details = resultDetails(
      await codeModeTools[0].execute("code-call-import", {
        code,
      }),
    );

    expect(details.status).toBe("failed");
    expect(String(details.error)).toContain("module access is disabled");
  });

  it("enforces output limits on completed exec calls", async () => {
    const catalogRef = createToolSearchCatalogRef();
    const config = {
      tools: {
        codeMode: {
          enabled: true,
          maxOutputBytes: 1024,
        },
      },
    } as never;
    const ctx = {
      config,
      runtimeConfig: config,
      sessionId: "session-code-mode",
      sessionKey: "agent:main:main",
      runId: "run-code-mode",
      catalogRef,
    };
    const tools = createCodeModeTools(ctx);
    applyCodeModeCatalog({
      tools: [...tools, pluginTool("fake_noop", "Noop")],
      config,
      sessionId: "session-code-mode",
      sessionKey: "agent:main:main",
      runId: "run-code-mode",
      catalogRef,
    });

    const details = resultDetails(
      await tools[0].execute("code-call-large", {
        code: "return 'x'.repeat(2048);",
      }),
    );

    expect(details.status).toBe("failed");
    expect(String(details.error)).toContain("output limit exceeded");
  });

  it("enforces output limits before suspending runs", async () => {
    const catalogRef = createToolSearchCatalogRef();
    const config = {
      tools: {
        codeMode: {
          enabled: true,
          maxOutputBytes: 1024,
        },
      },
    } as never;
    const ctx = {
      config,
      runtimeConfig: config,
      sessionId: "session-code-mode",
      sessionKey: "agent:main:main",
      runId: "run-code-mode",
      catalogRef,
    };
    const tools = createCodeModeTools(ctx);
    applyCodeModeCatalog({
      tools: [...tools, pluginTool("fake_noop", "Noop")],
      config,
      sessionId: "session-code-mode",
      sessionKey: "agent:main:main",
      runId: "run-code-mode",
      catalogRef,
    });

    const beforeRunCount = __testing.activeRuns.size;
    const details = resultDetails(
      await tools[0].execute("code-call-large-suspend", {
        code: "text('x'.repeat(2048)); await yield_control('pause'); return 1;",
      }),
    );

    expect(details.status).toBe("failed");
    expect(String(details.error)).toContain("output limit exceeded");
    expect(__testing.activeRuns.size).toBe(beforeRunCount);
  });

  it("terminates hostile infinite loops outside the main event loop", async () => {
    const catalogRef = createToolSearchCatalogRef();
    const config = {
      tools: {
        codeMode: {
          enabled: true,
          timeoutMs: 100,
        },
      },
    } as never;
    const ctx = {
      config,
      runtimeConfig: config,
      sessionId: "session-code-mode",
      sessionKey: "agent:main:main",
      runId: "run-code-mode",
      catalogRef,
    };
    const tools = createCodeModeTools(ctx);
    applyCodeModeCatalog({
      tools: [...tools, pluginTool("fake_noop", "Noop")],
      config,
      sessionId: "session-code-mode",
      sessionKey: "agent:main:main",
      runId: "run-code-mode",
      catalogRef,
    });

    const heartbeat = Promise.resolve("main-event-loop-alive");
    const details = resultDetails(
      await tools[0].execute("code-call-loop", {
        code: "while (true) {}",
      }),
    );

    await expect(heartbeat).resolves.toBe("main-event-loop-alive");
    expect(details.status).toBe("failed");
    expect(String(details.error)).toContain("timeout exceeded");
  });
});