From 0844e771a8df9f59bc4fe42695ac6346bc3e3d2e Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 11 May 2026 09:21:07 +0100 Subject: [PATCH] feat: add generic code mode runtime --- CHANGELOG.md | 1 + docs/docs.json | 1 + docs/reference/code-mode.md | 753 ++++++++++++++++ package.json | 1 + pnpm-lock.yaml | 3 + src/agents/code-mode.test.ts | 589 ++++++++++++ src/agents/code-mode.ts | 852 ++++++++++++++++++ src/agents/code-mode.worker.ts | 479 ++++++++++ .../openai-stream-wrappers.test.ts | 94 ++ .../openai-stream-wrappers.ts | 78 ++ .../pi-embedded-runner/run/attempt.test.ts | 17 + src/agents/pi-embedded-runner/run/attempt.ts | 149 ++- src/agents/tool-search.ts | 98 +- src/config/schema.help.ts | 22 + src/config/schema.labels.ts | 13 + src/config/schema.test.ts | 43 + src/config/types.tools.ts | 31 + src/config/zod-schema.agent-runtime.ts | 23 + tsdown.config.ts | 1 + 19 files changed, 3210 insertions(+), 38 deletions(-) create mode 100644 docs/reference/code-mode.md create mode 100644 src/agents/code-mode.test.ts create mode 100644 src/agents/code-mode.ts create mode 100644 src/agents/code-mode.worker.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 32fa9de950e..94b8ab0db82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -369,6 +369,7 @@ Docs: https://docs.openclaw.ai - Build: pin explicit oxfmt defaults in the shared formatter config to keep formatting behavior stable across upgrades. - TypeScript: enable stricter compiler checks for implicit returns, side-effect imports, overrides, and unused production code. - Logging: add targeted model transport, payload, SSE, and code-mode diagnostics with redacted URL handling. +- Agents/code mode: add opt-in generic QuickJS-WASI code mode that exposes `exec`/`wait` while hiding enabled tools behind a catalog bridge. - Agents: allow `session.agentToAgent.maxPingPongTurns` up to 20 while keeping the default at 5 for longer agent-to-agent reply chains. Fixes #52382. (#52400) Thanks @thirumaleshp. - Agents: add per-agent `tools.message.crossContext` overrides so sandboxed/public agents can restrict message sends to the current conversation without changing the global bot policy. - Agents: add per-agent `tools.message.actions.allow` overrides so sandboxed/public agents can expose and enforce send-only message tools. diff --git a/docs/docs.json b/docs/docs.json index 334e7e6389c..7d8ab5388f0 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -1687,6 +1687,7 @@ "reference/rpc", "concepts/openclaw-sdk", "reference/openclaw-sdk-api-design", + "reference/code-mode", "reference/device-models" ] }, diff --git a/docs/reference/code-mode.md b/docs/reference/code-mode.md new file mode 100644 index 00000000000..04270001f4e --- /dev/null +++ b/docs/reference/code-mode.md @@ -0,0 +1,753 @@ +--- +summary: "OpenClaw code mode: an opt-in exec/wait tool surface backed by QuickJS-WASI and a hidden run-scoped tool catalog" +title: "Code mode" +sidebarTitle: "Code mode" +read_when: + - You want to enable OpenClaw code mode for an agent run + - You need to explain why code mode is different from Codex Code mode + - You are reviewing the exec/wait contract, QuickJS-WASI sandbox, TypeScript transform, or hidden tool-catalog bridge +--- + +Code mode is an experimental OpenClaw agent-runtime feature. It is off by +default. When you enable it, OpenClaw changes what the model sees for one run: +instead of exposing every enabled tool schema directly, the model sees only +`exec` and `wait`. + +This page documents OpenClaw code mode. It is not Codex Code mode. Codex Code +mode is part of the Codex coding harness and has its own project workspace, +runtime, tools, and execution semantics. OpenClaw code mode is an OpenClaw-owned +tool-surface adapter for generic OpenClaw runs. It uses `quickjs-wasi`, a hidden +OpenClaw tool catalog, and the normal OpenClaw tool executor. + +## What is this? + +OpenClaw code mode lets the model write a small JavaScript or TypeScript program +instead of choosing directly from a long list of tools. + +When code mode is active: + +- The model-visible tool list is exactly `exec` and `wait`. +- `exec` evaluates model-generated JavaScript or TypeScript in a constrained + QuickJS-WASI worker. +- Normal OpenClaw tools are hidden from the model prompt and exposed inside the + guest program through `ALL_TOOLS` and `tools`. +- Guest code can search the hidden catalog, describe a tool, and call a tool + through the same OpenClaw execution path used by normal agent turns. +- `wait` resumes a suspended code-mode run when nested tool calls are still + pending. + +The important distinction: code mode changes the model-facing orchestration +surface. It does not replace OpenClaw tools, plugin tools, MCP tools, auth, +approval policy, channel behavior, or model selection. + +## Why is this good? + +Code mode makes large tool catalogs easier for models to use. + +- Smaller prompt surface: providers receive two control tools instead of dozens + or hundreds of full tool schemas. +- Better orchestration: the model can use loops, joins, small transforms, + conditional logic, and parallel nested tool calls inside one code cell. +- Provider neutral: it works for OpenClaw, plugin, MCP, and client tools without + depending on provider-native code execution. +- Existing policy stays in force: nested tool calls still go through OpenClaw + policy, approvals, hooks, session context, and audit paths. +- Clear failure mode: when code mode is explicitly enabled and the runtime is + unavailable, OpenClaw fails closed instead of falling back to broad direct tool + exposure. + +Code mode is especially useful for agents with a large enabled tool catalog or +for workflows where the model repeatedly needs to search, combine, and call +tools before producing an answer. + +## How to enable it + +Add `tools.codeMode.enabled: true` to the agent or runtime config: + +```json5 +{ + tools: { + codeMode: { + enabled: true, + }, + }, +} +``` + +The shorthand is also accepted: + +```json5 +{ + tools: { + codeMode: true, + }, +} +``` + +Code mode remains off when `tools.codeMode` is omitted, `false`, or an object +without `enabled: true`. + +Use explicit limits when you want tighter bounds: + +```json5 +{ + tools: { + codeMode: { + enabled: true, + timeoutMs: 10000, + memoryLimitBytes: 67108864, + maxOutputBytes: 65536, + maxSnapshotBytes: 10485760, + maxPendingToolCalls: 16, + snapshotTtlSeconds: 900, + searchDefaultLimit: 8, + maxSearchLimit: 50, + }, + }, +} +``` + +To confirm the model payload shape while debugging, run the Gateway with +targeted logging: + +```bash +OPENCLAW_DEBUG_CODE_MODE=1 \ +OPENCLAW_DEBUG_MODEL_TRANSPORT=1 \ +OPENCLAW_DEBUG_MODEL_PAYLOAD=tools \ +openclaw gateway +``` + +With code mode active, the logged model-facing tool names should be `exec` and +`wait`. If you need the redacted provider payload, add +`OPENCLAW_DEBUG_MODEL_PAYLOAD=full-redacted` for a short debugging session. + +## Technical tour + +The rest of this page describes the runtime contract and implementation details. +It is intended for maintainers, plugin authors debugging tool exposure, and +operators validating high-risk deployments. + +## Runtime status + +- Runtime: [`quickjs-wasi`](https://github.com/vercel-labs/quickjs-wasi). +- Default state: disabled. +- Target surface: generic OpenClaw agent runs. +- Security posture: model code is hostile. +- User-facing promise: enabling code mode never silently falls back to broad + direct tool exposure. + +## Scope + +Code mode owns the model-facing orchestration shape for a prepared run. It does +not own model selection, channel behavior, auth, tool policy, or tool +implementations. + +In scope: + +- model-visible `exec` and `wait` tool definitions +- hidden tool catalog construction +- JavaScript and TypeScript guest execution +- QuickJS-WASI worker runtime +- host callbacks for catalog search, schema describe, and tool call +- resumable state for suspended guest programs +- output, timeout, memory, pending-call, and snapshot limits +- telemetry and trajectory projection for nested tool calls + +Out of scope: + +- provider-native remote code execution +- shell execution semantics +- changing existing tool authorization +- persistent user-authored scripts +- package manager, file, network, or module access in guest code +- direct reuse of Codex Code mode internals + +Provider-owned tools such as remote Python sandboxes remain separate tools. See +[Code execution](/tools/code-execution). + +## Terms + +**Code mode** is the OpenClaw runtime mode that hides normal model tools and +exposes only `exec` and `wait`. + +**Guest runtime** is the QuickJS-WASI JavaScript VM that evaluates model code. + +**Host bridge** is the narrow JSON-compatible callback surface from guest code +back into OpenClaw. + +**Catalog** is the run-scoped list of effective tools after normal tool policy, +plugin, MCP, and client-tool resolution. + +**Nested tool call** is a tool call made from guest code through the host bridge. + +**Snapshot** is serialized QuickJS-WASI VM state saved so `wait` can continue a +suspended code-mode run. + +## Configuration + +`tools.codeMode.enabled` is the activation gate. Setting other code-mode fields +does not enable the feature. + +Supported fields: + +- `enabled`: boolean. Default `false`. Enables code mode only when `true`. +- `runtime`: `"quickjs-wasi"`. Only supported runtime. +- `mode`: `"only"`. Exposes `exec` and `wait`, hides normal model tools. +- `languages`: array of `"javascript"` and `"typescript"`. Default includes + both. +- `timeoutMs`: wall-clock cap for one `exec` or `wait`. Default `10000`. + Runtime clamp: `100` to `60000`. +- `memoryLimitBytes`: QuickJS heap cap. Default `67108864`. Runtime clamp: + `1048576` to `1073741824`. +- `maxOutputBytes`: cap for returned text, JSON, and logs. Default `65536`. + Runtime clamp: `1024` to `10485760`. +- `maxSnapshotBytes`: cap for serialized VM snapshots. Default `10485760`. + Runtime clamp: `1024` to `268435456`. +- `maxPendingToolCalls`: cap for concurrent nested tool calls. Default `16`. + Runtime clamp: `1` to `128`. +- `snapshotTtlSeconds`: how long a suspended VM can be resumed. Default `900`. + Runtime clamp: `1` to `86400`. +- `searchDefaultLimit`: default hidden-catalog search result count. Default `8`. + Runtime clamps this to `maxSearchLimit`. +- `maxSearchLimit`: maximum hidden-catalog search result count. Default `50`. + Runtime clamp: `1` to `50`. + +If code mode is enabled but QuickJS-WASI cannot load, OpenClaw fails closed for +that run. It does not silently expose normal tools as a fallback. + +## Activation + +Code mode is evaluated after the effective tool policy is known and before the +final model request is assembled. + +Activation order: + +1. Resolve the agent, model, provider, sandbox, channel, sender, and run policy. +2. Build the effective OpenClaw tool list. +3. Add eligible plugin, MCP, and client tools. +4. Apply allow and deny policy. +5. If `tools.codeMode.enabled` is false, continue with normal tool exposure. +6. If enabled and tools are active for the run, register the effective tools in + the code-mode catalog. +7. Remove all normal tools from the model-visible tool list. +8. Add code-mode `exec` and `wait`. + +Runs that intentionally have no tools, such as raw model calls, `disableTools`, +or an empty allowlist, do not activate the code-mode surface even if the config +contains `tools.codeMode.enabled: true`. + +The code-mode catalog is run-scoped. It must not leak tools from another agent, +session, sender, or run. + +## Model-visible tools + +When code mode is active, the model sees exactly these top-level tools: + +- `exec` +- `wait` + +All other enabled tools are hidden from the model-facing tool list and registered +in the code-mode catalog. + +The model should use `exec` for tool orchestration, data joining, loops, +parallel nested calls, and structured transformations. The model should use +`wait` only when `exec` returns a resumable `waiting` result. + +## `exec` + +`exec` starts a code-mode cell and returns one result. The input code is model +generated and must be treated as hostile. + +Input: + +```typescript +type CodeModeExecInput = { + code: string; + language?: "javascript" | "typescript"; +}; +``` + +Input rules: + +- `code` is required and must be non-empty. +- `language` defaults to `"javascript"`. +- If `language` is `"typescript"`, OpenClaw transpiles before evaluation. +- `exec` rejects `import`, `require`, dynamic import, and module-loader patterns + in v1. +- `exec` does not expose the normal shell `exec` implementation recursively. + +Result: + +```typescript +type CodeModeResult = CodeModeCompletedResult | CodeModeWaitingResult | CodeModeFailedResult; + +type CodeModeCompletedResult = { + status: "completed"; + value: unknown; + output?: CodeModeOutput[]; + telemetry: CodeModeTelemetry; +}; + +type CodeModeWaitingResult = { + status: "waiting"; + runId: string; + reason: "pending_tools" | "yield"; + pendingToolCalls?: CodeModePendingToolCall[]; + output?: CodeModeOutput[]; + telemetry: CodeModeTelemetry; +}; + +type CodeModeFailedResult = { + status: "failed"; + error: string; + code?: CodeModeErrorCode; + output?: CodeModeOutput[]; + telemetry: CodeModeTelemetry; +}; +``` + +`exec` returns `waiting` when the QuickJS VM suspends with resumable state. The +result includes a `runId` for `wait`. + +`exec` returns `completed` only when the guest VM has no pending work and the +final value is JSON-compatible after OpenClaw's output adapter runs. + +## `wait` + +`wait` continues a suspended code-mode VM. + +Input: + +```typescript +type CodeModeWaitInput = { + runId: string; +}; +``` + +The output is the same `CodeModeResult` union returned by `exec`. + +`wait` exists because nested OpenClaw tools can be slow, interactive, approval +gated, or stream partial updates. The model should not need to keep one long +`exec` call open while the host waits for external work. + +QuickJS-WASI snapshot and restore is the v1 resume mechanism: + +1. `exec` evaluates code until completion, failure, or suspension. +2. On suspension, OpenClaw snapshots the QuickJS VM and records pending host + work. +3. When pending work settles, `wait` restores the VM snapshot. +4. OpenClaw re-registers host callbacks by stable names. +5. OpenClaw delivers nested tool results into the restored VM. +6. OpenClaw drains QuickJS pending jobs. +7. `wait` returns `completed`, `failed`, or another `waiting` result. + +Snapshots are runtime state, not user artifacts. They are size-limited, expired, +and scoped to the run and session that created them. + +`wait` fails when: + +- `runId` is unknown. +- the snapshot expired. +- the parent run or session was aborted. +- the caller is not in the same run/session scope. +- QuickJS-WASI restore fails. +- restoring would exceed configured limits. + +## Guest runtime API + +The guest runtime exposes a small global API: + +```typescript +declare const ALL_TOOLS: ToolCatalogEntry[]; +declare const tools: ToolCatalog; + +declare function text(value: unknown): void; +declare function json(value: unknown): void; +declare function yield_control(reason?: string): Promise; +``` + +`ALL_TOOLS` is compact metadata for the run-scoped catalog. It does not contain +full schemas by default. + +```typescript +type ToolCatalogEntry = { + id: string; + name: string; + label?: string; + description: string; + source: "openclaw" | "plugin" | "mcp" | "client"; + sourceName?: string; +}; +``` + +Full schema is loaded only on demand: + +```typescript +type ToolCatalogEntryWithSchema = ToolCatalogEntry & { + parameters: unknown; +}; +``` + +Catalog helpers: + +```typescript +type ToolCatalog = { + search(query: string, options?: { limit?: number }): Promise; + describe(id: string): Promise; + call(id: string, input?: unknown): Promise; + [safeToolName: string]: unknown; +}; +``` + +Convenience tool functions are installed only for unambiguous safe names: + +```typescript +const files = await tools.search("read local file"); +const fileRead = await tools.describe(files[0].id); +const content = await tools.call(fileRead.id, { path: "README.md" }); + +// If the hidden catalog has an unambiguous `web_search` entry: +const hits = await tools.web_search({ query: "OpenClaw code mode" }); +``` + +The guest runtime must not expose host objects directly. Inputs and outputs cross +the bridge as JSON-compatible values with explicit size caps. + +## Output API + +`text(value)` appends human-readable output to the `output` array. + +`json(value)` appends a structured output item after JSON-compatible +serialization. + +The guest code's final returned value becomes `value` in a `completed` result. + +Output item: + +```typescript +type CodeModeOutput = { type: "text"; text: string } | { type: "json"; value: unknown }; +``` + +Output rules: + +- output order matches guest calls +- output is capped by `maxOutputBytes` +- non-serializable values are converted to plain strings or errors +- binary values are not supported in v1 +- images and files travel through ordinary OpenClaw tools, not through the + code-mode bridge + +## Tool catalog + +The hidden catalog includes tools after effective policy filtering: + +1. OpenClaw core tools. +2. Bundled plugin tools. +3. External plugin tools. +4. MCP tools. +5. Client-provided tools for the current run. + +Catalog ids are stable within one run and deterministic across equivalent tool +sets when possible. + +Recommended id shape: + +```text +:: +``` + +Examples: + +```text +openclaw:core:message +plugin:browser:browser_request +mcp:github:create_issue +client:app:select_file +``` + +The catalog omits code-mode control tools: + +- `exec` +- `wait` +- `tool_search_code` +- `tool_search` +- `tool_describe` +- `tool_call` + +This prevents recursion and keeps the model-facing contract narrow. + +## Tool Search interaction + +Code mode supersedes the PI Tool Search model surface for runs where it is +active. + +When `tools.codeMode.enabled` is true and code mode activates: + +- OpenClaw does not expose `tool_search_code`, `tool_search`, `tool_describe`, + or `tool_call` as model-visible tools. +- The same cataloging idea moves inside the guest runtime. +- The guest runtime receives compact `ALL_TOOLS` metadata and search, describe, + and call helpers. +- Nested calls dispatch through the same OpenClaw executor path that Tool Search + uses. + +The existing [Tool Search](/tools/tool-search) page describes the PI compact +catalog bridge. Code mode is the generic OpenClaw alternative for runs that can +use `exec` and `wait`. + +## Tool names and collisions + +The model-visible `exec` tool is the code-mode tool. If the normal OpenClaw +shell `exec` tool is enabled, it is hidden from the model and cataloged like any +other tool. + +Inside the guest runtime: + +- `tools.call("openclaw:core:exec", input)` can call the shell exec tool if + policy allows it. +- `tools.exec(...)` is installed only if the shell exec catalog entry has an + unambiguous safe name. +- the code-mode `exec` tool is never recursively available through `tools`. + +If two tools normalize to the same safe convenience name, OpenClaw omits the +convenience function and requires `tools.call(id, input)`. + +## Nested tool execution + +Every nested tool call crosses the host bridge and re-enters OpenClaw. + +Nested execution preserves: + +- active agent id +- session id and session key +- sender and channel context +- sandbox policy +- approval policy +- plugin `before_tool_call` hooks +- abort signal +- streaming updates where available +- trajectory and audit events + +Nested calls project into the transcript as real tool calls so support bundles +can show what happened. The projection identifies the parent code-mode tool call +and the nested tool id. + +Parallel nested calls are allowed up to `maxPendingToolCalls`. + +## Runtime state + +Each code-mode run has a state machine: + +- `running`: VM is executing or nested calls are in flight. +- `waiting`: VM snapshot exists and can be resumed with `wait`. +- `completed`: final value returned; snapshot deleted. +- `failed`: error returned; snapshot deleted. +- `expired`: snapshot or pending state exceeded retention; cannot resume. +- `aborted`: parent run/session cancelled; snapshot deleted. + +State is scoped by agent run, session, and tool call id. A `wait` call from a +different run or session fails. + +Snapshot storage is bounded: + +- maximum snapshot bytes per run +- maximum live snapshots per process +- snapshot TTL +- cleanup on run end +- cleanup on Gateway shutdown where persistence is not supported + +## QuickJS-WASI runtime + +OpenClaw loads `quickjs-wasi` as a direct dependency in the owning package. The +runtime does not rely on a transitive copy installed for proxy, PAC, or other +unrelated dependencies. + +Runtime responsibilities: + +- compile or load the QuickJS-WASI WebAssembly module +- create one isolated VM per code-mode run or resume +- register host callbacks by stable names +- set memory and interrupt limits +- evaluate JavaScript +- drain pending jobs +- snapshot suspended VM state +- restore snapshots for `wait` +- dispose VM handles and snapshots after terminal states + +The runtime executes outside OpenClaw's main event loop in a worker. A guest +infinite loop must not block the Gateway process indefinitely. + +## TypeScript + +TypeScript support is a source transform only: + +- accepted input: one TypeScript code string +- output: JavaScript string evaluated by QuickJS-WASI +- no typechecking +- no module resolution +- no `import` or `require` in v1 +- diagnostics are returned as `failed` results + +The TypeScript compiler is loaded lazily only for TypeScript cells. Plain +JavaScript cells and disabled code mode do not load the compiler. + +The transform should preserve useful line numbers where feasible. + +## Security boundary + +Model code is hostile. The runtime uses defense in depth: + +- run QuickJS-WASI outside the main event loop +- load `quickjs-wasi` as a direct dependency, not through Codex or a transitive + package +- no filesystem, network, subprocess, module import, environment variables, or + host global objects in the guest +- use QuickJS memory and interrupt limits +- enforce parent-process wall-clock timeout +- enforce output, snapshot, log, and pending-call caps +- serialize host bridge values through a narrow JSON adapter +- convert host errors into plain guest errors, never host realm objects +- drop snapshots on timeout, abort, session end, or expiry +- reject recursive access to `exec`, `wait`, and Tool Search control tools +- prevent convenience-name collisions from shadowing catalog helpers + +The sandbox is one security layer. Operators can still need OS-level hardening +for high-risk deployments. + +## Error codes + +```typescript +type CodeModeErrorCode = + | "runtime_unavailable" + | "invalid_config" + | "invalid_input" + | "unsupported_language" + | "typescript_transform_failed" + | "module_access_denied" + | "timeout" + | "memory_limit_exceeded" + | "output_limit_exceeded" + | "snapshot_limit_exceeded" + | "snapshot_expired" + | "snapshot_restore_failed" + | "too_many_pending_tool_calls" + | "nested_tool_failed" + | "aborted" + | "internal_error"; +``` + +Errors returned to the guest are plain data. Host `Error` instances, stack +objects, prototypes, and host functions do not cross into QuickJS. + +## Telemetry + +Code mode reports: + +- visible tool names sent to the model +- hidden catalog size and source breakdown +- `exec` and `wait` counts +- nested search, describe, and call counts +- nested tool ids called +- timeout, memory, snapshot, and output cap failures +- snapshot lifecycle events + +Telemetry must not include secrets, raw environment values, or unredacted tool +inputs beyond existing OpenClaw trajectory policy. + +## Debugging + +Use targeted model transport logging when code mode behaves differently from a +normal tool run: + +```bash +OPENCLAW_DEBUG_CODE_MODE=1 \ +OPENCLAW_DEBUG_MODEL_TRANSPORT=1 \ +OPENCLAW_DEBUG_MODEL_PAYLOAD=tools \ +OPENCLAW_DEBUG_SSE=events \ +openclaw gateway +``` + +For payload-shape debugging, use `OPENCLAW_DEBUG_MODEL_PAYLOAD=full-redacted`. +This logs a capped, redacted JSON snapshot of the model request; it should only +be used while debugging because prompts and message text can still appear. + +For stream debugging, use `OPENCLAW_DEBUG_SSE=peek` to log the first five +redacted SSE events. Code mode also fails closed if the final provider payload +does not contain exactly `exec` and `wait` after the code-mode surface has +activated. + +## Implementation layout + +Implementation units: + +- config contract: `tools.codeMode` +- catalog builder: effective tools to compact entries and id map +- model-surface adapter: replace visible tools with `exec` and `wait` +- QuickJS-WASI runtime adapter: load, eval, snapshot, restore, dispose +- worker supervisor: timeout, abort, crash isolation +- bridge adapter: JSON-safe host callbacks and result delivery +- TypeScript transform adapter +- snapshot store: TTL, size caps, run/session scoping +- trajectory projection for nested tool calls +- telemetry counters and diagnostics + +The implementation reuses catalog and executor concepts from Tool Search, but +does not use the `node:vm` child as the sandbox. + +## Validation checklist + +Code mode coverage should prove: + +- disabled config leaves existing tool exposure unchanged +- object config without `enabled: true` leaves code mode disabled +- enabled config exposes only `exec` and `wait` to the model when tools are + active for the run +- raw no-tool runs, `disableTools`, and empty allowlists do not trigger code-mode + payload enforcement +- all effective tools appear in `ALL_TOOLS` +- denied tools do not appear in `ALL_TOOLS` +- `tools.search`, `tools.describe`, and `tools.call` work for OpenClaw tools +- Tool Search control tools are hidden from both the model surface and the hidden + catalog +- nested calls preserve approval and hook behavior +- shell `exec` is hidden from the model but callable by catalog id when allowed +- recursive code-mode `exec` and `wait` are not callable from guest code +- TypeScript input is transformed and evaluated without loading TypeScript on + disabled or JavaScript-only paths +- `import`, `require`, filesystem, network, and environment access fail +- infinite loops time out and cannot block the Gateway +- memory cap failures terminate the guest VM +- output and snapshot caps are enforced for completed and suspended calls +- `wait` resumes a suspended snapshot and returns the final value +- expired, aborted, wrong-session, and unknown `runId` values fail +- transcript replay and persistence preserve code-mode control calls +- transcript and telemetry show nested tool calls clearly + +## E2E test plan + +Run these as integration or end-to-end tests when changing the runtime: + +1. Start a Gateway with `tools.codeMode.enabled: false`. +2. Send an agent turn with a small direct tool set. +3. Assert the model-visible tools are unchanged. +4. Restart with `tools.codeMode.enabled: true`. +5. Send an agent turn with OpenClaw, plugin, MCP, and client test tools. +6. Assert the model-visible tool list is exactly `exec`, `wait`. +7. In `exec`, read `ALL_TOOLS` and assert the effective test tools are present. +8. In `exec`, call `tools.search`, `tools.describe`, and `tools.call`. +9. Assert denied tools are absent and cannot be called by guessed id. +10. Start a nested tool call that resolves after `exec` returns `waiting`. +11. Call `wait` and assert the restored VM receives the tool result. +12. Assert the final answer contains output produced after restore. +13. Assert timeout, abort, and snapshot expiry clean up runtime state. +14. Export trajectory and assert nested calls are visible under the parent + code-mode call. + +Docs-only changes to this page should still run `pnpm check:docs`. + +## Related + +- [Tool Search](/tools/tool-search) +- [Agent runtimes](/concepts/agent-runtimes) +- [Exec tool](/tools/exec) +- [Code execution](/tools/code-execution) diff --git a/package.json b/package.json index effebd767ba..0bef158ff5b 100644 --- a/package.json +++ b/package.json @@ -1786,6 +1786,7 @@ "pdfjs-dist": "5.7.284", "playwright-core": "1.60.0", "qrcode": "1.5.4", + "quickjs-wasi": "^2.2.0", "tar": "7.5.15", "tokenjuice": "0.7.0", "tree-sitter-bash": "0.25.1", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 1987631056a..076f7585a6e 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -155,6 +155,9 @@ importers: qrcode: specifier: 1.5.4 version: 1.5.4 + quickjs-wasi: + specifier: ^2.2.0 + version: 2.2.0 tar: specifier: 7.5.15 version: 7.5.15 diff --git a/src/agents/code-mode.test.ts b/src/agents/code-mode.test.ts new file mode 100644 index 00000000000..fbdd9b7a00f --- /dev/null +++ b/src/agents/code-mode.test.ts @@ -0,0 +1,589 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; +import { setPluginToolMeta } from "../plugins/tools.js"; +import { + applyCodeModeCatalog, + CODE_MODE_EXEC_TOOL_NAME, + CODE_MODE_WAIT_TOOL_NAME, + createCodeModeTools, + resolveCodeModeConfig, + __testing, +} from "./code-mode.js"; +import { createToolSearchCatalogRef, type ToolSearchCatalogRef } from "./tool-search.js"; +import { + TOOL_CALL_RAW_TOOL_NAME, + TOOL_DESCRIBE_RAW_TOOL_NAME, + TOOL_SEARCH_CODE_MODE_TOOL_NAME, + TOOL_SEARCH_RAW_TOOL_NAME, +} from "./tool-search.js"; +import { jsonResult, type AnyAgentTool } from "./tools/common.js"; + +function fakeTool(name: string, description: string): AnyAgentTool { + return { + name, + label: name, + description, + parameters: { + type: "object", + properties: { + value: { type: "string" }, + }, + }, + execute: vi.fn(async (_toolCallId, input) => jsonResult({ name, input })), + }; +} + +function pluginTool(name: string, description: string, pluginId = "fake-code-mode"): AnyAgentTool { + const tool = fakeTool(name, description); + setPluginToolMeta(tool, { + pluginId, + optional: true, + }); + return tool; +} + +function pluginToolWithExecute( + name: string, + description: string, + execute: AnyAgentTool["execute"], +): AnyAgentTool { + const tool = pluginTool(name, description); + tool.execute = vi.fn(execute) as AnyAgentTool["execute"]; + return tool; +} + +function resultDetails(result: { details?: unknown }): Record { + expect(result.details).toBeDefined(); + expect(typeof result.details).toBe("object"); + return result.details as Record; +} + +function createCodeModeHarness(params: { catalogRef?: ToolSearchCatalogRef } = {}) { + const catalogRef = params.catalogRef ?? createToolSearchCatalogRef(); + const config = { tools: { codeMode: true } } as never; + const ctx = { + config, + runtimeConfig: config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }; + const tools = createCodeModeTools(ctx); + return { catalogRef, config, ctx, tools }; +} + +async function runUntilCompleted(params: { + execTool: AnyAgentTool; + waitTool: AnyAgentTool; + code: string; + language?: "javascript" | "typescript"; +}) { + let details = resultDetails( + await params.execTool.execute("code-call-1", { + code: params.code, + language: params.language, + }), + ); + for (let index = 0; index < 8 && details.status === "waiting"; index += 1) { + const runId = details.runId; + expect(typeof runId).toBe("string"); + details = resultDetails(await params.waitTool.execute(`code-wait-${index}`, { runId })); + } + return details; +} + +describe("Code Mode", () => { + afterEach(() => { + __testing.activeRuns.clear(); + }); + + it("resolves object config defaults", () => { + expect(resolveCodeModeConfig({ tools: { codeMode: true } } as never).enabled).toBe(true); + const resolved = resolveCodeModeConfig({ + tools: { + codeMode: { + timeoutMs: 1234, + languages: ["typescript"], + }, + }, + } as never); + expect(resolved.enabled).toBe(false); + expect(resolveCodeModeConfig({ tools: { codeMode: { enabled: true } } } as never).enabled).toBe( + true, + ); + expect(resolved.runtime).toBe("quickjs-wasi"); + expect(resolved.mode).toBe("only"); + expect(resolved.timeoutMs).toBe(1234); + expect(resolved.languages).toEqual(["typescript"]); + const limitedSearch = resolveCodeModeConfig({ + tools: { + codeMode: { + enabled: true, + maxSearchLimit: 3, + }, + }, + } as never); + expect(limitedSearch.searchDefaultLimit).toBe(3); + expect(limitedSearch.maxSearchLimit).toBe(3); + }); + + it("resolves the packaged worker URL from stable and hashed dist modules", () => { + expect( + __testing.resolveCodeModeWorkerUrl("file:///repo/dist/agents/code-mode.js").pathname, + ).toBe("/repo/dist/agents/code-mode.worker.js"); + expect( + __testing.resolveCodeModeWorkerUrl("file:///repo/dist/selection-abc123.js").pathname, + ).toBe("/repo/dist/agents/code-mode.worker.js"); + }); + + it("hides all normal tools behind exec and wait", () => { + const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness(); + const shellExec = fakeTool("exec", "Run shell command"); + const ticket = pluginTool("fake_create_ticket", "Create a fake ticket"); + + const compacted = applyCodeModeCatalog({ + tools: [...codeModeTools, shellExec, ticket], + config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }); + + expect(compacted.tools.map((tool) => tool.name)).toEqual([ + CODE_MODE_EXEC_TOOL_NAME, + CODE_MODE_WAIT_TOOL_NAME, + ]); + expect(compacted.catalogToolCount).toBe(2); + }); + + it("uses a flat enum for the exec language schema", () => { + const { tools } = createCodeModeHarness(); + const parameters = tools[0].parameters as { + properties?: Record>; + }; + const language = parameters.properties?.language; + + expect(language).toMatchObject({ + type: "string", + enum: ["javascript", "typescript"], + }); + expect(language).not.toHaveProperty("anyOf"); + expect(language).not.toHaveProperty("oneOf"); + }); + + it("removes legacy Tool Search controls from the visible code mode surface", () => { + const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness(); + const compacted = applyCodeModeCatalog({ + tools: [ + ...codeModeTools, + fakeTool(TOOL_SEARCH_CODE_MODE_TOOL_NAME, "legacy code surface"), + fakeTool(TOOL_SEARCH_RAW_TOOL_NAME, "legacy search"), + fakeTool(TOOL_DESCRIBE_RAW_TOOL_NAME, "legacy describe"), + fakeTool(TOOL_CALL_RAW_TOOL_NAME, "legacy call"), + pluginTool("fake_create_ticket", "Create a fake ticket"), + ], + config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }); + + expect(compacted.tools.map((tool) => tool.name)).toEqual([ + CODE_MODE_EXEC_TOOL_NAME, + CODE_MODE_WAIT_TOOL_NAME, + ]); + expect(compacted.catalogToolCount).toBe(1); + }); + + it("runs JavaScript through QuickJS-WASI and resumes nested tool calls with wait", async () => { + const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness(); + const ticket = pluginTool("fake_create_ticket", "Create a fake ticket"); + applyCodeModeCatalog({ + tools: [...codeModeTools, ticket], + config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }); + + const details = await runUntilCompleted({ + execTool: codeModeTools[0], + waitTool: codeModeTools[1], + code: ` + const hits = await tools.search("ticket", { limit: 1 }); + const described = await tools.describe(hits[0].id); + const called = await tools.call(described.id, { value: "ship" }); + text("created"); + return called.result.details; + `, + }); + + expect(details.status).toBe("completed"); + expect(details.value).toEqual({ + name: "fake_create_ticket", + input: { value: "ship" }, + }); + expect(details.output).toEqual([{ type: "text", text: "created" }]); + expect(ticket.execute).toHaveBeenCalledTimes(1); + }); + + it("marks yield suspensions and resumes the snapshot with wait", async () => { + const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness(); + applyCodeModeCatalog({ + tools: [...codeModeTools, pluginTool("fake_noop", "Noop")], + config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }); + + const first = resultDetails( + await codeModeTools[0].execute("code-call-yield", { + code: ` + text("before"); + await yield_control("pause"); + text("after"); + return "done"; + `, + }), + ); + + expect(first.status).toBe("waiting"); + expect(first.reason).toBe("yield"); + expect(first.output).toEqual([{ type: "text", text: "before" }]); + + const runId = first.runId; + expect(typeof runId).toBe("string"); + const resumed = resultDetails(await codeModeTools[1].execute("code-wait-yield", { runId })); + + expect(resumed.status).toBe("completed"); + expect(resumed.value).toBe("done"); + expect(resumed.output).toEqual([ + { type: "text", text: "before" }, + { type: "text", text: "after" }, + ]); + }); + + it("rejects wait calls from a different session scope", async () => { + const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness(); + applyCodeModeCatalog({ + tools: [...codeModeTools, pluginTool("fake_noop", "Noop")], + config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }); + + const first = resultDetails( + await codeModeTools[0].execute("code-call-wrong-session", { + code: 'await yield_control("pause"); return "done";', + }), + ); + expect(first.status).toBe("waiting"); + const otherWaitTool = createCodeModeTools({ + config, + runtimeConfig: config, + sessionId: "other-session", + sessionKey: "agent:other:main", + runId: "run-code-mode", + catalogRef, + })[1]; + + await expect( + otherWaitTool.execute("code-wait-wrong-session", { runId: first.runId }), + ).rejects.toThrow("different session"); + }); + + it("reports only unsettled pending tool calls when wait times out", async () => { + const catalogRef = createToolSearchCatalogRef(); + const config = { + tools: { + codeMode: { + enabled: true, + timeoutMs: 100, + }, + }, + } as never; + const ctx = { + config, + runtimeConfig: config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }; + const codeModeTools = createCodeModeTools(ctx); + applyCodeModeCatalog({ + tools: [ + ...codeModeTools, + pluginTool("fake_fast", "Fast helper"), + pluginToolWithExecute( + "fake_slow", + "Slow helper", + async () => await new Promise(() => undefined), + ), + ], + config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }); + + const first = resultDetails( + await codeModeTools[0].execute("code-call-timeout", { + code: ` + const fast = tools.fake_fast({}); + const slow = tools.fake_slow({}); + await fast; + await slow; + return "done"; + `, + }), + ); + expect(first.status).toBe("waiting"); + expect(first.pendingToolCalls).toHaveLength(2); + + const second = resultDetails( + await codeModeTools[1].execute("code-wait-timeout", { runId: first.runId }), + ); + + expect(second.status).toBe("waiting"); + expect(second.pendingToolCalls).toEqual([expect.objectContaining({ method: "call" })]); + }); + + it("does not load TypeScript for plain JavaScript code mode runs", async () => { + const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness(); + applyCodeModeCatalog({ + tools: [...codeModeTools, pluginTool("fake_noop", "Noop")], + config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }); + + const details = await runUntilCompleted({ + execTool: codeModeTools[0], + waitTool: codeModeTools[1], + code: "return 42;", + }); + + expect(details.status).toBe("completed"); + expect(details.value).toBe(42); + expect(__testing.getTypescriptRuntimePromise()).toBeNull(); + }); + + it("clamps omitted code-mode catalog search limits to maxSearchLimit", async () => { + const catalogRef = createToolSearchCatalogRef(); + const config = { + tools: { + codeMode: { + enabled: true, + maxSearchLimit: 3, + }, + }, + } as never; + const ctx = { + config, + runtimeConfig: config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }; + const codeModeTools = createCodeModeTools(ctx); + applyCodeModeCatalog({ + tools: [ + ...codeModeTools, + pluginTool("fake_ticket_one", "ticket helper"), + pluginTool("fake_ticket_two", "ticket helper"), + pluginTool("fake_ticket_three", "ticket helper"), + pluginTool("fake_ticket_four", "ticket helper"), + pluginTool("fake_ticket_five", "ticket helper"), + ], + config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }); + + const details = await runUntilCompleted({ + execTool: codeModeTools[0], + waitTool: codeModeTools[1], + code: 'const hits = await tools.search("ticket"); return hits.length;', + }); + + expect(details.status).toBe("completed"); + expect(details.value).toBe(3); + }); + + it("supports TypeScript source transform", async () => { + const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness(); + applyCodeModeCatalog({ + tools: [...codeModeTools, pluginTool("fake_noop", "Noop")], + config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }); + + const details = await runUntilCompleted({ + execTool: codeModeTools[0], + waitTool: codeModeTools[1], + language: "typescript", + code: ` + const value: number = 40 + 2; + return { value }; + `, + }); + + expect(details.status).toBe("completed"); + expect(details.value).toEqual({ value: 42 }); + }); + + it("rejects module access", async () => { + const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness(); + applyCodeModeCatalog({ + tools: [...codeModeTools, pluginTool("fake_noop", "Noop")], + config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }); + + const details = resultDetails( + await codeModeTools[0].execute("code-call-import", { + code: "const fs = require('node:fs'); return fs;", + }), + ); + + expect(details.status).toBe("failed"); + expect(String(details.error)).toContain("module access is disabled"); + }); + + it("enforces output limits on completed exec calls", async () => { + const catalogRef = createToolSearchCatalogRef(); + const config = { + tools: { + codeMode: { + enabled: true, + maxOutputBytes: 1024, + }, + }, + } as never; + const ctx = { + config, + runtimeConfig: config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }; + const tools = createCodeModeTools(ctx); + applyCodeModeCatalog({ + tools: [...tools, pluginTool("fake_noop", "Noop")], + config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }); + + const details = resultDetails( + await tools[0].execute("code-call-large", { + code: "return 'x'.repeat(2048);", + }), + ); + + expect(details.status).toBe("failed"); + expect(String(details.error)).toContain("output limit exceeded"); + }); + + it("enforces output limits before suspending runs", async () => { + const catalogRef = createToolSearchCatalogRef(); + const config = { + tools: { + codeMode: { + enabled: true, + maxOutputBytes: 1024, + }, + }, + } as never; + const ctx = { + config, + runtimeConfig: config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }; + const tools = createCodeModeTools(ctx); + applyCodeModeCatalog({ + tools: [...tools, pluginTool("fake_noop", "Noop")], + config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }); + + const beforeRunCount = __testing.activeRuns.size; + const details = resultDetails( + await tools[0].execute("code-call-large-suspend", { + code: "text('x'.repeat(2048)); await yield_control('pause'); return 1;", + }), + ); + + expect(details.status).toBe("failed"); + expect(String(details.error)).toContain("output limit exceeded"); + expect(__testing.activeRuns.size).toBe(beforeRunCount); + }); + + it("terminates hostile infinite loops outside the main event loop", async () => { + const catalogRef = createToolSearchCatalogRef(); + const config = { + tools: { + codeMode: { + enabled: true, + timeoutMs: 100, + }, + }, + } as never; + const ctx = { + config, + runtimeConfig: config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }; + const tools = createCodeModeTools(ctx); + applyCodeModeCatalog({ + tools: [...tools, pluginTool("fake_noop", "Noop")], + config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }); + + const heartbeat = Promise.resolve("main-event-loop-alive"); + const details = resultDetails( + await tools[0].execute("code-call-loop", { + code: "while (true) {}", + }), + ); + + await expect(heartbeat).resolves.toBe("main-event-loop-alive"); + expect(details.status).toBe("failed"); + }); +}); diff --git a/src/agents/code-mode.ts b/src/agents/code-mode.ts new file mode 100644 index 00000000000..e4a21aa96f3 --- /dev/null +++ b/src/agents/code-mode.ts @@ -0,0 +1,852 @@ +import { randomUUID } from "node:crypto"; +import path from "node:path"; +import { fileURLToPath, pathToFileURL } from "node:url"; +import { Worker } from "node:worker_threads"; +import type { AgentToolUpdateCallback } from "@mariozechner/pi-agent-core"; +import type { ToolDefinition } from "@mariozechner/pi-coding-agent"; +import { Type } from "typebox"; +import type { OpenClawConfig } from "../config/types.openclaw.js"; +import type { HookContext } from "./pi-tools.before-tool-call.js"; +import { optionalStringEnum } from "./schema/typebox.js"; +import { + addClientToolsToToolCatalog, + applyToolCatalogCompaction, + TOOL_CALL_RAW_TOOL_NAME, + TOOL_DESCRIBE_RAW_TOOL_NAME, + TOOL_SEARCH_CODE_MODE_TOOL_NAME, + TOOL_SEARCH_RAW_TOOL_NAME, + ToolSearchRuntime, + type ToolSearchCatalogRef, + type ToolSearchCatalogToolExecutor, + type ToolSearchConfig, + type ToolSearchToolContext, +} from "./tool-search.js"; +import { + asToolParamsRecord, + jsonResult, + ToolInputError, + type AnyAgentTool, +} from "./tools/common.js"; + +export const CODE_MODE_EXEC_TOOL_NAME = "exec"; +export const CODE_MODE_WAIT_TOOL_NAME = "wait"; + +const codeModeControlTools = new WeakSet(); + +const DEFAULT_TIMEOUT_MS = 10_000; +const DEFAULT_MEMORY_LIMIT_BYTES = 64 * 1024 * 1024; +const DEFAULT_MAX_OUTPUT_BYTES = 64 * 1024; +const DEFAULT_MAX_SNAPSHOT_BYTES = 10 * 1024 * 1024; +const DEFAULT_MAX_PENDING_TOOL_CALLS = 16; +const DEFAULT_SNAPSHOT_TTL_SECONDS = 900; +const DEFAULT_SEARCH_LIMIT = 8; +const DEFAULT_MAX_SEARCH_LIMIT = 50; + +type CodeModeLanguage = "javascript" | "typescript"; + +export type CodeModeConfig = { + enabled: boolean; + runtime: "quickjs-wasi"; + mode: "only"; + languages: CodeModeLanguage[]; + timeoutMs: number; + memoryLimitBytes: number; + maxOutputBytes: number; + maxSnapshotBytes: number; + maxPendingToolCalls: number; + snapshotTtlSeconds: number; + searchDefaultLimit: number; + maxSearchLimit: number; +}; + +type CodeModeBridgeMethod = "search" | "describe" | "call" | "yield"; + +type PendingBridgeRequest = { + id: string; + method: CodeModeBridgeMethod; + args: unknown[]; +}; + +type SettledBridgeRequest = { + id: string; + ok: boolean; + value?: unknown; + error?: string; +}; + +type PendingBridgeState = PendingBridgeRequest & { + promise: Promise; + settled?: SettledBridgeRequest; +}; + +type CodeModeRunState = { + runId: string; + parentToolCallId: string; + ctx: ToolSearchToolContext; + config: CodeModeConfig; + snapshotBytes: Uint8Array; + pending: PendingBridgeState[]; + output: unknown[]; + createdAt: number; + expiresAt: number; + runtime: ToolSearchRuntime; +}; + +type CodeModeToolContext = ToolSearchToolContext; + +type CodeModeWorkerResult = + | { + status: "completed"; + value: unknown; + output: unknown[]; + } + | { + status: "waiting"; + snapshotBytes: Uint8Array; + pendingRequests: PendingBridgeRequest[]; + output: unknown[]; + } + | { + status: "failed"; + error: string; + code: "invalid_input" | "internal_error"; + output: unknown[]; + }; + +const activeRuns = new Map(); +let typescriptRuntimePromise: Promise | null = null; + +function isRecord(value: unknown): value is Record { + return Boolean(value && typeof value === "object" && !Array.isArray(value)); +} + +function readCodeModeRawConfig(config?: OpenClawConfig): Record { + const tools = isRecord(config?.tools) ? config.tools : undefined; + const codeMode = tools?.codeMode; + if (codeMode === true) { + return { enabled: true }; + } + if (codeMode === false) { + return { enabled: false }; + } + return isRecord(codeMode) ? codeMode : {}; +} + +function readBoolean(value: unknown, fallback: boolean): boolean { + return typeof value === "boolean" ? value : fallback; +} + +function readPositiveInteger(value: unknown, fallback: number): number { + return typeof value === "number" && Number.isInteger(value) && value > 0 ? value : fallback; +} + +function clampInteger(value: number, min: number, max: number): number { + return Math.max(min, Math.min(max, value)); +} + +function readLanguages(value: unknown): CodeModeLanguage[] { + if (!Array.isArray(value)) { + return ["javascript", "typescript"]; + } + const languages = value.filter( + (entry): entry is CodeModeLanguage => entry === "javascript" || entry === "typescript", + ); + return languages.length > 0 ? [...new Set(languages)] : ["javascript", "typescript"]; +} + +export function resolveCodeModeConfig(config?: OpenClawConfig): CodeModeConfig { + const raw = readCodeModeRawConfig(config); + const maxSearchLimit = clampInteger( + readPositiveInteger(raw.maxSearchLimit, DEFAULT_MAX_SEARCH_LIMIT), + 1, + DEFAULT_MAX_SEARCH_LIMIT, + ); + return { + enabled: readBoolean(raw.enabled, false), + runtime: "quickjs-wasi", + mode: "only", + languages: readLanguages(raw.languages), + timeoutMs: clampInteger(readPositiveInteger(raw.timeoutMs, DEFAULT_TIMEOUT_MS), 100, 60_000), + memoryLimitBytes: clampInteger( + readPositiveInteger(raw.memoryLimitBytes, DEFAULT_MEMORY_LIMIT_BYTES), + 1024 * 1024, + 1024 * 1024 * 1024, + ), + maxOutputBytes: clampInteger( + readPositiveInteger(raw.maxOutputBytes, DEFAULT_MAX_OUTPUT_BYTES), + 1024, + 10 * 1024 * 1024, + ), + maxSnapshotBytes: clampInteger( + readPositiveInteger(raw.maxSnapshotBytes, DEFAULT_MAX_SNAPSHOT_BYTES), + 1024, + 256 * 1024 * 1024, + ), + maxPendingToolCalls: clampInteger( + readPositiveInteger(raw.maxPendingToolCalls, DEFAULT_MAX_PENDING_TOOL_CALLS), + 1, + 128, + ), + snapshotTtlSeconds: clampInteger( + readPositiveInteger(raw.snapshotTtlSeconds, DEFAULT_SNAPSHOT_TTL_SECONDS), + 1, + 24 * 60 * 60, + ), + searchDefaultLimit: clampInteger( + readPositiveInteger(raw.searchDefaultLimit, DEFAULT_SEARCH_LIMIT), + 1, + maxSearchLimit, + ), + maxSearchLimit, + }; +} + +function toToolSearchConfig(config: CodeModeConfig): ToolSearchConfig { + return { + enabled: true, + mode: "tools", + codeTimeoutMs: config.timeoutMs, + searchDefaultLimit: config.searchDefaultLimit, + maxSearchLimit: config.maxSearchLimit, + }; +} + +export function isCodeModeControlTool(tool: AnyAgentTool): boolean { + return codeModeControlTools.has(tool); +} + +function markCodeModeControlTool(tool: T): T { + codeModeControlTools.add(tool); + return tool; +} + +function removeExpiredRuns(now = Date.now()): void { + for (const [runId, state] of activeRuns) { + if (state.expiresAt <= now) { + activeRuns.delete(runId); + } + } +} + +function toJsonSafe(value: unknown): unknown { + if (value === undefined) { + return null; + } + try { + return JSON.parse(JSON.stringify(value)) as unknown; + } catch { + if (value instanceof Error) { + return { name: value.name, message: value.message }; + } + if (value === null) { + return null; + } + switch (typeof value) { + case "string": + case "number": + case "boolean": + return value; + case "bigint": + case "symbol": + case "function": + return String(value); + default: + return Object.prototype.toString.call(value); + } + } +} + +function jsonByteLength(value: unknown): number { + return Buffer.byteLength(JSON.stringify(toJsonSafe(value)) ?? "null", "utf8"); +} + +function enforceOutputLimit(output: unknown[], config: CodeModeConfig): void { + if (jsonByteLength(output) > config.maxOutputBytes) { + throw new ToolInputError("code mode output limit exceeded"); + } +} + +function enforceResultLimit(params: { + output: unknown[]; + value?: unknown; + config: CodeModeConfig; +}): void { + enforceOutputLimit(params.output, params.config); + if (params.value !== undefined && jsonByteLength(params.value) > params.config.maxOutputBytes) { + throw new ToolInputError("code mode output limit exceeded"); + } +} + +function readCode(args: unknown): { code: string; language?: CodeModeLanguage } { + const params = asToolParamsRecord(args); + const code = params.code; + if (typeof code !== "string" || !code.trim()) { + throw new ToolInputError("code must be a non-empty string."); + } + const language = params.language; + if (language !== undefined && language !== "javascript" && language !== "typescript") { + throw new ToolInputError("language must be javascript or typescript."); + } + return { code, language }; +} + +function readRunId(args: unknown): string { + const params = asToolParamsRecord(args); + const runId = params.runId ?? params.run_id; + if (typeof runId !== "string" || !runId.trim()) { + throw new ToolInputError("runId must be a non-empty string."); + } + return runId.trim(); +} + +function rejectsModuleAccess(code: string): boolean { + return /(^|[^\w$])import\s*(?:\(|[\s{*]|\w)|(^|[^\w$])require\s*\(/u.test(code); +} + +async function loadTypeScriptRuntime(): Promise { + typescriptRuntimePromise ??= import("typescript"); + return await typescriptRuntimePromise; +} + +async function prepareSource(input: { + code: string; + language?: CodeModeLanguage; + config: CodeModeConfig; +}): Promise { + const language = input.language ?? "javascript"; + if (!input.config.languages.includes(language)) { + throw new ToolInputError(`code mode ${language} input is disabled.`); + } + if (rejectsModuleAccess(input.code)) { + throw new ToolInputError("code mode module access is disabled."); + } + if (language === "javascript") { + return input.code; + } + const ts = await loadTypeScriptRuntime(); + const transformed = ts.transpileModule(input.code, { + compilerOptions: { + target: ts.ScriptTarget.ES2022, + module: ts.ModuleKind.ESNext, + importsNotUsedAsValues: ts.ImportsNotUsedAsValues.Remove, + sourceMap: false, + }, + reportDiagnostics: true, + }); + const diagnostics = transformed.diagnostics ?? []; + if (diagnostics.some((diagnostic) => diagnostic.category === ts.DiagnosticCategory.Error)) { + const message = diagnostics + .map((diagnostic) => ts.flattenDiagnosticMessageText(diagnostic.messageText, "\n")) + .join("\n"); + throw new ToolInputError(`typescript transform failed: ${message}`); + } + if (rejectsModuleAccess(transformed.outputText)) { + throw new ToolInputError("code mode module access is disabled."); + } + return transformed.outputText; +} + +function errorMessage(error: unknown): string { + if (error instanceof Error) { + return error.message || String(error); + } + return String(error); +} + +async function runBridgeRequest(params: { + runtime: ToolSearchRuntime; + parentToolCallId: string; + request: PendingBridgeRequest; + signal?: AbortSignal; + onUpdate?: AgentToolUpdateCallback; +}): Promise { + try { + const values = Array.isArray(params.request.args) ? params.request.args : []; + let value: unknown; + switch (params.request.method) { + case "search": { + const query = values[0]; + if (typeof query !== "string") { + throw new ToolInputError("search query must be a string."); + } + const options = isRecord(values[1]) ? values[1] : undefined; + value = await params.runtime.search(query, { + limit: typeof options?.limit === "number" ? options.limit : undefined, + }); + break; + } + case "describe": { + const id = values[0]; + if (typeof id !== "string") { + throw new ToolInputError("describe id must be a string."); + } + value = await params.runtime.describe(id); + break; + } + case "call": { + const id = values[0]; + if (typeof id !== "string") { + throw new ToolInputError("call id must be a string."); + } + value = await params.runtime.call(id, values[1] ?? {}, { + parentToolCallId: params.parentToolCallId, + signal: params.signal, + onUpdate: params.onUpdate, + }); + break; + } + case "yield": { + value = { status: "yielded", reason: values[0] ?? null }; + break; + } + } + return { id: params.request.id, ok: true, value: toJsonSafe(value) }; + } catch (error) { + return { id: params.request.id, ok: false, error: errorMessage(error) }; + } +} + +function resolveCodeModeWorkerUrl(currentModuleUrl: string): URL { + const currentPath = fileURLToPath(currentModuleUrl); + const distMarker = `${path.sep}dist${path.sep}`; + const distIndex = currentPath.lastIndexOf(distMarker); + if (distIndex >= 0) { + const distRoot = currentPath.slice(0, distIndex + distMarker.length - 1); + return pathToFileURL(path.join(distRoot, "agents", "code-mode.worker.js")); + } + const extension = path.extname(currentPath) || ".js"; + return new URL(`./code-mode.worker${extension}`, currentModuleUrl); +} + +function codeModeWorkerUrl(): URL { + return resolveCodeModeWorkerUrl(import.meta.url); +} + +async function runCodeModeWorker( + workerData: unknown, + timeoutMs: number, +): Promise { + const worker = new Worker(codeModeWorkerUrl(), { + workerData, + }); + let timer: ReturnType | undefined; + try { + return await new Promise((resolve) => { + let settled = false; + const finish = (result: CodeModeWorkerResult) => { + if (settled) { + return; + } + settled = true; + resolve(result); + }; + timer = setTimeout(() => { + void worker.terminate(); + finish({ + status: "failed", + error: "code mode worker timeout exceeded", + code: "internal_error", + output: [], + }); + }, timeoutMs); + worker.once("message", (message: unknown) => { + void worker.terminate(); + finish( + isRecord(message) + ? (message as CodeModeWorkerResult) + : { + status: "failed", + error: "invalid code mode worker response", + code: "internal_error", + output: [], + }, + ); + }); + worker.once("error", (error) => { + finish({ + status: "failed", + error: errorMessage(error), + code: "internal_error", + output: [], + }); + }); + worker.once("exit", (code) => { + if (code !== 0) { + finish({ + status: "failed", + error: `code mode worker exited with code ${code}`, + code: "internal_error", + output: [], + }); + } + }); + }); + } finally { + if (timer) { + clearTimeout(timer); + } + } +} + +function snapshotState(params: { + pendingRequests: PendingBridgeRequest[]; + snapshotBytes: Uint8Array; + parentToolCallId: string; + ctx: ToolSearchToolContext; + config: CodeModeConfig; + runtime: ToolSearchRuntime; + output: unknown[]; + signal?: AbortSignal; + onUpdate?: AgentToolUpdateCallback; +}) { + if (params.snapshotBytes.byteLength > params.config.maxSnapshotBytes) { + throw new ToolInputError("code mode snapshot limit exceeded"); + } + enforceOutputLimit(params.output, params.config); + const runId = `cm_${randomUUID()}`; + const pending = params.pendingRequests.map((request) => { + const promise = runBridgeRequest({ + runtime: params.runtime, + parentToolCallId: params.parentToolCallId, + request, + signal: params.signal, + onUpdate: params.onUpdate, + }); + const state: PendingBridgeState = { ...request, promise }; + void promise.then((settled) => { + state.settled = settled; + }); + return state; + }); + const now = Date.now(); + activeRuns.set(runId, { + runId, + parentToolCallId: params.parentToolCallId, + ctx: params.ctx, + config: params.config, + snapshotBytes: params.snapshotBytes, + pending, + output: params.output, + createdAt: now, + expiresAt: now + params.config.snapshotTtlSeconds * 1000, + runtime: params.runtime, + }); + return { + status: "waiting" as const, + runId, + reason: codeModeWaitingReason(pending), + pendingToolCalls: pendingToolCalls(pending), + output: params.output, + telemetry: telemetry(params.runtime), + }; +} + +function codeModeWaitingReason(pending: readonly PendingBridgeState[]): "pending_tools" | "yield" { + return pending.length > 0 && pending.every((entry) => entry.method === "yield") + ? "yield" + : "pending_tools"; +} + +function pendingToolCalls(pending: readonly PendingBridgeState[]) { + return pending.map((entry) => ({ id: entry.id, method: entry.method })); +} + +function telemetry(runtime: ToolSearchRuntime) { + return { + ...runtime.telemetry(), + visibleTools: [CODE_MODE_EXEC_TOOL_NAME, CODE_MODE_WAIT_TOOL_NAME], + }; +} + +async function runExec(params: { + toolCallId: string; + ctx: CodeModeToolContext; + code: string; + language?: CodeModeLanguage; + signal?: AbortSignal; + onUpdate?: AgentToolUpdateCallback; +}) { + removeExpiredRuns(); + const config = resolveCodeModeConfig(params.ctx.runtimeConfig ?? params.ctx.config); + if (!config.enabled) { + throw new ToolInputError("code mode is disabled."); + } + const runtime = new ToolSearchRuntime(params.ctx, toToolSearchConfig(config)); + const pendingRequests: PendingBridgeRequest[] = []; + let source: string; + try { + source = await prepareSource({ code: params.code, language: params.language, config }); + } catch (error) { + return { + status: "failed" as const, + error: errorMessage(error), + code: error instanceof ToolInputError ? "invalid_input" : "internal_error", + output: [], + telemetry: telemetry(runtime), + }; + } + try { + const result = await runCodeModeWorker( + { + kind: "exec", + source, + config, + catalog: runtime.all(), + }, + config.timeoutMs + 1000, + ); + if (result.status === "waiting") { + return snapshotState({ + pendingRequests: result.pendingRequests, + snapshotBytes: result.snapshotBytes, + parentToolCallId: params.toolCallId, + ctx: params.ctx, + config, + runtime, + output: result.output, + signal: params.signal, + onUpdate: params.onUpdate, + }); + } + enforceResultLimit({ + output: result.output, + value: result.status === "completed" ? result.value : undefined, + config, + }); + return { + ...result, + telemetry: telemetry(runtime), + }; + } catch (error) { + return { + status: "failed" as const, + error: errorMessage(error), + code: error instanceof ToolInputError ? "invalid_input" : "internal_error", + output: [], + telemetry: telemetry(runtime), + }; + } +} + +async function waitForPending(pending: PendingBridgeState[], timeoutMs: number): Promise { + const pendingPromises = pending.filter((entry) => !entry.settled).map((entry) => entry.promise); + if (pendingPromises.length === 0) { + return true; + } + let timer: ReturnType | undefined; + try { + return await Promise.race([ + Promise.all(pendingPromises).then(() => true), + new Promise((resolve) => { + timer = setTimeout(() => resolve(false), timeoutMs); + }), + ]); + } finally { + if (timer) { + clearTimeout(timer); + } + } +} + +async function runWait(params: { + toolCallId: string; + ctx: CodeModeToolContext; + runId: string; + signal?: AbortSignal; + onUpdate?: AgentToolUpdateCallback; +}) { + removeExpiredRuns(); + const state = activeRuns.get(params.runId); + if (!state) { + throw new ToolInputError("code mode run is unavailable or expired."); + } + if (state.ctx.runId && params.ctx.runId && state.ctx.runId !== params.ctx.runId) { + throw new ToolInputError("code mode run belongs to a different agent run."); + } + if ( + (state.ctx.sessionId && params.ctx.sessionId && state.ctx.sessionId !== params.ctx.sessionId) || + (state.ctx.sessionKey && + params.ctx.sessionKey && + state.ctx.sessionKey !== params.ctx.sessionKey) || + (state.ctx.agentId && params.ctx.agentId && state.ctx.agentId !== params.ctx.agentId) + ) { + throw new ToolInputError("code mode run belongs to a different session."); + } + const ready = await waitForPending(state.pending, state.config.timeoutMs); + if (!ready) { + const pending = state.pending.filter((entry) => !entry.settled); + return { + status: "waiting" as const, + runId: state.runId, + reason: codeModeWaitingReason(pending.length > 0 ? pending : state.pending), + pendingToolCalls: pendingToolCalls(pending.length > 0 ? pending : state.pending), + output: state.output, + telemetry: telemetry(state.runtime), + }; + } + + activeRuns.delete(state.runId); + try { + const settledRequests: SettledBridgeRequest[] = []; + for (const entry of state.pending) { + settledRequests.push(entry.settled ?? (await entry.promise)); + } + const result = await runCodeModeWorker( + { + kind: "resume", + snapshotBytes: state.snapshotBytes, + config: state.config, + settledRequests, + }, + state.config.timeoutMs + 1000, + ); + const output = [...state.output, ...result.output]; + enforceOutputLimit(output, state.config); + if (result.status === "waiting") { + return snapshotState({ + pendingRequests: result.pendingRequests, + snapshotBytes: result.snapshotBytes, + parentToolCallId: params.toolCallId, + ctx: state.ctx, + config: state.config, + runtime: state.runtime, + output, + signal: params.signal, + onUpdate: params.onUpdate, + }); + } + enforceResultLimit({ + output, + value: result.status === "completed" ? result.value : undefined, + config: state.config, + }); + return { + ...result, + output, + telemetry: telemetry(state.runtime), + }; + } catch (error) { + return { + status: "failed" as const, + error: errorMessage(error), + code: error instanceof ToolInputError ? "invalid_input" : "internal_error", + output: state.output, + telemetry: telemetry(state.runtime), + }; + } +} + +export function createCodeModeTools(ctx: CodeModeToolContext): AnyAgentTool[] { + const execTool = markCodeModeControlTool({ + name: CODE_MODE_EXEC_TOOL_NAME, + label: "exec", + description: + "Run JavaScript or TypeScript in OpenClaw code mode. Use ALL_TOOLS and tools.search/describe/call inside the code to discover and call enabled tools.", + parameters: Type.Object({ + code: Type.String({ description: "JavaScript or TypeScript source to run." }), + language: optionalStringEnum(["javascript", "typescript"] as const, { + description: "Source language. Defaults to javascript.", + }), + }), + execute: async ( + toolCallId: string, + args: unknown, + signal?: AbortSignal, + onUpdate?: AgentToolUpdateCallback, + ) => { + const input = readCode(args); + return jsonResult( + await runExec({ + toolCallId, + ctx, + code: input.code, + language: input.language, + signal, + onUpdate, + }), + ); + }, + } as AnyAgentTool); + const waitTool = markCodeModeControlTool({ + name: CODE_MODE_WAIT_TOOL_NAME, + label: "wait", + description: "Resume a suspended OpenClaw code mode run returned by exec.", + parameters: Type.Object({ + runId: Type.String({ description: "Code mode run id returned by exec." }), + }), + execute: async ( + toolCallId: string, + args: unknown, + signal?: AbortSignal, + onUpdate?: AgentToolUpdateCallback, + ) => + jsonResult( + await runWait({ + toolCallId, + ctx, + runId: readRunId(args), + signal, + onUpdate, + }), + ), + } as AnyAgentTool); + return [execTool, waitTool]; +} + +export function applyCodeModeCatalog(params: { + tools: AnyAgentTool[]; + config?: OpenClawConfig; + sessionId?: string; + sessionKey?: string; + agentId?: string; + runId?: string; + catalogRef?: ToolSearchCatalogRef; + toolHookContext?: HookContext; +}) { + const config = resolveCodeModeConfig(params.config); + if (!config.enabled) { + return applyToolCatalogCompaction({ + ...params, + enabled: false, + isVisibleControlTool: isCodeModeControlTool, + }); + } + const tools = params.tools.filter( + (tool) => + isCodeModeControlTool(tool) || + (tool.name !== TOOL_SEARCH_CODE_MODE_TOOL_NAME && + tool.name !== TOOL_SEARCH_RAW_TOOL_NAME && + tool.name !== TOOL_DESCRIBE_RAW_TOOL_NAME && + tool.name !== TOOL_CALL_RAW_TOOL_NAME), + ); + return applyToolCatalogCompaction({ + ...params, + tools, + enabled: true, + isVisibleControlTool: isCodeModeControlTool, + shouldCatalogTool: (tool) => !isCodeModeControlTool(tool), + }); +} + +export function addClientToolsToCodeModeCatalog(params: { + tools: ToolDefinition[]; + config?: OpenClawConfig; + sessionId?: string; + sessionKey?: string; + agentId?: string; + runId?: string; + catalogRef?: ToolSearchCatalogRef; +}) { + return addClientToolsToToolCatalog({ + ...params, + enabled: resolveCodeModeConfig(params.config).enabled, + }); +} + +export const __testing = { + activeRuns, + codeModeWorkerUrl, + resolveCodeModeWorkerUrl, + resolveCodeModeConfig, + getTypescriptRuntimePromise: () => typescriptRuntimePromise, +}; diff --git a/src/agents/code-mode.worker.ts b/src/agents/code-mode.worker.ts new file mode 100644 index 00000000000..1b016a6a45c --- /dev/null +++ b/src/agents/code-mode.worker.ts @@ -0,0 +1,479 @@ +import { randomUUID } from "node:crypto"; +import { parentPort, workerData } from "node:worker_threads"; +import { EvalFlags, Intrinsics, JSException, QuickJS, type JSValueHandle } from "quickjs-wasi"; + +type CodeModeBridgeMethod = "search" | "describe" | "call" | "yield"; + +type CodeModeConfig = { + timeoutMs: number; + memoryLimitBytes: number; + maxPendingToolCalls: number; + maxSnapshotBytes: number; +}; + +type PendingBridgeRequest = { + id: string; + method: CodeModeBridgeMethod; + args: unknown[]; +}; + +type SettledBridgeRequest = { + id: string; + ok: boolean; + value?: unknown; + error?: string; +}; + +type CodeModeWorkerInput = + | { + kind: "exec"; + source: string; + config: CodeModeConfig; + catalog: unknown[]; + } + | { + kind: "resume"; + snapshotBytes: Uint8Array; + config: CodeModeConfig; + settledRequests: SettledBridgeRequest[]; + }; + +type CodeModeWorkerResult = + | { + status: "completed"; + value: unknown; + output: unknown[]; + } + | { + status: "waiting"; + snapshotBytes: Uint8Array; + pendingRequests: PendingBridgeRequest[]; + output: unknown[]; + } + | { + status: "failed"; + error: string; + code: "invalid_input" | "internal_error"; + output: unknown[]; + }; + +function isRecord(value: unknown): value is Record { + return Boolean(value && typeof value === "object" && !Array.isArray(value)); +} + +function toJsonSafe(value: unknown): unknown { + if (value === undefined) { + return null; + } + try { + return JSON.parse(JSON.stringify(value)) as unknown; + } catch { + if (value instanceof Error) { + return { name: value.name, message: value.message }; + } + if (value === null) { + return null; + } + switch (typeof value) { + case "string": + case "number": + case "boolean": + return value; + case "bigint": + case "symbol": + case "function": + return String(value); + default: + return Object.prototype.toString.call(value); + } + } +} + +function errorMessage(error: unknown): string { + if (error instanceof JSException) { + return error.stack || error.message || String(error); + } + if (error instanceof Error) { + return error.message || String(error); + } + return String(error); +} + +const CONTROLLER_SOURCE = String.raw` +(() => { + const output = []; + const pending = new Map(); + const catalog = Array.isArray(globalThis.__openclawCatalog) ? globalThis.__openclawCatalog : []; + + function safe(value) { + if (value === undefined) return null; + try { + return JSON.parse(JSON.stringify(value)); + } catch { + if (value instanceof Error) { + return { name: value.name, message: value.message }; + } + if (value === null) return null; + const type = typeof value; + if (type === "string" || type === "number" || type === "boolean") return value; + return String(value); + } + } + + function asText(value) { + if (typeof value === "string") return value; + const encoded = JSON.stringify(safe(value)); + return typeof encoded === "string" ? encoded : String(value); + } + + function request(method, args) { + const id = String(globalThis.__openclawHostRequest(String(method), JSON.stringify(safe(args ?? [])))); + return new Promise((resolve, reject) => { + pending.set(id, { resolve, reject }); + }); + } + + function settle(id, ok, payload) { + const entry = pending.get(String(id)); + if (!entry) return false; + pending.delete(String(id)); + let parsed = null; + try { + parsed = JSON.parse(String(payload)); + } catch { + parsed = String(payload); + } + if (ok) { + entry.resolve(parsed); + } else { + const error = new Error(typeof parsed === "string" ? parsed : parsed?.message ?? "nested tool failed"); + entry.reject(error); + } + return true; + } + + const baseTools = Object.create(null); + Object.defineProperties(baseTools, { + search: { value: (query, options) => request("search", [query, options]), enumerable: true }, + describe: { value: (id) => request("describe", [id]), enumerable: true }, + call: { value: (id, input) => request("call", [id, input]), enumerable: true }, + }); + + const safeNameCounts = new Map(); + for (const tool of catalog) { + const name = typeof tool?.name === "string" ? tool.name : ""; + if (!/^[A-Za-z_$][A-Za-z0-9_$]*$/.test(name)) continue; + safeNameCounts.set(name, (safeNameCounts.get(name) ?? 0) + 1); + } + for (const tool of catalog) { + const name = typeof tool?.name === "string" ? tool.name : ""; + const id = typeof tool?.id === "string" ? tool.id : ""; + if (!id || safeNameCounts.get(name) !== 1 || Object.prototype.hasOwnProperty.call(baseTools, name)) { + continue; + } + Object.defineProperty(baseTools, name, { + value: (input) => request("call", [id, input]), + enumerable: true, + }); + } + + Object.defineProperties(globalThis, { + ALL_TOOLS: { value: Object.freeze(catalog.slice()), enumerable: true }, + tools: { value: Object.freeze(baseTools), enumerable: true }, + text: { value: (value) => output.push({ type: "text", text: asText(value) }), enumerable: true }, + json: { value: (value) => output.push({ type: "json", value: safe(value) }), enumerable: true }, + yield_control: { value: (reason) => request("yield", [reason]), enumerable: true }, + __openclawSettleBridge: { value: settle }, + __openclawTakeOutput: { value: () => output.splice(0) }, + }); +})(); +`; + +function buildUserSource(code: string): string { + return `globalThis.__openclawResult = (async () => {\n${code}\n})()`; +} + +function createHostRequestHandler(params: { + vm: QuickJS; + pendingRequests: PendingBridgeRequest[]; + config: CodeModeConfig; +}): (this: JSValueHandle, method: JSValueHandle, argsJson: JSValueHandle) => JSValueHandle { + return (methodHandle, argsHandle) => { + if (params.pendingRequests.length >= params.config.maxPendingToolCalls) { + throw new Error("too many pending code mode tool calls"); + } + const method = methodHandle.toString(); + if (method !== "search" && method !== "describe" && method !== "call" && method !== "yield") { + throw new Error("unsupported code mode bridge method"); + } + let args: unknown = []; + try { + args = JSON.parse(argsHandle.toString()) as unknown; + } catch { + args = []; + } + const id = `bridge:${params.pendingRequests.length + 1}:${randomUUID()}`; + params.pendingRequests.push({ + id, + method, + args: Array.isArray(args) ? args : [], + }); + return params.vm.newString(id); + }; +} + +async function createVm(params: { + catalog: unknown[]; + config: CodeModeConfig; + pendingRequests: PendingBridgeRequest[]; +}) { + const startedAt = Date.now(); + const vm = await QuickJS.create({ + memoryLimit: params.config.memoryLimitBytes, + intrinsics: Intrinsics.ALL, + timezoneOffset: 0, + interruptHandler: () => Date.now() - startedAt > params.config.timeoutMs, + }); + const catalogHandle = vm.hostToHandle(params.catalog); + try { + vm.setProp(vm.global, "__openclawCatalog", catalogHandle); + } finally { + catalogHandle.dispose(); + } + const hostRequest = vm.newFunction( + "__openclawHostRequest", + createHostRequestHandler({ + vm, + pendingRequests: params.pendingRequests, + config: params.config, + }), + ); + try { + vm.setProp(vm.global, "__openclawHostRequest", hostRequest); + } finally { + hostRequest.dispose(); + } + vm.evalCode(CONTROLLER_SOURCE, "openclaw-code-mode:controller.js").dispose(); + return vm; +} + +async function restoreVm(params: { + snapshotBytes: Uint8Array; + config: CodeModeConfig; + pendingRequests: PendingBridgeRequest[]; +}) { + const startedAt = Date.now(); + const snapshot = QuickJS.deserializeSnapshot(params.snapshotBytes); + const vm = await QuickJS.restore(snapshot, { + memoryLimit: params.config.memoryLimitBytes, + intrinsics: Intrinsics.ALL, + timezoneOffset: 0, + interruptHandler: () => Date.now() - startedAt > params.config.timeoutMs, + }); + vm.registerHostCallback( + "__openclawHostRequest", + createHostRequestHandler({ + vm, + pendingRequests: params.pendingRequests, + config: params.config, + }), + ); + return vm; +} + +function takeOutput(vm: QuickJS): unknown[] { + const take = vm.global.getProp("__openclawTakeOutput"); + try { + const output = vm.callFunction(take, vm.undefined); + try { + const dumped = vm.dump(output); + return Array.isArray(dumped) ? (dumped as unknown[]) : []; + } finally { + output.dispose(); + } + } finally { + take.dispose(); + } +} + +function drainPendingJobs(vm: QuickJS): void { + for (let index = 0; index < 1000; index += 1) { + if (vm.executePendingJobs() === 0) { + return; + } + } + throw new Error("code mode pending job limit exceeded"); +} + +function getResultHandle(vm: QuickJS): JSValueHandle { + return vm.global.getProp("__openclawResult"); +} + +async function readCompletedResult(vm: QuickJS, resultHandle: JSValueHandle): Promise { + if (!resultHandle.isPromise) { + return toJsonSafe(vm.dump(resultHandle)); + } + const settled = await vm.resolvePromise(resultHandle); + if ("error" in settled) { + try { + throw new Error(errorMessage(vm.dump(settled.error))); + } finally { + settled.error.dispose(); + } + } + try { + return toJsonSafe(vm.dump(settled.value)); + } finally { + settled.value.dispose(); + } +} + +function waitingResult(params: { + vm: QuickJS; + pendingRequests: PendingBridgeRequest[]; + output: unknown[]; + config: CodeModeConfig; +}): CodeModeWorkerResult { + const snapshotBytes = QuickJS.serializeSnapshot(params.vm.snapshot()); + if (snapshotBytes.byteLength > params.config.maxSnapshotBytes) { + throw new Error("code mode snapshot limit exceeded"); + } + return { + status: "waiting", + snapshotBytes, + pendingRequests: params.pendingRequests, + output: params.output, + }; +} + +async function runExec(input: Extract) { + const pendingRequests: PendingBridgeRequest[] = []; + const vm = await createVm({ catalog: input.catalog, config: input.config, pendingRequests }); + try { + vm.evalCode( + buildUserSource(input.source), + "openclaw-code-mode:user.js", + EvalFlags.ASYNC, + ).dispose(); + drainPendingJobs(vm); + const output = takeOutput(vm); + const resultHandle = getResultHandle(vm); + try { + if ( + pendingRequests.length > 0 || + (resultHandle.isPromise && resultHandle.promiseState === 0) + ) { + return waitingResult({ vm, pendingRequests, output, config: input.config }); + } + return { + status: "completed" as const, + value: await readCompletedResult(vm, resultHandle), + output, + }; + } finally { + resultHandle.dispose(); + } + } finally { + vm.dispose(); + } +} + +async function runResume(input: Extract) { + const pendingRequests: PendingBridgeRequest[] = []; + const vm = await restoreVm({ + snapshotBytes: input.snapshotBytes, + config: input.config, + pendingRequests, + }); + try { + const settle = vm.global.getProp("__openclawSettleBridge"); + try { + for (const request of input.settledRequests) { + const id = vm.newString(request.id); + const payload = vm.newString(JSON.stringify(request.ok ? request.value : request.error)); + try { + vm.callFunction( + settle, + vm.undefined, + id, + request.ok ? vm.true : vm.false, + payload, + ).dispose(); + } finally { + id.dispose(); + payload.dispose(); + } + } + } finally { + settle.dispose(); + } + drainPendingJobs(vm); + const output = takeOutput(vm); + const resultHandle = getResultHandle(vm); + try { + if ( + pendingRequests.length > 0 || + (resultHandle.isPromise && resultHandle.promiseState === 0) + ) { + return waitingResult({ vm, pendingRequests, output, config: input.config }); + } + return { + status: "completed" as const, + value: await readCompletedResult(vm, resultHandle), + output, + }; + } finally { + resultHandle.dispose(); + } + } finally { + vm.dispose(); + } +} + +async function main(): Promise { + const input = workerData as unknown; + if (!isRecord(input) || !isRecord(input.config)) { + return { + status: "failed", + error: "invalid code mode worker input", + code: "invalid_input", + output: [], + }; + } + try { + if (input.kind === "exec" && typeof input.source === "string") { + return await runExec({ + kind: "exec", + source: input.source, + config: input.config as CodeModeConfig, + catalog: Array.isArray(input.catalog) ? input.catalog : [], + }); + } + if (input.kind === "resume" && input.snapshotBytes instanceof Uint8Array) { + return await runResume({ + kind: "resume", + snapshotBytes: input.snapshotBytes, + config: input.config as CodeModeConfig, + settledRequests: Array.isArray(input.settledRequests) + ? (input.settledRequests as SettledBridgeRequest[]) + : [], + }); + } + return { + status: "failed", + error: "invalid code mode worker input", + code: "invalid_input", + output: [], + }; + } catch (error) { + return { + status: "failed", + error: errorMessage(error), + code: "internal_error", + output: [], + }; + } +} + +// oxlint-disable-next-line unicorn/require-post-message-target-origin -- Node worker_threads MessagePort, not window.postMessage. +parentPort?.postMessage(await main()); diff --git a/src/agents/pi-embedded-runner/openai-stream-wrappers.test.ts b/src/agents/pi-embedded-runner/openai-stream-wrappers.test.ts index c9d71516b5c..b3d856e3d5f 100644 --- a/src/agents/pi-embedded-runner/openai-stream-wrappers.test.ts +++ b/src/agents/pi-embedded-runner/openai-stream-wrappers.test.ts @@ -7,6 +7,7 @@ import { createOpenAICompletionsStrictMessageKeysWrapper, createOpenAICompletionsToolsCompatWrapper, createOpenAIThinkingLevelWrapper, + createCodexNativeWebSearchWrapper, } from "./openai-stream-wrappers.js"; function createPayloadCapture(opts?: { initialReasoning?: unknown }) { @@ -96,6 +97,99 @@ describe("createOpenAICompletionsToolsCompatWrapper", () => { }); }); +describe("createCodexNativeWebSearchWrapper", () => { + it("does not inject native web_search when code mode owns the tool surface", () => { + const payloads: Array> = []; + const baseStreamFn: StreamFn = (model, _context, options) => { + const payload: Record = { + model: model.id, + tools: [ + { type: "function", name: "exec" }, + { type: "function", name: "wait" }, + { type: "function", name: "web_search" }, + { type: "web_search" }, + ], + }; + options?.onPayload?.(payload, model); + payloads.push(structuredClone(payload)); + return createAssistantMessageEventStream(); + }; + const wrapped = createCodexNativeWebSearchWrapper(baseStreamFn, { + config: { + tools: { + codeMode: { enabled: true }, + web: { + search: { + enabled: true, + openaiCodex: { enabled: true, mode: "cached" }, + }, + }, + }, + }, + }); + + void wrapped( + { + api: "openai-codex-responses", + provider: "gateway", + id: "gpt-5.5", + } as Model<"openai-codex-responses">, + { + messages: [], + tools: [ + { name: "exec", description: "", parameters: {} }, + { name: "wait", description: "", parameters: {} }, + ], + }, + { + onPayload: (payload) => { + const payloadObj = payload as { tools?: unknown } | undefined; + if (payloadObj && Array.isArray(payloadObj.tools)) { + payloadObj.tools.push({ type: "function", name: "web_search" }); + } + }, + }, + ); + + expect(payloads[0]?.tools).toEqual([ + { type: "function", name: "exec" }, + { type: "function", name: "wait" }, + ]); + }); + + it("does not enable code-mode transport enforcement when config is on but controls are inactive", () => { + const observedOptions: Array> = []; + const payloads: Array> = []; + const baseStreamFn: StreamFn = (model, _context, options) => { + observedOptions.push(options as Record); + const payload: Record = { model: model.id }; + options?.onPayload?.(payload, model); + payloads.push(structuredClone(payload)); + return createAssistantMessageEventStream(); + }; + const wrapped = createCodexNativeWebSearchWrapper(baseStreamFn, { + config: { + tools: { + codeMode: { enabled: true }, + }, + }, + }); + + void wrapped( + { + api: "openai-codex-responses", + provider: "gateway", + id: "gpt-5.5", + } as Model<"openai-codex-responses">, + { messages: [] }, + {}, + ); + + expect(observedOptions[0]?.openclawCodeModeToolSurface).toBeUndefined(); + expect(payloads[0]).toEqual({ model: "gpt-5.5" }); + }); +}); + describe("createOpenAICompletionsStrictMessageKeysWrapper", () => { it("strips message keys to role and content for strict OpenAI-compatible endpoints", () => { const payloads: Array> = []; diff --git a/src/agents/pi-embedded-runner/openai-stream-wrappers.ts b/src/agents/pi-embedded-runner/openai-stream-wrappers.ts index d08ae3849f8..7fb4f40872d 100644 --- a/src/agents/pi-embedded-runner/openai-stream-wrappers.ts +++ b/src/agents/pi-embedded-runner/openai-stream-wrappers.ts @@ -8,6 +8,7 @@ import { patchCodexNativeWebSearchPayload, resolveCodexNativeSearchActivation, } from "../codex-native-web-search-core.js"; +import { emitModelTransportDebug } from "../model-transport-debug.js"; import { flattenCompletionMessagesToStringContent, stripCompletionMessagesToRoleContent, @@ -25,6 +26,9 @@ import { mapThinkingLevelToReasoningEffort } from "./reasoning-effort-utils.js"; import { streamWithPayloadPatch } from "./stream-payload-utils.js"; type OpenAIServiceTier = "auto" | "default" | "flex" | "priority"; +type OpenClawSimpleStreamOptions = SimpleStreamOptions & { + openclawCodeModeToolSurface?: boolean; +}; export { resolveOpenAITextVerbosity }; function resolveOpenAITextVerbosityForModel( @@ -79,6 +83,55 @@ function shouldApplyOpenAIServiceTier(model: { return resolveOpenAIResponsesPayloadPolicy(model, { storeMode: "disable" }).allowsServiceTier; } +function isCodeModeEnabled(config?: OpenClawConfig): boolean { + const tools = config?.tools; + if (!tools || typeof tools !== "object") { + return false; + } + const codeMode = (tools as { codeMode?: unknown }).codeMode; + if (codeMode === true) { + return true; + } + return Boolean( + codeMode && + typeof codeMode === "object" && + (codeMode as { enabled?: unknown }).enabled === true, + ); +} + +function readPayloadToolName(tool: unknown): string | undefined { + if (!tool || typeof tool !== "object") { + return undefined; + } + const record = tool as { name?: unknown; function?: { name?: unknown } }; + if (typeof record.name === "string") { + return record.name; + } + return typeof record.function?.name === "string" ? record.function.name : undefined; +} + +function filterCodeModePayloadTools(payload: unknown): void { + if (!payload || typeof payload !== "object") { + return; + } + const record = payload as { tools?: unknown }; + if (!Array.isArray(record.tools)) { + return; + } + record.tools = record.tools.filter((tool) => { + const name = readPayloadToolName(tool); + return name === "exec" || name === "wait"; + }); +} + +function hasCodeModeVisibleTools(context: { tools?: unknown }): boolean { + if (!Array.isArray(context.tools)) { + return false; + } + const names = new Set(context.tools.map(readPayloadToolName).filter(Boolean)); + return names.has("exec") && names.has("wait"); +} + function shouldApplyOpenAIReasoningCompatibility(model: { api?: unknown; provider?: unknown; @@ -500,6 +553,31 @@ export function createCodexNativeWebSearchWrapper( ): StreamFn { const underlying = baseStreamFn ?? streamSimple; return (model, context, options) => { + if (isCodeModeEnabled(params.config) && hasCodeModeVisibleTools(context)) { + emitModelTransportDebug( + log, + `skipping Codex native web search because code mode owns the model tool surface for ${ + model.provider ?? "unknown" + }/${model.id ?? "unknown"}`, + ); + const originalOnPayload = options?.onPayload; + const codeModeOptions: OpenClawSimpleStreamOptions = { + ...options, + openclawCodeModeToolSurface: true, + onPayload: (payload) => { + filterCodeModePayloadTools(payload); + const nextPayload = originalOnPayload?.(payload, model); + if (nextPayload !== undefined) { + filterCodeModePayloadTools(nextPayload); + return nextPayload; + } + filterCodeModePayloadTools(payload); + return undefined; + }, + }; + return underlying(model, context, codeModeOptions); + } + const activation = resolveCodexNativeSearchActivation({ config: params.config, modelProvider: readStringValue(model.provider), diff --git a/src/agents/pi-embedded-runner/run/attempt.test.ts b/src/agents/pi-embedded-runner/run/attempt.test.ts index f85710ae46d..28ac94b38f5 100644 --- a/src/agents/pi-embedded-runner/run/attempt.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.test.ts @@ -238,6 +238,23 @@ describe("buildToolSearchRunPlan", () => { expect(plan.emptyAllowlistCallableNames).toEqual(["tool-search-client:client_pick_file"]); }); + it("keeps code-mode control tools in replay-safe names", () => { + const plan = buildToolSearchRunPlan({ + visibleTools: [{ name: "exec" }, { name: "wait" }] as never, + uncompactedTools: [{ name: "fake_plugin_tool" }] as never, + clientTools: [], + catalogRegistered: true, + catalogToolCount: 1, + controlsEnabled: true, + controlNames: ["exec", "wait"], + explicitAllowlistSources: [{ entries: ["missing_tool"] }], + }); + + expect([...plan.visibleAllowedToolNames]).toEqual(["exec", "wait"]); + expect([...plan.replayAllowedToolNames]).toEqual(["fake_plugin_tool", "exec", "wait"]); + expect(plan.emptyAllowlistCallableNames).toEqual(["tool-search:0"]); + }); + it("does not let unrelated client tools mask a bad explicit allowlist", () => { const plan = buildToolSearchRunPlan({ visibleTools: [{ name: "tool_search_code" }] as never, diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 5f8d495709c..b4495688039 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -80,6 +80,14 @@ import { resolveChannelMessageToolHints, resolveChannelReactionGuidance, } from "../../channel-tools.js"; +import { + addClientToolsToCodeModeCatalog, + applyCodeModeCatalog, + CODE_MODE_EXEC_TOOL_NAME, + CODE_MODE_WAIT_TOOL_NAME, + createCodeModeTools, + resolveCodeModeConfig, +} from "../../code-mode.js"; import { DEFAULT_CONTEXT_TOKENS } from "../../defaults.js"; import { resolveOpenClawReferencePaths } from "../../docs-path.js"; import { isTimeoutError } from "../../failover-error.js"; @@ -478,6 +486,7 @@ export function buildToolSearchRunPlan(params: { catalogRegistered: boolean; catalogToolCount: number; controlsEnabled: boolean; + controlNames?: readonly string[]; explicitAllowlistSources: Array<{ entries: string[] }>; }): ToolSearchRunPlan { const visibleAllowedToolNames = collectAllowedToolNames({ @@ -488,9 +497,17 @@ export function buildToolSearchRunPlan(params: { tools: params.uncompactedTools, clientTools: params.clientTools, }); + if (params.controlsEnabled) { + for (const controlName of params.controlNames ?? TOOL_SEARCH_CONTROL_ALLOWLIST_NAMES) { + if (visibleAllowedToolNames.has(controlName)) { + replayAllowedToolNames.add(controlName); + } + } + } const autoAddedControlNames = buildAutoAddedToolSearchControlNamesForAllowlistCheck({ toolSearchControlsEnabled: params.controlsEnabled, explicitAllowlistSources: params.explicitAllowlistSources, + controlNames: params.controlNames, }); const clientCatalogCallableNames = params.catalogRegistered ? collectExplicitlyAllowedClientToolNames({ @@ -978,22 +995,33 @@ export async function runEmbeddedAttempt( toolsAllow: params.toolsAllow, }); const toolsEnabled = supportsModelTools(params.model); + const codeModeConfig = resolveCodeModeConfig(params.config); + const codeModeControlsEnabledForRun = + toolsEnabled && + params.disableTools !== true && + !isRawModelRun && + params.toolsAllow?.length !== 0 && + codeModeConfig.enabled; const toolSearchControlsEnabledForRun = toolsEnabled && params.disableTools !== true && !isRawModelRun && params.toolsAllow?.length !== 0 && + !codeModeControlsEnabledForRun && resolveToolSearchConfig(params.config).enabled; const effectiveToolsAllow = toolSearchControlsEnabledForRun && params.toolsAllow ? [...new Set([...params.toolsAllow, ...TOOL_SEARCH_CONTROL_ALLOWLIST_NAMES])] : params.toolsAllow; const shouldConstructTools = - toolConstructionPlan.constructTools || toolSearchControlsEnabledForRun; + toolConstructionPlan.constructTools || + toolSearchControlsEnabledForRun || + codeModeControlsEnabledForRun; let toolSearchCatalogExecutor: ToolSearchCatalogToolExecutor | undefined; - const toolSearchCatalogRef: ToolSearchCatalogRef | undefined = toolSearchControlsEnabledForRun - ? createToolSearchCatalogRef() - : undefined; + const toolSearchCatalogRef: ToolSearchCatalogRef | undefined = + toolSearchControlsEnabledForRun || codeModeControlsEnabledForRun + ? createToolSearchCatalogRef() + : undefined; const toolSearchTargetTranscriptProjections: ToolSearchTargetTranscriptProjection[] = []; const toolsRaw = !shouldConstructTools ? [] @@ -1313,35 +1341,67 @@ export async function runEmbeddedAttempt( }); const uncompactedEffectiveTools = [...tools, ...filteredBundledTools]; let effectiveTools = uncompactedEffectiveTools; - const toolSearch = applyToolSearchCatalog({ - tools: effectiveTools, - config: params.config, - sessionId: params.sessionId, - sessionKey: sandboxSessionKey, + const catalogToolHookContext = { agentId: sessionAgentId, + config: params.config, + cwd: effectiveWorkspace, + sessionKey: sandboxSessionKey, + sessionId: params.sessionId, runId: params.runId, - catalogRef: toolSearchCatalogRef, - toolHookContext: { + channelId: params.currentChannelId, + trace: runTrace, + loopDetection: resolveToolLoopDetectionConfig({ + cfg: params.config, agentId: sessionAgentId, - config: params.config, - cwd: effectiveWorkspace, - sessionKey: sandboxSessionKey, - sessionId: params.sessionId, - runId: params.runId, - channelId: params.currentChannelId, - trace: runTrace, - loopDetection: resolveToolLoopDetectionConfig({ - cfg: params.config, + }), + onToolOutcome: params.onToolOutcome, + }; + const codeModeTools = codeModeControlsEnabledForRun + ? createCodeModeTools({ + config: params.config, + runtimeConfig: params.config, agentId: sessionAgentId, - }), - onToolOutcome: params.onToolOutcome, - }, - }); + sessionKey: sandboxSessionKey, + sessionId: params.sessionId, + runId: params.runId, + catalogRef: toolSearchCatalogRef, + abortSignal: runAbortController.signal, + executeTool: (toolParams) => { + if (!toolSearchCatalogExecutor) { + throw new Error("Code Mode catalog executor is unavailable for this run."); + } + return toolSearchCatalogExecutor(toolParams); + }, + }) + : []; + const toolSearch = codeModeControlsEnabledForRun + ? applyCodeModeCatalog({ + tools: [...codeModeTools, ...effectiveTools], + config: params.config, + sessionId: params.sessionId, + sessionKey: sandboxSessionKey, + agentId: sessionAgentId, + runId: params.runId, + catalogRef: toolSearchCatalogRef, + toolHookContext: catalogToolHookContext, + }) + : applyToolSearchCatalog({ + tools: effectiveTools, + config: params.config, + sessionId: params.sessionId, + sessionKey: sandboxSessionKey, + agentId: sessionAgentId, + runId: params.runId, + catalogRef: toolSearchCatalogRef, + toolHookContext: catalogToolHookContext, + }); effectiveTools = toolSearch.tools; if (toolSearch.compacted) { - prepStages.mark("tool-search"); + prepStages.mark(codeModeControlsEnabledForRun ? "code-mode" : "tool-search"); log.info( - `tool-search: cataloged ${toolSearch.catalogToolCount} tools behind compact prompt surface`, + codeModeControlsEnabledForRun + ? `code-mode: cataloged ${toolSearch.catalogToolCount} tools behind exec/wait` + : `tool-search: cataloged ${toolSearch.catalogToolCount} tools behind compact prompt surface`, ); } prepStages.mark("bundle-tools"); @@ -1371,7 +1431,10 @@ export async function runEmbeddedAttempt( clientTools, catalogRegistered: toolSearch.catalogRegistered, catalogToolCount: toolSearch.catalogToolCount, - controlsEnabled: toolSearchControlsEnabledForRun, + controlsEnabled: toolSearchControlsEnabledForRun || codeModeControlsEnabledForRun, + controlNames: codeModeControlsEnabledForRun + ? [CODE_MODE_EXEC_TOOL_NAME, CODE_MODE_WAIT_TOOL_NAME] + : undefined, explicitAllowlistSources: explicitToolAllowlistSources, }); const allowedToolNames = toolSearchRunPlan.visibleAllowedToolNames; @@ -1872,19 +1935,31 @@ export async function runEmbeddedAttempt( }, ) : []; - const clientToolSearch = addClientToolsToToolSearchCatalog({ - tools: clientToolDefs, - config: params.config, - sessionId: params.sessionId, - sessionKey: sandboxSessionKey, - agentId: sessionAgentId, - runId: params.runId, - catalogRef: toolSearchCatalogRef, - }); + const clientToolSearch = codeModeControlsEnabledForRun + ? addClientToolsToCodeModeCatalog({ + tools: clientToolDefs, + config: params.config, + sessionId: params.sessionId, + sessionKey: sandboxSessionKey, + agentId: sessionAgentId, + runId: params.runId, + catalogRef: toolSearchCatalogRef, + }) + : addClientToolsToToolSearchCatalog({ + tools: clientToolDefs, + config: params.config, + sessionId: params.sessionId, + sessionKey: sandboxSessionKey, + agentId: sessionAgentId, + runId: params.runId, + catalogRef: toolSearchCatalogRef, + }); clientToolDefs = clientToolSearch.tools; if (clientToolSearch.compacted) { log.info( - `tool-search: cataloged ${clientToolSearch.catalogToolCount} client tools behind compact prompt surface`, + codeModeControlsEnabledForRun + ? `code-mode: cataloged ${clientToolSearch.catalogToolCount} client tools behind exec/wait` + : `tool-search: cataloged ${clientToolSearch.catalogToolCount} client tools behind compact prompt surface`, ); } diff --git a/src/agents/tool-search.ts b/src/agents/tool-search.ts index 2bb64c13bb6..39c1b0ccc8e 100644 --- a/src/agents/tool-search.ts +++ b/src/agents/tool-search.ts @@ -991,7 +991,7 @@ function sanitizeToolCallIdPart(value: string): string { return safe || "call"; } -class ToolSearchRuntime { +export class ToolSearchRuntime { private callSequence = 0; constructor( @@ -1012,6 +1012,11 @@ class ToolSearchRuntime { .map((hit) => compactEntry(hit.entry)); }; + all = () => { + const catalog = resolveCatalog(this.ctx); + return catalog.entries.map((entry) => compactEntry(entry)); + }; + describe = async (id: string) => { const catalog = resolveCatalog(this.ctx); catalog.describeCount += 1; @@ -1062,6 +1067,97 @@ class ToolSearchRuntime { } } +export function applyToolCatalogCompaction(params: { + tools: AnyAgentTool[]; + enabled: boolean; + sessionId?: string; + sessionKey?: string; + agentId?: string; + runId?: string; + catalogRef?: ToolSearchCatalogRef; + toolHookContext?: HookContext; + isVisibleControlTool: (tool: AnyAgentTool) => boolean; + shouldCatalogTool?: (tool: AnyAgentTool) => boolean; +}): { + tools: AnyAgentTool[]; + compacted: boolean; + catalogToolCount: number; + catalogRegistered: boolean; +} { + if (!params.enabled) { + return { tools: params.tools, compacted: false, catalogToolCount: 0, catalogRegistered: false }; + } + const hasControlTool = params.tools.some((tool) => params.isVisibleControlTool(tool)); + const key = sessionCatalogKey(params); + if (!hasControlTool || (!key && !params.catalogRef)) { + return { + tools: params.tools.filter((tool) => !TOOL_SEARCH_CONTROL_TOOL_NAMES.has(tool.name)), + compacted: false, + catalogToolCount: 0, + catalogRegistered: false, + }; + } + + const visible: AnyAgentTool[] = []; + const catalog: ToolSearchCatalogEntry[] = []; + const shouldCatalog = params.shouldCatalogTool ?? shouldCatalogTool; + for (const tool of params.tools) { + if (params.isVisibleControlTool(tool)) { + visible.push(tool); + continue; + } + if (shouldCatalog(tool)) { + catalog.push(toCatalogEntry(tool, undefined, params.toolHookContext)); + continue; + } + visible.push(tool); + } + registerToolSearchCatalog({ + sessionId: params.sessionId, + sessionKey: params.sessionKey, + agentId: params.agentId, + runId: params.runId, + catalogRef: params.catalogRef, + entries: catalog, + append: false, + }); + return { + tools: visible, + compacted: catalog.length > 0, + catalogToolCount: catalog.length, + catalogRegistered: true, + }; +} + +export function addClientToolsToToolCatalog(params: { + tools: ToolDefinition[]; + enabled: boolean; + sessionId?: string; + sessionKey?: string; + agentId?: string; + runId?: string; + catalogRef?: ToolSearchCatalogRef; +}): { tools: ToolDefinition[]; compacted: boolean; catalogToolCount: number } { + const key = sessionCatalogKey(params); + if (!params.enabled || (!key && !params.catalogRef)) { + return { tools: params.tools, compacted: false, catalogToolCount: 0 }; + } + const existing = params.catalogRef?.current ?? (key ? sessionCatalogs.get(key) : undefined); + if (!existing) { + return { tools: params.tools, compacted: false, catalogToolCount: 0 }; + } + registerToolSearchCatalog({ + sessionId: params.sessionId, + sessionKey: params.sessionKey, + agentId: params.agentId, + runId: params.runId, + catalogRef: params.catalogRef, + entries: params.tools.map((tool) => toCatalogEntry(tool, "client")), + append: true, + }); + return { tools: [], compacted: params.tools.length > 0, catalogToolCount: params.tools.length }; +} + function toJsonSafe(value: unknown): unknown { if (value === undefined) { return null; diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index 02192ea9b76..bc15238863f 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -423,6 +423,28 @@ export const FIELD_HELP: Record = { "Default number of Tool Search results returned when the model omits a limit. Runtime clamps this to `maxSearchLimit`.", "tools.toolSearch.maxSearchLimit": "Maximum number of Tool Search results a model can request. Runtime clamps values to the supported 1..50 range.", + "tools.codeMode": + "Generic OpenClaw code mode. When enabled, agent runs expose only `exec` and `wait` to the model and hide normal tools behind a QuickJS-WASI catalog bridge.", + "tools.codeMode.enabled": + "Enables generic code mode. Default is off. When explicitly enabled, OpenClaw fails closed if the runtime is unavailable instead of exposing the full tool list.", + "tools.codeMode.runtime": 'Guest JavaScript runtime. Only "quickjs-wasi" is supported.', + "tools.codeMode.mode": + 'Model-facing surface. Only "only" is supported: expose code-mode `exec` and `wait` and hide normal tools.', + "tools.codeMode.languages": + 'Accepted source languages for `exec`. Supported values are "javascript" and "typescript".', + "tools.codeMode.timeoutMs": "Maximum milliseconds for one code-mode `exec` or `wait` call.", + "tools.codeMode.memoryLimitBytes": "QuickJS heap limit for one code-mode VM.", + "tools.codeMode.maxOutputBytes": "Maximum serialized bytes returned through code-mode output.", + "tools.codeMode.maxSnapshotBytes": + "Maximum serialized bytes retained for one suspended QuickJS snapshot.", + "tools.codeMode.maxPendingToolCalls": + "Maximum concurrent nested tool calls a code-mode VM can start before it must resume later.", + "tools.codeMode.snapshotTtlSeconds": + "How long suspended code-mode snapshots can be resumed with `wait` before they expire.", + "tools.codeMode.searchDefaultLimit": + "Default number of hidden catalog search results returned by `tools.search` inside code mode.", + "tools.codeMode.maxSearchLimit": + "Maximum number of hidden catalog search results a code-mode program can request.", "tools.elevated": "Elevated tool access controls for privileged command surfaces that should only be reachable from trusted senders. Keep disabled unless operator workflows explicitly require elevated actions.", "tools.elevated.enabled": diff --git a/src/config/schema.labels.ts b/src/config/schema.labels.ts index 88c9dab5f8f..58638cb6841 100644 --- a/src/config/schema.labels.ts +++ b/src/config/schema.labels.ts @@ -249,6 +249,19 @@ export const FIELD_LABELS: Record = { "tools.toolSearch.codeTimeoutMs": "Tool Search Code Timeout", "tools.toolSearch.searchDefaultLimit": "Tool Search Default Results", "tools.toolSearch.maxSearchLimit": "Tool Search Max Results", + "tools.codeMode": "Code Mode", + "tools.codeMode.enabled": "Enable Code Mode", + "tools.codeMode.runtime": "Code Mode Runtime", + "tools.codeMode.mode": "Code Mode Surface", + "tools.codeMode.languages": "Code Mode Languages", + "tools.codeMode.timeoutMs": "Code Mode Timeout", + "tools.codeMode.memoryLimitBytes": "Code Mode Memory Limit", + "tools.codeMode.maxOutputBytes": "Code Mode Output Limit", + "tools.codeMode.maxSnapshotBytes": "Code Mode Snapshot Limit", + "tools.codeMode.maxPendingToolCalls": "Code Mode Pending Tool Limit", + "tools.codeMode.snapshotTtlSeconds": "Code Mode Snapshot TTL", + "tools.codeMode.searchDefaultLimit": "Code Mode Default Search Results", + "tools.codeMode.maxSearchLimit": "Code Mode Max Search Results", "tools.elevated": "Elevated Tool Access", "tools.elevated.enabled": "Enable Elevated Tool Access", "tools.elevated.allowFrom": "Elevated Tool Allow Rules", diff --git a/src/config/schema.test.ts b/src/config/schema.test.ts index 16f319aca38..b4bdf0ad612 100644 --- a/src/config/schema.test.ts +++ b/src/config/schema.test.ts @@ -396,6 +396,49 @@ describe("config schema", () => { ).toBe(false); }); + it("accepts Code Mode config in the runtime zod schema", () => { + expect(ToolsSchema.parse({ codeMode: true })?.codeMode).toBe(true); + expect( + ToolsSchema.parse({ + codeMode: { + enabled: true, + runtime: "quickjs-wasi", + mode: "only", + languages: ["javascript", "typescript"], + timeoutMs: 5000, + memoryLimitBytes: 67_108_864, + maxOutputBytes: 65_536, + maxSnapshotBytes: 10_485_760, + maxPendingToolCalls: 8, + snapshotTtlSeconds: 900, + searchDefaultLimit: 4, + maxSearchLimit: 12, + }, + })?.codeMode, + ).toEqual({ + enabled: true, + runtime: "quickjs-wasi", + mode: "only", + languages: ["javascript", "typescript"], + timeoutMs: 5000, + memoryLimitBytes: 67_108_864, + maxOutputBytes: 65_536, + maxSnapshotBytes: 10_485_760, + maxPendingToolCalls: 8, + snapshotTtlSeconds: 900, + searchDefaultLimit: 4, + maxSearchLimit: 12, + }); + expect( + ToolsSchema.safeParse({ + codeMode: { + enabled: true, + runtime: "node", + }, + }).success, + ).toBe(false); + }); + it("accepts web fetch maxResponseBytes in the runtime zod schema", () => { const parsed = ToolsSchema.parse({ web: { diff --git a/src/config/types.tools.ts b/src/config/types.tools.ts index 8003b6e7501..1f89f6fda9b 100644 --- a/src/config/types.tools.ts +++ b/src/config/types.tools.ts @@ -205,6 +205,35 @@ export type ToolSearchConfig = maxSearchLimit?: number; }; +export type CodeModeConfig = + | boolean + | { + /** Enable generic OpenClaw code mode. Default: false. */ + enabled?: boolean; + /** Guest runtime. Only quickjs-wasi is supported. */ + runtime?: "quickjs-wasi"; + /** Model-facing mode. Only "only" is supported: expose exec/wait and hide normal tools. */ + mode?: "only"; + /** Accepted source languages. */ + languages?: Array<"javascript" | "typescript">; + /** Wall-clock limit in milliseconds for one exec or wait call. */ + timeoutMs?: number; + /** QuickJS heap limit in bytes. */ + memoryLimitBytes?: number; + /** Maximum serialized output bytes. */ + maxOutputBytes?: number; + /** Maximum serialized snapshot bytes. */ + maxSnapshotBytes?: number; + /** Maximum concurrent nested tool calls. */ + maxPendingToolCalls?: number; + /** Retention for suspended snapshots. */ + snapshotTtlSeconds?: number; + /** Default search result count for tools.search. */ + searchDefaultLimit?: number; + /** Maximum search result count for tools.search. */ + maxSearchLimit?: number; + }; + export type SessionsToolsVisibility = "self" | "tree" | "agent" | "all"; export type ToolPolicyConfig = { @@ -657,6 +686,8 @@ export type ToolsConfig = { loopDetection?: ToolLoopDetectionConfig; /** Compact large OpenClaw, MCP, and client tool catalogs behind search/call tools. */ toolSearch?: ToolSearchConfig; + /** Generic code mode: expose exec/wait and hide normal tools behind a QuickJS catalog bridge. */ + codeMode?: CodeModeConfig; /** Sub-agent tool policy defaults (deny wins). */ subagents?: { /** Default model selection for spawned sub-agents (string or {primary,fallbacks}). */ diff --git a/src/config/zod-schema.agent-runtime.ts b/src/config/zod-schema.agent-runtime.ts index b2e73df4698..1103d1f810e 100644 --- a/src/config/zod-schema.agent-runtime.ts +++ b/src/config/zod-schema.agent-runtime.ts @@ -587,6 +587,28 @@ const ToolSearchSchema = z ]) .optional(); +const CodeModeSchema = z + .union([ + z.boolean(), + z + .object({ + enabled: z.boolean().optional(), + runtime: z.literal("quickjs-wasi").optional(), + mode: z.literal("only").optional(), + languages: z.array(z.enum(["javascript", "typescript"])).optional(), + timeoutMs: z.number().int().positive().optional(), + memoryLimitBytes: z.number().int().positive().optional(), + maxOutputBytes: z.number().int().positive().optional(), + maxSnapshotBytes: z.number().int().positive().optional(), + maxPendingToolCalls: z.number().int().positive().optional(), + snapshotTtlSeconds: z.number().int().positive().optional(), + searchDefaultLimit: z.number().int().positive().optional(), + maxSearchLimit: z.number().int().positive().optional(), + }) + .strict(), + ]) + .optional(); + const SandboxSshSchema = z .object({ target: z.string().min(1).optional(), @@ -997,6 +1019,7 @@ export const ToolsSchema = z .optional(), loopDetection: ToolLoopDetectionSchema, toolSearch: ToolSearchSchema, + codeMode: CodeModeSchema, message: MessageToolConfigSchema, agentToAgent: z .object({ diff --git a/tsdown.config.ts b/tsdown.config.ts index 7bab52afacd..51a42c48979 100644 --- a/tsdown.config.ts +++ b/tsdown.config.ts @@ -216,6 +216,7 @@ function buildCoreDistEntries(): Record { "agents/auth-profiles.runtime": "src/agents/auth-profiles.runtime.ts", "agents/model-catalog.runtime": "src/agents/model-catalog.runtime.ts", "agents/models-config.runtime": "src/agents/models-config.runtime.ts", + "agents/code-mode.worker": "src/agents/code-mode.worker.ts", "acp/control-plane/manager": "src/acp/control-plane/manager.ts", "cli/gateway-lifecycle.runtime": "src/cli/gateway-cli/lifecycle.runtime.ts", "provider-dispatcher.runtime": "src/auto-reply/reply/provider-dispatcher.runtime.ts",