From 0db09793656809bc1c17c11946ded9da99b29acd Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 14 May 2026 23:13:37 +0100 Subject: [PATCH] fix: harden code mode runtime --- docs/.generated/config-baseline.sha256 | 4 +- package.json | 2 +- pnpm-lock.yaml | 7 +- src/agents/code-mode.test.ts | 113 ++++++++++++++++++++++++- src/agents/code-mode.ts | 103 +++++++++++++++++++--- src/agents/code-mode.worker.ts | 59 +++++++++---- test/scripts/lint-suppressions.test.ts | 1 + 7 files changed, 253 insertions(+), 36 deletions(-) diff --git a/docs/.generated/config-baseline.sha256 b/docs/.generated/config-baseline.sha256 index ed4c385d28e..76f40bced2e 100644 --- a/docs/.generated/config-baseline.sha256 +++ b/docs/.generated/config-baseline.sha256 @@ -1,4 +1,4 @@ -41fe35b1d577bdc2495842effde4158657b7ebfbfe5bd20aed4e507f52a6ac20 config-baseline.json -d2307659e5496b97096b8e5af6b6f8fefeee861f7d6a3b29d250887be5c937cf config-baseline.core.json +932d0df0d277aded125d4843a55f3acc2b90d39a5867d09d76bdf1a59d469e5f config-baseline.json +fe5e2eecee8c354eac3e10b801c27c20a16f9432377a19fcb2849221e62c62bf config-baseline.core.json 2aa997d48549bd321a478485126a4bd5065ba47333a80e7eb07a0ef6ad75b0a6 config-baseline.channel.json 1ab5b65a94d84f59bae5e6bbe310057fae0a645f2538ab00f1f37b7f8b371e6f config-baseline.plugin.json diff --git a/package.json b/package.json index 0bef158ff5b..ee05493f80b 100644 --- a/package.json +++ b/package.json @@ -1786,7 +1786,7 @@ "pdfjs-dist": "5.7.284", "playwright-core": "1.60.0", "qrcode": "1.5.4", - "quickjs-wasi": "^2.2.0", + "quickjs-wasi": "2.2.0", "tar": "7.5.15", "tokenjuice": "0.7.0", "tree-sitter-bash": "0.25.1", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 076f7585a6e..7cc9d35c371 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -156,7 +156,7 @@ importers: specifier: 1.5.4 version: 1.5.4 quickjs-wasi: - specifier: ^2.2.0 + specifier: 2.2.0 version: 2.2.0 tar: specifier: 7.5.15 @@ -6898,6 +6898,9 @@ packages: quick-format-unescaped@4.0.4: resolution: {integrity: sha512-tYC1Q1hgyRuHgloV/YXs2w15unPVh8qfu/qCTfhTYamaw7fyhumKa2yGpdSo87vY32rIclj+4fWYQXUMs9EHvg==} + quickjs-wasi@2.2.0: + resolution: {integrity: sha512-zQxXmQMrEoD3S+jQdYsloq4qAuaxKFHZj6hHqOYGwB2iQZH+q9e/lf5zQPXCKOk0WJuAjzRFbO4KwHIp2D05Iw==} + range-parser@1.2.1: resolution: {integrity: sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==} engines: {node: '>= 0.6'} @@ -13913,6 +13916,8 @@ snapshots: quick-format-unescaped@4.0.4: {} + quickjs-wasi@2.2.0: {} + range-parser@1.2.1: {} raw-body@3.0.2: diff --git a/src/agents/code-mode.test.ts b/src/agents/code-mode.test.ts index fbdd9b7a00f..03d075d4fab 100644 --- a/src/agents/code-mode.test.ts +++ b/src/agents/code-mode.test.ts @@ -95,6 +95,7 @@ async function runUntilCompleted(params: { describe("Code Mode", () => { afterEach(() => { __testing.activeRuns.clear(); + __testing.resumingRunIds.clear(); }); it("resolves object config defaults", () => { @@ -299,6 +300,60 @@ describe("Code Mode", () => { ).rejects.toThrow("different session"); }); + it("rejects concurrent waits for the same suspended run", async () => { + const catalogRef = createToolSearchCatalogRef(); + const config = { + tools: { + codeMode: { + enabled: true, + timeoutMs: 100, + }, + }, + } as never; + const ctx = { + config, + runtimeConfig: config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }; + const codeModeTools = createCodeModeTools(ctx); + applyCodeModeCatalog({ + tools: [ + ...codeModeTools, + pluginToolWithExecute( + "fake_slow", + "Slow helper", + async () => await new Promise(() => undefined), + ), + ], + config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }); + + const first = resultDetails( + await codeModeTools[0].execute("code-call-concurrent-wait", { + code: "await tools.fake_slow({}); return 'done';", + }), + ); + expect(first.status).toBe("waiting"); + + const firstWait = codeModeTools[1].execute("code-wait-concurrent-a", { + runId: first.runId, + }); + await expect( + codeModeTools[1].execute("code-wait-concurrent-b", { runId: first.runId }), + ).rejects.toThrow("already being resumed"); + const stillWaiting = resultDetails(await firstWait); + + expect(stillWaiting.status).toBe("waiting"); + expect(stillWaiting.runId).toBe(first.runId); + }); + it("reports only unsettled pending tool calls when wait times out", async () => { const catalogRef = createToolSearchCatalogRef(); const config = { @@ -379,6 +434,54 @@ describe("Code Mode", () => { expect(__testing.getTypescriptRuntimePromise()).toBeNull(); }); + it("allows identifiers and strings that contain import without module access", async () => { + const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness(); + applyCodeModeCatalog({ + tools: [...codeModeTools, pluginTool("fake_noop", "Noop")], + config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }); + + const details = await runUntilCompleted({ + execTool: codeModeTools[0], + waitTool: codeModeTools[1], + code: ` + const important = 41; + const message = "import docs later"; + return important + (message.includes("import") ? 1 : 0); + `, + }); + + expect(details.status).toBe("completed"); + expect(details.value).toBe(42); + }); + + it("fails pending promises that have no host bridge work", async () => { + const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness(); + applyCodeModeCatalog({ + tools: [...codeModeTools, pluginTool("fake_noop", "Noop")], + config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }); + + const beforeRunCount = __testing.activeRuns.size; + const details = resultDetails( + await codeModeTools[0].execute("code-call-empty-wait", { + code: "await new Promise(() => undefined); return 'never';", + }), + ); + + expect(details.status).toBe("failed"); + expect(String(details.error)).toContain("pending without host work"); + expect(__testing.activeRuns.size).toBe(beforeRunCount); + }); + it("clamps omitted code-mode catalog search limits to maxSearchLimit", async () => { const catalogRef = createToolSearchCatalogRef(); const config = { @@ -449,7 +552,12 @@ describe("Code Mode", () => { expect(details.value).toEqual({ value: 42 }); }); - it("rejects module access", async () => { + it.each([ + "const fs = require('node:fs'); return fs;", + "return import('node:fs');", + "return import.meta.url;", + "return `${import('node:fs')}`;", + ])("rejects module access: %s", async (code) => { const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness(); applyCodeModeCatalog({ tools: [...codeModeTools, pluginTool("fake_noop", "Noop")], @@ -462,7 +570,7 @@ describe("Code Mode", () => { const details = resultDetails( await codeModeTools[0].execute("code-call-import", { - code: "const fs = require('node:fs'); return fs;", + code, }), ); @@ -585,5 +693,6 @@ describe("Code Mode", () => { await expect(heartbeat).resolves.toBe("main-event-loop-alive"); expect(details.status).toBe("failed"); + expect(String(details.error)).toContain("timeout exceeded"); }); }); diff --git a/src/agents/code-mode.ts b/src/agents/code-mode.ts index 610e6f2530d..f92459c4100 100644 --- a/src/agents/code-mode.ts +++ b/src/agents/code-mode.ts @@ -40,6 +40,7 @@ const DEFAULT_MAX_PENDING_TOOL_CALLS = 16; const DEFAULT_SNAPSHOT_TTL_SECONDS = 900; const DEFAULT_SEARCH_LIMIT = 8; const DEFAULT_MAX_SEARCH_LIMIT = 50; +const MAX_ACTIVE_CODE_MODE_RUNS = 64; type CodeModeLanguage = "javascript" | "typescript"; @@ -113,6 +114,7 @@ type CodeModeWorkerResult = }; const activeRuns = new Map(); +const resumingRunIds = new Set(); let typescriptRuntimePromise: Promise | null = null; function isRecord(value: unknown): value is Record { @@ -223,10 +225,18 @@ function removeExpiredRuns(now = Date.now()): void { for (const [runId, state] of activeRuns) { if (state.expiresAt <= now) { activeRuns.delete(runId); + resumingRunIds.delete(runId); } } } +function enforceActiveRunLimit(): void { + removeExpiredRuns(); + if (activeRuns.size >= MAX_ACTIVE_CODE_MODE_RUNS) { + throw new ToolInputError("too many suspended code mode runs."); + } +} + function toJsonSafe(value: unknown): unknown { if (value === undefined) { return null; @@ -298,8 +308,65 @@ function readRunId(args: unknown): string { return runId.trim(); } +function maskCodeLiteralsAndComments(code: string): string { + let masked = ""; + let index = 0; + while (index < code.length) { + const char = code[index]; + const next = code[index + 1]; + if (char === "/" && next === "/") { + masked += " "; + index += 2; + while (index < code.length && code[index] !== "\n") { + masked += " "; + index += 1; + } + continue; + } + if (char === "/" && next === "*") { + masked += " "; + index += 2; + while (index < code.length) { + if (code[index] === "*" && code[index + 1] === "/") { + masked += " "; + index += 2; + break; + } + masked += code[index] === "\n" ? "\n" : " "; + index += 1; + } + continue; + } + if (char === "'" || char === '"') { + const quote = char; + masked += " "; + index += 1; + while (index < code.length) { + const current = code[index]; + masked += current === "\n" ? "\n" : " "; + index += 1; + if (current === "\\") { + if (index < code.length) { + masked += code[index] === "\n" ? "\n" : " "; + index += 1; + } + continue; + } + if (current === quote) { + break; + } + } + continue; + } + masked += char; + index += 1; + } + return masked; +} + function rejectsModuleAccess(code: string): boolean { - return /(^|[^\w$])import\s*(?:\(|[\s{*]|\w)|(^|[^\w$])require\s*\(/u.test(code); + const source = maskCodeLiteralsAndComments(code); + return /\bimport\b\s*(?:\.|\(|["'`{*]|\w)|\brequire\b\s*\(/u.test(source); } async function loadTypeScriptRuntime(): Promise { @@ -498,6 +565,7 @@ function snapshotState(params: { signal?: AbortSignal; onUpdate?: AgentToolUpdateCallback; }) { + enforceActiveRunLimit(); if (params.snapshotBytes.byteLength > params.config.maxSnapshotBytes) { throw new ToolInputError("code mode snapshot limit exceeded"); } @@ -670,21 +738,25 @@ async function runWait(params: { ) { throw new ToolInputError("code mode run belongs to a different session."); } - const ready = await waitForPending(state.pending, state.config.timeoutMs); - if (!ready) { - const pending = state.pending.filter((entry) => !entry.settled); - return { - status: "waiting" as const, - runId: state.runId, - reason: codeModeWaitingReason(pending.length > 0 ? pending : state.pending), - pendingToolCalls: pendingToolCalls(pending.length > 0 ? pending : state.pending), - output: state.output, - telemetry: telemetry(state.runtime), - }; + if (resumingRunIds.has(state.runId)) { + throw new ToolInputError("code mode run is already being resumed."); } - - activeRuns.delete(state.runId); + resumingRunIds.add(state.runId); try { + const ready = await waitForPending(state.pending, state.config.timeoutMs); + if (!ready) { + const pending = state.pending.filter((entry) => !entry.settled); + return { + status: "waiting" as const, + runId: state.runId, + reason: codeModeWaitingReason(pending.length > 0 ? pending : state.pending), + pendingToolCalls: pendingToolCalls(pending.length > 0 ? pending : state.pending), + output: state.output, + telemetry: telemetry(state.runtime), + }; + } + + activeRuns.delete(state.runId); const settledRequests: SettledBridgeRequest[] = []; for (const entry of state.pending) { settledRequests.push(entry.settled ?? (await entry.promise)); @@ -731,6 +803,8 @@ async function runWait(params: { output: state.output, telemetry: telemetry(state.runtime), }; + } finally { + resumingRunIds.delete(state.runId); } } @@ -843,6 +917,7 @@ export function addClientToolsToCodeModeCatalog(params: { export const __testing = { activeRuns, + resumingRunIds, codeModeWorkerUrl, resolveCodeModeWorkerUrl, resolveCodeModeConfig, diff --git a/src/agents/code-mode.worker.ts b/src/agents/code-mode.worker.ts index 1b016a6a45c..a3139db688f 100644 --- a/src/agents/code-mode.worker.ts +++ b/src/agents/code-mode.worker.ts @@ -57,6 +57,11 @@ type CodeModeWorkerResult = output: unknown[]; }; +type VmRun = { + vm: QuickJS; + didTimeout: () => boolean; +}; + function isRecord(value: unknown): value is Record { return Boolean(value && typeof value === "object" && !Array.isArray(value)); } @@ -226,13 +231,17 @@ async function createVm(params: { catalog: unknown[]; config: CodeModeConfig; pendingRequests: PendingBridgeRequest[]; -}) { +}): Promise { const startedAt = Date.now(); + let timedOut = false; const vm = await QuickJS.create({ memoryLimit: params.config.memoryLimitBytes, intrinsics: Intrinsics.ALL, timezoneOffset: 0, - interruptHandler: () => Date.now() - startedAt > params.config.timeoutMs, + interruptHandler: () => { + timedOut = Date.now() - startedAt > params.config.timeoutMs; + return timedOut; + }, }); const catalogHandle = vm.hostToHandle(params.catalog); try { @@ -254,21 +263,25 @@ async function createVm(params: { hostRequest.dispose(); } vm.evalCode(CONTROLLER_SOURCE, "openclaw-code-mode:controller.js").dispose(); - return vm; + return { vm, didTimeout: () => timedOut }; } async function restoreVm(params: { snapshotBytes: Uint8Array; config: CodeModeConfig; pendingRequests: PendingBridgeRequest[]; -}) { +}): Promise { const startedAt = Date.now(); + let timedOut = false; const snapshot = QuickJS.deserializeSnapshot(params.snapshotBytes); const vm = await QuickJS.restore(snapshot, { memoryLimit: params.config.memoryLimitBytes, intrinsics: Intrinsics.ALL, timezoneOffset: 0, - interruptHandler: () => Date.now() - startedAt > params.config.timeoutMs, + interruptHandler: () => { + timedOut = Date.now() - startedAt > params.config.timeoutMs; + return timedOut; + }, }); vm.registerHostCallback( "__openclawHostRequest", @@ -278,7 +291,7 @@ async function restoreVm(params: { config: params.config, }), ); - return vm; + return { vm, didTimeout: () => timedOut }; } function takeOutput(vm: QuickJS): unknown[] { @@ -348,7 +361,11 @@ function waitingResult(params: { async function runExec(input: Extract) { const pendingRequests: PendingBridgeRequest[] = []; - const vm = await createVm({ catalog: input.catalog, config: input.config, pendingRequests }); + const { vm, didTimeout } = await createVm({ + catalog: input.catalog, + config: input.config, + pendingRequests, + }); try { vm.evalCode( buildUserSource(input.source), @@ -359,12 +376,12 @@ async function runExec(input: Extract) { const output = takeOutput(vm); const resultHandle = getResultHandle(vm); try { - if ( - pendingRequests.length > 0 || - (resultHandle.isPromise && resultHandle.promiseState === 0) - ) { + if (pendingRequests.length > 0) { return waitingResult({ vm, pendingRequests, output, config: input.config }); } + if (resultHandle.isPromise && resultHandle.promiseState === 0) { + throw new Error("code mode promise is pending without host work"); + } return { status: "completed" as const, value: await readCompletedResult(vm, resultHandle), @@ -373,6 +390,11 @@ async function runExec(input: Extract) { } finally { resultHandle.dispose(); } + } catch (error) { + if (didTimeout()) { + throw new Error("code mode timeout exceeded", { cause: error }); + } + throw error; } finally { vm.dispose(); } @@ -380,7 +402,7 @@ async function runExec(input: Extract) { async function runResume(input: Extract) { const pendingRequests: PendingBridgeRequest[] = []; - const vm = await restoreVm({ + const { vm, didTimeout } = await restoreVm({ snapshotBytes: input.snapshotBytes, config: input.config, pendingRequests, @@ -411,12 +433,12 @@ async function runResume(input: Extract const output = takeOutput(vm); const resultHandle = getResultHandle(vm); try { - if ( - pendingRequests.length > 0 || - (resultHandle.isPromise && resultHandle.promiseState === 0) - ) { + if (pendingRequests.length > 0) { return waitingResult({ vm, pendingRequests, output, config: input.config }); } + if (resultHandle.isPromise && resultHandle.promiseState === 0) { + throw new Error("code mode promise is pending without host work"); + } return { status: "completed" as const, value: await readCompletedResult(vm, resultHandle), @@ -425,6 +447,11 @@ async function runResume(input: Extract } finally { resultHandle.dispose(); } + } catch (error) { + if (didTimeout()) { + throw new Error("code mode timeout exceeded", { cause: error }); + } + throw error; } finally { vm.dispose(); } diff --git a/test/scripts/lint-suppressions.test.ts b/test/scripts/lint-suppressions.test.ts index 183caaf48e2..998e9ed7d33 100644 --- a/test/scripts/lint-suppressions.test.ts +++ b/test/scripts/lint-suppressions.test.ts @@ -92,6 +92,7 @@ describe("production lint suppressions", () => { "scripts/lib/extension-package-boundary.ts|typescript/no-unnecessary-type-parameters|1", "scripts/lib/plugin-npm-release.ts|typescript/no-unnecessary-type-parameters|1", "src/agents/agent-scope.ts|no-control-regex|1", + "src/agents/code-mode.worker.ts|unicorn/require-post-message-target-origin|1", "src/agents/pi-embedded-runner/run/images.ts|no-control-regex|1", "src/agents/subagent-attachments.ts|no-control-regex|1", "src/agents/subagent-spawn.ts|no-control-regex|1",