fix: harden code mode runtime

This commit is contained in:
Peter Steinberger
2026-05-14 23:13:37 +01:00
parent 204941c03e
commit 0db0979365
7 changed files with 253 additions and 36 deletions

View File

@@ -1,4 +1,4 @@
41fe35b1d577bdc2495842effde4158657b7ebfbfe5bd20aed4e507f52a6ac20 config-baseline.json
d2307659e5496b97096b8e5af6b6f8fefeee861f7d6a3b29d250887be5c937cf config-baseline.core.json
932d0df0d277aded125d4843a55f3acc2b90d39a5867d09d76bdf1a59d469e5f config-baseline.json
fe5e2eecee8c354eac3e10b801c27c20a16f9432377a19fcb2849221e62c62bf config-baseline.core.json
2aa997d48549bd321a478485126a4bd5065ba47333a80e7eb07a0ef6ad75b0a6 config-baseline.channel.json
1ab5b65a94d84f59bae5e6bbe310057fae0a645f2538ab00f1f37b7f8b371e6f config-baseline.plugin.json

View File

@@ -1786,7 +1786,7 @@
"pdfjs-dist": "5.7.284",
"playwright-core": "1.60.0",
"qrcode": "1.5.4",
"quickjs-wasi": "^2.2.0",
"quickjs-wasi": "2.2.0",
"tar": "7.5.15",
"tokenjuice": "0.7.0",
"tree-sitter-bash": "0.25.1",

7
pnpm-lock.yaml generated
View File

@@ -156,7 +156,7 @@ importers:
specifier: 1.5.4
version: 1.5.4
quickjs-wasi:
specifier: ^2.2.0
specifier: 2.2.0
version: 2.2.0
tar:
specifier: 7.5.15
@@ -6898,6 +6898,9 @@ packages:
quick-format-unescaped@4.0.4:
resolution: {integrity: sha512-tYC1Q1hgyRuHgloV/YXs2w15unPVh8qfu/qCTfhTYamaw7fyhumKa2yGpdSo87vY32rIclj+4fWYQXUMs9EHvg==}
quickjs-wasi@2.2.0:
resolution: {integrity: sha512-zQxXmQMrEoD3S+jQdYsloq4qAuaxKFHZj6hHqOYGwB2iQZH+q9e/lf5zQPXCKOk0WJuAjzRFbO4KwHIp2D05Iw==}
range-parser@1.2.1:
resolution: {integrity: sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==}
engines: {node: '>= 0.6'}
@@ -13913,6 +13916,8 @@ snapshots:
quick-format-unescaped@4.0.4: {}
quickjs-wasi@2.2.0: {}
range-parser@1.2.1: {}
raw-body@3.0.2:

View File

@@ -95,6 +95,7 @@ async function runUntilCompleted(params: {
describe("Code Mode", () => {
afterEach(() => {
__testing.activeRuns.clear();
__testing.resumingRunIds.clear();
});
it("resolves object config defaults", () => {
@@ -299,6 +300,60 @@ describe("Code Mode", () => {
).rejects.toThrow("different session");
});
it("rejects concurrent waits for the same suspended run", async () => {
const catalogRef = createToolSearchCatalogRef();
const config = {
tools: {
codeMode: {
enabled: true,
timeoutMs: 100,
},
},
} as never;
const ctx = {
config,
runtimeConfig: config,
sessionId: "session-code-mode",
sessionKey: "agent:main:main",
runId: "run-code-mode",
catalogRef,
};
const codeModeTools = createCodeModeTools(ctx);
applyCodeModeCatalog({
tools: [
...codeModeTools,
pluginToolWithExecute(
"fake_slow",
"Slow helper",
async () => await new Promise<never>(() => undefined),
),
],
config,
sessionId: "session-code-mode",
sessionKey: "agent:main:main",
runId: "run-code-mode",
catalogRef,
});
const first = resultDetails(
await codeModeTools[0].execute("code-call-concurrent-wait", {
code: "await tools.fake_slow({}); return 'done';",
}),
);
expect(first.status).toBe("waiting");
const firstWait = codeModeTools[1].execute("code-wait-concurrent-a", {
runId: first.runId,
});
await expect(
codeModeTools[1].execute("code-wait-concurrent-b", { runId: first.runId }),
).rejects.toThrow("already being resumed");
const stillWaiting = resultDetails(await firstWait);
expect(stillWaiting.status).toBe("waiting");
expect(stillWaiting.runId).toBe(first.runId);
});
it("reports only unsettled pending tool calls when wait times out", async () => {
const catalogRef = createToolSearchCatalogRef();
const config = {
@@ -379,6 +434,54 @@ describe("Code Mode", () => {
expect(__testing.getTypescriptRuntimePromise()).toBeNull();
});
it("allows identifiers and strings that contain import without module access", async () => {
const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness();
applyCodeModeCatalog({
tools: [...codeModeTools, pluginTool("fake_noop", "Noop")],
config,
sessionId: "session-code-mode",
sessionKey: "agent:main:main",
runId: "run-code-mode",
catalogRef,
});
const details = await runUntilCompleted({
execTool: codeModeTools[0],
waitTool: codeModeTools[1],
code: `
const important = 41;
const message = "import docs later";
return important + (message.includes("import") ? 1 : 0);
`,
});
expect(details.status).toBe("completed");
expect(details.value).toBe(42);
});
it("fails pending promises that have no host bridge work", async () => {
const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness();
applyCodeModeCatalog({
tools: [...codeModeTools, pluginTool("fake_noop", "Noop")],
config,
sessionId: "session-code-mode",
sessionKey: "agent:main:main",
runId: "run-code-mode",
catalogRef,
});
const beforeRunCount = __testing.activeRuns.size;
const details = resultDetails(
await codeModeTools[0].execute("code-call-empty-wait", {
code: "await new Promise(() => undefined); return 'never';",
}),
);
expect(details.status).toBe("failed");
expect(String(details.error)).toContain("pending without host work");
expect(__testing.activeRuns.size).toBe(beforeRunCount);
});
it("clamps omitted code-mode catalog search limits to maxSearchLimit", async () => {
const catalogRef = createToolSearchCatalogRef();
const config = {
@@ -449,7 +552,12 @@ describe("Code Mode", () => {
expect(details.value).toEqual({ value: 42 });
});
it("rejects module access", async () => {
it.each([
"const fs = require('node:fs'); return fs;",
"return import('node:fs');",
"return import.meta.url;",
"return `${import('node:fs')}`;",
])("rejects module access: %s", async (code) => {
const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness();
applyCodeModeCatalog({
tools: [...codeModeTools, pluginTool("fake_noop", "Noop")],
@@ -462,7 +570,7 @@ describe("Code Mode", () => {
const details = resultDetails(
await codeModeTools[0].execute("code-call-import", {
code: "const fs = require('node:fs'); return fs;",
code,
}),
);
@@ -585,5 +693,6 @@ describe("Code Mode", () => {
await expect(heartbeat).resolves.toBe("main-event-loop-alive");
expect(details.status).toBe("failed");
expect(String(details.error)).toContain("timeout exceeded");
});
});

View File

@@ -40,6 +40,7 @@ const DEFAULT_MAX_PENDING_TOOL_CALLS = 16;
const DEFAULT_SNAPSHOT_TTL_SECONDS = 900;
const DEFAULT_SEARCH_LIMIT = 8;
const DEFAULT_MAX_SEARCH_LIMIT = 50;
const MAX_ACTIVE_CODE_MODE_RUNS = 64;
type CodeModeLanguage = "javascript" | "typescript";
@@ -113,6 +114,7 @@ type CodeModeWorkerResult =
};
const activeRuns = new Map<string, CodeModeRunState>();
const resumingRunIds = new Set<string>();
let typescriptRuntimePromise: Promise<typeof import("typescript")> | null = null;
function isRecord(value: unknown): value is Record<string, unknown> {
@@ -223,10 +225,18 @@ function removeExpiredRuns(now = Date.now()): void {
for (const [runId, state] of activeRuns) {
if (state.expiresAt <= now) {
activeRuns.delete(runId);
resumingRunIds.delete(runId);
}
}
}
function enforceActiveRunLimit(): void {
removeExpiredRuns();
if (activeRuns.size >= MAX_ACTIVE_CODE_MODE_RUNS) {
throw new ToolInputError("too many suspended code mode runs.");
}
}
function toJsonSafe(value: unknown): unknown {
if (value === undefined) {
return null;
@@ -298,8 +308,65 @@ function readRunId(args: unknown): string {
return runId.trim();
}
function maskCodeLiteralsAndComments(code: string): string {
let masked = "";
let index = 0;
while (index < code.length) {
const char = code[index];
const next = code[index + 1];
if (char === "/" && next === "/") {
masked += " ";
index += 2;
while (index < code.length && code[index] !== "\n") {
masked += " ";
index += 1;
}
continue;
}
if (char === "/" && next === "*") {
masked += " ";
index += 2;
while (index < code.length) {
if (code[index] === "*" && code[index + 1] === "/") {
masked += " ";
index += 2;
break;
}
masked += code[index] === "\n" ? "\n" : " ";
index += 1;
}
continue;
}
if (char === "'" || char === '"') {
const quote = char;
masked += " ";
index += 1;
while (index < code.length) {
const current = code[index];
masked += current === "\n" ? "\n" : " ";
index += 1;
if (current === "\\") {
if (index < code.length) {
masked += code[index] === "\n" ? "\n" : " ";
index += 1;
}
continue;
}
if (current === quote) {
break;
}
}
continue;
}
masked += char;
index += 1;
}
return masked;
}
function rejectsModuleAccess(code: string): boolean {
return /(^|[^\w$])import\s*(?:\(|[\s{*]|\w)|(^|[^\w$])require\s*\(/u.test(code);
const source = maskCodeLiteralsAndComments(code);
return /\bimport\b\s*(?:\.|\(|["'`{*]|\w)|\brequire\b\s*\(/u.test(source);
}
async function loadTypeScriptRuntime(): Promise<typeof import("typescript")> {
@@ -498,6 +565,7 @@ function snapshotState(params: {
signal?: AbortSignal;
onUpdate?: AgentToolUpdateCallback<unknown>;
}) {
enforceActiveRunLimit();
if (params.snapshotBytes.byteLength > params.config.maxSnapshotBytes) {
throw new ToolInputError("code mode snapshot limit exceeded");
}
@@ -670,21 +738,25 @@ async function runWait(params: {
) {
throw new ToolInputError("code mode run belongs to a different session.");
}
const ready = await waitForPending(state.pending, state.config.timeoutMs);
if (!ready) {
const pending = state.pending.filter((entry) => !entry.settled);
return {
status: "waiting" as const,
runId: state.runId,
reason: codeModeWaitingReason(pending.length > 0 ? pending : state.pending),
pendingToolCalls: pendingToolCalls(pending.length > 0 ? pending : state.pending),
output: state.output,
telemetry: telemetry(state.runtime),
};
if (resumingRunIds.has(state.runId)) {
throw new ToolInputError("code mode run is already being resumed.");
}
activeRuns.delete(state.runId);
resumingRunIds.add(state.runId);
try {
const ready = await waitForPending(state.pending, state.config.timeoutMs);
if (!ready) {
const pending = state.pending.filter((entry) => !entry.settled);
return {
status: "waiting" as const,
runId: state.runId,
reason: codeModeWaitingReason(pending.length > 0 ? pending : state.pending),
pendingToolCalls: pendingToolCalls(pending.length > 0 ? pending : state.pending),
output: state.output,
telemetry: telemetry(state.runtime),
};
}
activeRuns.delete(state.runId);
const settledRequests: SettledBridgeRequest[] = [];
for (const entry of state.pending) {
settledRequests.push(entry.settled ?? (await entry.promise));
@@ -731,6 +803,8 @@ async function runWait(params: {
output: state.output,
telemetry: telemetry(state.runtime),
};
} finally {
resumingRunIds.delete(state.runId);
}
}
@@ -843,6 +917,7 @@ export function addClientToolsToCodeModeCatalog(params: {
export const __testing = {
activeRuns,
resumingRunIds,
codeModeWorkerUrl,
resolveCodeModeWorkerUrl,
resolveCodeModeConfig,

View File

@@ -57,6 +57,11 @@ type CodeModeWorkerResult =
output: unknown[];
};
type VmRun = {
vm: QuickJS;
didTimeout: () => boolean;
};
function isRecord(value: unknown): value is Record<string, unknown> {
return Boolean(value && typeof value === "object" && !Array.isArray(value));
}
@@ -226,13 +231,17 @@ async function createVm(params: {
catalog: unknown[];
config: CodeModeConfig;
pendingRequests: PendingBridgeRequest[];
}) {
}): Promise<VmRun> {
const startedAt = Date.now();
let timedOut = false;
const vm = await QuickJS.create({
memoryLimit: params.config.memoryLimitBytes,
intrinsics: Intrinsics.ALL,
timezoneOffset: 0,
interruptHandler: () => Date.now() - startedAt > params.config.timeoutMs,
interruptHandler: () => {
timedOut = Date.now() - startedAt > params.config.timeoutMs;
return timedOut;
},
});
const catalogHandle = vm.hostToHandle(params.catalog);
try {
@@ -254,21 +263,25 @@ async function createVm(params: {
hostRequest.dispose();
}
vm.evalCode(CONTROLLER_SOURCE, "openclaw-code-mode:controller.js").dispose();
return vm;
return { vm, didTimeout: () => timedOut };
}
async function restoreVm(params: {
snapshotBytes: Uint8Array;
config: CodeModeConfig;
pendingRequests: PendingBridgeRequest[];
}) {
}): Promise<VmRun> {
const startedAt = Date.now();
let timedOut = false;
const snapshot = QuickJS.deserializeSnapshot(params.snapshotBytes);
const vm = await QuickJS.restore(snapshot, {
memoryLimit: params.config.memoryLimitBytes,
intrinsics: Intrinsics.ALL,
timezoneOffset: 0,
interruptHandler: () => Date.now() - startedAt > params.config.timeoutMs,
interruptHandler: () => {
timedOut = Date.now() - startedAt > params.config.timeoutMs;
return timedOut;
},
});
vm.registerHostCallback(
"__openclawHostRequest",
@@ -278,7 +291,7 @@ async function restoreVm(params: {
config: params.config,
}),
);
return vm;
return { vm, didTimeout: () => timedOut };
}
function takeOutput(vm: QuickJS): unknown[] {
@@ -348,7 +361,11 @@ function waitingResult(params: {
async function runExec(input: Extract<CodeModeWorkerInput, { kind: "exec" }>) {
const pendingRequests: PendingBridgeRequest[] = [];
const vm = await createVm({ catalog: input.catalog, config: input.config, pendingRequests });
const { vm, didTimeout } = await createVm({
catalog: input.catalog,
config: input.config,
pendingRequests,
});
try {
vm.evalCode(
buildUserSource(input.source),
@@ -359,12 +376,12 @@ async function runExec(input: Extract<CodeModeWorkerInput, { kind: "exec" }>) {
const output = takeOutput(vm);
const resultHandle = getResultHandle(vm);
try {
if (
pendingRequests.length > 0 ||
(resultHandle.isPromise && resultHandle.promiseState === 0)
) {
if (pendingRequests.length > 0) {
return waitingResult({ vm, pendingRequests, output, config: input.config });
}
if (resultHandle.isPromise && resultHandle.promiseState === 0) {
throw new Error("code mode promise is pending without host work");
}
return {
status: "completed" as const,
value: await readCompletedResult(vm, resultHandle),
@@ -373,6 +390,11 @@ async function runExec(input: Extract<CodeModeWorkerInput, { kind: "exec" }>) {
} finally {
resultHandle.dispose();
}
} catch (error) {
if (didTimeout()) {
throw new Error("code mode timeout exceeded", { cause: error });
}
throw error;
} finally {
vm.dispose();
}
@@ -380,7 +402,7 @@ async function runExec(input: Extract<CodeModeWorkerInput, { kind: "exec" }>) {
async function runResume(input: Extract<CodeModeWorkerInput, { kind: "resume" }>) {
const pendingRequests: PendingBridgeRequest[] = [];
const vm = await restoreVm({
const { vm, didTimeout } = await restoreVm({
snapshotBytes: input.snapshotBytes,
config: input.config,
pendingRequests,
@@ -411,12 +433,12 @@ async function runResume(input: Extract<CodeModeWorkerInput, { kind: "resume" }>
const output = takeOutput(vm);
const resultHandle = getResultHandle(vm);
try {
if (
pendingRequests.length > 0 ||
(resultHandle.isPromise && resultHandle.promiseState === 0)
) {
if (pendingRequests.length > 0) {
return waitingResult({ vm, pendingRequests, output, config: input.config });
}
if (resultHandle.isPromise && resultHandle.promiseState === 0) {
throw new Error("code mode promise is pending without host work");
}
return {
status: "completed" as const,
value: await readCompletedResult(vm, resultHandle),
@@ -425,6 +447,11 @@ async function runResume(input: Extract<CodeModeWorkerInput, { kind: "resume" }>
} finally {
resultHandle.dispose();
}
} catch (error) {
if (didTimeout()) {
throw new Error("code mode timeout exceeded", { cause: error });
}
throw error;
} finally {
vm.dispose();
}

View File

@@ -92,6 +92,7 @@ describe("production lint suppressions", () => {
"scripts/lib/extension-package-boundary.ts|typescript/no-unnecessary-type-parameters|1",
"scripts/lib/plugin-npm-release.ts|typescript/no-unnecessary-type-parameters|1",
"src/agents/agent-scope.ts|no-control-regex|1",
"src/agents/code-mode.worker.ts|unicorn/require-post-message-target-origin|1",
"src/agents/pi-embedded-runner/run/images.ts|no-control-regex|1",
"src/agents/subagent-attachments.ts|no-control-regex|1",
"src/agents/subagent-spawn.ts|no-control-regex|1",