mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-03 23:44:07 +00:00
* feat: add internal code mode namespaces * test: add code mode namespace live proof * test: add live code mode Docker repro * chore: keep code mode docker repro out of package scripts * fix: break code mode namespace type cycle * fix: clean code mode namespace ci drift * fix: route code mode namespaces through tools * fix: preserve explicit agent global sessions * docs: explain code mode namespace registry * test: cap realtime websocket payload * fix: normalize code mode timeout results * fix: satisfy code mode timeout lint * chore: rerun code mode CI * ci: extend node shard silence watchdog * test: avoid child process mock deadlocks * test: fix code mode repro shebang * fix: scope explicit agent sentinel sessions * test: preserve child process mock actual loader * fix: dispatch namespace tools by exact id * test: satisfy restart execFile mock type
645 lines
22 KiB
JavaScript
Executable File
645 lines
22 KiB
JavaScript
Executable File
#!/usr/bin/env -S node --import tsx
|
|
import { performance } from "node:perf_hooks";
|
|
import { Type } from "typebox";
|
|
import type { Model } from "../../packages/agent-core/src/llm.js";
|
|
import type { AgentEvent, AgentTool } from "../../packages/agent-core/src/types.js";
|
|
import {
|
|
clearCodeModeNamespacesForPlugin,
|
|
createCodeModeNamespaceTool,
|
|
registerCodeModeNamespaceForPlugin,
|
|
} from "../../src/agents/code-mode-namespaces.js";
|
|
import { applyCodeModeCatalog, createCodeModeTools } from "../../src/agents/code-mode.js";
|
|
import { Agent } from "../../src/agents/runtime/index.js";
|
|
import { createToolSearchCatalogRef } from "../../src/agents/tool-search.js";
|
|
import { jsonResult, type AnyAgentTool } from "../../src/agents/tools/common.js";
|
|
import { setPluginToolMeta } from "../../src/plugins/tools.js";
|
|
|
|
type Mode = "regular" | "code-catalog" | "code-namespace";
|
|
|
|
type FictionTitle = {
|
|
id: string;
|
|
title: string;
|
|
lead: string;
|
|
status: string;
|
|
riskScore: number;
|
|
dependencies: Array<{ id: string; cleared: boolean }>;
|
|
};
|
|
|
|
type FictionScene = {
|
|
id: string;
|
|
titleId: string;
|
|
pages: number;
|
|
blocked: boolean;
|
|
};
|
|
|
|
type FictionDefect = {
|
|
id: string;
|
|
titleId: string;
|
|
sceneId: string;
|
|
state: "open" | "closed";
|
|
};
|
|
|
|
type FictionInvoice = {
|
|
id: string;
|
|
titleId: string;
|
|
author: string;
|
|
amount: number;
|
|
paid: boolean;
|
|
};
|
|
|
|
type FictionServiceState = {
|
|
titles: FictionTitle[];
|
|
scenes: FictionScene[];
|
|
defects: FictionDefect[];
|
|
invoices: FictionInvoice[];
|
|
};
|
|
|
|
type FictionService = ReturnType<typeof createFictionService>;
|
|
|
|
type Task = {
|
|
id: string;
|
|
prompt: string;
|
|
validate(answer: unknown, service: FictionService): { ok: boolean; reason?: string };
|
|
};
|
|
|
|
type RunMetrics = {
|
|
mode: Mode;
|
|
task: string;
|
|
ok: boolean;
|
|
reason?: string;
|
|
latencyMs: number;
|
|
modelTurns: number;
|
|
assistantMessages: number;
|
|
topLevelToolCalls: number;
|
|
serviceCalls: number;
|
|
finalText: string;
|
|
stopReason?: string;
|
|
errorMessage?: string;
|
|
toolResults?: unknown[];
|
|
};
|
|
|
|
const PLUGIN_ID = "fictions-live";
|
|
|
|
function cloneState(): FictionServiceState {
|
|
return {
|
|
titles: [
|
|
{
|
|
id: "PX-73",
|
|
title: "The Glass Orchard",
|
|
lead: "Mira Vale",
|
|
status: "draft",
|
|
riskScore: 77,
|
|
dependencies: [
|
|
{ id: "outline", cleared: true },
|
|
{ id: "rights", cleared: true },
|
|
],
|
|
},
|
|
{
|
|
id: "NM-12",
|
|
title: "Night Market of Moons",
|
|
lead: "Oren Quill",
|
|
status: "revision",
|
|
riskScore: 91,
|
|
dependencies: [
|
|
{ id: "continuity", cleared: true },
|
|
{ id: "copyedit", cleared: false },
|
|
],
|
|
},
|
|
{
|
|
id: "RS-40",
|
|
title: "River Static",
|
|
lead: "Nia Rowan",
|
|
status: "locked",
|
|
riskScore: 54,
|
|
dependencies: [{ id: "legal", cleared: true }],
|
|
},
|
|
],
|
|
scenes: [
|
|
{ id: "PX-73-S1", titleId: "PX-73", pages: 32, blocked: false },
|
|
{ id: "PX-73-S2", titleId: "PX-73", pages: 28, blocked: false },
|
|
{ id: "PX-73-S3", titleId: "PX-73", pages: 36, blocked: false },
|
|
{ id: "NM-12-S1", titleId: "NM-12", pages: 44, blocked: false },
|
|
{ id: "NM-12-S2", titleId: "NM-12", pages: 39, blocked: true },
|
|
{ id: "RS-40-S1", titleId: "RS-40", pages: 51, blocked: false },
|
|
],
|
|
defects: [
|
|
{ id: "D-101", titleId: "NM-12", sceneId: "NM-12-S1", state: "open" },
|
|
{ id: "D-102", titleId: "NM-12", sceneId: "NM-12-S2", state: "open" },
|
|
{ id: "D-103", titleId: "NM-12", sceneId: "NM-12-S2", state: "open" },
|
|
{ id: "D-104", titleId: "PX-73", sceneId: "PX-73-S3", state: "closed" },
|
|
{ id: "D-105", titleId: "RS-40", sceneId: "RS-40-S1", state: "open" },
|
|
],
|
|
invoices: [
|
|
{ id: "I-200", titleId: "PX-73", author: "Mira Vale", amount: 4200, paid: false },
|
|
{ id: "I-201", titleId: "NM-12", author: "Oren Quill", amount: 6100, paid: false },
|
|
{ id: "I-202", titleId: "RS-40", author: "Nia Rowan", amount: 3700, paid: true },
|
|
],
|
|
};
|
|
}
|
|
|
|
function createFictionService() {
|
|
const state = cloneState();
|
|
let calls = 0;
|
|
const note = () => {
|
|
calls += 1;
|
|
};
|
|
const title = (id: string) => state.titles.find((entry) => entry.id === id);
|
|
return {
|
|
get calls() {
|
|
return calls;
|
|
},
|
|
snapshot() {
|
|
note();
|
|
return structuredClone(state);
|
|
},
|
|
listTitles() {
|
|
note();
|
|
return structuredClone(state.titles);
|
|
},
|
|
getTitle(id: string) {
|
|
note();
|
|
return title(id) ?? null;
|
|
},
|
|
listScenes(titleId?: string) {
|
|
note();
|
|
return structuredClone(state.scenes.filter((entry) => !titleId || entry.titleId === titleId));
|
|
},
|
|
listDefects(titleId?: string) {
|
|
note();
|
|
return state.defects
|
|
.filter((entry) => !titleId || entry.titleId === titleId)
|
|
.map((entry) => structuredClone(entry));
|
|
},
|
|
listInvoices(author?: string) {
|
|
note();
|
|
return structuredClone(state.invoices.filter((entry) => !author || entry.author === author));
|
|
},
|
|
updateStatus(id: string, status: string) {
|
|
note();
|
|
const entry = title(id);
|
|
if (!entry) {
|
|
return { ok: false, error: "unknown title", id };
|
|
}
|
|
entry.status = status;
|
|
return { ok: true, id, status };
|
|
},
|
|
currentStatus(id: string) {
|
|
return title(id)?.status;
|
|
},
|
|
};
|
|
}
|
|
|
|
function stringParam(params: Record<string, unknown>, key: string): string {
|
|
const value = params[key];
|
|
return typeof value === "string" ? value : "";
|
|
}
|
|
|
|
function makeTool(
|
|
name: string,
|
|
description: string,
|
|
properties: Record<string, unknown>,
|
|
execute: (params: Record<string, unknown>) => unknown,
|
|
): AnyAgentTool {
|
|
const tool = {
|
|
name,
|
|
label: name,
|
|
description,
|
|
parameters: Type.Object(properties),
|
|
execute: async (_toolCallId: string, params: unknown) =>
|
|
jsonResult(
|
|
execute((params && typeof params === "object" ? params : {}) as Record<string, unknown>),
|
|
),
|
|
} satisfies AnyAgentTool;
|
|
setPluginToolMeta(tool, { pluginId: PLUGIN_ID, optional: true });
|
|
return tool;
|
|
}
|
|
|
|
function createFictionTools(service: FictionService): AnyAgentTool[] {
|
|
return [
|
|
makeTool("fictions_list_titles", "List fiction titles with status and risk.", {}, () =>
|
|
service.listTitles(),
|
|
),
|
|
makeTool(
|
|
"fictions_get_title",
|
|
"Get one fiction title by id.",
|
|
{ id: Type.String() },
|
|
(params) => service.getTitle(stringParam(params, "id")),
|
|
),
|
|
makeTool(
|
|
"fictions_list_scenes",
|
|
"List scenes, optionally filtered by title id.",
|
|
{ titleId: Type.Optional(Type.String()) },
|
|
(params) =>
|
|
service.listScenes(typeof params.titleId === "string" ? params.titleId : undefined),
|
|
),
|
|
makeTool(
|
|
"fictions_list_defects",
|
|
"List defects, optionally filtered by title id.",
|
|
{ titleId: Type.Optional(Type.String()) },
|
|
(params) =>
|
|
service.listDefects(typeof params.titleId === "string" ? params.titleId : undefined),
|
|
),
|
|
makeTool(
|
|
"fictions_list_invoices",
|
|
"List invoices, optionally filtered by author.",
|
|
{ author: Type.Optional(Type.String()) },
|
|
(params) =>
|
|
service.listInvoices(typeof params.author === "string" ? params.author : undefined),
|
|
),
|
|
makeTool(
|
|
"fictions_update_status",
|
|
"Update a fiction title status.",
|
|
{ id: Type.String(), status: Type.String() },
|
|
(params) => service.updateStatus(stringParam(params, "id"), stringParam(params, "status")),
|
|
),
|
|
];
|
|
}
|
|
|
|
function createFictionNamespaceTools(service: FictionService): AnyAgentTool[] {
|
|
return [
|
|
makeTool("fictions_snapshot", "Return the complete fiction production snapshot.", {}, () =>
|
|
service.snapshot(),
|
|
),
|
|
makeTool("fictions_risk_audit", "Return highest-risk title audit.", {}, () => {
|
|
const data = service.snapshot();
|
|
const highest = data.titles.toSorted((a, b) => b.riskScore - a.riskScore)[0];
|
|
if (!highest) {
|
|
return null;
|
|
}
|
|
return {
|
|
task: "risk-audit",
|
|
id: highest.id,
|
|
lead: highest.lead,
|
|
status: highest.status,
|
|
unresolvedDefects: data.defects.filter(
|
|
(defect) => defect.titleId === highest.id && defect.state === "open",
|
|
).length,
|
|
blockedScenes: data.scenes
|
|
.filter((scene) => scene.titleId === highest.id && scene.blocked)
|
|
.map((scene) => scene.id),
|
|
};
|
|
}),
|
|
makeTool(
|
|
"fictions_promote_if_ready",
|
|
"Promote a title if dependencies and page count allow it.",
|
|
{ id: Type.String(), status: Type.String() },
|
|
(params) => {
|
|
const id = stringParam(params, "id");
|
|
const status = stringParam(params, "status");
|
|
const data = service.snapshot();
|
|
const title = data.titles.find((entry) => entry.id === id);
|
|
const scenes = data.scenes.filter((scene) => scene.titleId === id);
|
|
const totalPages = scenes.reduce((sum, scene) => sum + scene.pages, 0);
|
|
const dependenciesCleared =
|
|
title?.dependencies.every((dependency) => dependency.cleared) ?? false;
|
|
if (!title || totalPages >= 110 || !dependenciesCleared) {
|
|
return {
|
|
task: "promote",
|
|
id,
|
|
action: "blocked",
|
|
totalPages,
|
|
finalStatus: title?.status ?? null,
|
|
};
|
|
}
|
|
const updated = service.updateStatus(id, status);
|
|
return {
|
|
task: "promote",
|
|
id,
|
|
action: updated.ok ? "updated" : "blocked",
|
|
totalPages,
|
|
finalStatus: service.currentStatus(id) ?? null,
|
|
};
|
|
},
|
|
),
|
|
makeTool(
|
|
"fictions_unpaid_over",
|
|
"Return unpaid invoices over a numeric threshold.",
|
|
{ amount: Type.Number() },
|
|
(params) => {
|
|
const amount = typeof params.amount === "number" ? params.amount : 0;
|
|
const data = service.snapshot();
|
|
const invoices = data.invoices.filter(
|
|
(invoice) => !invoice.paid && invoice.amount > amount,
|
|
);
|
|
return {
|
|
task: "invoice",
|
|
invoiceIds: invoices.map((invoice) => invoice.id),
|
|
totalUnpaidOver5000: invoices.reduce((sum, invoice) => sum + invoice.amount, 0),
|
|
};
|
|
},
|
|
),
|
|
];
|
|
}
|
|
|
|
function registerFictionNamespace(): void {
|
|
clearCodeModeNamespacesForPlugin(PLUGIN_ID);
|
|
registerCodeModeNamespaceForPlugin(PLUGIN_ID, {
|
|
id: "fictions",
|
|
globalName: "Fictions",
|
|
description: "Fiction production service helpers.",
|
|
requiredToolNames: [
|
|
"fictions_promote_if_ready",
|
|
"fictions_risk_audit",
|
|
"fictions_snapshot",
|
|
"fictions_unpaid_over",
|
|
],
|
|
prompt:
|
|
"Use Fictions.riskAudit(), Fictions.promoteIfReady(id, status), Fictions.unpaidOver(amount), and Fictions.snapshot().",
|
|
createScope: () => ({
|
|
snapshot: createCodeModeNamespaceTool("fictions_snapshot"),
|
|
riskAudit: createCodeModeNamespaceTool("fictions_risk_audit"),
|
|
promoteIfReady: createCodeModeNamespaceTool("fictions_promote_if_ready", ([id, status]) => ({
|
|
id: typeof id === "string" ? id : "",
|
|
status: typeof status === "string" ? status : "",
|
|
})),
|
|
unpaidOver: createCodeModeNamespaceTool("fictions_unpaid_over", ([amount]) => ({
|
|
amount: typeof amount === "number" ? amount : 0,
|
|
})),
|
|
}),
|
|
});
|
|
}
|
|
|
|
function createModel(modelId: string): Model<"openai-responses"> {
|
|
const baseUrl = process.env.OPENAI_BASE_URL?.trim() || "https://api.openai.com/v1";
|
|
return {
|
|
id: modelId,
|
|
name: modelId,
|
|
api: "openai-responses",
|
|
provider: "openai",
|
|
baseUrl,
|
|
reasoning: modelId.startsWith("gpt-5"),
|
|
input: ["text"],
|
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
contextWindow: 400_000,
|
|
maxTokens: 128_000,
|
|
};
|
|
}
|
|
|
|
function systemPromptForMode(mode: Mode): string {
|
|
const base =
|
|
"You are testing a fiction production service. Use the available tools, never invent data, and return only one minified JSON object. No markdown.";
|
|
if (mode === "regular") {
|
|
return `${base} Use the Fictions tools directly.`;
|
|
}
|
|
if (mode === "code-catalog") {
|
|
return `${base} Use code_mode_exec with JavaScript and always return the final JSON object from the code. In code, call direct tool helpers such as await tools.fictions_list_titles({}), await tools.fictions_list_scenes({titleId:"PX-73"}), await tools.fictions_list_defects({titleId:"NM-12"}), await tools.fictions_list_invoices({}), and await tools.fictions_update_status({id:"PX-73",status:"preproduction"}). Call code_mode_wait until the code result is completed, then return that completed value as your final answer.`;
|
|
}
|
|
return `${base} Use code_mode_exec with JavaScript and always return the final JSON object from the code. In code, prefer the namespace helpers: return await Fictions.riskAudit(); return await Fictions.promoteIfReady("PX-73","preproduction"); return await Fictions.unpaidOver(5000). Call code_mode_wait until the code result is completed, then return that completed value as your final answer.`;
|
|
}
|
|
|
|
function toolsForMode(mode: Mode, service: FictionService): AgentTool[] {
|
|
const fictionTools = createFictionTools(service);
|
|
if (mode === "regular") {
|
|
return fictionTools as AgentTool[];
|
|
}
|
|
if (mode === "code-namespace") {
|
|
registerFictionNamespace();
|
|
} else {
|
|
clearCodeModeNamespacesForPlugin(PLUGIN_ID);
|
|
}
|
|
const config = {
|
|
tools: {
|
|
codeMode: {
|
|
enabled: true,
|
|
timeoutMs: 20_000,
|
|
maxPendingToolCalls: 32,
|
|
},
|
|
},
|
|
};
|
|
const catalogRef = createToolSearchCatalogRef();
|
|
const codeModeTools = createCodeModeTools({
|
|
config,
|
|
runtimeConfig: config,
|
|
sessionId: `live-${mode}`,
|
|
sessionKey: `agent:live-${mode}:main`,
|
|
agentId: "live",
|
|
runId: `run-${mode}`,
|
|
catalogRef,
|
|
});
|
|
const catalogTools =
|
|
mode === "code-namespace" ? createFictionNamespaceTools(service) : fictionTools;
|
|
return applyCodeModeCatalog({
|
|
tools: [...codeModeTools, ...catalogTools],
|
|
config,
|
|
sessionId: `live-${mode}`,
|
|
sessionKey: `agent:live-${mode}:main`,
|
|
agentId: "live",
|
|
runId: `run-${mode}`,
|
|
catalogRef,
|
|
}).tools as AgentTool[];
|
|
}
|
|
|
|
function textFromMessageContent(content: unknown): string {
|
|
if (typeof content === "string") {
|
|
return content;
|
|
}
|
|
if (!Array.isArray(content)) {
|
|
return "";
|
|
}
|
|
return content
|
|
.filter(
|
|
(entry) => entry && typeof entry === "object" && (entry as { type?: string }).type === "text",
|
|
)
|
|
.map((entry) => (entry as { text?: string }).text ?? "")
|
|
.join("");
|
|
}
|
|
|
|
function parseFirstJson(text: string): unknown {
|
|
const trimmed = text.trim();
|
|
try {
|
|
return JSON.parse(trimmed) as unknown;
|
|
} catch {
|
|
const start = trimmed.indexOf("{");
|
|
const end = trimmed.lastIndexOf("}");
|
|
if (start >= 0 && end > start) {
|
|
return JSON.parse(trimmed.slice(start, end + 1)) as unknown;
|
|
}
|
|
throw new Error("assistant did not return JSON");
|
|
}
|
|
}
|
|
|
|
function isRecord(value: unknown): value is Record<string, unknown> {
|
|
return Boolean(value && typeof value === "object" && !Array.isArray(value));
|
|
}
|
|
|
|
const tasks: Task[] = [
|
|
{
|
|
id: "risk-audit",
|
|
prompt:
|
|
'Find the fiction title with the highest riskScore. Return JSON with keys task, id, lead, status, unresolvedDefects, blockedScenes. blockedScenes must be an array of blocked scene ids, not a count. task must be "risk-audit".',
|
|
validate(answer) {
|
|
if (!isRecord(answer)) {
|
|
return { ok: false, reason: "answer is not an object" };
|
|
}
|
|
const blockedScenes = Array.isArray(answer.blockedScenes)
|
|
? answer.blockedScenes.map(String).toSorted()
|
|
: [];
|
|
const unresolvedDefects = Array.isArray(answer.unresolvedDefects)
|
|
? answer.unresolvedDefects.length
|
|
: answer.unresolvedDefects;
|
|
const ok =
|
|
answer.task === "risk-audit" &&
|
|
answer.id === "NM-12" &&
|
|
answer.lead === "Oren Quill" &&
|
|
answer.status === "revision" &&
|
|
unresolvedDefects === 3 &&
|
|
JSON.stringify(blockedScenes) === JSON.stringify(["NM-12-S2"]);
|
|
return ok ? { ok } : { ok, reason: `unexpected risk audit: ${JSON.stringify(answer)}` };
|
|
},
|
|
},
|
|
{
|
|
id: "promote",
|
|
prompt:
|
|
'For PX-73, if total scene pages are below 110 and every dependency is cleared, update its status to "preproduction". If a Fictions.promoteIfReady helper exists, use it. Return JSON with keys task, id, action, totalPages, finalStatus. action must be exactly "updated" when the command succeeds. task must be "promote".',
|
|
validate(answer, service) {
|
|
if (!isRecord(answer)) {
|
|
return { ok: false, reason: "answer is not an object" };
|
|
}
|
|
const ok =
|
|
answer.task === "promote" &&
|
|
answer.id === "PX-73" &&
|
|
typeof answer.action === "string" &&
|
|
answer.action.includes("updated") &&
|
|
answer.totalPages === 96 &&
|
|
answer.finalStatus === "preproduction" &&
|
|
service.currentStatus("PX-73") === "preproduction";
|
|
return ok ? { ok } : { ok, reason: `unexpected promote result: ${JSON.stringify(answer)}` };
|
|
},
|
|
},
|
|
{
|
|
id: "invoice",
|
|
prompt:
|
|
'For unpaid invoices over 5000, return JSON with keys task, invoiceIds, totalUnpaidOver5000. task must be "invoice".',
|
|
validate(answer) {
|
|
if (!isRecord(answer)) {
|
|
return { ok: false, reason: "answer is not an object" };
|
|
}
|
|
const invoiceIds = Array.isArray(answer.invoiceIds)
|
|
? answer.invoiceIds.map(String).toSorted()
|
|
: [];
|
|
const ok =
|
|
answer.task === "invoice" &&
|
|
JSON.stringify(invoiceIds) === JSON.stringify(["I-201"]) &&
|
|
answer.totalUnpaidOver5000 === 6100;
|
|
return ok ? { ok } : { ok, reason: `unexpected invoice result: ${JSON.stringify(answer)}` };
|
|
},
|
|
},
|
|
];
|
|
|
|
async function runOne(mode: Mode, task: Task, model: string, apiKey: string): Promise<RunMetrics> {
|
|
const service = createFictionService();
|
|
const counts = {
|
|
modelTurns: 0,
|
|
assistantMessages: 0,
|
|
topLevelToolCalls: 0,
|
|
};
|
|
const toolResults: unknown[] = [];
|
|
const agent = new Agent({
|
|
sessionId: `code-mode-live-${mode}-${task.id}`,
|
|
initialState: {
|
|
model: createModel(model),
|
|
systemPrompt: systemPromptForMode(mode),
|
|
tools: toolsForMode(mode, service),
|
|
thinkingLevel: "off",
|
|
},
|
|
getApiKey: (provider) => (provider === "openai" ? apiKey : undefined),
|
|
toolExecution: "parallel",
|
|
maxRetryDelayMs: 10_000,
|
|
});
|
|
agent.subscribe((event: AgentEvent) => {
|
|
if (event.type === "turn_start") {
|
|
counts.modelTurns += 1;
|
|
} else if (event.type === "message_end" && event.message.role === "assistant") {
|
|
counts.assistantMessages += 1;
|
|
} else if (event.type === "tool_execution_start") {
|
|
counts.topLevelToolCalls += 1;
|
|
} else if (event.type === "tool_execution_end") {
|
|
toolResults.push(event.result);
|
|
}
|
|
});
|
|
|
|
const started = performance.now();
|
|
await agent.prompt(task.prompt);
|
|
const latencyMs = Math.round(performance.now() - started);
|
|
const lastAssistant = agent.state.messages
|
|
.toReversed()
|
|
.find((message) => message.role === "assistant");
|
|
const finalText = textFromMessageContent(lastAssistant?.content).trim();
|
|
let validation: { ok: boolean; reason?: string };
|
|
try {
|
|
validation = task.validate(parseFirstJson(finalText), service);
|
|
} catch (error) {
|
|
validation = {
|
|
ok: false,
|
|
reason: error instanceof Error ? error.message : String(error),
|
|
};
|
|
}
|
|
return {
|
|
mode,
|
|
task: task.id,
|
|
ok: validation.ok,
|
|
...(validation.reason ? { reason: validation.reason } : {}),
|
|
latencyMs,
|
|
modelTurns: counts.modelTurns,
|
|
assistantMessages: counts.assistantMessages,
|
|
topLevelToolCalls: counts.topLevelToolCalls,
|
|
serviceCalls: service.calls,
|
|
finalText,
|
|
...(lastAssistant?.stopReason ? { stopReason: lastAssistant.stopReason } : {}),
|
|
...(lastAssistant?.errorMessage ? { errorMessage: lastAssistant.errorMessage } : {}),
|
|
...(process.env.OPENCLAW_CODE_MODE_LIVE_DEBUG === "1" ? { toolResults } : {}),
|
|
};
|
|
}
|
|
|
|
function readArg(name: string): string | undefined {
|
|
const prefix = `--${name}=`;
|
|
const match = process.argv.find((arg) => arg.startsWith(prefix));
|
|
return match?.slice(prefix.length);
|
|
}
|
|
|
|
async function main() {
|
|
const apiKey = process.env.OPENAI_API_KEY?.trim();
|
|
if (!apiKey) {
|
|
throw new Error("OPENAI_API_KEY is required");
|
|
}
|
|
const model = readArg("model") ?? process.env.OPENCLAW_CODE_MODE_LIVE_MODEL ?? "gpt-5.4-mini";
|
|
const modeArg = readArg("modes");
|
|
const modes = (modeArg ? modeArg.split(",") : ["regular", "code-namespace"]) as Mode[];
|
|
const taskLimit = Number(readArg("tasks") ?? process.env.OPENCLAW_CODE_MODE_LIVE_TASKS ?? "3");
|
|
const selectedTasks = tasks.slice(
|
|
0,
|
|
Number.isFinite(taskLimit) && taskLimit > 0 ? taskLimit : tasks.length,
|
|
);
|
|
const results: RunMetrics[] = [];
|
|
for (const task of selectedTasks) {
|
|
for (const mode of modes) {
|
|
results.push(await runOne(mode, task, model, apiKey));
|
|
}
|
|
}
|
|
const summary = {
|
|
model,
|
|
tasks: selectedTasks.map((task) => task.id),
|
|
results,
|
|
aggregate: modes.map((mode) => {
|
|
const entries = results.filter((entry) => entry.mode === mode);
|
|
return {
|
|
mode,
|
|
ok: entries.filter((entry) => entry.ok).length,
|
|
total: entries.length,
|
|
latencyMs: entries.reduce((sum, entry) => sum + entry.latencyMs, 0),
|
|
modelTurns: entries.reduce((sum, entry) => sum + entry.modelTurns, 0),
|
|
topLevelToolCalls: entries.reduce((sum, entry) => sum + entry.topLevelToolCalls, 0),
|
|
serviceCalls: entries.reduce((sum, entry) => sum + entry.serviceCalls, 0),
|
|
};
|
|
}),
|
|
};
|
|
console.log(JSON.stringify(summary, null, 2));
|
|
if (results.some((entry) => !entry.ok)) {
|
|
process.exitCode = 1;
|
|
}
|
|
}
|
|
|
|
await main().finally(() => {
|
|
clearCodeModeNamespacesForPlugin(PLUGIN_ID);
|
|
});
|