ci: add source performance probes

This commit is contained in:
Peter Steinberger
2026-05-02 17:45:55 +01:00
parent d92a634fae
commit 25ca5cc8df
6 changed files with 385 additions and 3 deletions

View File

@@ -89,6 +89,7 @@ jobs:
REPORT_DIR: ${{ github.workspace }}/.artifacts/kova/reports/${{ matrix.lane }}
BUNDLE_DIR: ${{ github.workspace }}/.artifacts/kova/bundles/${{ matrix.lane }}
SUMMARY_DIR: ${{ github.workspace }}/.artifacts/kova/summaries
SOURCE_PERF_DIR: ${{ github.workspace }}/.artifacts/openclaw-performance/source/${{ matrix.lane }}
LANE_ID: ${{ matrix.lane }}
PROFILE: ${{ inputs.profile || 'diagnostic' }}
REQUESTED_REPEAT: ${{ inputs.repeat || '3' }}
@@ -297,6 +298,105 @@ jobs:
exit "$status"
fi
- name: Run OpenClaw source performance probes
if: ${{ steps.lane.outputs.run == 'true' && matrix.lane == 'mock-provider' }}
shell: bash
run: |
set -euo pipefail
source_runs="$REQUESTED_REPEAT"
if ! [[ "$source_runs" =~ ^[0-9]+$ ]] || [[ "$source_runs" -lt 1 ]]; then
source_runs=3
fi
mkdir -p "$SOURCE_PERF_DIR/mock-hello"
pnpm build
pnpm test:gateway:cpu-scenarios \
--output-dir "$SOURCE_PERF_DIR/gateway-cpu" \
--runs "$source_runs" \
--warmup 1 \
--skip-qa \
--startup-case default \
--startup-case skipChannels \
--startup-case oneInternalHook \
--startup-case allInternalHooks \
--startup-case fiftyPlugins \
--startup-case fiftyStartupLazyPlugins
for run_index in $(seq 1 "$source_runs"); do
run_dir="$SOURCE_PERF_DIR/mock-hello/run-$(printf '%03d' "$run_index")"
pnpm openclaw qa suite \
--provider-mode mock-openai \
--concurrency 1 \
--output-dir "$(realpath --relative-to="$GITHUB_WORKSPACE" "$run_dir")" \
--scenario channel-chat-baseline
done
gateway_home="$(mktemp -d)"
gateway_port="$(node -e "const net=require('node:net'); const s=net.createServer(); s.listen(0,'127.0.0.1',()=>{ console.log(s.address().port); s.close(); });")"
gateway_state="$gateway_home/.openclaw"
gateway_config="$gateway_state/openclaw.json"
gateway_log="$SOURCE_PERF_DIR/cli-gateway.log"
gateway_pid=""
mkdir -p "$gateway_state"
cat > "$gateway_config" <<EOF
{
"browser": { "enabled": false },
"gateway": {
"mode": "local",
"port": ${gateway_port},
"bind": "loopback",
"auth": { "mode": "none" },
"controlUi": { "enabled": false },
"tailscale": { "mode": "off" }
},
"plugins": {
"enabled": true,
"entries": { "browser": { "enabled": false } }
}
}
EOF
cleanup_gateway() {
if [[ -n "${gateway_pid:-}" ]] && kill -0 "$gateway_pid" 2>/dev/null; then
kill "$gateway_pid" 2>/dev/null || true
wait "$gateway_pid" 2>/dev/null || true
fi
rm -rf "$gateway_home"
}
trap cleanup_gateway EXIT
OPENCLAW_HOME="$gateway_home" OPENCLAW_STATE_DIR="$gateway_state" OPENCLAW_CONFIG_PATH="$gateway_config" OPENCLAW_GATEWAY_PORT="$gateway_port" OPENCLAW_SKIP_CHANNELS=1 \
node dist/entry.js gateway run --bind loopback --port "$gateway_port" --auth none --allow-unconfigured --force \
>"$gateway_log" 2>&1 &
gateway_pid="$!"
for _ in $(seq 1 120); do
if curl -fsS "http://127.0.0.1:${gateway_port}/healthz" >/dev/null; then
break
fi
if ! kill -0 "$gateway_pid" 2>/dev/null; then
cat "$gateway_log" >&2
exit 1
fi
sleep 1
done
curl -fsS "http://127.0.0.1:${gateway_port}/healthz" >/dev/null
OPENCLAW_HOME="$gateway_home" OPENCLAW_STATE_DIR="$gateway_state" OPENCLAW_CONFIG_PATH="$gateway_config" OPENCLAW_GATEWAY_PORT="$gateway_port" \
node --import tsx scripts/bench-cli-startup.ts \
--case gatewayHealthJson \
--case configGetGatewayPort \
--runs "$source_runs" \
--warmup 1 \
--output "$SOURCE_PERF_DIR/cli-startup.json"
cleanup_gateway
trap - EXIT
pnpm perf:source:summary \
--source-dir "$SOURCE_PERF_DIR" \
--output "$SOURCE_PERF_DIR/index.md"
cat "$SOURCE_PERF_DIR/index.md" >> "$GITHUB_STEP_SUMMARY"
- name: Upload Kova artifacts
if: ${{ always() && steps.lane.outputs.run == 'true' }}
uses: actions/upload-artifact@v5
@@ -306,6 +406,7 @@ jobs:
.artifacts/kova/reports/${{ matrix.lane }}
.artifacts/kova/bundles/${{ matrix.lane }}
.artifacts/kova/summaries/${{ matrix.lane }}.md
.artifacts/openclaw-performance/source/${{ matrix.lane }}
if-no-files-found: ignore
retention-days: ${{ matrix.deep_profile == 'true' && 14 || 30 }}
@@ -348,6 +449,18 @@ jobs:
mkdir -p "$dest/bundles"
cp -R "$BUNDLE_DIR"/. "$dest/bundles/"
fi
if [[ -d "$SOURCE_PERF_DIR" ]]; then
mkdir -p "$dest/source"
cp -R "$SOURCE_PERF_DIR"/. "$dest/source/"
if [[ -f "$SOURCE_PERF_DIR/index.md" ]]; then
cat >> "$dest/index.md" <<'EOF'
## Source probes
Additional gateway boot, memory, plugin pressure, mock hello-loop, and CLI startup numbers are in [source/index.md](source/index.md).
EOF
fi
fi
cat > "${reports_root}/openclaw-performance/${ref_slug}/latest-${LANE_ID}.json" <<EOF
{
"repository": "${GITHUB_REPOSITORY}",

View File

@@ -143,7 +143,9 @@ The workflow installs OCM from a pinned release and Kova from the pinned `kova_r
- `mock-deep-profile`: CPU/heap/trace profiling for startup, gateway, and agent-turn hotspots.
- `live-gpt54`: a real OpenAI `openai/gpt-5.4` agent turn, skipped when `OPENAI_API_KEY` is unavailable.
Every lane uploads GitHub artifacts. When `CLAWGRIT_REPORTS_TOKEN` is configured, the workflow also commits `report.json`, `report.md`, bundles, and `index.md` into `openclaw/clawgrit-reports` under `openclaw-performance/<ref>/<run-id>-<attempt>/<lane>/`. The current branch pointer is written as `openclaw-performance/<ref>/latest-<lane>.json`.
The mock-provider lane also runs OpenClaw-native source probes after the Kova pass: gateway boot timing and memory across default, hook, and 50-plugin startup cases; repeated mock-OpenAI `channel-chat-baseline` hello loops; and CLI startup commands against the booted gateway. The source probe Markdown summary lives at `source/index.md` in the report bundle, with raw JSON beside it.
Every lane uploads GitHub artifacts. When `CLAWGRIT_REPORTS_TOKEN` is configured, the workflow also commits `report.json`, `report.md`, bundles, `index.md`, and source-probe artifacts into `openclaw/clawgrit-reports` under `openclaw-performance/<ref>/<run-id>-<attempt>/<lane>/`. The current branch pointer is written as `openclaw-performance/<ref>/latest-<lane>.json`.
## Full Release Validation

View File

@@ -50,7 +50,9 @@ When debugging real providers/models (requires real creds):
`live_gpt54=true` for a real `openai/gpt-5.4` agent turn or
`deep_profile=true` for Kova CPU/heap/trace artifacts. Daily scheduled runs
publish mock-provider, deep-profile, and GPT 5.4 lane artifacts to
`openclaw/clawgrit-reports` when `CLAWGRIT_REPORTS_TOKEN` is configured.
`openclaw/clawgrit-reports` when `CLAWGRIT_REPORTS_TOKEN` is configured. The
mock-provider report also includes source-level gateway boot, memory,
plugin-pressure, repeated fake-model hello-loop, and CLI startup numbers.
- Docker live model sweep: `pnpm test:docker:live-models`
- Each selected model now runs a text turn plus a small file-read-style probe.
Models whose metadata advertises `image` input also run a tiny image turn.

View File

@@ -1429,6 +1429,7 @@
"openclaw": "node scripts/run-node.mjs",
"openclaw:rpc": "node scripts/run-node.mjs agent --mode rpc --json",
"perf:kova:summary": "node scripts/kova-ci-summary.mjs",
"perf:source:summary": "node scripts/openclaw-performance-source-summary.mjs",
"plugin-sdk:api:check": "node --import tsx scripts/generate-plugin-sdk-api-baseline.ts --check",
"plugin-sdk:api:gen": "node --import tsx scripts/generate-plugin-sdk-api-baseline.ts --write",
"plugin-sdk:check-exports": "node scripts/sync-plugin-sdk-exports.mjs --check",

View File

@@ -15,6 +15,8 @@ type Sample = {
maxRssMb: number | null;
exitCode: number | null;
signal: string | null;
stdoutTail?: string;
stderrTail?: string;
};
type SummaryStats = {
@@ -328,7 +330,7 @@ function runCase(params: {
...process.env,
OPENCLAW_HIDE_BANNER: "1",
},
stdio: ["ignore", "ignore", "pipe"],
stdio: ["ignore", "pipe", "pipe"],
encoding: "utf8",
timeout: params.timeoutMs,
maxBuffer: 32 * 1024 * 1024,
@@ -342,11 +344,21 @@ function runCase(params: {
maxRssMb: parseMaxRssMb(proc.stderr ?? ""),
exitCode: proc.status,
signal: proc.signal,
...(proc.status === 0
? {}
: {
stdoutTail: tailLines(proc.stdout ?? "", 20),
stderrTail: tailLines(proc.stderr ?? "", 20),
}),
});
}
return samples;
}
function tailLines(value: string, maxLines: number): string {
return value.split(/\r?\n/).filter(Boolean).slice(-maxLines).join("\n");
}
function printSuite(result: SuiteResult): void {
console.log(`Entry: ${result.entry}`);
for (const commandCase of result.cases) {

View File

@@ -0,0 +1,252 @@
#!/usr/bin/env node
import fs from "node:fs";
import path from "node:path";
import process from "node:process";
function parseArgs(argv) {
const options = { sourceDir: null, output: null };
for (let index = 0; index < argv.length; index += 1) {
const arg = argv[index];
const readValue = () => {
const value = argv[index + 1];
if (!value) {
throw new Error(`Missing value for ${arg}`);
}
index += 1;
return value;
};
switch (arg) {
case "--source-dir":
options.sourceDir = path.resolve(readValue());
break;
case "--output":
options.output = path.resolve(readValue());
break;
case "--help":
printHelp();
process.exit(0);
break;
default:
throw new Error(`Unknown argument: ${arg}`);
}
}
if (!options.sourceDir) {
throw new Error("--source-dir is required");
}
return options;
}
function printHelp() {
console.log(`Usage: node scripts/openclaw-performance-source-summary.mjs --source-dir <dir> [--output <summary.md>]
Summarizes OpenClaw-native performance probe artifacts for CI reports.`);
}
function readJsonIfExists(filePath) {
if (!fs.existsSync(filePath)) {
return null;
}
return JSON.parse(fs.readFileSync(filePath, "utf8"));
}
function formatMs(value) {
return typeof value === "number" && Number.isFinite(value) ? `${value.toFixed(1)}ms` : "n/a";
}
function formatMb(value) {
return typeof value === "number" && Number.isFinite(value) ? `${value.toFixed(1)}MB` : "n/a";
}
function formatBytesAsMb(value) {
return typeof value === "number" && Number.isFinite(value)
? formatMb(value / 1024 / 1024)
: "n/a";
}
function formatRatio(value) {
return typeof value === "number" && Number.isFinite(value) ? value.toFixed(3) : "n/a";
}
function metric(stats, key = "p50") {
return stats && typeof stats[key] === "number" ? stats[key] : null;
}
function escapeCell(value) {
return String(value).replaceAll("|", "\\|");
}
function table(headers, rows) {
if (rows.length === 0) {
return ["No data.", ""];
}
return [
`| ${headers.join(" | ")} |`,
`| ${headers.map(() => "---").join(" | ")} |`,
...rows.map((row) => `| ${row.map((cell) => escapeCell(cell)).join(" | ")} |`),
"",
];
}
function loadMockHelloSummaries(sourceDir) {
const root = path.join(sourceDir, "mock-hello");
if (!fs.existsSync(root)) {
return [];
}
return fs
.readdirSync(root, { withFileTypes: true })
.filter((entry) => entry.isDirectory())
.map((entry) => ({
id: entry.name,
summary: readJsonIfExists(path.join(root, entry.name, "qa-suite-summary.json")),
}))
.filter((entry) => entry.summary != null)
.toSorted((a, b) => a.id.localeCompare(b.id));
}
function buildStartupRows(startup) {
return (startup?.results ?? []).map((result) => [
result.id ?? "unknown",
result.name ?? result.id ?? "unknown",
formatMs(metric(result.summary?.readyzMs)),
formatMs(metric(result.summary?.readyzMs, "p95")),
formatMs(metric(result.summary?.healthzMs)),
formatMs(metric(result.summary?.readyLogMs)),
formatMs(metric(result.summary?.firstOutputMs)),
formatMb(metric(result.summary?.maxRssMb, "p95")),
formatRatio(metric(result.summary?.cpuCoreRatio, "p95")),
]);
}
function buildTraceRows(startup) {
const rows = [];
for (const result of startup?.results ?? []) {
const traceEntries = Object.entries(result.summary?.startupTrace ?? {})
.filter(([, stats]) => typeof stats?.p50 === "number")
.toSorted((a, b) => (b[1].p50 ?? 0) - (a[1].p50 ?? 0))
.slice(0, 5);
for (const [name, stats] of traceEntries) {
rows.push([result.id ?? "unknown", name, formatMs(stats.p50), formatMs(stats.p95)]);
}
}
return rows;
}
function buildMockHelloRows(summaries) {
return summaries.map(({ id, summary }) => {
const status =
typeof summary?.counts?.failed === "number" && summary.counts.failed > 0 ? "fail" : "pass";
const counts = summary?.counts
? `${summary.counts.passed ?? 0}/${summary.counts.total ?? 0}`
: "n/a";
return [
id,
status,
counts,
formatMs(summary?.metrics?.wallMs),
formatRatio(summary?.metrics?.gatewayCpuCoreRatio),
formatBytesAsMb(summary?.metrics?.gatewayProcessRssStartBytes),
formatBytesAsMb(summary?.metrics?.gatewayProcessRssEndBytes),
formatBytesAsMb(summary?.metrics?.gatewayProcessRssDeltaBytes),
summary?.run?.primaryModel ?? "n/a",
];
});
}
function buildCliRows(cli) {
return (cli?.primary?.cases ?? []).map((commandCase) => [
commandCase.id ?? "unknown",
commandCase.name ?? commandCase.id ?? "unknown",
formatMs(commandCase.summary?.durationMs?.p50),
formatMs(commandCase.summary?.durationMs?.p95),
formatMb(commandCase.summary?.maxRssMb?.p95),
commandCase.summary?.exitSummary ?? "n/a",
]);
}
function buildObservationRows(summary) {
return (summary?.observations ?? []).map((observation) => [
observation.kind ?? "unknown",
observation.id ?? "unknown",
formatRatio(observation.cpuCoreRatio ?? observation.cpuCoreRatioMax),
formatMs(observation.wallMs ?? observation.wallMsMax),
]);
}
function buildMarkdown(sourceDir) {
const gatewaySummary = readJsonIfExists(path.join(sourceDir, "gateway-cpu", "summary.json"));
const startup = readJsonIfExists(
path.join(sourceDir, "gateway-cpu", "gateway-startup-bench.json"),
);
const cli = readJsonIfExists(path.join(sourceDir, "cli-startup.json"));
const mockHelloSummaries = loadMockHelloSummaries(sourceDir);
const lines = [
"# OpenClaw Source Performance",
"",
`Generated: ${new Date().toISOString()}`,
"",
"## Gateway Boot",
"",
...table(
[
"case",
"name",
"readyz p50",
"readyz p95",
"healthz p50",
"ready log p50",
"first output p50",
"RSS p95",
"CPU core p95",
],
buildStartupRows(startup),
),
"## Startup Hotspots",
"",
...table(["case", "phase", "p50", "p95"], buildTraceRows(startup)),
"## Fake Model Hello Loops",
"",
...table(
[
"run",
"status",
"pass",
"wall",
"gateway CPU core",
"RSS start",
"RSS end",
"RSS delta",
"model",
],
buildMockHelloRows(mockHelloSummaries),
),
"## CLI Against Booted Gateway",
"",
...table(
["case", "command", "duration p50", "duration p95", "RSS p95", "exits"],
buildCliRows(cli),
),
"## Observations",
"",
...table(["kind", "id", "CPU core", "wall"], buildObservationRows(gatewaySummary)),
];
return `${lines.join("\n")}\n`;
}
async function main() {
const options = parseArgs(process.argv.slice(2));
const markdown = buildMarkdown(options.sourceDir);
if (options.output) {
fs.mkdirSync(path.dirname(options.output), { recursive: true });
fs.writeFileSync(options.output, markdown, "utf8");
} else {
process.stdout.write(markdown);
}
}
main().catch((error) => {
console.error(error instanceof Error ? error.stack : String(error));
process.exitCode = 1;
});