diff --git a/scripts/bench-cli-startup.ts b/scripts/bench-cli-startup.ts
index 8bf6c11b1ac..bce59171f88 100644
--- a/scripts/bench-cli-startup.ts
+++ b/scripts/bench-cli-startup.ts
@@ -10,6 +10,8 @@ type CommandCase = {
   name: string;
   args: string[];
   presets: readonly string[];
+  expectedExitCodes?: readonly number[];
+  expectedNonzeroOutputIncludes?: readonly string[];
   firstOutputBudgetMs?: number;
   exitBudgetMs?: number;
 };
@@ -46,6 +48,8 @@ type SuiteResult = {
     id: string;
     name: string;
     args: string[];
+    expectedExitCodes?: number[];
+    expectedNonzeroOutputIncludes?: string[];
     contract: {
       firstOutputBudgetMs: number | null;
       exitBudgetMs: number | null;
@@ -307,8 +311,22 @@ const COMMAND_CASES: readonly CommandCase[] = [
     firstOutputBudgetMs: 2_500,
     exitBudgetMs: 6_000,
   },
-  { id: "health", name: "health", args: ["health"], presets: ["startup", "real"] },
-  { id: "healthJson", name: "health --json", args: ["health", "--json"], presets: ["startup"] },
+  {
+    id: "health",
+    name: "health",
+    args: ["health"],
+    presets: ["startup", "real"],
+    expectedExitCodes: [0, 1],
+    expectedNonzeroOutputIncludes: ["Gateway target:"],
+  },
+  {
+    id: "healthJson",
+    name: "health --json",
+    args: ["health", "--json"],
+    presets: ["startup"],
+    expectedExitCodes: [0, 1],
+    expectedNonzeroOutputIncludes: ['"ok"', '"gateway_transport_error"'],
+  },
   {
     id: "statusJson",
     name: "status --json",
@@ -364,12 +382,16 @@ const COMMAND_CASES: readonly CommandCase[] = [
     name: "gateway health --json",
     args: ["gateway", "health", "--json"],
     presets: ["real"],
+    expectedExitCodes: [0, 1],
+    expectedNonzeroOutputIncludes: ['"ok"', '"gateway_transport_error"'],
   },
   {
     id: "configGetGatewayPort",
     name: "config get gateway.port",
     args: ["config", "get", "gateway.port"],
     presets: ["real"],
+    expectedExitCodes: [0, 1],
+    expectedNonzeroOutputIncludes: ["Config path not found: gateway.port"],
   },
 ] as const;
 
@@ -759,10 +781,23 @@ export function collectFailedSamples(result: SuiteResult): string[] {
     }
     for (const [sampleIndex, sample] of commandCase.samples.entries()) {
       const label = `${result.entry} ${commandCase.id} sample ${sampleIndex + 1}`;
+      const expectedExitCodes = new Set(commandCase.expectedExitCodes ?? [0]);
       if (sample.signal !== null) {
         failures.push(`${label}: exited via signal ${sample.signal}`);
-      } else if (sample.exitCode !== 0) {
+      } else if (!expectedExitCodes.has(sample.exitCode ?? -1)) {
         failures.push(`${label}: exited with code ${String(sample.exitCode)}`);
+      } else if (sample.exitCode !== 0) {
+        const output = `${sample.stdoutTail ?? ""}\n${sample.stderrTail ?? ""}`;
+        const missing = (commandCase.expectedNonzeroOutputIncludes ?? []).filter(
+          (snippet) => !output.includes(snippet),
+        );
+        if (missing.length > 0) {
+          failures.push(
+            `${label}: exited with expected code ${String(
+              sample.exitCode,
+            )} but output did not match expected clean-state markers (${missing.join(", ")})`,
+          );
+        }
       }
     }
   }
@@ -790,6 +825,12 @@ async function buildSuiteResult(params: {
       id: commandCase.id,
       name: commandCase.name,
       args: commandCase.args,
+      ...(commandCase.expectedExitCodes && commandCase.expectedExitCodes.some((code) => code !== 0)
+        ? { expectedExitCodes: [...commandCase.expectedExitCodes] }
+        : {}),
+      ...(commandCase.expectedNonzeroOutputIncludes
+        ? { expectedNonzeroOutputIncludes: [...commandCase.expectedNonzeroOutputIncludes] }
+        : {}),
       contract:
         commandCase.firstOutputBudgetMs != null || commandCase.exitBudgetMs != null
           ? {
@@ -819,11 +860,7 @@ function parseOptions(): CliOptions {
     entrySecondary: parseFlagValue("--entry-secondary"),
     runs: parsePositiveInt(parseFlagValue("--runs"), DEFAULT_RUNS, "--runs"),
     warmup: parseNonNegativeInt(parseFlagValue("--warmup"), DEFAULT_WARMUP, "--warmup"),
-    timeoutMs: parsePositiveInt(
-      parseFlagValue("--timeout-ms"),
-      DEFAULT_TIMEOUT_MS,
-      "--timeout-ms",
-    ),
+    timeoutMs: parsePositiveInt(parseFlagValue("--timeout-ms"), DEFAULT_TIMEOUT_MS, "--timeout-ms"),
     json: hasFlag("--json"),
     output: parseFlagValue("--output"),
     cpuProfDir: parseFlagValue("--cpu-prof-dir"),
diff --git a/scripts/test-cli-startup-bench-budget.mjs b/scripts/test-cli-startup-bench-budget.mjs
index 9f56d48249f..86beceae28a 100644
--- a/scripts/test-cli-startup-bench-budget.mjs
+++ b/scripts/test-cli-startup-bench-budget.mjs
@@ -86,6 +86,14 @@ function resolveCurrentReportPath() {
   if (opts.report) {
     return opts.report;
   }
+  const build = spawnSync(process.execPath, ["scripts/ensure-cli-startup-build.mjs"], {
+    cwd: process.cwd(),
+    stdio: "inherit",
+    env: process.env,
+  });
+  if (build.status !== 0) {
+    process.exit(build.status ?? 1);
+  }
   const reportPath = `.artifacts/cli-startup-bench.current.json`;
   fs.mkdirSync(".artifacts", { recursive: true });
   const args = [
diff --git a/test/scripts/bench-cli-startup.test.ts b/test/scripts/bench-cli-startup.test.ts
index 7815e94fa36..53c6465c7a7 100644
--- a/test/scripts/bench-cli-startup.test.ts
+++ b/test/scripts/bench-cli-startup.test.ts
@@ -65,6 +65,78 @@ describe("bench-cli-startup", () => {
     ]);
   });
 
+  it("allows declared nonzero exit codes for clean-state probes", () => {
+    const sample = {
+      ms: 10,
+      firstOutputMs: 5,
+      maxRssMb: 50,
+      exitCode: 1,
+      signal: null,
+      stderrTail: "Health check failed: gateway closed\n  Gateway target: ws://127.0.0.1:18789",
+    };
+
+    expect(
+      testing.collectFailedSamples({
+        entry: "openclaw.mjs",
+        cases: [
+          {
+            id: "health",
+            name: "health",
+            args: ["health"],
+            expectedExitCodes: [0, 1],
+            expectedNonzeroOutputIncludes: ["Gateway target:"],
+            contract: null,
+            samples: [sample],
+            summary: {
+              sampleCount: 1,
+              durationMs: { avg: 10, p50: 10, p95: 10, min: 10, max: 10 },
+              firstOutputMs: { avg: 5, p50: 5, p95: 5, min: 5, max: 5 },
+              maxRssMb: { avg: 50, p50: 50, p95: 50, min: 50, max: 50 },
+              exitSummary: "code:1x1",
+            },
+          },
+        ],
+      }),
+    ).toEqual([]);
+  });
+
+  it("rejects allowed nonzero exits without their expected clean-state output", () => {
+    const sample = {
+      ms: 10,
+      firstOutputMs: 5,
+      maxRssMb: 50,
+      exitCode: 1,
+      signal: null,
+      stderrTail: "TypeError: crashed before output",
+    };
+
+    expect(
+      testing.collectFailedSamples({
+        entry: "openclaw.mjs",
+        cases: [
+          {
+            id: "health",
+            name: "health",
+            args: ["health"],
+            expectedExitCodes: [0, 1],
+            expectedNonzeroOutputIncludes: ["Gateway target:"],
+            contract: null,
+            samples: [sample],
+            summary: {
+              sampleCount: 1,
+              durationMs: { avg: 10, p50: 10, p95: 10, min: 10, max: 10 },
+              firstOutputMs: { avg: 5, p50: 5, p95: 5, min: 5, max: 5 },
+              maxRssMb: { avg: 50, p50: 50, p95: 50, min: 50, max: 50 },
+              exitSummary: "code:1x1",
+            },
+          },
+        ],
+      }),
+    ).toEqual([
+      "openclaw.mjs health sample 1: exited with expected code 1 but output did not match expected clean-state markers (Gateway target:)",
+    ]);
+  });
+
   it("rejects invalid measured run counts", () => {
     expect(() => testing.parsePositiveInt("0", 5, "--runs")).toThrow(
       "--runs must be an integer >= 1",
diff --git a/test/scripts/cli-startup-bench-spawner.test.ts b/test/scripts/cli-startup-bench-spawner.test.ts
index 1a692749145..ccf32d6aabf 100644
--- a/test/scripts/cli-startup-bench-spawner.test.ts
+++ b/test/scripts/cli-startup-bench-spawner.test.ts
@@ -19,6 +19,20 @@ describe("CLI startup benchmark script spawners", () => {
     }
   });
 
+  it("builds the source CLI before generating a startup budget report", () => {
+    const source = fs.readFileSync(
+      path.resolve(process.cwd(), "scripts/test-cli-startup-bench-budget.mjs"),
+      "utf8",
+    );
+
+    expect(source).toContain(
+      'spawnSync(process.execPath, ["scripts/ensure-cli-startup-build.mjs"]',
+    );
+    expect(source.indexOf("scripts/ensure-cli-startup-build.mjs")).toBeLessThan(
+      source.indexOf("scripts/bench-cli-startup.ts"),
+    );
+  });
+
   it("does not require unrelated fixture cases for a narrowed preset", () => {
     const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-bench-budget-test-"));
     try {