Harden Codex harness control surfaces (#77459)

* fix(scripts): find codex protocol source from worktrees

* fix(test): keep codex harness docker caches writable

* fix(test): relax live codex cache mount permissions

* test(codex): add live docker harness debug output

* fix(test): detect numeric ci env in codex docker harness

* fix(codex): skip duplicate agent-command telemetry

* fix(tooling): skip sparse-missing oxlint tsconfig

* fix(tooling): route changed checks through testbox

* fix(qa): keep coverage json source-clean

* fix(test): preflight codex docker auth

* fix(codex): validate bind option values

* fix(codex): parse quoted command arguments

* fix(codex): reject extra control args

* fix(codex): use content for blank bound prompts

* fix(codex): decode local image file urls

* fix(codex): treat local media urls as images

* fix(codex): keep windows media paths local

* fix(codex): reject malformed diagnostics confirmations

* fix(codex): reject malformed resume commands

* fix(codex): reject malformed thread actions

* fix(codex): reject malformed turn controls

* fix(codex): reject malformed model controls

* fix(codex): resolve empty user input prompts

* fix(codex): enforce user input options

* fix(codex): reject ambiguous computer-use actions

* fix(codex): ignore stale bound turn notifications

* test(gateway): close task registries in gateway harness

* test(gateway): route cleanup through task seams

* fix(codex): describe current permission approvals

* fix(codex): disclose command approval amendments

* fix(codex): preserve approval detail under truncation

* fix(codex): propagate dynamic tool failures

* test(codex): align dynamic tool block contract

* fix(codex): reject extra read-only command operands

* fix(codex): escape command readout fields

* fix(codex): escape status probe errors

* fix(codex): narrow formatted thread details

* fix(codex): escape successful status summaries

* fix(codex): escape bound control replies

* fix(codex): escape user input prompts

* fix(codex): escape control failure replies

* fix(codex): escape approval prompt text

* test(codex): narrow escaped reply assertions

* test(codex): complete strict reply fixtures

* test(codex): preserve account fixture literals

* test(codex): align status probe fixtures

* fix(codex): satisfy sanitizer regex lint

* fix(codex): harden command readouts

* fix(codex): harden bound image inputs

* fix(codex): sanitize command failure replies

* test(codex): complete rate limit fixture

* test(tooling): isolate postinstall compile cache fixture

* fix(codex): keep app-server event ownership explicit

---------

Co-authored-by: pashpashpash <nik@vault77.ai>
This commit is contained in:
Vincent Koc
2026-05-04 15:23:41 -07:00
committed by GitHub
parent b3e42bf327
commit ac3cd1a0ca
42 changed files with 2672 additions and 245 deletions

View File

@@ -1031,14 +1031,6 @@ async function agentCommandInternal(
currentTurnUserMessagePersisted = true;
},
onAgentEvent: (evt) => {
if (evt.stream.startsWith("codex_app_server.")) {
emitAgentEvent({
runId,
stream: evt.stream,
data: evt.data ?? {},
...(evt.sessionKey ? { sessionKey: evt.sessionKey } : {}),
});
}
if (
evt.stream === "lifecycle" &&
typeof evt.data?.phase === "string" &&

View File

@@ -1176,7 +1176,7 @@ describe("runAgentTurnWithFallback", () => {
});
});
it("publishes Codex app-server telemetry to agent event subscribers", async () => {
it("leaves Codex app-server telemetry publication to the harness", async () => {
const agentEvents = await import("../../infra/agent-events.js");
const emitAgentEvent = vi.mocked(agentEvents.emitAgentEvent);
state.runEmbeddedPiAgentMock.mockImplementationOnce(async (params: EmbeddedAgentParams) => {
@@ -1217,15 +1217,12 @@ describe("runAgentTurnWithFallback", () => {
});
expect(result.kind).toBe("success");
expect(emitAgentEvent).toHaveBeenCalledWith({
runId: "run-codex",
stream: "codex_app_server.guardian",
sessionKey: "agent:main:subagent:codex-child",
data: {
phase: "blocked",
message: "command requires approval",
},
});
expect(emitAgentEvent).not.toHaveBeenCalledWith(
expect.objectContaining({
runId: "run-codex",
stream: "codex_app_server.guardian",
}),
);
});
it("emits an embedded lifecycle terminal backstop when the runner returns without one", async () => {

View File

@@ -1509,14 +1509,6 @@ export async function runAgentTurnWithFallback(params: {
onReasoningEnd: params.opts?.onReasoningEnd,
onAgentEvent: async (evt) => {
lifecycleBackstop.note(evt);
if (evt.stream.startsWith("codex_app_server.")) {
emitAgentEvent({
runId,
stream: evt.stream,
data: evt.data,
...(evt.sessionKey ? { sessionKey: evt.sessionKey } : {}),
});
}
// Signal run start only after the embedded agent emits real activity.
const hasLifecyclePhase =
evt.stream === "lifecycle" && typeof evt.data.phase === "string";

View File

@@ -15,7 +15,7 @@ import {
import type { SessionEntry } from "../../config/sessions.js";
import type { TypingMode } from "../../config/types.js";
import { logVerbose } from "../../globals.js";
import { emitAgentEvent, registerAgentRunContext } from "../../infra/agent-events.js";
import { registerAgentRunContext } from "../../infra/agent-events.js";
import { formatErrorMessage } from "../../infra/errors.js";
import { defaultRuntime } from "../../runtime.js";
import { isInternalMessageChannel } from "../../utils/message-channel.js";
@@ -332,14 +332,6 @@ export function createFollowupRunner(params: {
bootstrapPromptWarningSignaturesSeen.length - 1
],
onAgentEvent: (evt) => {
if (evt.stream.startsWith("codex_app_server.")) {
emitAgentEvent({
runId,
stream: evt.stream,
data: evt.data,
...(evt.sessionKey ? { sessionKey: evt.sessionKey } : {}),
});
}
if (evt.stream !== "compaction") {
return;
}

View File

@@ -588,6 +588,44 @@ describe("agentCommand", () => {
});
});
it("does not publish Codex app-server events from the core command callback", async () => {
await withTempHome(async (home) => {
const store = path.join(home, "sessions.json");
mockConfig(home, store);
const codexEvents: Array<{ runId: string; phase?: string }> = [];
const stop = onAgentEvent((evt) => {
if (evt.stream !== "codex_app_server.lifecycle") {
return;
}
codexEvents.push({
runId: evt.runId,
phase: typeof evt.data?.phase === "string" ? evt.data.phase : undefined,
});
});
vi.mocked(runEmbeddedPiAgent).mockImplementationOnce(async (params) => {
(
params as {
onAgentEvent?: (evt: { stream: string; data: Record<string, unknown> }) => void;
}
).onAgentEvent?.({
stream: "codex_app_server.lifecycle",
data: { phase: "startup" },
});
return {
payloads: [{ text: "hello" }],
meta: { agentMeta: { provider: "p", model: "m" } },
} as never;
});
await agentCommand({ message: "hi", to: "+1555", thinking: "low" }, runtime);
stop();
expect(codexEvents).toHaveLength(0);
});
});
it("uses default fallback list for auto session model overrides", async () => {
await withTempHome(async (home) => {
const store = path.join(home, "sessions.json");

View File

@@ -28,6 +28,8 @@ import {
} from "../routing/session-key.js";
import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js";
import { normalizeOptionalString } from "../shared/string-coerce.js";
import { resetTaskRegistryForTests } from "../tasks/runtime-internal.js";
import { resetTaskFlowRegistryForTests } from "../tasks/task-flow-runtime-internal.js";
import { captureEnv } from "../test-utils/env.js";
import { getDeterministicFreePortBlock } from "../test-utils/ports.js";
import { GATEWAY_CLIENT_MODES, GATEWAY_CLIENT_NAMES } from "../utils/message-channel.js";
@@ -254,6 +256,8 @@ async function resetGatewayTestState(options: { uniqueConfigRoot: boolean }) {
}
applyGatewaySkipEnv();
delete process.env.OPENCLAW_GATEWAY_TOKEN;
resetTaskRegistryForTests({ persist: false });
resetTaskFlowRegistryForTests({ persist: false });
const stateDir = process.env.OPENCLAW_STATE_DIR;
if (stateDir) {
await fs.rm(stateDir, {
@@ -365,6 +369,8 @@ async function cleanupGatewayTestHome(options: { restoreEnv: boolean }) {
vi.useRealTimers();
clearGatewaySubagentRuntime();
resetLogger();
resetTaskRegistryForTests({ persist: false });
resetTaskFlowRegistryForTests({ persist: false });
if (options.restoreEnv) {
gatewayEnvSnapshot?.restore();
gatewayEnvSnapshot = undefined;

View File

@@ -834,6 +834,46 @@ describe("run-node script", () => {
});
});
it("runs QA coverage report from source without rebuilding private QA dist", async () => {
await withTempDir({ prefix: "openclaw-run-node-" }, async (tmp) => {
await setupTrackedProject(tmp, {
files: {
"extensions/qa-lab/src/cli.runtime.ts": "export {};\n",
},
buildPaths: [DIST_ENTRY, BUILD_STAMP],
});
const spawnCalls: string[][] = [];
const spawn = (cmd: string, args: string[]) => {
spawnCalls.push([cmd, ...args]);
return createExitedProcess(0);
};
const exitCode = await runNodeMain({
cwd: tmp,
args: ["qa", "coverage", "--json"],
env: {
...process.env,
OPENCLAW_RUNNER_LOG: "0",
},
spawn,
execPath: process.execPath,
platform: process.platform,
});
expect(exitCode).toBe(0);
expect(spawnCalls).toEqual([
[
process.execPath,
"--import",
"tsx",
path.join(tmp, "scripts", "qa-coverage-report.ts"),
"--json",
],
]);
});
});
it("skips runtime postbuild restaging when the runtime stamp is current", async () => {
await withTempDir({ prefix: "openclaw-run-node-" }, async (tmp) => {
await setupTrackedProject(tmp, {