feat(agents): support Codex app-server runs

This commit is contained in:
Peter Steinberger
2026-04-24 04:17:07 +01:00
parent 5d0887574b
commit 553162c998
11 changed files with 284 additions and 17 deletions

View File

@@ -906,6 +906,13 @@ async function agentCommandInternal(
sessionHasHistory:
!isNewSession || (await attemptExecutionRuntime.sessionFileHasContent(sessionFile)),
onAgentEvent: (evt) => {
if (evt.stream.startsWith("codex_app_server.")) {
emitAgentEvent({
runId,
stream: evt.stream,
data: evt.data ?? {},
});
}
if (
evt.stream === "lifecycle" &&
typeof evt.data?.phase === "string" &&

View File

@@ -439,6 +439,55 @@ describe("embedded attempt harness pinning", () => {
);
});
it("pins sessions with history to the configured Codex harness instead of PI", async () => {
const sessionEntry: SessionEntry = {
sessionId: "codex-history-session",
updatedAt: Date.now(),
};
runEmbeddedPiAgentMock.mockResolvedValueOnce({
meta: { durationMs: 1 },
} satisfies EmbeddedPiRunResult);
await runAgentAttempt({
providerOverride: "codex",
modelOverride: "gpt-5.4",
cfg: {
agents: {
defaults: {
embeddedHarness: { runtime: "codex", fallback: "none" },
},
},
} as OpenClawConfig,
sessionEntry,
sessionId: sessionEntry.sessionId,
sessionKey: "agent:main:main",
sessionAgentId: "main",
sessionFile: path.join(tmpDir, "session.jsonl"),
workspaceDir: tmpDir,
body: "continue",
isFallbackRetry: false,
resolvedThinkLevel: "medium",
timeoutMs: 1_000,
runId: "run-codex-no-pi-pin",
opts: { senderIsOwner: false } as Parameters<typeof runAgentAttempt>[0]["opts"],
runContext: {} as Parameters<typeof runAgentAttempt>[0]["runContext"],
spawnedBy: undefined,
messageChannel: undefined,
skillsSnapshot: undefined,
resolvedVerboseLevel: undefined,
agentDir: tmpDir,
onAgentEvent: vi.fn(),
authProfileProvider: "codex",
sessionHasHistory: true,
});
expect(runEmbeddedPiAgent).toHaveBeenCalledWith(
expect.objectContaining({
agentHarnessId: "codex",
}),
);
});
it("leaves a fresh unpinned session on config-selected harness resolution", async () => {
const sessionEntry: SessionEntry = {
sessionId: "fresh-session",

View File

@@ -14,6 +14,7 @@ import { resolveBootstrapWarningSignaturesSeen } from "../bootstrap-budget.js";
import { runCliAgent } from "../cli-runner.js";
import { getCliSessionBinding, setCliSessionBinding } from "../cli-session.js";
import { FailoverError } from "../failover-error.js";
import { resolveAgentHarnessPolicy } from "../harness/selection.js";
import { isCliProvider } from "../model-selection.js";
import { prepareSessionManagerForRun } from "../pi-embedded-runner/session-manager-init.js";
import { runEmbeddedPiAgent, type EmbeddedPiRunResult } from "../pi-embedded.js";
@@ -262,10 +263,14 @@ export function runAgentAttempt(params: {
);
const bootstrapPromptWarningSignature =
bootstrapPromptWarningSignaturesSeen[bootstrapPromptWarningSignaturesSeen.length - 1];
const sessionPinnedAgentHarnessId =
params.sessionEntry?.sessionId === params.sessionId
? (params.sessionEntry.agentHarnessId ?? (params.sessionHasHistory ? "pi" : undefined))
: undefined;
const sessionPinnedAgentHarnessId = resolveSessionPinnedAgentHarnessId({
cfg: params.cfg,
sessionAgentId: params.sessionAgentId,
sessionEntry: params.sessionEntry,
sessionHasHistory: params.sessionHasHistory,
sessionId: params.sessionId,
sessionKey: params.sessionKey ?? params.sessionId,
});
const authProfileId =
params.providerOverride === params.authProfileProvider
? params.sessionEntry?.authProfileOverride
@@ -442,6 +447,43 @@ export function runAgentAttempt(params: {
});
}
function resolveSessionPinnedAgentHarnessId(params: {
cfg: OpenClawConfig;
sessionAgentId: string;
sessionEntry?: SessionEntry;
sessionHasHistory?: boolean;
sessionId: string;
sessionKey: string;
}): string | undefined {
if (params.sessionEntry?.sessionId !== params.sessionId) {
return resolveConfiguredAgentHarnessId(params);
}
if (params.sessionEntry.agentHarnessId) {
return params.sessionEntry.agentHarnessId;
}
const configuredAgentHarnessId = resolveConfiguredAgentHarnessId(params);
if (configuredAgentHarnessId) {
return configuredAgentHarnessId;
}
if (!params.sessionHasHistory) {
return undefined;
}
return "pi";
}
function resolveConfiguredAgentHarnessId(params: {
cfg: OpenClawConfig;
sessionAgentId: string;
sessionKey: string;
}): string | undefined {
const policy = resolveAgentHarnessPolicy({
config: params.cfg,
agentId: params.sessionAgentId,
sessionKey: params.sessionKey,
});
return policy.runtime === "auto" ? undefined : policy.runtime;
}
export function buildAcpResult(params: {
payloadText: string;
startedAt: number;

View File

@@ -1,5 +1,10 @@
import type { Api, Model } from "@mariozechner/pi-ai";
import type { AuthStorage, ModelRegistry } from "@mariozechner/pi-coding-agent";
import {
AuthStorage as PiAuthStorageClass,
ModelRegistry as PiModelRegistryClass,
type AuthStorage,
type ModelRegistry,
} from "@mariozechner/pi-coding-agent";
import type { OpenClawConfig } from "../../config/types.openclaw.js";
import type { ProviderRuntimeModel } from "../../plugins/provider-runtime-model.types.js";
import {
@@ -87,6 +92,21 @@ const STATIC_PROVIDER_RUNTIME_HOOKS: ProviderRuntimeHooks = {
normalizeProviderTransportWithPlugin: () => undefined,
};
function createEmptyPiDiscoveryStores(): {
authStorage: AuthStorage;
modelRegistry: ModelRegistry;
} {
const authStorage =
typeof PiAuthStorageClass.inMemory === "function"
? PiAuthStorageClass.inMemory({})
: PiAuthStorageClass.create();
const modelRegistry =
typeof PiModelRegistryClass.inMemory === "function"
? PiModelRegistryClass.inMemory(authStorage)
: PiModelRegistryClass.create(authStorage);
return { authStorage, modelRegistry };
}
function resolveRuntimeHooks(params?: {
runtimeHooks?: ProviderRuntimeHooks;
skipProviderRuntimeHooks?: boolean;
@@ -739,6 +759,7 @@ export async function resolveModelAsync(
retryTransientProviderRuntimeMiss?: boolean;
runtimeHooks?: ProviderRuntimeHooks;
skipProviderRuntimeHooks?: boolean;
skipPiDiscovery?: boolean;
},
): Promise<{
model?: Model<Api>;
@@ -751,8 +772,18 @@ export async function resolveModelAsync(
model: normalizeStaticProviderModelId(normalizeProviderId(provider), modelId),
};
const resolvedAgentDir = agentDir ?? resolveOpenClawAgentDir();
const authStorage = options?.authStorage ?? discoverAuthStorage(resolvedAgentDir);
const modelRegistry = options?.modelRegistry ?? discoverModels(authStorage, resolvedAgentDir);
const emptyDiscoveryStores =
options?.skipPiDiscovery && (!options.authStorage || !options.modelRegistry)
? createEmptyPiDiscoveryStores()
: undefined;
const authStorage =
options?.authStorage ??
emptyDiscoveryStores?.authStorage ??
discoverAuthStorage(resolvedAgentDir);
const modelRegistry =
options?.modelRegistry ??
emptyDiscoveryStores?.modelRegistry ??
discoverModels(authStorage, resolvedAgentDir);
const runtimeHooks = resolveRuntimeHooks(options);
const explicitModel = resolveExplicitModelWithRegistry({
provider: normalizedRef.provider,

View File

@@ -1,8 +1,9 @@
import { beforeAll, beforeEach, describe, expect, it } from "vitest";
import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
import type { AgentInternalEvent } from "../internal-events.js";
import { makeAttemptResult } from "./run.overflow-compaction.fixture.js";
import {
loadRunOverflowCompactionHarness,
mockedGetApiKeyForModel,
mockedRunEmbeddedAttempt,
overflowBaseRunParams,
resetRunOverflowCompactionHarnessMocks,
@@ -61,4 +62,39 @@ describe("runEmbeddedPiAgent forwards optional params to runEmbeddedAttempt", ()
expect.objectContaining(forwardingCase.expected),
);
});
it("lets plugin harnesses own auth before the attempt runs", async () => {
const { clearAgentHarnesses, registerAgentHarness } = await import("../harness/registry.js");
const pluginRunAttempt = vi.fn(async () => makeAttemptResult({ assistantTexts: ["ok"] }));
clearAgentHarnesses();
registerAgentHarness({
id: "codex",
label: "Codex",
supports: (ctx) =>
ctx.provider === "codex" ? { supported: true, priority: 100 } : { supported: false },
runAttempt: pluginRunAttempt,
});
mockedGetApiKeyForModel.mockRejectedValueOnce(new Error("generic auth should be skipped"));
try {
await runEmbeddedPiAgent({
...overflowBaseRunParams,
provider: "codex",
model: "gpt-5.4",
config: {
agents: {
defaults: {
embeddedHarness: { runtime: "codex", fallback: "none" },
},
},
},
runId: "plugin-harness-skips-generic-auth",
});
} finally {
clearAgentHarnesses();
}
expect(mockedGetApiKeyForModel).not.toHaveBeenCalled();
expect(pluginRunAttempt).toHaveBeenCalledWith(expect.objectContaining({ provider: "codex" }));
});
});

View File

@@ -21,6 +21,7 @@ import {
} from "../agent-scope.js";
import {
type AuthProfileFailureReason,
type AuthProfileStore,
markAuthProfileFailure,
resolveAuthProfileEligibility,
markAuthProfileGood,
@@ -38,6 +39,7 @@ import {
FailoverError,
resolveFailoverStatus,
} from "../failover-error.js";
import { selectAgentHarness } from "../harness/selection.js";
import { LiveSessionModelSwitchError } from "../live-model-switch-error.js";
import { shouldSwitchToLiveModel, clearLiveModelSwitchPending } from "../live-model-switch.js";
import {
@@ -140,6 +142,13 @@ type ApiKeyInfo = ResolvedProviderAuth;
const MAX_SAME_MODEL_IDLE_TIMEOUT_RETRIES = 1;
function createEmptyAuthProfileStore(): AuthProfileStore {
return {
version: 1,
profiles: {},
};
}
function buildTraceToolSummary(params: {
toolMetas: Array<{ toolName: string; meta?: string }>;
hadFailure: boolean;
@@ -291,7 +300,6 @@ export async function runEmbeddedPiAgent(
agentId: params.agentId,
sessionKey: normalizedSessionKey,
});
await ensureOpenClawModelsJson(params.config, agentDir);
const resolvedSessionKey = normalizedSessionKey;
const hookRunner = getGlobalHookRunner();
const hookCtx = {
@@ -318,12 +326,28 @@ export async function runEmbeddedPiAgent(
provider = hookSelection.provider;
modelId = hookSelection.modelId;
const legacyBeforeAgentStartResult = hookSelection.legacyBeforeAgentStartResult;
const agentHarness = selectAgentHarness({
provider,
modelId,
config: params.config,
agentId: params.agentId,
sessionKey: params.sessionKey,
agentHarnessId: params.agentHarnessId,
});
const pluginHarnessOwnsTransport = agentHarness.id !== "pi";
if (!pluginHarnessOwnsTransport) {
await ensureOpenClawModelsJson(params.config, agentDir);
}
const { model, error, authStorage, modelRegistry } = await resolveModelAsync(
provider,
modelId,
agentDir,
params.config,
// Plugin harnesses may expose synthetic providers that PI cannot
// discover safely; resolve their model metadata without touching PI
// auth/model stores.
{ skipPiDiscovery: pluginHarnessOwnsTransport },
);
if (!model) {
throw new FailoverError(error ?? `Unknown model: ${provider}/${modelId}`, {
@@ -343,9 +367,11 @@ export async function runEmbeddedPiAgent(
const ctxInfo = resolvedRuntimeModel.ctxInfo;
let effectiveModel = resolvedRuntimeModel.effectiveModel;
const authStore = ensureAuthProfileStore(agentDir, {
allowKeychainPrompt: false,
});
const authStore = pluginHarnessOwnsTransport
? createEmptyAuthProfileStore()
: ensureAuthProfileStore(agentDir, {
allowKeychainPrompt: false,
});
const preferredProfileId = params.authProfileId?.trim();
let lockedProfileId = params.authProfileIdSource === "user" ? preferredProfileId : undefined;
if (lockedProfileId) {
@@ -444,7 +470,12 @@ export async function runEmbeddedPiAgent(
log,
});
await initializeAuthProfile();
// Plugin harnesses own their model transport/auth. Running PI's generic
// auth bootstrap here can turn synthetic provider markers into real
// vendor-token refresh attempts before the plugin gets control.
if (!pluginHarnessOwnsTransport) {
await initializeAuthProfile();
}
const { sessionAgentId } = resolveSessionAgentIds({
sessionKey: params.sessionKey,
config: params.config,
@@ -731,7 +762,10 @@ export async function runEmbeddedPiAgent(
disableTools: params.disableTools,
provider,
modelId,
agentHarnessId: params.agentHarnessId,
// Use the harness selected before model/auth setup for the actual
// attempt too. Otherwise plugin-owned transports can skip PI auth
// bootstrap but drift back to PI when the attempt is created.
agentHarnessId: agentHarness.id,
model: applyAuthHeaderOverride(
applyLocalNoAuthHeaderOverride(effectiveModel, apiKeyInfo),
// When runtime auth exchange produced a different credential

View File

@@ -320,7 +320,10 @@ describe("buildAgentSystemPrompt", () => {
});
expect(prompt).toContain(
'For requests like "do this in codex/claude code/cursor/gemini" or similar ACP harnesses, treat it as ACP harness intent',
'For requests like "do this in claude code/cursor/gemini" or similar ACP harnesses, treat it as ACP harness intent',
);
expect(prompt).toContain(
"For Codex conversation binding/control, prefer the native Codex app-server plugin path",
);
expect(prompt).toContain(
'On Discord, default ACP harness requests to thread-bound persistent sessions (`thread: true`, `mode: "session"`)',

View File

@@ -705,7 +705,8 @@ export function buildAgentSystemPrompt(params: {
'Sub-agents start isolated by default. Use `sessions_spawn` with `context:"fork"` only when the child needs the current transcript context; otherwise omit `context` or use `context:"isolated"`.',
...(acpHarnessSpawnAllowed
? [
'For requests like "do this in codex/claude code/cursor/gemini" or similar ACP harnesses, treat it as ACP harness intent and call `sessions_spawn` with `runtime: "acp"`.',
'For requests like "do this in claude code/cursor/gemini" or similar ACP harnesses, treat it as ACP harness intent and call `sessions_spawn` with `runtime: "acp"`.',
"For Codex conversation binding/control, prefer the native Codex app-server plugin path (`/codex bind`, `/codex threads`, `/codex resume`). Use ACP for Codex only when the user explicitly asks for ACP/`/acp`, or for background child sessions where native Codex runtime spawn is not exposed.",
'On Discord, default ACP harness requests to thread-bound persistent sessions (`thread: true`, `mode: "session"`) unless the user asks otherwise.',
"Set `agentId` explicitly unless `acp.defaultAgent` is configured, and do not route ACP harness requests through `subagents`/`agents_list` or local PTY exec flows.",
'For ACP harness thread spawns, do not call `message` with `action=thread-create`; use `sessions_spawn` (`runtime: "acp"`, `thread: true`) as the single thread creation path.',

View File

@@ -707,6 +707,56 @@ describe("runAgentTurnWithFallback", () => {
});
});
it("publishes Codex app-server telemetry to agent event subscribers", async () => {
const agentEvents = await import("../../infra/agent-events.js");
const emitAgentEvent = vi.mocked(agentEvents.emitAgentEvent);
state.runEmbeddedPiAgentMock.mockImplementationOnce(async (params: EmbeddedAgentParams) => {
await params.onAgentEvent?.({
stream: "codex_app_server.guardian",
data: {
phase: "blocked",
message: "command requires approval",
},
});
return { payloads: [{ text: "final" }], meta: {} };
});
const runAgentTurnWithFallback = await getRunAgentTurnWithFallback();
const result = await runAgentTurnWithFallback({
commandBody: "hello",
followupRun: createFollowupRun(),
sessionCtx: {
Provider: "whatsapp",
MessageSid: "msg",
} as unknown as TemplateContext,
opts: { runId: "run-codex" } as GetReplyOptions,
typingSignals: createMockTypingSignaler(),
blockReplyPipeline: null,
blockStreamingEnabled: false,
resolvedBlockStreamingBreak: "message_end",
applyReplyToMode: (payload) => payload,
shouldEmitToolResult: () => true,
shouldEmitToolOutput: () => false,
pendingToolTasks: new Set(),
resetSessionAfterCompactionFailure: async () => false,
resetSessionAfterRoleOrderingConflict: async () => false,
isHeartbeat: false,
sessionKey: "main",
getActiveSessionEntry: () => undefined,
resolvedVerboseLevel: "off",
});
expect(result.kind).toBe("success");
expect(emitAgentEvent).toHaveBeenCalledWith({
runId: "run-codex",
stream: "codex_app_server.guardian",
data: {
phase: "blocked",
message: "command requires approval",
},
});
});
it("trims chatty GPT ack-turn final prose", async () => {
state.runWithModelFallbackMock.mockImplementationOnce(async (params: FallbackRunnerParams) => ({
result: await params.run("openai", "gpt-5.4"),

View File

@@ -1084,6 +1084,13 @@ export async function runAgentTurnWithFallback(params: {
: undefined,
onReasoningEnd: params.opts?.onReasoningEnd,
onAgentEvent: async (evt) => {
if (evt.stream.startsWith("codex_app_server.")) {
emitAgentEvent({
runId,
stream: evt.stream,
data: evt.data,
});
}
// Signal run start only after the embedded agent emits real activity.
const hasLifecyclePhase =
evt.stream === "lifecycle" && typeof evt.data.phase === "string";

View File

@@ -13,7 +13,7 @@ import { runEmbeddedPiAgent } from "../../agents/pi-embedded.js";
import type { SessionEntry } from "../../config/sessions.js";
import type { TypingMode } from "../../config/types.js";
import { logVerbose } from "../../globals.js";
import { registerAgentRunContext } from "../../infra/agent-events.js";
import { emitAgentEvent, registerAgentRunContext } from "../../infra/agent-events.js";
import { formatErrorMessage } from "../../infra/errors.js";
import { defaultRuntime } from "../../runtime.js";
import { isInternalMessageChannel } from "../../utils/message-channel.js";
@@ -265,6 +265,13 @@ export function createFollowupRunner(params: {
bootstrapPromptWarningSignaturesSeen.length - 1
],
onAgentEvent: (evt) => {
if (evt.stream.startsWith("codex_app_server.")) {
emitAgentEvent({
runId,
stream: evt.stream,
data: evt.data,
});
}
if (evt.stream !== "compaction") {
return;
}