[codex] Add contract-first Pi/Codex runtime plan suite (#71096)

* test: add pi codex runtime contract coverage

* test: expand pi codex tool runtime contracts

* test: tighten tool runtime contracts

* test: reset tool contract param cache

* test: document codex tool middleware fixture

* test: type pi tool contract events

* test: satisfy pi tool contract test types

* test: cover tool media telemetry contracts

* test: reset plugin runtime after tool contracts

* test: add auth profile runtime contracts

* test: strengthen auth profile runtime contracts

* test: clarify auth profile contract fixtures

* test: expand auth profile contract matrix

* test: assert unrelated cli auth isolation

* test: expand auth profile contract matrix

* test: tighten auth profile contract expectations

* test: add outcome fallback runtime contracts

* test: strengthen outcome fallback contracts

* test: isolate outcome fallback contracts

* test: cover codex terminal outcome signals

* test: expand terminal fallback contracts

* test: add delivery no reply runtime contracts

* test: document json no-reply delivery gap

* test: align delivery contract fixtures

* test: add transcript repair runtime contracts

* test: tighten transcript repair contracts

* test: add prompt overlay runtime contracts

* test: tighten prompt overlay contract scope

* test: type prompt overlay contracts

* test: add schema normalization runtime contracts

* test: clarify schema normalization contract gaps

* test: simplify schema normalization contracts

* test: tighten schema normalization contract gaps

* test: cover compaction schema contract

* test: satisfy schema contract lint

* test: add transport params runtime contracts

* test: tighten transport params contract scope

* test: isolate transport params contracts

* test: lock exact transport defaults

* feat: add agent runtime plan foundation

* fix: preserve codex harness auth profiles

* fix: route followup delivery through runtime plan

* fix: normalize parameter-free openai tool schemas

* fix: satisfy runtime plan type checks

* fix: narrow followup delivery runtime planning

* fix: apply codex app-server auth profiles

* fix: classify codex terminal outcomes

* fix: prevent harness auth leakage into unrelated cli providers

* feat: expand agent runtime plan policy contract

* fix: route pi runtime policy through runtime plan

* fix: route codex runtime policy through runtime plan

* fix: route fallback outcome classification through runtime plan

* refactor: make runtime plan contracts topology-safe

* fix: restore runtime plan test type coverage

* fix: align runtime plan schema contract assertions

* fix: stabilize incomplete turn runtime tests

* fix: stabilize codex native web search test

* fix: preserve codex auth profile secret refs

* fix: keep runtime resolved refs canonical

* fix: preserve permissive nested openai schemas

* fix: accept Codex auth provider aliases

* test: update media-only groups mock

* fix: resolve runtime plan rebase checks

* fix: resolve runtime plan rebase checks

---------

Co-authored-by: Eva <eva@100yen.org>
Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
EVA
2026-04-25 00:34:01 +07:00
committed by GitHub
parent ec3dbd22a4
commit 860dad268d
61 changed files with 5087 additions and 195 deletions

View File

@@ -0,0 +1,45 @@
import { describe, expect, it } from "vitest";
import {
codexPromptOverlayContext,
GPT5_CONTRACT_MODEL_ID,
NON_GPT5_CONTRACT_MODEL_ID,
sharedGpt5PersonalityConfig,
} from "../../test/helpers/agents/prompt-overlay-runtime-contract.js";
import { buildCodexProvider } from "./provider.js";
describe("Codex prompt overlay runtime contract", () => {
it("adds the shared GPT-5 behavior contract to Codex GPT-5 provider runs", () => {
const provider = buildCodexProvider();
const contribution = provider.resolveSystemPromptContribution?.(
codexPromptOverlayContext({ modelId: GPT5_CONTRACT_MODEL_ID }),
);
expect(contribution?.stablePrefix).toContain("<persona_latch>");
expect(contribution?.sectionOverrides?.interaction_style).toContain(
"This is a live chat, not a memo.",
);
});
it("respects shared GPT-5 prompt overlay config for Codex runs", () => {
const provider = buildCodexProvider();
const contribution = provider.resolveSystemPromptContribution?.(
codexPromptOverlayContext({
modelId: GPT5_CONTRACT_MODEL_ID,
config: sharedGpt5PersonalityConfig("off"),
}),
);
expect(contribution?.stablePrefix).toContain("<persona_latch>");
expect(contribution?.sectionOverrides).toEqual({});
});
it("does not add the shared GPT-5 overlay to non-GPT-5 Codex provider runs", () => {
const provider = buildCodexProvider();
expect(
provider.resolveSystemPromptContribution?.(
codexPromptOverlayContext({ modelId: NON_GPT5_CONTRACT_MODEL_ID }),
),
).toBeUndefined();
});
});

View File

@@ -1,8 +1,55 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { describe, expect, it } from "vitest";
import { bridgeCodexAppServerStartOptions } from "./auth-bridge.js";
import { upsertAuthProfile } from "openclaw/plugin-sdk/provider-auth";
import { afterEach, describe, expect, it, vi } from "vitest";
import {
applyCodexAppServerAuthProfile,
bridgeCodexAppServerStartOptions,
refreshCodexAppServerAuthTokens,
} from "./auth-bridge.js";
const oauthMocks = vi.hoisted(() => ({
refreshOpenAICodexToken: vi.fn(),
}));
const providerRuntimeMocks = vi.hoisted(() => ({
formatProviderAuthProfileApiKeyWithPlugin: vi.fn(),
refreshProviderOAuthCredentialWithPlugin: vi.fn(
async (params: { context: { refresh: string } }) => {
const refreshed = await oauthMocks.refreshOpenAICodexToken(params.context.refresh);
return refreshed
? {
...params.context,
...refreshed,
type: "oauth",
provider: "openai-codex",
}
: undefined;
},
),
}));
vi.mock("@mariozechner/pi-ai/oauth", () => ({
getOAuthApiKey: vi.fn(),
getOAuthProviders: () => [],
loginOpenAICodex: vi.fn(),
refreshOpenAICodexToken: oauthMocks.refreshOpenAICodexToken,
}));
vi.mock("../../../../src/plugins/provider-runtime.runtime.js", () => ({
formatProviderAuthProfileApiKeyWithPlugin:
providerRuntimeMocks.formatProviderAuthProfileApiKeyWithPlugin,
refreshProviderOAuthCredentialWithPlugin:
providerRuntimeMocks.refreshProviderOAuthCredentialWithPlugin,
}));
afterEach(() => {
vi.unstubAllEnvs();
oauthMocks.refreshOpenAICodexToken.mockReset();
providerRuntimeMocks.formatProviderAuthProfileApiKeyWithPlugin.mockReset();
providerRuntimeMocks.refreshProviderOAuthCredentialWithPlugin.mockClear();
});
describe("bridgeCodexAppServerStartOptions", () => {
it("leaves Codex app-server start options unchanged", async () => {
@@ -30,4 +77,290 @@ describe("bridgeCodexAppServerStartOptions", () => {
await fs.rm(agentDir, { recursive: true, force: true });
}
});
it("applies an OpenAI Codex OAuth profile through app-server login", async () => {
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-codex-app-server-"));
const request = vi.fn(async () => ({ type: "chatgptAuthTokens" }));
try {
upsertAuthProfile({
agentDir,
profileId: "openai-codex:work",
credential: {
type: "oauth",
provider: "openai-codex",
access: "access-token",
refresh: "refresh-token",
expires: Date.now() + 24 * 60 * 60_000,
accountId: "account-123",
email: "codex@example.test",
},
});
await applyCodexAppServerAuthProfile({
client: { request } as never,
agentDir,
authProfileId: "openai-codex:work",
});
expect(request).toHaveBeenCalledWith("account/login/start", {
type: "chatgptAuthTokens",
accessToken: "access-token",
chatgptAccountId: "account-123",
chatgptPlanType: null,
});
} finally {
await fs.rm(agentDir, { recursive: true, force: true });
}
});
it("refreshes an expired OpenAI Codex OAuth profile before app-server login", async () => {
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-codex-app-server-"));
const request = vi.fn(async () => ({ type: "chatgptAuthTokens" }));
oauthMocks.refreshOpenAICodexToken.mockResolvedValueOnce({
access: "fresh-access-token",
refresh: "fresh-refresh-token",
expires: Date.now() + 60_000,
accountId: "account-456",
});
try {
upsertAuthProfile({
agentDir,
profileId: "openai-codex:work",
credential: {
type: "oauth",
provider: "openai-codex",
access: "expired-access-token",
refresh: "refresh-token",
expires: Date.now() - 60_000,
accountId: "account-123",
email: "codex@example.test",
},
});
await applyCodexAppServerAuthProfile({
client: { request } as never,
agentDir,
authProfileId: "openai-codex:work",
});
expect(oauthMocks.refreshOpenAICodexToken).toHaveBeenCalledWith("refresh-token");
expect(request).toHaveBeenCalledWith("account/login/start", {
type: "chatgptAuthTokens",
accessToken: "fresh-access-token",
chatgptAccountId: "account-456",
chatgptPlanType: null,
});
} finally {
await fs.rm(agentDir, { recursive: true, force: true });
}
});
it("applies an OpenAI Codex api-key profile backed by a secret ref", async () => {
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-codex-app-server-"));
const request = vi.fn(async () => ({ type: "apiKey" }));
vi.stubEnv("OPENAI_CODEX_API_KEY", "ref-backed-api-key");
try {
upsertAuthProfile({
agentDir,
profileId: "openai-codex:work",
credential: {
type: "api_key",
provider: "openai-codex",
keyRef: { source: "env", provider: "default", id: "OPENAI_CODEX_API_KEY" },
},
});
await applyCodexAppServerAuthProfile({
client: { request } as never,
agentDir,
authProfileId: "openai-codex:work",
});
expect(request).toHaveBeenCalledWith("account/login/start", {
type: "apiKey",
apiKey: "ref-backed-api-key",
});
} finally {
await fs.rm(agentDir, { recursive: true, force: true });
}
});
it("applies an OpenAI Codex token profile backed by a secret ref", async () => {
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-codex-app-server-"));
const request = vi.fn(async () => ({ type: "chatgptAuthTokens" }));
vi.stubEnv("OPENAI_CODEX_TOKEN", "ref-backed-access-token");
try {
upsertAuthProfile({
agentDir,
profileId: "openai-codex:work",
credential: {
type: "token",
provider: "openai-codex",
tokenRef: { source: "env", provider: "default", id: "OPENAI_CODEX_TOKEN" },
email: "codex@example.test",
},
});
await applyCodexAppServerAuthProfile({
client: { request } as never,
agentDir,
authProfileId: "openai-codex:work",
});
expect(request).toHaveBeenCalledWith("account/login/start", {
type: "chatgptAuthTokens",
accessToken: "ref-backed-access-token",
chatgptAccountId: "codex@example.test",
chatgptPlanType: null,
});
} finally {
await fs.rm(agentDir, { recursive: true, force: true });
}
});
it("accepts a legacy Codex auth-provider alias for app-server login", async () => {
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-codex-app-server-"));
const request = vi.fn(async () => ({ type: "chatgptAuthTokens" }));
try {
upsertAuthProfile({
agentDir,
profileId: "openai-codex:work",
credential: {
type: "token",
provider: "codex-cli",
token: "legacy-access-token",
email: "legacy-codex@example.test",
},
});
await applyCodexAppServerAuthProfile({
client: { request } as never,
agentDir,
authProfileId: "openai-codex:work",
});
expect(request).toHaveBeenCalledWith("account/login/start", {
type: "chatgptAuthTokens",
accessToken: "legacy-access-token",
chatgptAccountId: "legacy-codex@example.test",
chatgptPlanType: null,
});
} finally {
await fs.rm(agentDir, { recursive: true, force: true });
}
});
it("answers app-server ChatGPT token refresh requests from the bound profile", async () => {
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-codex-app-server-"));
oauthMocks.refreshOpenAICodexToken.mockResolvedValueOnce({
access: "refreshed-access-token",
refresh: "refreshed-refresh-token",
expires: Date.now() + 60_000,
accountId: "account-789",
});
try {
upsertAuthProfile({
agentDir,
profileId: "openai-codex:work",
credential: {
type: "oauth",
provider: "openai-codex",
access: "stale-access-token",
refresh: "refresh-token",
expires: Date.now() + 60_000,
accountId: "account-123",
email: "codex@example.test",
},
});
await expect(
refreshCodexAppServerAuthTokens({
agentDir,
authProfileId: "openai-codex:work",
}),
).resolves.toEqual({
accessToken: "refreshed-access-token",
chatgptAccountId: "account-789",
chatgptPlanType: null,
});
expect(oauthMocks.refreshOpenAICodexToken).toHaveBeenCalledWith("refresh-token");
} finally {
await fs.rm(agentDir, { recursive: true, force: true });
}
});
it("accepts a refreshed Codex OAuth credential when the stored provider is a legacy alias", async () => {
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-codex-app-server-"));
oauthMocks.refreshOpenAICodexToken.mockResolvedValueOnce({
access: "refreshed-alias-access-token",
refresh: "refreshed-alias-refresh-token",
expires: Date.now() + 60_000,
accountId: "account-alias",
});
try {
upsertAuthProfile({
agentDir,
profileId: "openai-codex:work",
credential: {
type: "oauth",
provider: "codex-cli",
access: "stale-alias-access-token",
refresh: "alias-refresh-token",
expires: Date.now() + 60_000,
accountId: "account-legacy",
email: "legacy-codex@example.test",
},
});
await expect(
refreshCodexAppServerAuthTokens({
agentDir,
authProfileId: "openai-codex:work",
}),
).resolves.toEqual({
accessToken: "refreshed-alias-access-token",
chatgptAccountId: "account-alias",
chatgptPlanType: null,
});
expect(oauthMocks.refreshOpenAICodexToken).toHaveBeenCalledWith("alias-refresh-token");
} finally {
await fs.rm(agentDir, { recursive: true, force: true });
}
});
it("preserves a stored ChatGPT plan type when building token login params", async () => {
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-codex-app-server-"));
const request = vi.fn(async () => ({ type: "chatgptAuthTokens" }));
try {
upsertAuthProfile({
agentDir,
profileId: "openai-codex:work",
credential: {
type: "oauth",
provider: "openai-codex",
access: "access-token",
refresh: "refresh-token",
expires: Date.now() + 24 * 60 * 60_000,
accountId: "account-123",
email: "codex@example.test",
chatgptPlanType: "pro",
} as never,
});
await applyCodexAppServerAuthProfile({
client: { request } as never,
agentDir,
authProfileId: "openai-codex:work",
});
expect(request).toHaveBeenCalledWith("account/login/start", {
type: "chatgptAuthTokens",
accessToken: "access-token",
chatgptAccountId: "account-123",
chatgptPlanType: "pro",
});
} finally {
await fs.rm(agentDir, { recursive: true, force: true });
}
});
});

View File

@@ -1,4 +1,18 @@
import {
ensureAuthProfileStore,
loadAuthProfileStoreForSecretsRuntime,
resolveProviderIdForAuth,
resolveApiKeyForProfile,
saveAuthProfileStore,
type AuthProfileCredential,
type OAuthCredential,
} from "openclaw/plugin-sdk/agent-runtime";
import type { CodexAppServerClient } from "./client.js";
import type { CodexAppServerStartOptions } from "./config.js";
import type { ChatgptAuthTokensRefreshResponse } from "./protocol-generated/typescript/v2/ChatgptAuthTokensRefreshResponse.js";
import type { LoginAccountParams } from "./protocol-generated/typescript/v2/LoginAccountParams.js";
const CODEX_APP_SERVER_AUTH_PROVIDER = "openai-codex";
export async function bridgeCodexAppServerStartOptions(params: {
startOptions: CodexAppServerStartOptions;
@@ -9,3 +23,170 @@ export async function bridgeCodexAppServerStartOptions(params: {
void params.authProfileId;
return params.startOptions;
}
export async function applyCodexAppServerAuthProfile(params: {
client: CodexAppServerClient;
agentDir: string;
authProfileId?: string;
}): Promise<void> {
const loginParams = await resolveCodexAppServerAuthProfileLoginParams({
agentDir: params.agentDir,
authProfileId: params.authProfileId,
});
if (!loginParams) {
return;
}
await params.client.request("account/login/start", loginParams);
}
export function resolveCodexAppServerAuthProfileLoginParams(params: {
agentDir: string;
authProfileId?: string;
}): Promise<LoginAccountParams | undefined> {
return resolveCodexAppServerAuthProfileLoginParamsInternal(params);
}
export async function refreshCodexAppServerAuthTokens(params: {
agentDir: string;
authProfileId?: string;
}): Promise<ChatgptAuthTokensRefreshResponse> {
const loginParams = await resolveCodexAppServerAuthProfileLoginParamsInternal({
...params,
forceOAuthRefresh: true,
});
if (!loginParams || loginParams.type !== "chatgptAuthTokens") {
throw new Error("Codex app-server ChatGPT token refresh requires an OAuth auth profile.");
}
return {
accessToken: loginParams.accessToken,
chatgptAccountId: loginParams.chatgptAccountId,
chatgptPlanType: loginParams.chatgptPlanType ?? null,
};
}
async function resolveCodexAppServerAuthProfileLoginParamsInternal(params: {
agentDir: string;
authProfileId?: string;
forceOAuthRefresh?: boolean;
}): Promise<LoginAccountParams | undefined> {
const profileId = params.authProfileId?.trim();
if (!profileId) {
return undefined;
}
const store = ensureAuthProfileStore(params.agentDir, { allowKeychainPrompt: false });
const credential = store.profiles[profileId];
if (!credential) {
throw new Error(`Codex app-server auth profile "${profileId}" was not found.`);
}
if (!isCodexAppServerAuthProvider(credential.provider)) {
throw new Error(
`Codex app-server auth profile "${profileId}" must belong to provider "openai-codex" or a supported alias.`,
);
}
const loginParams = await resolveLoginParamsForCredential(profileId, credential, {
agentDir: params.agentDir,
forceOAuthRefresh: params.forceOAuthRefresh === true,
});
if (!loginParams) {
throw new Error(
`Codex app-server auth profile "${profileId}" does not contain usable credentials.`,
);
}
return loginParams;
}
async function resolveLoginParamsForCredential(
profileId: string,
credential: AuthProfileCredential,
params: { agentDir: string; forceOAuthRefresh: boolean },
): Promise<LoginAccountParams | undefined> {
if (credential.type === "api_key") {
const resolved = await resolveApiKeyForProfile({
store: ensureAuthProfileStore(params.agentDir, { allowKeychainPrompt: false }),
profileId,
agentDir: params.agentDir,
});
const apiKey = resolved?.apiKey?.trim();
return apiKey ? { type: "apiKey", apiKey } : undefined;
}
if (credential.type === "token") {
const resolved = await resolveApiKeyForProfile({
store: ensureAuthProfileStore(params.agentDir, { allowKeychainPrompt: false }),
profileId,
agentDir: params.agentDir,
});
const accessToken = resolved?.apiKey?.trim();
return accessToken
? buildChatgptAuthTokensParams(profileId, credential, accessToken)
: undefined;
}
const resolvedCredential = await resolveOAuthCredentialForCodexAppServer(profileId, credential, {
agentDir: params.agentDir,
forceRefresh: params.forceOAuthRefresh,
});
const accessToken = resolvedCredential.access?.trim();
return accessToken
? buildChatgptAuthTokensParams(profileId, resolvedCredential, accessToken)
: undefined;
}
async function resolveOAuthCredentialForCodexAppServer(
profileId: string,
credential: OAuthCredential,
params: { agentDir: string; forceRefresh: boolean },
): Promise<OAuthCredential> {
const store = ensureAuthProfileStore(params.agentDir, { allowKeychainPrompt: false });
if (params.forceRefresh) {
store.profiles[profileId] = { ...credential, expires: 0 };
saveAuthProfileStore(store, params.agentDir);
}
const resolved = await resolveApiKeyForProfile({
store,
profileId,
agentDir: params.agentDir,
});
const refreshed = loadAuthProfileStoreForSecretsRuntime(params.agentDir).profiles[profileId];
const storedCredential = store.profiles[profileId];
const candidate =
refreshed?.type === "oauth" && isCodexAppServerAuthProvider(refreshed.provider)
? refreshed
: storedCredential?.type === "oauth" &&
isCodexAppServerAuthProvider(storedCredential.provider)
? storedCredential
: credential;
return resolved?.apiKey ? { ...candidate, access: resolved.apiKey } : candidate;
}
function isCodexAppServerAuthProvider(provider: string): boolean {
return resolveProviderIdForAuth(provider) === CODEX_APP_SERVER_AUTH_PROVIDER;
}
function buildChatgptAuthTokensParams(
profileId: string,
credential: AuthProfileCredential,
accessToken: string,
): LoginAccountParams {
return {
type: "chatgptAuthTokens",
accessToken,
chatgptAccountId: resolveChatgptAccountId(profileId, credential),
chatgptPlanType: resolveChatgptPlanType(credential),
};
}
function resolveChatgptPlanType(credential: AuthProfileCredential): string | null {
const record = credential as Record<string, unknown>;
const planType = record.chatgptPlanType ?? record.planType;
return typeof planType === "string" && planType.trim() ? planType.trim() : null;
}
function resolveChatgptAccountId(profileId: string, credential: AuthProfileCredential): string {
if ("accountId" in credential && typeof credential.accountId === "string") {
const accountId = credential.accountId.trim();
if (accountId) {
return accountId;
}
}
const email = credential.email?.trim();
return email || profileId;
}

View File

@@ -0,0 +1,210 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import {
abortAgentHarnessRun,
type EmbeddedRunAttemptParams,
} from "openclaw/plugin-sdk/agent-harness";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import { AUTH_PROFILE_RUNTIME_CONTRACT } from "../../../../test/helpers/agents/auth-profile-runtime-contract.js";
import { runCodexAppServerAttempt, __testing } from "./run-attempt.js";
import { readCodexAppServerBinding, writeCodexAppServerBinding } from "./session-binding.js";
import { createCodexTestModel } from "./test-support.js";
function createParams(sessionFile: string, workspaceDir: string): EmbeddedRunAttemptParams {
return {
prompt: AUTH_PROFILE_RUNTIME_CONTRACT.workspacePrompt,
sessionId: AUTH_PROFILE_RUNTIME_CONTRACT.sessionId,
sessionKey: AUTH_PROFILE_RUNTIME_CONTRACT.sessionKey,
sessionFile,
workspaceDir,
runId: AUTH_PROFILE_RUNTIME_CONTRACT.runId,
provider: AUTH_PROFILE_RUNTIME_CONTRACT.codexHarnessProvider,
modelId: "gpt-5.4-codex",
model: createCodexTestModel(AUTH_PROFILE_RUNTIME_CONTRACT.codexHarnessProvider),
thinkLevel: "medium",
disableTools: true,
timeoutMs: 5_000,
authStorage: {} as never,
modelRegistry: {} as never,
} as EmbeddedRunAttemptParams;
}
function threadStartResult(threadId = "thread-auth-contract") {
return {
thread: {
id: threadId,
forkedFromId: null,
preview: "",
ephemeral: false,
modelProvider: "openai",
createdAt: 1,
updatedAt: 1,
status: { type: "idle" },
path: null,
cwd: "",
cliVersion: "0.118.0",
source: "unknown",
agentNickname: null,
agentRole: null,
gitInfo: null,
name: null,
turns: [],
},
model: "gpt-5.4-codex",
modelProvider: "openai",
serviceTier: null,
cwd: "",
instructionSources: [],
approvalPolicy: "never",
approvalsReviewer: "user",
sandbox: { type: "dangerFullAccess" },
permissionProfile: null,
reasoningEffort: null,
};
}
function turnStartResult(turnId = "turn-auth-contract") {
return {
turn: {
id: turnId,
status: "inProgress",
items: [],
error: null,
startedAt: null,
completedAt: null,
durationMs: null,
},
};
}
function createCodexAuthProfileHarness(params: { startMethod: "thread/start" | "thread/resume" }) {
const seenAuthProfileIds: Array<string | undefined> = [];
const requests: Array<{ method: string; params: unknown }> = [];
let notify: (notification: unknown) => Promise<void> = async () => undefined;
__testing.setCodexAppServerClientFactoryForTests(async (_startOptions, authProfileId) => {
seenAuthProfileIds.push(authProfileId);
return {
request: vi.fn(async (method: string, requestParams?: unknown) => {
requests.push({ method, params: requestParams });
if (method === params.startMethod) {
return threadStartResult();
}
if (method === "turn/start") {
return turnStartResult();
}
throw new Error(`unexpected method: ${method}`);
}),
addNotificationHandler: (handler: (notification: unknown) => Promise<void>) => {
notify = handler;
return () => undefined;
},
addRequestHandler: () => () => undefined,
} as never;
});
return {
seenAuthProfileIds,
async waitForMethod(method: string) {
await vi.waitFor(() => expect(requests.some((entry) => entry.method === method)).toBe(true), {
interval: 1,
});
},
async completeTurn() {
await notify({
method: "turn/completed",
params: {
threadId: "thread-auth-contract",
turnId: "turn-auth-contract",
turn: { id: "turn-auth-contract", status: "completed" },
},
});
},
};
}
describe("Auth profile runtime contract - Codex app-server adapter", () => {
let tmpDir: string;
beforeEach(async () => {
tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-codex-auth-contract-"));
});
afterEach(async () => {
abortAgentHarnessRun(AUTH_PROFILE_RUNTIME_CONTRACT.sessionId);
__testing.resetCodexAppServerClientFactoryForTests();
await fs.rm(tmpDir, { recursive: true, force: true });
});
it("passes the exact OpenAI Codex auth profile into app-server startup", async () => {
const harness = createCodexAuthProfileHarness({ startMethod: "thread/start" });
const sessionFile = path.join(tmpDir, "session.jsonl");
const params = createParams(sessionFile, tmpDir);
params.authProfileId = AUTH_PROFILE_RUNTIME_CONTRACT.openAiCodexProfileId;
const run = runCodexAppServerAttempt(params);
await vi.waitFor(
() =>
expect(harness.seenAuthProfileIds).toEqual([
AUTH_PROFILE_RUNTIME_CONTRACT.openAiCodexProfileId,
]),
{ interval: 1 },
);
await harness.waitForMethod("turn/start");
await harness.completeTurn();
await run;
});
it("reuses a bound OpenAI Codex auth profile when resume params omit authProfileId", async () => {
const harness = createCodexAuthProfileHarness({ startMethod: "thread/resume" });
const sessionFile = path.join(tmpDir, "session.jsonl");
await writeCodexAppServerBinding(sessionFile, {
threadId: "thread-auth-contract",
cwd: tmpDir,
authProfileId: AUTH_PROFILE_RUNTIME_CONTRACT.openAiCodexProfileId,
dynamicToolsFingerprint: "[]",
});
// authProfileId is intentionally omitted to exercise the resume-bound profile path.
const params = createParams(sessionFile, tmpDir);
const run = runCodexAppServerAttempt(params);
await vi.waitFor(
() =>
expect(harness.seenAuthProfileIds).toEqual([
AUTH_PROFILE_RUNTIME_CONTRACT.openAiCodexProfileId,
]),
{ interval: 1 },
);
await harness.waitForMethod("turn/start");
await harness.completeTurn();
await run;
});
it("prefers an explicit runtime auth profile over a stale persisted binding", async () => {
const harness = createCodexAuthProfileHarness({ startMethod: "thread/resume" });
const sessionFile = path.join(tmpDir, "session.jsonl");
await writeCodexAppServerBinding(sessionFile, {
threadId: "thread-auth-contract",
cwd: tmpDir,
authProfileId: "openai-codex:stale",
dynamicToolsFingerprint: "[]",
});
const params = createParams(sessionFile, tmpDir);
params.authProfileId = AUTH_PROFILE_RUNTIME_CONTRACT.openAiCodexProfileId;
const run = runCodexAppServerAttempt(params);
await vi.waitFor(
() =>
expect(harness.seenAuthProfileIds).toEqual([
AUTH_PROFILE_RUNTIME_CONTRACT.openAiCodexProfileId,
]),
{ interval: 1 },
);
await harness.waitForMethod("turn/start");
await harness.completeTurn();
await run;
await expect(readCodexAppServerBinding(sessionFile)).resolves.toMatchObject({
authProfileId: AUTH_PROFILE_RUNTIME_CONTRACT.openAiCodexProfileId,
});
});
});

View File

@@ -167,7 +167,10 @@ export function resolveCodexAppServerRuntimeOptions(
};
}
export function codexAppServerStartOptionsKey(options: CodexAppServerStartOptions): string {
export function codexAppServerStartOptionsKey(
options: CodexAppServerStartOptions,
params: { authProfileId?: string } = {},
): string {
return JSON.stringify({
transport: options.transport,
command: options.command,
@@ -179,6 +182,7 @@ export function codexAppServerStartOptionsKey(options: CodexAppServerStartOption
),
env: Object.entries(options.env ?? {}).toSorted(([left], [right]) => left.localeCompare(right)),
clearEnv: [...(options.clearEnv ?? [])].toSorted(),
authProfileId: params.authProfileId ?? null,
});
}

View File

@@ -0,0 +1,80 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { SessionManager } from "@mariozechner/pi-coding-agent";
import type { EmbeddedRunAttemptParams } from "openclaw/plugin-sdk/agent-harness";
import { afterEach, describe, expect, it } from "vitest";
import { isSilentReplyPayloadText } from "../../../../src/auto-reply/tokens.js";
import { DELIVERY_NO_REPLY_RUNTIME_CONTRACT } from "../../../../test/helpers/agents/delivery-no-reply-runtime-contract.js";
import { CodexAppServerEventProjector } from "./event-projector.js";
import { createCodexTestModel } from "./test-support.js";
const THREAD_ID = "thread-delivery-contract";
const TURN_ID = "turn-delivery-contract";
const tempDirs = new Set<string>();
type ProjectorNotification = Parameters<CodexAppServerEventProjector["handleNotification"]>[0];
async function createParams(): Promise<EmbeddedRunAttemptParams> {
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-codex-delivery-contract-"));
tempDirs.add(tempDir);
const sessionFile = path.join(tempDir, "session.jsonl");
SessionManager.open(sessionFile);
return {
prompt: DELIVERY_NO_REPLY_RUNTIME_CONTRACT.prompt,
sessionId: DELIVERY_NO_REPLY_RUNTIME_CONTRACT.sessionId,
sessionKey: DELIVERY_NO_REPLY_RUNTIME_CONTRACT.sessionKey,
sessionFile,
workspaceDir: tempDir,
runId: DELIVERY_NO_REPLY_RUNTIME_CONTRACT.runId,
provider: "codex",
modelId: "gpt-5.4-codex",
model: createCodexTestModel("codex"),
thinkLevel: "medium",
} as EmbeddedRunAttemptParams;
}
function forCurrentTurn(
method: ProjectorNotification["method"],
params: Record<string, unknown>,
): ProjectorNotification {
return {
method,
params: { threadId: THREAD_ID, turnId: TURN_ID, ...params },
} as ProjectorNotification;
}
afterEach(async () => {
for (const tempDir of tempDirs) {
await fs.rm(tempDir, { recursive: true, force: true });
}
tempDirs.clear();
});
describe("Delivery/NO_REPLY runtime contract - Codex app-server adapter", () => {
it.each([
DELIVERY_NO_REPLY_RUNTIME_CONTRACT.silentText,
` ${DELIVERY_NO_REPLY_RUNTIME_CONTRACT.silentText} `,
DELIVERY_NO_REPLY_RUNTIME_CONTRACT.jsonSilentText,
])("preserves silent terminal text %s for shared delivery suppression", async (text) => {
const projector = new CodexAppServerEventProjector(await createParams(), THREAD_ID, TURN_ID);
await projector.handleNotification(
forCurrentTurn("item/agentMessage/delta", {
itemId: "msg-1",
delta: text,
}),
);
const result = projector.buildResult({
didSendViaMessagingTool: false,
messagingToolSentTexts: [],
messagingToolSentMediaUrls: [],
messagingToolSentTargets: [],
toolMediaUrls: [],
toolAudioAsVoice: false,
});
expect(result.assistantTexts).toEqual([text.trim()]);
expect(isSilentReplyPayloadText(result.assistantTexts[0])).toBe(true);
});
});

View File

@@ -34,6 +34,10 @@ export type CodexAppServerToolTelemetry = {
successfulCronAdds?: number;
};
type AgentHarnessResultClassification = NonNullable<
EmbeddedRunAttemptResult["agentHarnessResultClassification"]
>;
const ZERO_USAGE: Usage = {
input: 0,
output: 0,
@@ -60,6 +64,25 @@ const CURRENT_TOKEN_USAGE_KEYS = [
const MAX_TOOL_OUTPUT_DELTA_MESSAGES_PER_ITEM = 20;
function classifyTerminalResult(params: {
assistantTexts: string[];
reasoningText: string;
planText: string;
promptError: unknown;
turnCompleted: boolean;
}): AgentHarnessResultClassification | undefined {
if (!params.turnCompleted || params.promptError || params.assistantTexts.length > 0) {
return undefined;
}
if (params.planText.trim()) {
return "planning-only";
}
if (params.reasoningText.trim()) {
return "reasoning-only";
}
return "empty";
}
export class CodexAppServerEventProjector {
private readonly assistantTextByItem = new Map<string, string>();
private readonly assistantItemOrder: string[] = [];
@@ -192,6 +215,13 @@ export class CodexAppServerEventProjector {
const promptError =
this.promptError ??
(turnFailed ? (this.completedTurn?.error?.message ?? "codex app-server turn failed") : null);
const agentHarnessResultClassification = classifyTerminalResult({
assistantTexts,
reasoningText,
planText,
promptError,
turnCompleted: Boolean(this.completedTurn),
});
return {
aborted: this.aborted || turnInterrupted,
externalAbort: false,
@@ -201,6 +231,7 @@ export class CodexAppServerEventProjector {
promptError,
promptErrorSource: promptError ? this.promptErrorSource || "prompt" : null,
sessionIdUsed: this.params.sessionId,
...(agentHarnessResultClassification ? { agentHarnessResultClassification } : {}),
bootstrapPromptWarningSignaturesSeen: this.params.bootstrapPromptWarningSignaturesSeen,
bootstrapPromptWarningSignature: this.params.bootstrapPromptWarningSignature,
messagesSnapshot,

View File

@@ -4,6 +4,7 @@ import { createClientHarness } from "./test-support.js";
const mocks = vi.hoisted(() => {
const authBridge = {
applyAuthProfile: vi.fn(async () => undefined),
startOptions: vi.fn(async ({ startOptions }) => startOptions),
};
const providerAuth = {
@@ -13,6 +14,7 @@ const mocks = vi.hoisted(() => {
});
vi.mock("./auth-bridge.js", () => ({
applyCodexAppServerAuthProfile: mocks.authBridge.applyAuthProfile,
bridgeCodexAppServerStartOptions: mocks.authBridge.startOptions,
}));
@@ -34,6 +36,7 @@ describe("listCodexAppServerModels", () => {
afterEach(() => {
resetSharedCodexAppServerClientForTests();
vi.restoreAllMocks();
mocks.authBridge.applyAuthProfile.mockClear();
mocks.authBridge.startOptions.mockClear();
mocks.providerAuth.agentDir.mockClear();
});

View File

@@ -0,0 +1,408 @@
import type { AnyAgentTool } from "openclaw/plugin-sdk/agent-harness";
import { afterEach, describe, expect, it, vi } from "vitest";
import { wrapToolWithBeforeToolCallHook } from "../../../../src/agents/pi-tools.before-tool-call.js";
import {
installCodexToolResultMiddleware,
installOpenClawOwnedToolHooks,
mediaToolResult,
resetOpenClawOwnedToolHooks,
textToolResult,
} from "../../../../test/helpers/agents/openclaw-owned-tool-runtime-contract.js";
import { createCodexDynamicToolBridge } from "./dynamic-tools.js";
function createContractTool(overrides: Partial<AnyAgentTool>): AnyAgentTool {
return {
name: "exec",
description: "Run a command.",
parameters: { type: "object", properties: {} },
execute: vi.fn(),
...overrides,
} as unknown as AnyAgentTool;
}
describe("OpenClaw-owned tool runtime contract — Codex app-server adapter", () => {
afterEach(() => {
resetOpenClawOwnedToolHooks();
});
it("wraps unwrapped dynamic tools with before/after tool hooks", async () => {
const adjustedParams = { mode: "safe" };
const mergedParams = { command: "pwd", mode: "safe" };
const hooks = installOpenClawOwnedToolHooks({ adjustedParams });
const execute = vi.fn(async () => textToolResult("done", { ok: true }));
const bridge = createCodexDynamicToolBridge({
tools: [createContractTool({ name: "exec", execute })],
signal: new AbortController().signal,
hookContext: {
agentId: "agent-1",
sessionId: "session-1",
sessionKey: "agent:agent-1:session-1",
runId: "run-contract",
},
});
const result = await bridge.handleToolCall({
threadId: "thread-1",
turnId: "turn-1",
callId: "call-contract",
namespace: null,
tool: "exec",
arguments: { command: "pwd" },
});
expect(result).toEqual({
success: true,
contentItems: [{ type: "inputText", text: "done" }],
});
expect(hooks.beforeToolCall).toHaveBeenCalledWith(
expect.objectContaining({
toolName: "exec",
toolCallId: "call-contract",
runId: "run-contract",
params: { command: "pwd" },
}),
expect.objectContaining({
agentId: "agent-1",
sessionId: "session-1",
sessionKey: "agent:agent-1:session-1",
runId: "run-contract",
toolCallId: "call-contract",
}),
);
expect(execute).toHaveBeenCalledWith(
"call-contract",
mergedParams,
expect.any(AbortSignal),
undefined,
);
await vi.waitFor(() => {
expect(hooks.afterToolCall).toHaveBeenCalledWith(
expect.objectContaining({
toolName: "exec",
toolCallId: "call-contract",
params: mergedParams,
result: expect.objectContaining({
content: [{ type: "text", text: "done" }],
details: { ok: true },
}),
}),
expect.objectContaining({
agentId: "agent-1",
sessionId: "session-1",
sessionKey: "agent:agent-1:session-1",
runId: "run-contract",
toolCallId: "call-contract",
}),
);
});
});
it("runs tool_result middleware before after_tool_call observes the result", async () => {
const adjustedParams = { mode: "safe" };
const mergedParams = { command: "status", mode: "safe" };
const hooks = installOpenClawOwnedToolHooks({ adjustedParams });
const middleware = installCodexToolResultMiddleware((event) => {
expect(event).toMatchObject({
toolName: "exec",
toolCallId: "call-middleware",
args: { command: "status" },
result: {
content: [{ type: "text", text: "raw output" }],
details: { stage: "execute" },
},
});
return textToolResult("compacted output", { stage: "middleware" });
});
const execute = vi.fn(async () => textToolResult("raw output", { stage: "execute" }));
const bridge = createCodexDynamicToolBridge({
tools: [createContractTool({ name: "exec", execute })],
signal: new AbortController().signal,
hookContext: {
agentId: "agent-1",
sessionId: "session-1",
sessionKey: "agent:agent-1:session-1",
runId: "run-middleware",
},
});
const result = await bridge.handleToolCall({
threadId: "thread-1",
turnId: "turn-1",
callId: "call-middleware",
namespace: null,
tool: "exec",
arguments: { command: "status" },
});
expect(result).toEqual({
success: true,
contentItems: [{ type: "inputText", text: "compacted output" }],
});
expect(execute).toHaveBeenCalledWith(
"call-middleware",
mergedParams,
expect.any(AbortSignal),
undefined,
);
expect(middleware.middleware).toHaveBeenCalledTimes(1);
await vi.waitFor(() => {
expect(hooks.afterToolCall).toHaveBeenCalledWith(
expect.objectContaining({
toolName: "exec",
toolCallId: "call-middleware",
params: mergedParams,
result: expect.objectContaining({
content: [{ type: "text", text: "compacted output" }],
details: { stage: "middleware" },
}),
}),
expect.objectContaining({
runId: "run-middleware",
toolCallId: "call-middleware",
}),
);
});
});
it("fails closed when before_tool_call blocks a dynamic tool", async () => {
const hooks = installOpenClawOwnedToolHooks({ blockReason: "blocked by policy" });
const execute = vi.fn(async () => textToolResult("should not run"));
const bridge = createCodexDynamicToolBridge({
tools: [createContractTool({ name: "message", execute })],
signal: new AbortController().signal,
hookContext: { runId: "run-blocked" },
});
const result = await bridge.handleToolCall({
threadId: "thread-1",
turnId: "turn-1",
callId: "call-blocked",
namespace: null,
tool: "message",
arguments: {
action: "send",
text: "blocked",
provider: "telegram",
to: "chat-1",
},
});
expect(result).toEqual({
success: false,
contentItems: [{ type: "inputText", text: "blocked by policy" }],
});
expect(execute).not.toHaveBeenCalled();
expect(bridge.telemetry.didSendViaMessagingTool).toBe(false);
await vi.waitFor(() => {
expect(hooks.afterToolCall).toHaveBeenCalledWith(
expect.objectContaining({
toolName: "message",
toolCallId: "call-blocked",
params: {
action: "send",
text: "blocked",
provider: "telegram",
to: "chat-1",
},
error: "blocked by policy",
}),
expect.objectContaining({
runId: "run-blocked",
toolCallId: "call-blocked",
}),
);
});
});
it("reports dynamic tool execution errors through after_tool_call", async () => {
const adjustedParams = { timeoutSec: 1 };
const mergedParams = { command: "false", timeoutSec: 1 };
const hooks = installOpenClawOwnedToolHooks({ adjustedParams });
const execute = vi.fn(async () => {
throw new Error("tool failed");
});
const bridge = createCodexDynamicToolBridge({
tools: [createContractTool({ name: "exec", execute })],
signal: new AbortController().signal,
hookContext: { runId: "run-error" },
});
const result = await bridge.handleToolCall({
threadId: "thread-1",
turnId: "turn-1",
callId: "call-error",
namespace: null,
tool: "exec",
arguments: { command: "false" },
});
expect(result).toEqual({
success: false,
contentItems: [{ type: "inputText", text: "tool failed" }],
});
expect(execute).toHaveBeenCalledWith(
"call-error",
mergedParams,
expect.any(AbortSignal),
undefined,
);
await vi.waitFor(() => {
expect(hooks.afterToolCall).toHaveBeenCalledWith(
expect.objectContaining({
toolName: "exec",
toolCallId: "call-error",
params: mergedParams,
error: "tool failed",
}),
expect.objectContaining({
runId: "run-error",
toolCallId: "call-error",
}),
);
});
});
it("records successful Codex messaging text, media, and target telemetry", async () => {
const hooks = installOpenClawOwnedToolHooks();
const execute = vi.fn(async () => textToolResult("Sent."));
const bridge = createCodexDynamicToolBridge({
tools: [createContractTool({ name: "message", execute })],
signal: new AbortController().signal,
hookContext: { runId: "run-message" },
});
const result = await bridge.handleToolCall({
threadId: "thread-1",
turnId: "turn-1",
callId: "call-message",
namespace: null,
tool: "message",
arguments: {
action: "send",
text: "hello from Codex",
mediaUrl: "/tmp/codex-reply.png",
provider: "telegram",
to: "chat-1",
threadId: "thread-ts-1",
},
});
expect(result).toEqual({
success: true,
contentItems: [{ type: "inputText", text: "Sent." }],
});
expect(bridge.telemetry).toMatchObject({
didSendViaMessagingTool: true,
messagingToolSentTexts: ["hello from Codex"],
messagingToolSentMediaUrls: ["/tmp/codex-reply.png"],
messagingToolSentTargets: [
{
tool: "message",
provider: "telegram",
to: "chat-1",
threadId: "thread-ts-1",
},
],
});
await vi.waitFor(() => {
expect(hooks.afterToolCall).toHaveBeenCalledWith(
expect.objectContaining({
toolName: "message",
toolCallId: "call-message",
params: expect.objectContaining({
text: "hello from Codex",
mediaUrl: "/tmp/codex-reply.png",
}),
}),
expect.objectContaining({
runId: "run-message",
toolCallId: "call-message",
}),
);
});
});
it("records successful Codex media artifacts from tool results", async () => {
const hooks = installOpenClawOwnedToolHooks();
const execute = vi.fn(async () =>
mediaToolResult("Generated media reply.", "/tmp/reply.opus", true),
);
const bridge = createCodexDynamicToolBridge({
tools: [createContractTool({ name: "tts", execute })],
signal: new AbortController().signal,
hookContext: { runId: "run-media" },
});
const result = await bridge.handleToolCall({
threadId: "thread-1",
turnId: "turn-1",
callId: "call-media",
namespace: null,
tool: "tts",
arguments: { text: "hello" },
});
expect(result).toEqual({
success: true,
contentItems: [{ type: "inputText", text: "Generated media reply." }],
});
expect(bridge.telemetry.toolMediaUrls).toEqual(["/tmp/reply.opus"]);
expect(bridge.telemetry.toolAudioAsVoice).toBe(true);
await vi.waitFor(() => {
expect(hooks.afterToolCall).toHaveBeenCalledWith(
expect.objectContaining({
toolName: "tts",
toolCallId: "call-media",
result: expect.objectContaining({
details: {
media: {
mediaUrl: "/tmp/reply.opus",
audioAsVoice: true,
},
},
}),
}),
expect.objectContaining({
runId: "run-media",
toolCallId: "call-media",
}),
);
});
});
it("does not double-wrap dynamic tools that already have before_tool_call", async () => {
const adjustedParams = { mode: "safe" };
const mergedParams = { command: "pwd", mode: "safe" };
const hooks = installOpenClawOwnedToolHooks({ adjustedParams });
const execute = vi.fn(async () => textToolResult("done"));
const tool = wrapToolWithBeforeToolCallHook(createContractTool({ name: "exec", execute }), {
runId: "run-wrapped",
});
const bridge = createCodexDynamicToolBridge({
tools: [tool],
signal: new AbortController().signal,
hookContext: { runId: "run-wrapped" },
});
const result = await bridge.handleToolCall({
threadId: "thread-1",
turnId: "turn-1",
callId: "call-wrapped",
namespace: null,
tool: "exec",
arguments: { command: "pwd" },
});
expect(result).toEqual({
success: true,
contentItems: [{ type: "inputText", text: "done" }],
});
expect(hooks.beforeToolCall).toHaveBeenCalledTimes(1);
expect(execute).toHaveBeenCalledWith(
"call-wrapped",
mergedParams,
expect.any(AbortSignal),
undefined,
);
});
});

View File

@@ -0,0 +1,352 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { SessionManager } from "@mariozechner/pi-coding-agent";
import type { EmbeddedRunAttemptParams } from "openclaw/plugin-sdk/agent-harness";
import { afterEach, describe, expect, it } from "vitest";
import { classifyEmbeddedPiRunResultForModelFallback } from "../../../../src/agents/pi-embedded-runner/result-fallback-classifier.js";
import {
createContractRunResult,
OUTCOME_FALLBACK_RUNTIME_CONTRACT,
} from "../../../../test/helpers/agents/outcome-fallback-runtime-contract.js";
import {
CodexAppServerEventProjector,
type CodexAppServerToolTelemetry,
} from "./event-projector.js";
import { createCodexTestModel } from "./test-support.js";
const THREAD_ID = "thread-outcome-contract";
const TURN_ID = "turn-outcome-contract";
const tempDirs = new Set<string>();
type ProjectorNotification = Parameters<CodexAppServerEventProjector["handleNotification"]>[0];
type ProjectedAttemptResult = ReturnType<CodexAppServerEventProjector["buildResult"]>;
async function createParams(): Promise<EmbeddedRunAttemptParams> {
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-codex-outcome-contract-"));
tempDirs.add(tempDir);
const sessionFile = path.join(tempDir, "session.jsonl");
SessionManager.open(sessionFile);
return {
prompt: OUTCOME_FALLBACK_RUNTIME_CONTRACT.prompt,
sessionId: OUTCOME_FALLBACK_RUNTIME_CONTRACT.sessionId,
sessionKey: OUTCOME_FALLBACK_RUNTIME_CONTRACT.sessionKey,
sessionFile,
workspaceDir: tempDir,
runId: OUTCOME_FALLBACK_RUNTIME_CONTRACT.runId,
provider: "codex",
modelId: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryModel,
model: createCodexTestModel("codex"),
thinkLevel: "medium",
} as EmbeddedRunAttemptParams;
}
async function createProjector(): Promise<CodexAppServerEventProjector> {
return new CodexAppServerEventProjector(await createParams(), THREAD_ID, TURN_ID);
}
function buildToolTelemetry(
overrides: Partial<CodexAppServerToolTelemetry> = {},
): CodexAppServerToolTelemetry {
return {
didSendViaMessagingTool: false,
messagingToolSentTexts: [],
messagingToolSentMediaUrls: [],
messagingToolSentTargets: [],
toolMediaUrls: [],
toolAudioAsVoice: false,
...overrides,
};
}
function forCurrentTurn(
method: ProjectorNotification["method"],
params: Record<string, unknown>,
): ProjectorNotification {
return {
method,
params: { threadId: THREAD_ID, turnId: TURN_ID, ...params },
} as ProjectorNotification;
}
function classifyProjectedAttemptResult(result: ProjectedAttemptResult) {
const finalAssistantText = result.assistantTexts.join("\n\n").trim();
return classifyEmbeddedPiRunResultForModelFallback({
provider: "codex",
model: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryModel,
result: createContractRunResult({
...result,
meta: {
durationMs: 1,
aborted: result.aborted,
agentHarnessResultClassification: result.agentHarnessResultClassification,
finalAssistantRawText: finalAssistantText || undefined,
finalAssistantVisibleText: finalAssistantText || undefined,
},
}),
});
}
afterEach(async () => {
for (const tempDir of tempDirs) {
await fs.rm(tempDir, { recursive: true, force: true });
}
tempDirs.clear();
});
describe("Outcome/fallback runtime contract - Codex app-server adapter", () => {
it("preserves an empty terminal turn for OpenClaw-owned fallback classification", async () => {
const projector = await createProjector();
await projector.handleNotification(
forCurrentTurn("turn/completed", {
turn: { id: TURN_ID, status: "completed", items: [] },
}),
);
const result = projector.buildResult(buildToolTelemetry());
expect(result.assistantTexts).toEqual([]);
expect(result.lastAssistant).toBeUndefined();
expect(result.promptError).toBeNull();
});
it("preserves exact NO_REPLY as assistant text instead of classifying in the adapter", async () => {
const projector = await createProjector();
await projector.handleNotification(
forCurrentTurn("item/agentMessage/delta", {
itemId: "msg-1",
delta: "NO_REPLY",
}),
);
await projector.handleNotification(
forCurrentTurn("turn/completed", {
turn: {
id: TURN_ID,
status: "completed",
items: [{ type: "agentMessage", id: "msg-1", text: "NO_REPLY" }],
},
}),
);
const result = projector.buildResult(buildToolTelemetry());
expect(result.assistantTexts).toEqual(["NO_REPLY"]);
expect(result.lastAssistant?.content).toEqual([{ type: "text", text: "NO_REPLY" }]);
expect(result.promptError).toBeNull();
});
it("preserves reasoning-only terminal turns for OpenClaw-owned fallback classification", async () => {
const projector = await createProjector();
await projector.handleNotification(
forCurrentTurn("item/reasoning/textDelta", {
itemId: "reasoning-1",
delta: OUTCOME_FALLBACK_RUNTIME_CONTRACT.reasoningOnlyText,
}),
);
await projector.handleNotification(
forCurrentTurn("turn/completed", {
turn: {
id: TURN_ID,
status: "completed",
items: [{ type: "reasoning", id: "reasoning-1" }],
},
}),
);
const result = projector.buildResult(buildToolTelemetry());
expect(result.assistantTexts).toEqual([]);
expect(result.lastAssistant).toBeUndefined();
expect(result.promptError).toBeNull();
expect(result.messagesSnapshot).toEqual(
expect.arrayContaining([
expect.objectContaining({
role: "assistant",
content: [
{
type: "text",
text: `Codex reasoning:\n${OUTCOME_FALLBACK_RUNTIME_CONTRACT.reasoningOnlyText}`,
},
],
}),
]),
);
});
it("preserves planning-only terminal turns for OpenClaw-owned fallback classification", async () => {
const projector = await createProjector();
await projector.handleNotification(
forCurrentTurn("item/plan/delta", {
itemId: "plan-1",
delta: OUTCOME_FALLBACK_RUNTIME_CONTRACT.planningOnlyText,
}),
);
await projector.handleNotification(
forCurrentTurn("turn/completed", {
turn: {
id: TURN_ID,
status: "completed",
items: [
{
type: "plan",
id: "plan-1",
text: OUTCOME_FALLBACK_RUNTIME_CONTRACT.planningOnlyText,
},
],
},
}),
);
const result = projector.buildResult(buildToolTelemetry());
expect(result.assistantTexts).toEqual([]);
expect(result.lastAssistant).toBeUndefined();
expect(result.promptError).toBeNull();
expect(result.messagesSnapshot).toEqual(
expect.arrayContaining([
expect.objectContaining({
role: "assistant",
content: [
{
type: "text",
text: `Codex plan:\n${OUTCOME_FALLBACK_RUNTIME_CONTRACT.planningOnlyText}`,
},
],
}),
]),
);
});
it("preserves tool side-effect telemetry so fallback can stay disabled", async () => {
const projector = await createProjector();
const result = projector.buildResult(
buildToolTelemetry({
didSendViaMessagingTool: true,
messagingToolSentTexts: ["sent out of band"],
}),
);
expect(result.assistantTexts).toEqual([]);
expect(result.didSendViaMessagingTool).toBe(true);
expect(result.messagingToolSentTexts).toEqual(["sent out of band"]);
});
it.each([
{
name: "empty",
classification: "empty",
expectedCode: "empty_result",
build: async () => {
const projector = await createProjector();
await projector.handleNotification(
forCurrentTurn("turn/completed", {
turn: { id: TURN_ID, status: "completed", items: [] },
}),
);
return projector.buildResult(buildToolTelemetry());
},
},
{
name: "reasoning-only",
classification: "reasoning-only",
expectedCode: "reasoning_only_result",
build: async () => {
const projector = await createProjector();
await projector.handleNotification(
forCurrentTurn("item/reasoning/textDelta", {
itemId: "reasoning-1",
delta: OUTCOME_FALLBACK_RUNTIME_CONTRACT.reasoningOnlyText,
}),
);
await projector.handleNotification(
forCurrentTurn("turn/completed", {
turn: {
id: TURN_ID,
status: "completed",
items: [{ type: "reasoning", id: "reasoning-1" }],
},
}),
);
return projector.buildResult(buildToolTelemetry());
},
},
{
name: "planning-only",
classification: "planning-only",
expectedCode: "planning_only_result",
build: async () => {
const projector = await createProjector();
await projector.handleNotification(
forCurrentTurn("item/plan/delta", {
itemId: "plan-1",
delta: OUTCOME_FALLBACK_RUNTIME_CONTRACT.planningOnlyText,
}),
);
await projector.handleNotification(
forCurrentTurn("turn/completed", {
turn: {
id: TURN_ID,
status: "completed",
items: [
{
type: "plan",
id: "plan-1",
text: OUTCOME_FALLBACK_RUNTIME_CONTRACT.planningOnlyText,
},
],
},
}),
);
return projector.buildResult(buildToolTelemetry());
},
},
] as const)(
"keeps $name terminal turns fallback-ready with adapter-produced classification",
async ({ build, classification, expectedCode }) => {
const result = await build();
expect(result.agentHarnessResultClassification).toBe(classification);
expect(classifyProjectedAttemptResult(result)).toMatchObject({
reason: "format",
code: expectedCode,
});
},
);
it("keeps exact NO_REPLY classified as an intentional silent terminal reply", async () => {
const projector = await createProjector();
await projector.handleNotification(
forCurrentTurn("item/agentMessage/delta", {
itemId: "msg-1",
delta: "NO_REPLY",
}),
);
await projector.handleNotification(
forCurrentTurn("turn/completed", {
turn: {
id: TURN_ID,
status: "completed",
items: [{ type: "agentMessage", id: "msg-1", text: "NO_REPLY" }],
},
}),
);
const result = projector.buildResult(buildToolTelemetry());
expect(classifyProjectedAttemptResult(result)).toBeNull();
});
it("keeps tool side effects classified as non-fallback terminal outcomes", async () => {
const projector = await createProjector();
const result = projector.buildResult(
buildToolTelemetry({
didSendViaMessagingTool: true,
messagingToolSentTexts: ["sent out of band"],
}),
);
expect(result.agentHarnessResultClassification).toBeUndefined();
expect(classifyProjectedAttemptResult(result)).toBeNull();
});
});

View File

@@ -34,6 +34,7 @@ import {
type NativeHookRelayRegistrationHandle,
} from "openclaw/plugin-sdk/agent-harness-runtime";
import { handleCodexAppServerApprovalRequest } from "./approval-bridge.js";
import { refreshCodexAppServerAuthTokens } from "./auth-bridge.js";
import {
createCodexAppServerClientFactoryTestHooks,
defaultCodexAppServerClientFactory,
@@ -149,7 +150,10 @@ export async function runCodexAppServerAttempt(
: undefined;
let yieldDetected = false;
const startupBinding = await readCodexAppServerBinding(params.sessionFile);
const startupAuthProfileId = params.authProfileId ?? startupBinding?.authProfileId;
const startupAuthProfileId =
params.runtimePlan?.auth.forwardedAuthProfileId ??
params.authProfileId ??
startupBinding?.authProfileId;
const tools = await buildDynamicTools({
params,
resolvedWorkspace,
@@ -373,6 +377,12 @@ export async function runCodexAppServerAttempt(
const notificationCleanup = client.addNotificationHandler(enqueueNotification);
const requestCleanup = client.addRequestHandler(async (request) => {
if (request.method === "account/chatgptAuthTokens/refresh") {
return refreshCodexAppServerAuthTokens({
agentDir,
authProfileId: startupAuthProfileId,
});
}
if (!turnId) {
return undefined;
}
@@ -486,7 +496,11 @@ export async function runCodexAppServerAttempt(
sessionId: params.sessionId,
provider: params.provider,
model: params.modelId,
resolvedRef: `${params.provider}/${params.modelId}`,
resolvedRef:
params.runtimePlan?.observability.resolvedRef ?? `${params.provider}/${params.modelId}`,
...(params.runtimePlan?.observability.harnessId
? { harnessId: params.runtimePlan.observability.harnessId }
: {}),
assistantTexts: [],
},
ctx: hookContext,
@@ -642,7 +656,11 @@ export async function runCodexAppServerAttempt(
sessionId: params.sessionId,
provider: params.provider,
model: params.modelId,
resolvedRef: `${params.provider}/${params.modelId}`,
resolvedRef:
params.runtimePlan?.observability.resolvedRef ?? `${params.provider}/${params.modelId}`,
...(params.runtimePlan?.observability.harnessId
? { harnessId: params.runtimePlan.observability.harnessId }
: {}),
assistantTexts: result.assistantTexts,
...(result.lastAssistant ? { lastAssistant: result.lastAssistant } : {}),
...(result.attemptUsage ? { usage: result.attemptUsage } : {}),
@@ -821,16 +839,23 @@ async function buildDynamicTools(input: DynamicToolBuildParams) {
params.toolsAllow && params.toolsAllow.length > 0
? visionFilteredTools.filter((tool) => params.toolsAllow?.includes(tool.name))
: visionFilteredTools;
return normalizeProviderToolSchemas({
tools: filteredTools,
provider: params.provider,
config: params.config,
workspaceDir: input.effectiveWorkspace,
env: process.env,
modelId: params.modelId,
modelApi: params.model.api,
model: params.model,
});
return (
params.runtimePlan?.tools.normalize(filteredTools, {
workspaceDir: input.effectiveWorkspace,
modelApi: params.model.api,
model: params.model,
}) ??
normalizeProviderToolSchemas({
tools: filteredTools,
provider: params.provider,
config: params.config,
workspaceDir: input.effectiveWorkspace,
env: process.env,
modelId: params.modelId,
modelApi: params.model.api,
model: params.model,
})
);
}
async function withCodexStartupTimeout<T>(params: {

View File

@@ -0,0 +1,168 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import type { EmbeddedRunAttemptParams } from "openclaw/plugin-sdk/agent-harness";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import {
createParameterFreeTool,
createPermissiveTool,
normalizedParameterFreeSchema,
} from "../../../../test/helpers/agents/schema-normalization-runtime-contract.js";
import { createCodexTestModel } from "./test-support.js";
import { startOrResumeThread } from "./thread-lifecycle.js";
let tempDir: string;
function createParams(sessionFile: string, workspaceDir: string): EmbeddedRunAttemptParams {
return {
prompt: "hello",
sessionId: "session-1",
sessionKey: "agent:main:session-1",
sessionFile,
workspaceDir,
runId: "run-1",
provider: "codex",
modelId: "gpt-5.4",
model: createCodexTestModel("codex"),
thinkLevel: "medium",
disableTools: true,
timeoutMs: 5_000,
authStorage: {} as never,
modelRegistry: {} as never,
} as EmbeddedRunAttemptParams;
}
function createAppServerOptions(): Parameters<typeof startOrResumeThread>[0]["appServer"] {
return {
start: {
transport: "stdio",
command: "codex",
args: ["app-server"],
headers: {},
},
requestTimeoutMs: 60_000,
approvalPolicy: "never",
approvalsReviewer: "user",
sandbox: "workspace-write",
};
}
function threadStartResult(threadId = "thread-1") {
return {
thread: {
id: threadId,
forkedFromId: null,
preview: "",
ephemeral: false,
modelProvider: "openai",
createdAt: 1,
updatedAt: 1,
status: { type: "idle" },
path: null,
cwd: tempDir,
cliVersion: "0.118.0",
source: "unknown",
agentNickname: null,
agentRole: null,
gitInfo: null,
name: null,
turns: [],
},
model: "gpt-5.4",
modelProvider: "openai",
serviceTier: null,
cwd: tempDir,
instructionSources: [],
approvalPolicy: "never",
approvalsReviewer: "user",
sandbox: { type: "dangerFullAccess" },
permissionProfile: null,
reasoningEffort: null,
};
}
describe("Codex app-server dynamic tool schema boundary contract", () => {
beforeEach(async () => {
tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-codex-schema-contract-"));
});
afterEach(async () => {
await fs.rm(tempDir, { recursive: true, force: true });
vi.restoreAllMocks();
});
it("passes prepared executable dynamic tool schemas through thread start unchanged", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const parameterFreeTool = createParameterFreeTool("message");
const dynamicTool = {
name: parameterFreeTool.name,
description: parameterFreeTool.description,
inputSchema: normalizedParameterFreeSchema(),
};
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult();
}
throw new Error(`unexpected method: ${method}`);
});
await startOrResumeThread({
client: { request } as never,
params: createParams(sessionFile, workspaceDir),
cwd: workspaceDir,
dynamicTools: [dynamicTool],
appServer: createAppServerOptions(),
});
expect(request).toHaveBeenCalledWith(
"thread/start",
expect.objectContaining({
dynamicTools: [dynamicTool],
}),
);
});
it("treats dynamic tool schema changes as thread-fingerprint changes", async () => {
const sessionFile = path.join(tempDir, "session.jsonl");
const workspaceDir = path.join(tempDir, "workspace");
const appServer = createAppServerOptions();
let nextThreadId = 1;
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult(`thread-${nextThreadId++}`);
}
throw new Error(`unexpected method: ${method}`);
});
await startOrResumeThread({
client: { request } as never,
params: createParams(sessionFile, workspaceDir),
cwd: workspaceDir,
dynamicTools: [
{
name: "message",
description: "Permissive test tool",
inputSchema: { type: "object" },
},
],
appServer,
});
const permissiveTool = createPermissiveTool("message");
await startOrResumeThread({
client: { request } as never,
params: createParams(sessionFile, workspaceDir),
cwd: workspaceDir,
dynamicTools: [
{
name: permissiveTool.name,
description: permissiveTool.description,
inputSchema: permissiveTool.parameters,
},
],
appServer,
});
expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/start", "thread/start"]);
});
});

View File

@@ -5,10 +5,12 @@ import { createClientHarness } from "./test-support.js";
const mocks = vi.hoisted(() => ({
bridgeCodexAppServerStartOptions: vi.fn(async ({ startOptions }) => startOptions),
applyCodexAppServerAuthProfile: vi.fn(async () => undefined),
resolveOpenClawAgentDir: vi.fn(() => "/tmp/openclaw-agent"),
}));
vi.mock("./auth-bridge.js", () => ({
applyCodexAppServerAuthProfile: mocks.applyCodexAppServerAuthProfile,
bridgeCodexAppServerStartOptions: mocks.bridgeCodexAppServerStartOptions,
}));
@@ -51,6 +53,7 @@ describe("shared Codex app-server client", () => {
vi.useRealTimers();
vi.restoreAllMocks();
mocks.bridgeCodexAppServerStartOptions.mockClear();
mocks.applyCodexAppServerAuthProfile.mockClear();
mocks.resolveOpenClawAgentDir.mockClear();
});
@@ -118,6 +121,11 @@ describe("shared Codex app-server client", () => {
authProfileId: "openai-codex:work",
}),
);
expect(mocks.applyCodexAppServerAuthProfile).toHaveBeenCalledWith(
expect.objectContaining({
authProfileId: "openai-codex:work",
}),
);
});
it("restarts the shared client when the bridged auth token changes", async () => {

View File

@@ -1,5 +1,5 @@
import { resolveOpenClawAgentDir } from "openclaw/plugin-sdk/provider-auth";
import { bridgeCodexAppServerStartOptions } from "./auth-bridge.js";
import { applyCodexAppServerAuthProfile, bridgeCodexAppServerStartOptions } from "./auth-bridge.js";
import { CodexAppServerClient } from "./client.js";
import {
codexAppServerStartOptionsKey,
@@ -35,7 +35,9 @@ export async function getSharedCodexAppServerClient(options?: {
agentDir: resolveOpenClawAgentDir(),
authProfileId: options?.authProfileId,
});
const key = codexAppServerStartOptionsKey(startOptions);
const key = codexAppServerStartOptionsKey(startOptions, {
authProfileId: options?.authProfileId,
});
if (state.key && state.key !== key) {
clearSharedCodexAppServerClient();
}
@@ -48,6 +50,11 @@ export async function getSharedCodexAppServerClient(options?: {
client.addCloseHandler(clearSharedClientIfCurrent);
try {
await client.initialize();
await applyCodexAppServerAuthProfile({
client,
agentDir: resolveOpenClawAgentDir(),
authProfileId: options?.authProfileId,
});
return client;
} catch (error) {
// Startup failures happen before callers own the shared client, so close
@@ -84,6 +91,11 @@ export async function createIsolatedCodexAppServerClient(options?: {
const initialize = client.initialize();
try {
await withTimeout(initialize, options?.timeoutMs ?? 0, "codex app-server initialize timed out");
await applyCodexAppServerAuthProfile({
client,
agentDir: resolveOpenClawAgentDir(),
authProfileId: options?.authProfileId,
});
return client;
} catch (error) {
client.close();

View File

@@ -219,16 +219,45 @@ function stabilizeJsonValue(value: JsonValue): JsonValue {
}
export function buildDeveloperInstructions(params: EmbeddedRunAttemptParams): string {
const promptOverlay = renderCodexRuntimePromptOverlay(params);
const sections = [
"You are running inside OpenClaw. Use OpenClaw dynamic tools for messaging, cron, sessions, and host actions when available.",
"Preserve the user's existing channel/session context. If sending a channel reply, use the OpenClaw messaging tool instead of describing that you would reply.",
renderCodexPromptOverlay({ modelId: params.modelId }),
promptOverlay,
params.extraSystemPrompt,
params.skillsSnapshot?.prompt,
];
return sections.filter((section) => typeof section === "string" && section.trim()).join("\n\n");
}
function renderCodexRuntimePromptOverlay(params: EmbeddedRunAttemptParams): string | undefined {
const contribution = params.runtimePlan?.prompt.resolveSystemPromptContribution({
config: params.config,
agentDir: params.agentDir,
workspaceDir: params.workspaceDir,
provider: params.provider,
modelId: params.modelId,
promptMode: "full",
agentId: params.agentId,
});
if (!contribution) {
return renderCodexPromptOverlay({
config: params.config,
providerId: params.provider,
modelId: params.modelId,
});
}
return [
contribution.stablePrefix,
...Object.values(contribution.sectionOverrides ?? {}),
contribution.dynamicSuffix,
]
.filter(
(section): section is string => typeof section === "string" && section.trim().length > 0,
)
.join("\n\n");
}
function buildUserInput(
params: EmbeddedRunAttemptParams,
promptText: string = params.prompt,

View File

@@ -0,0 +1,44 @@
import { describe, expect, it } from "vitest";
import {
assistantHistoryMessage,
currentPromptHistoryMessage,
mediaOnlyHistoryMessage,
structuredHistoryMessage,
} from "../../../../test/helpers/agents/transcript-repair-runtime-contract.js";
import { projectContextEngineAssemblyForCodex } from "./context-engine-projection.js";
describe("Codex transcript projection runtime contract", () => {
it("drops only the duplicate trailing current prompt while preserving prior structured context", () => {
const prompt = "newest inbound message";
const result = projectContextEngineAssemblyForCodex({
prompt,
originalHistoryMessages: [structuredHistoryMessage()],
assembledMessages: [
structuredHistoryMessage(),
assistantHistoryMessage(),
currentPromptHistoryMessage(prompt),
],
});
expect(result.promptText).toContain("Current user request:\nnewest inbound message");
expect(result.promptText).toContain("[user]\nolder structured context\n[image omitted]");
expect(result.promptText).toContain("[assistant]\nack");
expect(result.promptText).not.toContain("[user]\nnewest inbound message");
});
it("keeps media-only user history visible as omitted media instead of dropping the turn", () => {
const result = projectContextEngineAssemblyForCodex({
prompt: "newest inbound message",
originalHistoryMessages: [mediaOnlyHistoryMessage()],
assembledMessages: [
mediaOnlyHistoryMessage(),
currentPromptHistoryMessage("newest inbound message"),
],
});
expect(result.promptText).toContain("[user]\n[image omitted]");
expect(result.promptText).not.toContain("data:image/png");
expect(result.promptText).not.toContain("bbbb");
});
});