From 40be5ad58187fd20b8defe5fc30e649190c131a8 Mon Sep 17 00:00:00 2001 From: EVA Date: Fri, 24 Apr 2026 14:55:52 +0700 Subject: [PATCH] fix: harden GPT-5 runtime paths Co-authored-by: EVA <100yenadmin@users.noreply.github.com> --- CHANGELOG.md | 1 + .../codex/src/app-server/run-attempt.ts | 9 +- extensions/matrix/src/actions.ts | 1 + extensions/msteams/src/actions.ts | 1 + extensions/msteams/src/channel.ts | 1 + extensions/openai/speech-provider.test.ts | 1 + extensions/openai/tts.test.ts | 1 + extensions/openai/tts.ts | 122 ++++---- .../agent-command.live-model-switch.test.ts | 73 ++++- src/agents/agent-command.ts | 10 +- src/agents/auth-profiles/order.test.ts | 152 +++++++++ src/agents/auth-profiles/order.ts | 14 +- .../auth-profiles/session-override.test.ts | 42 +++ src/agents/auth-profiles/session-override.ts | 8 +- .../command/attempt-execution.cli.test.ts | 54 +++- src/agents/command/attempt-execution.ts | 11 +- src/agents/gpt5-prompt-overlay.ts | 22 +- .../model-fallback.run-embedded.e2e.test.ts | 46 +++ src/agents/model-fallback.test.ts | 145 +++++++++ src/agents/model-fallback.ts | 75 ++++- ...-github-copilot-profile-env-tokens.test.ts | 1 + src/agents/openai-responses-payload-policy.ts | 6 +- src/agents/openai-tool-schema.ts | 94 ++++++ src/agents/openai-transport-stream.test.ts | 74 +++++ src/agents/openai-transport-stream.ts | 69 +++-- ...mbedded-runner-extraparams-resolve.test.ts | 4 +- .../pi-embedded-runner-extraparams.test.ts | 49 ++- ...pi-agent.auth-profile-rotation.e2e.test.ts | 78 ++++- src/agents/pi-embedded-runner/compact.ts | 3 +- src/agents/pi-embedded-runner/extra-params.ts | 3 +- .../openai-stream-wrappers.ts | 6 +- .../result-fallback-classifier.ts | 111 +++++++ src/agents/pi-embedded-runner/run.ts | 30 +- .../run/attempt.prompt-helpers.ts | 195 ++++++++++-- .../pi-embedded-runner/run/attempt.test.ts | 123 +++++++- src/agents/pi-embedded-runner/run/attempt.ts | 29 +- .../pi-model-discovery.synthetic-auth.test.ts | 2 + src/agents/provider-auth-aliases.test.ts | 35 +++ src/agents/provider-auth-aliases.ts | 53 +++- src/agents/tools-effective-inventory.ts | 3 +- src/agents/tools/message-tool.test.ts | 51 +++ src/agents/tools/message-tool.ts | 5 +- .../reply/agent-runner-auth-profile.ts | 20 +- .../reply/agent-runner-execution.test.ts | 292 +++++++++++++++++- .../reply/agent-runner-execution.ts | 64 +++- src/auto-reply/reply/followup-runner.test.ts | 61 +++- src/auto-reply/reply/followup-runner.ts | 54 +++- .../plugins/message-action-discovery.ts | 42 +++ src/channels/plugins/message-actions.test.ts | 112 +++++++ src/channels/plugins/types.core.ts | 6 + src/plugins/provider-runtime.test.ts | 65 ++++ src/plugins/provider-runtime.ts | 11 +- 52 files changed, 2335 insertions(+), 205 deletions(-) create mode 100644 src/agents/pi-embedded-runner/result-fallback-classifier.ts create mode 100644 src/agents/provider-auth-aliases.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index f1d22fe3a94..5e71f7e8657 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ Docs: https://docs.openclaw.ai - Plugins/onboarding: record local plugin install source metadata without duplicating raw absolute local paths in persisted `plugins.installs`, while preserving linked load-path cleanup. (#70970) Thanks @vincentkoc. - Browser/tool: tell agents not to pass per-call `timeoutMs` on existing-session type, evaluate, and other Chrome MCP actions that reject timeout overrides. +- Codex/GPT-5.4: harden fallback, auth-profile, tool-schema, and replay edge cases across native and embedded runtime paths. (#70743) Thanks @100yenadmin. - Voice-call/Telnyx: preserve inbound/outbound callback metadata and read transcription text from Telnyx's current `transcription_data` payload. - Codex harness: send verbose tool progress to chat channels for native app-server runs, matching the Pi harness `/verbose on` and `/verbose full` behavior. (#70966) Thanks @jalehman. - Codex models: fetch paginated Codex app-server model catalogs, mark truncated `/codex models` output, and keep ChatGPT OAuth defaults on the `openai-codex/gpt-5.5` route instead of the OpenAI API-key route. diff --git a/extensions/codex/src/app-server/run-attempt.ts b/extensions/codex/src/app-server/run-attempt.ts index 8164d17d0de..29e822fc234 100644 --- a/extensions/codex/src/app-server/run-attempt.ts +++ b/extensions/codex/src/app-server/run-attempt.ts @@ -70,6 +70,10 @@ import { mirrorCodexAppServerTranscript } from "./transcript-mirror.js"; import { createCodexUserInputBridge } from "./user-input-bridge.js"; import { filterToolsForVisionInputs } from "./vision-tools.js"; +type OpenClawCodingToolsOptions = NonNullable< + Parameters<(typeof import("openclaw/plugin-sdk/agent-harness"))["createOpenClawCodingTools"]>[0] +>; + let clientFactory = defaultCodexAppServerClientFactory; function emitCodexAppServerEvent( @@ -709,7 +713,10 @@ async function buildDynamicTools(input: DynamicToolBuildParams) { abortSignal: input.runAbortController.signal, modelProvider: params.model.provider, modelId: params.modelId, - modelCompat: params.model.compat, + modelCompat: + params.model.compat && typeof params.model.compat === "object" + ? (params.model.compat as OpenClawCodingToolsOptions["modelCompat"]) + : undefined, modelApi: params.model.api, modelContextWindowTokens: params.model.contextWindow, modelAuthMode: resolveModelAuthMode(params.model.provider, params.config), diff --git a/extensions/matrix/src/actions.ts b/extensions/matrix/src/actions.ts index a63d213dbf6..5f10f4e561b 100644 --- a/extensions/matrix/src/actions.ts +++ b/extensions/matrix/src/actions.ts @@ -99,6 +99,7 @@ function createMatrixExposedActions(params: { function buildMatrixProfileToolSchema(): NonNullable { return { + actions: ["set-profile"], properties: { displayName: Type.Optional( Type.String({ diff --git a/extensions/msteams/src/actions.ts b/extensions/msteams/src/actions.ts index b10ceaae24e..08f5f365266 100644 --- a/extensions/msteams/src/actions.ts +++ b/extensions/msteams/src/actions.ts @@ -274,6 +274,7 @@ export function describeMSTeamsMessageTool({ capabilities: enabled ? ["presentation"] : [], schema: enabled ? { + actions: ["unpin"], properties: { pinnedMessageId: Type.Optional( Type.String({ diff --git a/extensions/msteams/src/channel.ts b/extensions/msteams/src/channel.ts index 09292974bdc..d3361504e33 100644 --- a/extensions/msteams/src/channel.ts +++ b/extensions/msteams/src/channel.ts @@ -388,6 +388,7 @@ function describeMSTeamsMessageTool({ capabilities: enabled ? ["presentation"] : [], schema: enabled ? { + actions: ["unpin"], properties: { pinnedMessageId: Type.Optional( Type.String({ diff --git a/extensions/openai/speech-provider.test.ts b/extensions/openai/speech-provider.test.ts index ab4cead5bc5..65c09736d86 100644 --- a/extensions/openai/speech-provider.test.ts +++ b/extensions/openai/speech-provider.test.ts @@ -12,6 +12,7 @@ vi.mock("openclaw/plugin-sdk/ssrf-runtime", () => ({ response: await globalThis.fetch(url, init), release: vi.fn(async () => {}), }), + ssrfPolicyFromHttpBaseUrlAllowedHostname: () => undefined, })); function isSpeechRequestBody(value: unknown): value is { response_format?: string } { diff --git a/extensions/openai/tts.test.ts b/extensions/openai/tts.test.ts index 7b590064aa0..9e48c8fc6ca 100644 --- a/extensions/openai/tts.test.ts +++ b/extensions/openai/tts.test.ts @@ -24,6 +24,7 @@ vi.mock("openclaw/plugin-sdk/ssrf-runtime", () => ({ response: await globalThis.fetch(url, init), release: vi.fn(async () => {}), }), + ssrfPolicyFromHttpBaseUrlAllowedHostname: () => undefined, })); describe("openai tts", () => { diff --git a/extensions/openai/tts.ts b/extensions/openai/tts.ts index 53f4262b909..f7d4df417bb 100644 --- a/extensions/openai/tts.ts +++ b/extensions/openai/tts.ts @@ -3,7 +3,10 @@ import { isDebugProxyGlobalFetchPatchInstalled, } from "openclaw/plugin-sdk/proxy-capture"; import { extractProviderErrorDetail, trimToUndefined } from "openclaw/plugin-sdk/speech"; -import { fetchWithSsrFGuard } from "openclaw/plugin-sdk/ssrf-runtime"; +import { + fetchWithSsrFGuard, + ssrfPolicyFromHttpBaseUrlAllowedHostname, +} from "openclaw/plugin-sdk/ssrf-runtime"; export const DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1"; @@ -91,72 +94,63 @@ export async function openaiTTS(params: { throw new Error(`Invalid voice: ${voice}`); } - const controller = new AbortController(); - const timeout = setTimeout(() => controller.abort(), timeoutMs); - + const requestHeaders = { + Authorization: `Bearer ${apiKey}`, + "Content-Type": "application/json", + }; + const requestBody = JSON.stringify({ + model, + input: text, + voice, + response_format: responseFormat, + ...(speed != null && { speed }), + ...(effectiveInstructions != null && { instructions: effectiveInstructions }), + }); + const requestUrl = `${baseUrl}/audio/speech`; + const debugProxyFetchPatchInstalled = isDebugProxyGlobalFetchPatchInstalled(); + const { response, release } = await fetchWithSsrFGuard({ + url: requestUrl, + init: { + method: "POST", + headers: requestHeaders, + body: requestBody, + }, + timeoutMs, + policy: ssrfPolicyFromHttpBaseUrlAllowedHostname(baseUrl), + capture: false, + pinDns: debugProxyFetchPatchInstalled ? false : undefined, + auditContext: "openai-tts", + }); try { - const requestHeaders = { - Authorization: `Bearer ${apiKey}`, - "Content-Type": "application/json", - }; - const requestBody = JSON.stringify({ - model, - input: text, - voice, - response_format: responseFormat, - ...(speed != null && { speed }), - ...(effectiveInstructions != null && { instructions: effectiveInstructions }), - }); - const requestUrl = `${baseUrl}/audio/speech`; - const isGlobalFetchPatchInstalled = isDebugProxyGlobalFetchPatchInstalled(); - const guardedFetchImpl = isGlobalFetchPatchInstalled - ? globalThis.fetch.bind(globalThis) - : undefined; - const { response, release } = await fetchWithSsrFGuard({ - url: requestUrl, - init: { + if (!debugProxyFetchPatchInstalled) { + captureHttpExchange({ + url: requestUrl, method: "POST", - headers: requestHeaders, - body: requestBody, - signal: controller.signal, - }, - ...(guardedFetchImpl ? { fetchImpl: guardedFetchImpl } : {}), - capture: false, - auditContext: "openai-tts", - }); - try { - if (!isGlobalFetchPatchInstalled) { - captureHttpExchange({ - url: requestUrl, - method: "POST", - requestHeaders, - requestBody, - response, - transport: "http", - meta: { - provider: "openai", - capability: "tts", - }, - }); - } - - if (!response.ok) { - const detail = await extractOpenAiErrorDetail(response); - const requestId = - trimToUndefined(response.headers.get("x-request-id")) ?? - trimToUndefined(response.headers.get("request-id")); - throw new Error( - `OpenAI TTS API error (${response.status})` + - (detail ? `: ${detail}` : "") + - (requestId ? ` [request_id=${requestId}]` : ""), - ); - } - - return Buffer.from(await response.arrayBuffer()); - } finally { - await release(); + requestHeaders, + requestBody, + response, + transport: "http", + meta: { + provider: "openai", + capability: "tts", + }, + }); } + + if (!response.ok) { + const detail = await extractOpenAiErrorDetail(response); + const requestId = + trimToUndefined(response.headers.get("x-request-id")) ?? + trimToUndefined(response.headers.get("request-id")); + throw new Error( + `OpenAI TTS API error (${response.status})` + + (detail ? `: ${detail}` : "") + + (requestId ? ` [request_id=${requestId}]` : ""), + ); + } + + return Buffer.from(await response.arrayBuffer()); } finally { - clearTimeout(timeout); + await release(); } } diff --git a/src/agents/agent-command.live-model-switch.test.ts b/src/agents/agent-command.live-model-switch.test.ts index 77b02eab97b..37435405017 100644 --- a/src/agents/agent-command.live-model-switch.test.ts +++ b/src/agents/agent-command.live-model-switch.test.ts @@ -10,6 +10,10 @@ const state = vi.hoisted(() => ({ clearAgentRunContextMock: vi.fn(), updateSessionStoreAfterAgentRunMock: vi.fn(), deliverAgentCommandResultMock: vi.fn(), + clearSessionAuthProfileOverrideMock: vi.fn(), + authProfileStoreMock: { profiles: {} } as { profiles: Record }, + sessionEntryMock: undefined as unknown, + sessionStoreMock: undefined as unknown, })); vi.mock("./model-fallback.js", () => ({ @@ -57,12 +61,12 @@ vi.mock("./command/session.js", () => ({ resolveSession: () => ({ sessionId: "session-1", sessionKey: "agent:main", - sessionEntry: { + sessionEntry: state.sessionEntryMock ?? { sessionId: "session-1", updatedAt: Date.now(), skillsSnapshot: { prompt: "", skills: [], version: 0 }, }, - sessionStore: undefined, + sessionStore: state.sessionStoreMock, storePath: undefined, isNewSession: false, persistedThinking: undefined, @@ -250,8 +254,13 @@ vi.mock("./auth-profiles.js", () => ({ ensureAuthProfileStore: () => ({ profiles: {} }), })); +vi.mock("./auth-profiles/store.js", () => ({ + ensureAuthProfileStore: () => state.authProfileStoreMock, +})); + vi.mock("./auth-profiles/session-override.js", () => ({ - clearSessionAuthProfileOverride: vi.fn(), + clearSessionAuthProfileOverride: (...args: unknown[]) => + state.clearSessionAuthProfileOverrideMock(...args), })); vi.mock("./defaults.js", () => ({ @@ -269,7 +278,12 @@ vi.mock("./model-catalog.js", () => ({ vi.mock("./model-selection.js", () => ({ buildAllowedModelSet: () => ({ - allowedKeys: new Set(["anthropic/claude", "openai/claude", "openai/gpt-5.4"]), + allowedKeys: new Set([ + "anthropic/claude", + "codex-cli/gpt-5.4", + "openai/claude", + "openai/gpt-5.4", + ]), allowedCatalog: [], allowAny: false, }), @@ -281,6 +295,12 @@ vi.mock("./model-selection.js", () => ({ resolveThinkingDefault: () => "low", })); +vi.mock("./provider-auth-aliases.js", () => ({ + resolveProviderAuthAliasMap: () => ({}), + resolveProviderIdForAuth: (provider: string) => + provider.trim().toLowerCase() === "codex-cli" ? "openai-codex" : provider.trim().toLowerCase(), +})); + vi.mock("./skills.js", () => ({ buildWorkspaceSkillSnapshot: () => ({}), })); @@ -376,6 +396,9 @@ function expectFallbackOverrideCalls(first: boolean, second: boolean) { describe("agentCommand – LiveSessionModelSwitchError retry", () => { beforeEach(() => { vi.clearAllMocks(); + state.authProfileStoreMock = { profiles: {} }; + state.sessionEntryMock = undefined; + state.sessionStoreMock = undefined; state.deliverAgentCommandResultMock.mockResolvedValue(undefined); state.updateSessionStoreAfterAgentRunMock.mockResolvedValue(undefined); }); @@ -450,6 +473,48 @@ describe("agentCommand – LiveSessionModelSwitchError retry", () => { expect(state.runWithModelFallbackMock).toHaveBeenCalledTimes(2); }); + it("keeps aliased session auth profiles for codex-cli runs", async () => { + let capturedAuthProfileProvider: string | undefined; + const sessionEntry = { + sessionId: "session-1", + updatedAt: Date.now(), + providerOverride: "codex-cli", + modelOverride: "gpt-5.4", + authProfileOverride: "openai-codex:work", + authProfileOverrideSource: "user", + skillsSnapshot: { prompt: "", skills: [], version: 0 }, + }; + state.sessionEntryMock = sessionEntry; + state.authProfileStoreMock = { + profiles: { + "openai-codex:work": { + type: "api_key", + provider: "openai-codex", + key: "sk-test", + }, + }, + }; + state.runWithModelFallbackMock.mockImplementation(async (params: FallbackRunnerParams) => { + const result = await params.run(params.provider, params.model); + return { + result, + provider: params.provider, + model: params.model, + attempts: [], + }; + }); + state.runAgentAttemptMock.mockImplementation(async (...args: unknown[]) => { + const attemptParams = args[0] as { authProfileProvider?: string } | undefined; + capturedAuthProfileProvider = attemptParams?.authProfileProvider; + return makeSuccessResult("codex-cli", "gpt-5.4"); + }); + + await runBasicAgentCommand(); + + expect(capturedAuthProfileProvider).toBe("codex-cli"); + expect(state.clearSessionAuthProfileOverrideMock).not.toHaveBeenCalled(); + }); + it("updates hasSessionModelOverride for fallback resolution after switch", async () => { setupModelSwitchRetry({ provider: "openai", diff --git a/src/agents/agent-command.ts b/src/agents/agent-command.ts index 3be69de51a0..7ddf6000c79 100644 --- a/src/agents/agent-command.ts +++ b/src/agents/agent-command.ts @@ -58,6 +58,7 @@ import { resolveDefaultModelForAgent, resolveThinkingDefault, } from "./model-selection.js"; +import { resolveProviderIdForAuth } from "./provider-auth-aliases.js"; import { normalizeSpawnedRunMetadata } from "./spawned-context.js"; import { resolveAgentTimeoutMs } from "./timeout.js"; import { ensureAgentWorkspace } from "./workspace.js"; @@ -756,7 +757,14 @@ async function agentCommandInternal( const entry = sessionEntry; const store = ensureAuthProfileStore(); const profile = store.profiles[authProfileId]; - if (!profile || profile.provider !== providerForAuthProfileValidation) { + const profileAuthProvider = profile + ? resolveProviderIdForAuth(profile.provider, { config: cfg, workspaceDir }) + : undefined; + const validationAuthProvider = resolveProviderIdForAuth(providerForAuthProfileValidation, { + config: cfg, + workspaceDir, + }); + if (!profile || profileAuthProvider !== validationAuthProvider) { if (sessionStore && sessionKey) { await clearSessionAuthProfileOverride({ sessionEntry: entry, diff --git a/src/agents/auth-profiles/order.test.ts b/src/agents/auth-profiles/order.test.ts index 29251c86746..32acc8e2800 100644 --- a/src/agents/auth-profiles/order.test.ts +++ b/src/agents/auth-profiles/order.test.ts @@ -69,6 +69,158 @@ describe("resolveAuthProfileOrder", () => { expect(order).toEqual(["fixture-provider:default"]); }); + it("uses canonical provider auth order for alias providers", async () => { + const { resolveAuthProfileOrder } = await importAuthProfileModulesWithAliasRegistry(); + const store: AuthProfileStore = { + version: 1, + profiles: { + "fixture-provider:primary": { + type: "api_key", + provider: "fixture-provider", + key: "sk-primary", + }, + "fixture-provider:secondary": { + type: "api_key", + provider: "fixture-provider", + key: "sk-secondary", + }, + }, + order: { + "fixture-provider": ["fixture-provider:secondary", "fixture-provider:primary"], + }, + }; + + const order = resolveAuthProfileOrder({ + store, + provider: "fixture-provider-plan", + }); + + expect(order).toEqual(["fixture-provider:secondary", "fixture-provider:primary"]); + }); + + it("falls back to legacy stored auth order when alias order is empty", async () => { + const { resolveAuthProfileOrder } = await importAuthProfileModulesWithAliasRegistry(); + const store: AuthProfileStore = { + version: 1, + profiles: { + "fixture-provider:primary": { + type: "api_key", + provider: "fixture-provider", + key: "sk-primary", + }, + "fixture-provider:secondary": { + type: "api_key", + provider: "fixture-provider", + key: "sk-secondary", + }, + }, + order: { + "fixture-provider-plan": [], + "fixture-provider": ["fixture-provider:secondary", "fixture-provider:primary"], + }, + }; + + const order = resolveAuthProfileOrder({ + store, + provider: "fixture-provider-plan", + }); + + expect(order).toEqual(["fixture-provider:secondary", "fixture-provider:primary"]); + }); + + it("falls back to legacy configured auth order when alias order is empty", async () => { + const { resolveAuthProfileOrder } = await importAuthProfileModulesWithAliasRegistry(); + const store: AuthProfileStore = { + version: 1, + profiles: { + "fixture-provider:primary": { + type: "api_key", + provider: "fixture-provider", + key: "sk-primary", + }, + "fixture-provider:secondary": { + type: "api_key", + provider: "fixture-provider", + key: "sk-secondary", + }, + }, + }; + + const order = resolveAuthProfileOrder({ + cfg: { + auth: { + order: { + "fixture-provider-plan": [], + "fixture-provider": ["fixture-provider:secondary", "fixture-provider:primary"], + }, + }, + }, + store, + provider: "fixture-provider-plan", + }); + + expect(order).toEqual(["fixture-provider:secondary", "fixture-provider:primary"]); + }); + + it("keeps explicit empty configured auth order as a provider disable", async () => { + const { resolveAuthProfileOrder } = await importAuthProfileModulesWithAliasRegistry(); + const store: AuthProfileStore = { + version: 1, + profiles: { + "fixture-provider:primary": { + type: "api_key", + provider: "fixture-provider", + key: "sk-primary", + }, + }, + }; + + const order = resolveAuthProfileOrder({ + cfg: { + auth: { + order: { + "fixture-provider": [], + }, + }, + }, + store, + provider: "fixture-provider", + }); + + expect(order).toEqual([]); + }); + + it("keeps explicit empty stored auth order as a provider disable", async () => { + const { resolveAuthProfileOrder } = await importAuthProfileModulesWithAliasRegistry(); + const store: AuthProfileStore = { + version: 1, + profiles: { + "fixture-provider:primary": { + type: "api_key", + provider: "fixture-provider", + key: "sk-primary", + }, + }, + order: { + "fixture-provider": [], + }, + }; + + const order = resolveAuthProfileOrder({ + cfg: { + auth: { + order: { + "fixture-provider": ["fixture-provider:primary"], + }, + }, + }, + store, + provider: "fixture-provider", + }); + + expect(order).toEqual([]); + }); + it("marks aliased provider profiles good under the canonical auth provider", async () => { const { markAuthProfileGood } = await importAuthProfileModulesWithAliasRegistry(); const agentDir = await mkdtemp(path.join(os.tmpdir(), "openclaw-auth-profile-alias-")); diff --git a/src/agents/auth-profiles/order.ts b/src/agents/auth-profiles/order.ts index 7d8f4bb18e0..1b59b4f8964 100644 --- a/src/agents/auth-profiles/order.ts +++ b/src/agents/auth-profiles/order.ts @@ -78,8 +78,11 @@ export function resolveAuthProfileOrder(params: { // get a fresh error count and are not immediately re-penalized on the // next transient failure. See #3604. clearExpiredCooldowns(store, now); - const storedOrder = findNormalizedProviderValue(store.order, providerKey); - const configuredOrder = findNormalizedProviderValue(cfg?.auth?.order, providerKey); + const storedOrder = + resolveAuthOrder(store.order, providerAuthKey) ?? resolveAuthOrder(store.order, providerKey); + const configuredOrder = + resolveAuthOrder(cfg?.auth?.order, providerAuthKey) ?? + resolveAuthOrder(cfg?.auth?.order, providerKey); const explicitOrder = storedOrder ?? configuredOrder; const explicitProfiles = cfg?.auth?.profiles ? Object.entries(cfg.auth.profiles) @@ -161,6 +164,13 @@ export function resolveAuthProfileOrder(params: { return sorted; } +function resolveAuthOrder( + order: Record | undefined, + provider: string, +): string[] | undefined { + return findNormalizedProviderValue(order, provider); +} + function orderProfilesByMode(order: string[], store: AuthProfileStore): string[] { const now = Date.now(); diff --git a/src/agents/auth-profiles/session-override.test.ts b/src/agents/auth-profiles/session-override.test.ts index 35f97f259a1..e5e64f0d021 100644 --- a/src/agents/auth-profiles/session-override.test.ts +++ b/src/agents/auth-profiles/session-override.test.ts @@ -210,4 +210,46 @@ describe("resolveSessionAuthProfileOverride", () => { expect(sessionEntry.authProfileOverrideSource).toBe("user"); }); }); + + it("keeps session override when CLI provider aliases the stored profile provider", async () => { + await withAuthStateDir(async ({ stateDir }) => { + const agentDir = path.join(stateDir, "agent"); + await fs.mkdir(agentDir, { recursive: true }); + authStoreMocks.state.hasSource = true; + authStoreMocks.state.store = createAuthStoreWithProfiles({ + profiles: { + [TEST_PRIMARY_PROFILE_ID]: { + type: "api_key", + provider: "openai-codex", + key: "sk-codex", + }, + }, + order: { + "codex-cli": [TEST_PRIMARY_PROFILE_ID], + }, + }); + + const sessionEntry: SessionEntry = { + sessionId: "s1", + updatedAt: Date.now(), + authProfileOverride: TEST_PRIMARY_PROFILE_ID, + authProfileOverrideSource: "auto", + }; + const sessionStore = { "agent:main:main": sessionEntry }; + + const resolved = await resolveSessionAuthProfileOverride({ + cfg: {} as OpenClawConfig, + provider: "codex-cli", + agentDir, + sessionEntry, + sessionStore, + sessionKey: "agent:main:main", + storePath: undefined, + isNewSession: false, + }); + + expect(resolved).toBe(TEST_PRIMARY_PROFILE_ID); + expect(sessionEntry.authProfileOverride).toBe(TEST_PRIMARY_PROFILE_ID); + }); + }); }); diff --git a/src/agents/auth-profiles/session-override.ts b/src/agents/auth-profiles/session-override.ts index 28c0b26bdb1..4e0490e57cb 100644 --- a/src/agents/auth-profiles/session-override.ts +++ b/src/agents/auth-profiles/session-override.ts @@ -3,7 +3,7 @@ import type { OpenClawConfig } from "../../config/types.openclaw.js"; import { resolveAuthProfileOrder } from "../auth-profiles/order.js"; import { ensureAuthProfileStore, hasAnyAuthProfileStoreSource } from "../auth-profiles/store.js"; import { isProfileInCooldown } from "../auth-profiles/usage.js"; -import { normalizeProviderId } from "../model-selection.js"; +import { resolveProviderIdForAuth } from "../provider-auth-aliases.js"; let sessionStoreRuntimePromise: | Promise @@ -15,6 +15,7 @@ function loadSessionStoreRuntime() { } function isProfileForProvider(params: { + cfg: OpenClawConfig; provider: string; profileId: string; store: ReturnType; @@ -23,7 +24,8 @@ function isProfileForProvider(params: { if (!entry?.provider) { return false; } - return normalizeProviderId(entry.provider) === normalizeProviderId(params.provider); + const providerKey = resolveProviderIdForAuth(params.provider, { config: params.cfg }); + return resolveProviderIdForAuth(entry.provider, { config: params.cfg }) === providerKey; } export async function clearSessionAuthProfileOverride(params: { @@ -98,7 +100,7 @@ export async function resolveSessionAuthProfileOverride(params: { current = undefined; } - if (current && !isProfileForProvider({ provider, profileId: current, store })) { + if (current && !isProfileForProvider({ cfg, provider, profileId: current, store })) { await clearSessionAuthProfileOverride({ sessionEntry, sessionStore, sessionKey, storePath }); current = undefined; } diff --git a/src/agents/command/attempt-execution.cli.test.ts b/src/agents/command/attempt-execution.cli.test.ts index 165545eec89..04c6fc09809 100644 --- a/src/agents/command/attempt-execution.cli.test.ts +++ b/src/agents/command/attempt-execution.cli.test.ts @@ -17,10 +17,17 @@ vi.mock("../cli-runner.js", () => ({ })); vi.mock("../model-selection.js", () => ({ - isCliProvider: (provider: string) => provider.trim().toLowerCase() === "claude-cli", + isCliProvider: (provider: string) => + provider.trim().toLowerCase() === "claude-cli" || provider.trim().toLowerCase() === "codex-cli", normalizeProviderId: (provider: string) => provider.trim().toLowerCase(), })); +vi.mock("../provider-auth-aliases.js", () => ({ + resolveProviderAuthAliasMap: () => ({}), + resolveProviderIdForAuth: (provider: string) => + provider.trim().toLowerCase() === "codex-cli" ? "openai-codex" : provider.trim().toLowerCase(), +})); + vi.mock("../pi-embedded.js", () => ({ runEmbeddedPiAgent: runEmbeddedPiAgentMock, })); @@ -296,6 +303,51 @@ describe("CLI attempt execution", () => { expect(sessionStore[sessionKey]?.claudeCliSessionId).toBe(cliSessionId); }); + it("passes session-bound OpenAI Codex auth profile to codex-cli aliases", async () => { + const sessionKey = "agent:main:direct:codex-cli-auth-alias"; + const sessionEntry: SessionEntry = { + sessionId: "openclaw-session-codex", + updatedAt: Date.now(), + authProfileOverride: "openai-codex:work", + authProfileOverrideSource: "user", + }; + const sessionStore: Record = { [sessionKey]: sessionEntry }; + await fs.writeFile(storePath, JSON.stringify(sessionStore, null, 2), "utf-8"); + runCliAgentMock.mockResolvedValueOnce(makeCliResult("codex cli response")); + + await runAgentAttempt({ + providerOverride: "codex-cli", + modelOverride: "gpt-5.4", + cfg: {} as OpenClawConfig, + sessionEntry, + sessionId: sessionEntry.sessionId, + sessionKey, + sessionAgentId: "main", + sessionFile: path.join(tmpDir, "session.jsonl"), + workspaceDir: tmpDir, + body: "continue", + isFallbackRetry: false, + resolvedThinkLevel: "medium", + timeoutMs: 1_000, + runId: "run-codex-cli-auth-alias", + opts: { senderIsOwner: false } as Parameters[0]["opts"], + runContext: {} as Parameters[0]["runContext"], + spawnedBy: undefined, + messageChannel: undefined, + skillsSnapshot: undefined, + resolvedVerboseLevel: undefined, + agentDir: tmpDir, + onAgentEvent: vi.fn(), + authProfileProvider: "openai-codex", + sessionStore, + storePath, + sessionHasHistory: false, + }); + + expect(runCliAgentMock).toHaveBeenCalledTimes(1); + expect(runCliAgentMock.mock.calls[0]?.[0]?.authProfileId).toBe("openai-codex:work"); + }); + it("persists CLI replies into the session transcript", async () => { const sessionKey = "agent:main:subagent:cli-transcript"; const sessionEntry: SessionEntry = { diff --git a/src/agents/command/attempt-execution.ts b/src/agents/command/attempt-execution.ts index f7af54994bc..94202d8e6f7 100644 --- a/src/agents/command/attempt-execution.ts +++ b/src/agents/command/attempt-execution.ts @@ -18,6 +18,7 @@ import { resolveAgentHarnessPolicy } from "../harness/selection.js"; import { isCliProvider } from "../model-selection.js"; import { prepareSessionManagerForRun } from "../pi-embedded-runner/session-manager-init.js"; import { runEmbeddedPiAgent, type EmbeddedPiRunResult } from "../pi-embedded.js"; +import { resolveProviderIdForAuth } from "../provider-auth-aliases.js"; import { buildWorkspaceSkillSnapshot } from "../skills.js"; import { buildUsageWithNoCost } from "../stream-message-shared.js"; import { @@ -271,8 +272,16 @@ export function runAgentAttempt(params: { sessionId: params.sessionId, sessionKey: params.sessionKey ?? params.sessionId, }); + const providerAuthKey = resolveProviderIdForAuth(params.providerOverride, { + config: params.cfg, + workspaceDir: params.workspaceDir, + }); + const authProfileProviderKey = resolveProviderIdForAuth(params.authProfileProvider, { + config: params.cfg, + workspaceDir: params.workspaceDir, + }); const authProfileId = - params.providerOverride === params.authProfileProvider + providerAuthKey === authProfileProviderKey ? params.sessionEntry?.authProfileOverride : undefined; if (isCliProvider(params.providerOverride, params.cfg)) { diff --git a/src/agents/gpt5-prompt-overlay.ts b/src/agents/gpt5-prompt-overlay.ts index c2dd91d0cc3..6477d6f6d75 100644 --- a/src/agents/gpt5-prompt-overlay.ts +++ b/src/agents/gpt5-prompt-overlay.ts @@ -3,6 +3,14 @@ import { normalizeOptionalLowercaseString } from "../shared/string-coerce.js"; import type { ProviderSystemPromptContribution } from "./system-prompt-contribution.js"; const GPT5_MODEL_ID_PATTERN = /(?:^|[/:])gpt-5(?:[.-]|$)/i; +const OPENAI_FAMILY_GPT5_PROMPT_OVERLAY_PROVIDERS = new Set([ + "codex", + "codex-cli", + "openai", + "azure-openai", + "azure-openai-responses", + "openai-codex", +]); export const GPT5_FRIENDLY_PROMPT_OVERLAY = `## Interaction Style @@ -100,10 +108,16 @@ export function normalizeGpt5PromptOverlayMode(value: unknown): Gpt5PromptOverla export function resolveGpt5PromptOverlayMode( config?: OpenClawConfig, legacyPluginConfig?: Record, + params?: { providerId?: string }, ): Gpt5PromptOverlayMode { + const providerId = normalizeOptionalLowercaseString(params?.providerId); + const canUseOpenAiPluginFallback = + !providerId || OPENAI_FAMILY_GPT5_PROMPT_OVERLAY_PROVIDERS.has(providerId); return ( normalizeGpt5PromptOverlayMode(config?.agents?.defaults?.promptOverlays?.gpt5?.personality) ?? - normalizeGpt5PromptOverlayMode(config?.plugins?.entries?.openai?.config?.personality) ?? + (canUseOpenAiPluginFallback + ? normalizeGpt5PromptOverlayMode(config?.plugins?.entries?.openai?.config?.personality) + : undefined) ?? normalizeGpt5PromptOverlayMode(legacyPluginConfig?.personality) ?? "friendly" ); @@ -116,6 +130,7 @@ export function isGpt5ModelId(modelId?: string): boolean { export function resolveGpt5SystemPromptContribution(params: { config?: OpenClawConfig; + providerId?: string; modelId?: string; legacyPluginConfig?: Record; enabled?: boolean; @@ -123,7 +138,9 @@ export function resolveGpt5SystemPromptContribution(params: { if (params.enabled === false || !isGpt5ModelId(params.modelId)) { return undefined; } - const mode = resolveGpt5PromptOverlayMode(params.config, params.legacyPluginConfig); + const mode = resolveGpt5PromptOverlayMode(params.config, params.legacyPluginConfig, { + providerId: params.providerId, + }); return { stablePrefix: GPT5_BEHAVIOR_CONTRACT, sectionOverrides: @@ -133,6 +150,7 @@ export function resolveGpt5SystemPromptContribution(params: { export function renderGpt5PromptOverlay(params: { config?: OpenClawConfig; + providerId?: string; modelId?: string; legacyPluginConfig?: Record; enabled?: boolean; diff --git a/src/agents/model-fallback.run-embedded.e2e.test.ts b/src/agents/model-fallback.run-embedded.e2e.test.ts index d55bd07352c..b88fb516ec5 100644 --- a/src/agents/model-fallback.run-embedded.e2e.test.ts +++ b/src/agents/model-fallback.run-embedded.e2e.test.ts @@ -5,6 +5,7 @@ import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest"; import type { OpenClawConfig } from "../config/config.js"; import type { AuthProfileFailureReason } from "./auth-profiles.js"; import { runWithModelFallback } from "./model-fallback.js"; +import { classifyEmbeddedPiRunResultForModelFallback } from "./pi-embedded-runner/result-fallback-classifier.js"; import type { EmbeddedRunAttemptResult } from "./pi-embedded-runner/run/types.js"; import { buildEmbeddedRunnerAssistant, @@ -381,6 +382,51 @@ function expectProviderAttemptCounts(expected: { openai: number; groq: number }) } describe("runWithModelFallback + runEmbeddedPiAgent failover behavior", () => { + it("keeps tool summary on incomplete side-effect terminal results", async () => { + await withAgentWorkspace(async ({ agentDir, workspaceDir }) => { + await writeAuthStore(agentDir); + runEmbeddedAttemptMock.mockResolvedValueOnce( + makeEmbeddedRunnerAttempt({ + toolMetas: [{ toolName: "write", meta: "path=out.txt" }], + lastAssistant: buildEmbeddedRunnerAssistant({ + provider: "openai", + model: "mock-1", + stopReason: "stop", + content: [], + }), + }), + ); + + const result = await runEmbeddedPiAgent({ + sessionId: "session:tool-side-effect-terminal", + sessionKey: "agent:test:tool-side-effect-terminal", + sessionFile: path.join(workspaceDir, "tool-side-effect-terminal.jsonl"), + workspaceDir, + agentDir, + config: makeConfig(), + prompt: "write the file", + provider: "openai", + model: "mock-1", + authProfileIdSource: "auto", + timeoutMs: 5_000, + runId: "run:tool-side-effect-terminal", + enqueue: async (task) => await task(), + }); + + expect(result.meta.toolSummary).toMatchObject({ + calls: 1, + tools: ["write"], + }); + expect( + classifyEmbeddedPiRunResultForModelFallback({ + provider: "openai-codex", + model: "gpt-5.4", + result, + }), + ).toBeNull(); + }); + }); + it("falls back on OpenRouter-style no-endpoints assistant errors", async () => { await withAgentWorkspace(async ({ agentDir, workspaceDir }) => { await writeAuthStore(agentDir); diff --git a/src/agents/model-fallback.test.ts b/src/agents/model-fallback.test.ts index 8cc290ab158..c71df3ca859 100644 --- a/src/agents/model-fallback.test.ts +++ b/src/agents/model-fallback.test.ts @@ -9,6 +9,8 @@ import type { AuthProfileStore } from "./auth-profiles/types.js"; import { FailoverError } from "./failover-error.js"; import { LiveSessionModelSwitchError } from "./live-model-switch-error.js"; import { runWithImageModelFallback, runWithModelFallback } from "./model-fallback.js"; +import { classifyEmbeddedPiRunResultForModelFallback } from "./pi-embedded-runner/result-fallback-classifier.js"; +import type { EmbeddedPiRunResult } from "./pi-embedded-runner/types.js"; import { makeModelFallbackCfg } from "./test-helpers/model-fallback-config-fixture.js"; vi.mock("../infra/file-lock.js", () => ({ @@ -416,6 +418,149 @@ describe("runWithModelFallback", () => { }); }); + it("uses optional result classification to continue to configured fallbacks", async () => { + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "openai-codex/gpt-5.4", + fallbacks: ["anthropic/claude-haiku-3-5"], + }, + }, + }, + }); + const run = vi + .fn() + .mockResolvedValueOnce({ payloads: [] }) + .mockResolvedValueOnce({ + payloads: [{ text: "fallback ok" }], + }); + const classifyResult = vi.fn(({ result }) => + Array.isArray(result.payloads) && result.payloads.length === 0 + ? { + message: "terminal result contained no visible assistant reply", + reason: "format" as const, + code: "empty_result", + } + : null, + ); + + const result = await runWithModelFallback({ + cfg, + provider: "openai-codex", + model: "gpt-5.4", + run, + classifyResult, + }); + + expect(result.result).toEqual({ payloads: [{ text: "fallback ok" }] }); + expect(run).toHaveBeenCalledTimes(2); + expect(run.mock.calls[1]).toEqual(["anthropic", "claude-haiku-3-5"]); + expect(result.attempts[0]).toMatchObject({ + provider: "openai-codex", + model: "gpt-5.4", + reason: "format", + code: "empty_result", + }); + }); + + it("surfaces classified terminal results when no fallback remains", async () => { + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "openai-codex/gpt-5.4", + fallbacks: [], + }, + }, + }, + }); + const run = vi.fn().mockResolvedValueOnce({ payloads: [] }); + + await expect( + runWithModelFallback({ + cfg, + provider: "openai-codex", + model: "gpt-5.4", + run, + classifyResult: ({ result }) => { + const payloads = (result as { payloads?: unknown[] }).payloads; + return Array.isArray(payloads) && payloads.length === 0 + ? { + message: "terminal result contained no visible assistant reply", + reason: "format", + code: "empty_result", + } + : null; + }, + }), + ).rejects.toMatchObject({ + name: "FailoverError", + reason: "format", + provider: "openai-codex", + model: "gpt-5.4", + code: "empty_result", + }); + expect(run).toHaveBeenCalledTimes(1); + }); + + it("does not classify successful results when the optional classifier returns null", async () => { + const cfg = makeProviderFallbackCfg("openai-codex"); + const run = vi.fn().mockResolvedValueOnce({ payloads: [{ text: "ok" }] }); + const classifyResult = vi.fn(() => null); + + const result = await runWithModelFallback({ + cfg, + provider: "openai-codex", + model: "m1", + run, + classifyResult, + }); + + expect(result.result).toEqual({ payloads: [{ text: "ok" }] }); + expect(run).toHaveBeenCalledTimes(1); + expect(result.attempts).toEqual([]); + }); + + it("keeps tool-executing empty GPT-5 runs out of fallback", () => { + const runResult: EmbeddedPiRunResult = { + payloads: [], + meta: { + durationMs: 1, + toolSummary: { + calls: 1, + tools: ["mcp_write"], + }, + }, + }; + + expect( + classifyEmbeddedPiRunResultForModelFallback({ + provider: "openai-codex", + model: "gpt-5.4", + result: runResult, + }), + ).toBeNull(); + }); + + it("keeps normalized silent GPT-5 terminal replies out of fallback", () => { + const runResult: EmbeddedPiRunResult = { + payloads: [], + meta: { + durationMs: 1, + finalAssistantRawText: "NO_REPLY", + }, + }; + + expect( + classifyEmbeddedPiRunResultForModelFallback({ + provider: "openai-codex", + model: "gpt-5.4", + result: runResult, + }), + ).toBeNull(); + }); + it("passes original unknown errors to onError during fallback", async () => { const cfg = makeCfg(); const unknownError = new Error("provider misbehaved"); diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index f6e14135fa5..a0e489327ac 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -133,6 +133,28 @@ type ModelFallbackErrorHandler = (attempt: { total: number; }) => void | Promise; +export type ModelFallbackResultClassification = + | { + message: string; + reason?: FailoverReason; + status?: number; + code?: string; + rawError?: string; + } + | { + error: unknown; + } + | null + | undefined; + +type ModelFallbackResultClassifier = (attempt: { + result: T; + provider: string; + model: string; + attempt: number; + total: number; +}) => ModelFallbackResultClassification | Promise; + type ModelFallbackRunResult = { result: T; provider: string; @@ -197,6 +219,9 @@ async function runFallbackAttempt(params: { model: string; attempts: FallbackAttempt[]; options?: ModelFallbackRunOptions; + classifyResult?: ModelFallbackResultClassifier; + attempt: number; + total: number; }): Promise<{ success: ModelFallbackRunResult } | { error: unknown }> { const runResult = await runFallbackCandidate({ run: params.run, @@ -205,6 +230,20 @@ async function runFallbackAttempt(params: { options: params.options, }); if (runResult.ok) { + const classification = await params.classifyResult?.({ + result: runResult.result, + provider: params.provider, + model: params.model, + attempt: params.attempt, + total: params.total, + }); + const classifiedError = resolveResultClassificationError(classification, { + provider: params.provider, + model: params.model, + }); + if (classifiedError) { + return { error: classifiedError }; + } return { success: buildFallbackSuccess({ result: runResult.result, @@ -217,6 +256,30 @@ async function runFallbackAttempt(params: { return { error: runResult.error }; } +function resolveResultClassificationError( + classification: ModelFallbackResultClassification, + params: { provider: string; model: string }, +) { + if (!classification) { + return null; + } + if ("error" in classification) { + return classification.error; + } + const message = normalizeOptionalString(classification.message); + if (!message) { + return null; + } + return new FailoverError(message, { + reason: classification.reason ?? "unknown", + provider: params.provider, + model: params.model, + status: classification.status, + code: classification.code, + rawError: classification.rawError, + }); +} + function sameModelCandidate(a: ModelCandidate, b: ModelCandidate): boolean { return a.provider === b.provider && a.model === b.model; } @@ -659,6 +722,7 @@ export async function runWithModelFallback(params: { fallbacksOverride?: string[]; run: ModelFallbackRunFn; onError?: ModelFallbackErrorHandler; + classifyResult?: ModelFallbackResultClassifier; }): Promise> { const candidates = resolveFallbackCandidates({ cfg: params.cfg, @@ -803,6 +867,9 @@ export async function runWithModelFallback(params: { ...candidate, attempts, options: runOptions, + classifyResult: params.classifyResult, + attempt: i + 1, + total: candidates.length, }); if ("success" in attemptRun) { if (i > 0 || attempts.length > 0 || attemptedDuringCooldown) { @@ -955,7 +1022,13 @@ export async function runWithImageModelFallback(params: { for (let i = 0; i < candidates.length; i += 1) { const candidate = candidates[i]; - const attemptRun = await runFallbackAttempt({ run: params.run, ...candidate, attempts }); + const attemptRun = await runFallbackAttempt({ + run: params.run, + ...candidate, + attempts, + attempt: i + 1, + total: candidates.length, + }); if ("success" in attemptRun) { return attemptRun.success; } diff --git a/src/agents/models-config.uses-first-github-copilot-profile-env-tokens.test.ts b/src/agents/models-config.uses-first-github-copilot-profile-env-tokens.test.ts index be5e6b2b990..0514fca6137 100644 --- a/src/agents/models-config.uses-first-github-copilot-profile-env-tokens.test.ts +++ b/src/agents/models-config.uses-first-github-copilot-profile-env-tokens.test.ts @@ -12,6 +12,7 @@ vi.mock("./model-auth-env.js", () => ({ })); vi.mock("./provider-auth-aliases.js", () => ({ + resolveProviderAuthAliasMap: () => ({}), resolveProviderIdForAuth: (provider: string) => provider.trim().toLowerCase(), })); diff --git a/src/agents/openai-responses-payload-policy.ts b/src/agents/openai-responses-payload-policy.ts index 4727238fe73..07d69a01eba 100644 --- a/src/agents/openai-responses-payload-policy.ts +++ b/src/agents/openai-responses-payload-policy.ts @@ -98,11 +98,15 @@ export function resolveOpenAIResponsesPayloadPolicy( model: OpenAIResponsesPayloadModel, options: OpenAIResponsesPayloadPolicyOptions = {}, ): OpenAIResponsesPayloadPolicy { + const compat = + model.compat && typeof model.compat === "object" + ? (model.compat as { supportsStore?: boolean }) + : undefined; const capabilities = resolveProviderRequestPolicyConfig({ provider: readStringValue(model.provider), api: readStringValue(model.api), baseUrl: readStringValue(model.baseUrl), - compat: model.compat, + compat, capability: "llm", transport: "stream", }).capabilities; diff --git a/src/agents/openai-tool-schema.ts b/src/agents/openai-tool-schema.ts index 4c05886b4ea..660d2f8b059 100644 --- a/src/agents/openai-tool-schema.ts +++ b/src/agents/openai-tool-schema.ts @@ -5,6 +5,7 @@ export { } from "./openai-strict-tool-setting.js"; type ToolWithParameters = { + name?: unknown; parameters: unknown; }; @@ -62,6 +63,33 @@ export function isStrictOpenAIJsonSchemaCompatible(schema: unknown): boolean { return isStrictOpenAIJsonSchemaCompatibleRecursive(normalizeStrictOpenAIJsonSchema(schema)); } +export type OpenAIStrictToolSchemaDiagnostic = { + toolIndex: number; + toolName?: string; + violations: string[]; +}; + +export function findOpenAIStrictToolSchemaDiagnostics( + tools: readonly ToolWithParameters[], +): OpenAIStrictToolSchemaDiagnostic[] { + return tools.flatMap((tool, toolIndex) => { + const violations = findStrictOpenAIJsonSchemaViolations( + normalizeStrictOpenAIJsonSchema(tool.parameters), + `${typeof tool.name === "string" && tool.name ? tool.name : `tool[${toolIndex}]`}.parameters`, + ); + if (violations.length === 0) { + return []; + } + return [ + { + toolIndex, + ...(typeof tool.name === "string" && tool.name ? { toolName: tool.name } : {}), + violations, + }, + ]; + }); +} + function isStrictOpenAIJsonSchemaCompatibleRecursive(schema: unknown): boolean { if (Array.isArray(schema)) { return schema.every((entry) => isStrictOpenAIJsonSchemaCompatibleRecursive(entry)); @@ -109,6 +137,72 @@ function isStrictOpenAIJsonSchemaCompatibleRecursive(schema: unknown): boolean { }); } +function findStrictOpenAIJsonSchemaViolations(schema: unknown, path: string): string[] { + if (Array.isArray(schema)) { + return schema.flatMap((entry, index) => + findStrictOpenAIJsonSchemaViolations(entry, `${path}[${index}]`), + ); + } + if (!schema || typeof schema !== "object") { + return []; + } + + const record = schema as Record; + const violations: string[] = []; + for (const key of ["anyOf", "oneOf", "allOf"] as const) { + if (key in record) { + violations.push(`${path}.${key}`); + } + } + if (Array.isArray(record.type)) { + violations.push(`${path}.type`); + } + if (record.type === "object") { + if (record.additionalProperties !== false) { + violations.push(`${path}.additionalProperties`); + } + const properties = + record.properties && + typeof record.properties === "object" && + !Array.isArray(record.properties) + ? (record.properties as Record) + : {}; + const required = Array.isArray(record.required) + ? record.required.filter((entry): entry is string => typeof entry === "string") + : undefined; + if (!required) { + violations.push(`${path}.required`); + } else { + const requiredSet = new Set(required); + for (const key of Object.keys(properties)) { + if (!requiredSet.has(key)) { + violations.push(`${path}.required.${key}`); + } + } + } + } + + if ( + record.properties && + typeof record.properties === "object" && + !Array.isArray(record.properties) + ) { + for (const [key, value] of Object.entries(record.properties)) { + violations.push(...findStrictOpenAIJsonSchemaViolations(value, `${path}.properties.${key}`)); + } + } + for (const [key, value] of Object.entries(record)) { + if (key === "properties") { + continue; + } + if (value && typeof value === "object") { + violations.push(...findStrictOpenAIJsonSchemaViolations(value, `${path}.${key}`)); + } + } + + return violations; +} + export function resolveOpenAIStrictToolFlagForInventory( tools: readonly ToolWithParameters[], strict: boolean | null | undefined, diff --git a/src/agents/openai-transport-stream.test.ts b/src/agents/openai-transport-stream.test.ts index fe994c7e9db..4e9471cc300 100644 --- a/src/agents/openai-transport-stream.test.ts +++ b/src/agents/openai-transport-stream.test.ts @@ -1053,6 +1053,80 @@ describe("openai transport stream", () => { expect(params.tools?.[0]).not.toHaveProperty("strict"); }); + it("still normalizes responses tool parameters when strict is omitted", () => { + const params = buildOpenAIResponsesParams( + { + id: "custom-model", + name: "Custom Model", + api: "openai-responses", + provider: "openai", + baseUrl: "https://proxy.example.com/v1", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200000, + maxTokens: 8192, + } satisfies Model<"openai-responses">, + { + systemPrompt: "system", + messages: [], + tools: [ + { + name: "lookup_weather", + description: "Get forecast", + parameters: {}, + }, + ], + } as never, + undefined, + ) as { tools?: Array<{ strict?: boolean; parameters?: Record }> }; + + expect(params.tools?.[0]).not.toHaveProperty("strict"); + expect(params.tools?.[0]?.parameters).toMatchObject({ + type: "object", + properties: {}, + }); + }); + + it("normalizes responses tool parameters while downgrading native strict:false", () => { + const params = buildOpenAIResponsesParams( + { + id: "gpt-5.4", + name: "GPT-5.4", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200000, + maxTokens: 8192, + } satisfies Model<"openai-responses">, + { + systemPrompt: "system", + messages: [], + tools: [ + { + name: "read", + description: "Read file", + parameters: { + properties: { path: { type: "string" } }, + required: [], + }, + }, + ], + } as never, + undefined, + ) as { tools?: Array<{ strict?: boolean; parameters?: Record }> }; + + expect(params.tools?.[0]?.strict).toBe(false); + expect(params.tools?.[0]?.parameters).toMatchObject({ + type: "object", + properties: { path: { type: "string" } }, + required: [], + }); + }); + it("adds native OpenAI turn metadata on direct Responses routes", () => { const params = buildOpenAIResponsesParams( { diff --git a/src/agents/openai-transport-stream.ts b/src/agents/openai-transport-stream.ts index 0c0e943f265..d249ca71216 100644 --- a/src/agents/openai-transport-stream.ts +++ b/src/agents/openai-transport-stream.ts @@ -20,6 +20,7 @@ import type { ResponseInputMessageContentList, } from "openai/resources/responses/responses.js"; import type { ModelCompatConfig } from "../config/types.models.js"; +import { createSubsystemLogger } from "../logging/subsystem.js"; import type { ProviderRuntimeModel } from "../plugins/provider-runtime-model.types.js"; import { resolveProviderTransportTurnStateWithPlugin } from "../plugins/provider-runtime.js"; import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./copilot-dynamic-headers.js"; @@ -37,6 +38,7 @@ import { resolveOpenAIResponsesPayloadPolicy, } from "./openai-responses-payload-policy.js"; import { + findOpenAIStrictToolSchemaDiagnostics, normalizeOpenAIStrictToolParameters, resolveOpenAIStrictToolFlagForInventory, resolveOpenAIStrictToolSetting, @@ -47,6 +49,7 @@ import { transformTransportMessages } from "./transport-message-transform.js"; import { mergeTransportMetadata, sanitizeTransportPayloadText } from "./transport-stream-shared.js"; const DEFAULT_AZURE_OPENAI_API_VERSION = "2024-12-01-preview"; +const log = createSubsystemLogger("openai-transport"); type BaseStreamOptions = { temperature?: number; @@ -348,27 +351,53 @@ function convertResponsesMessages( function convertResponsesTools( tools: NonNullable, + model: OpenAIModeModel, options?: { strict?: boolean | null }, ): FunctionTool[] { - const strict = resolveOpenAIStrictToolFlagForInventory(tools, options?.strict); - if (strict === undefined) { - return tools.map((tool) => ({ - type: "function", + const strict = resolveOpenAIStrictToolFlagWithDiagnostics(tools, options?.strict, { + transport: "responses", + model, + }); + return tools.map((tool): FunctionTool => { + const base = { + type: "function" as const, name: tool.name, description: tool.description, - parameters: tool.parameters, - })) as unknown as FunctionTool[]; + parameters: normalizeOpenAIStrictToolParameters(tool.parameters, strict === true) as Record< + string, + unknown + >, + }; + return strict === undefined ? (base as FunctionTool) : { ...base, strict }; + }); +} + +function resolveOpenAIStrictToolFlagWithDiagnostics( + tools: NonNullable, + strictSetting: boolean | null | undefined, + context: { transport: "responses" | "completions"; model: OpenAIModeModel }, +): boolean | undefined { + const strict = resolveOpenAIStrictToolFlagForInventory(tools, strictSetting); + if (strictSetting === true && strict === false && log.isEnabled("debug", "any")) { + const diagnostics = findOpenAIStrictToolSchemaDiagnostics(tools); + const sample = diagnostics.slice(0, 5).map((entry) => ({ + tool: entry.toolName ?? `tool[${entry.toolIndex}]`, + violations: entry.violations.slice(0, 8), + })); + log.debug( + `OpenAI ${context.transport} tool schema strict mode downgraded to strict=false for ` + + `${context.model.provider ?? "unknown"}/${context.model.id ?? "unknown"} ` + + `because ${diagnostics.length} tool schema(s) are not strict-compatible`, + { + transport: context.transport, + provider: context.model.provider, + model: context.model.id, + incompatibleToolCount: diagnostics.length, + sample, + }, + ); } - return tools.map((tool) => ({ - type: "function", - name: tool.name, - description: tool.description, - parameters: normalizeOpenAIStrictToolParameters(tool.parameters, strict) as Record< - string, - unknown - >, - strict, - })); + return strict; } async function processResponsesStream( @@ -836,7 +865,7 @@ export function buildOpenAIResponsesParams( params.service_tier = options.serviceTier; } if (context.tools) { - params.tools = convertResponsesTools(context.tools, { + params.tools = convertResponsesTools(context.tools, model as OpenAIModeModel, { strict: resolveOpenAIStrictToolSetting(model as OpenAIModeModel, { transport: "stream", }), @@ -1557,12 +1586,16 @@ function convertTools( compat: ReturnType, model: OpenAIModeModel, ) { - const strict = resolveOpenAIStrictToolFlagForInventory( + const strict = resolveOpenAIStrictToolFlagWithDiagnostics( tools, resolveOpenAIStrictToolSetting(model, { transport: "stream", supportsStrictMode: compat?.supportsStrictMode, }), + { + transport: "completions", + model, + }, ); return tools.map((tool) => ({ type: "function", diff --git a/src/agents/pi-embedded-runner-extraparams-resolve.test.ts b/src/agents/pi-embedded-runner-extraparams-resolve.test.ts index b911d3caa64..f33bf6597ac 100644 --- a/src/agents/pi-embedded-runner-extraparams-resolve.test.ts +++ b/src/agents/pi-embedded-runner-extraparams-resolve.test.ts @@ -22,7 +22,7 @@ describe("resolveExtraParams", () => { expect(result).toEqual({ parallel_tool_calls: true, text_verbosity: "low", - openaiWsWarmup: true, + openaiWsWarmup: false, }); }); @@ -189,7 +189,7 @@ describe("resolveExtraParams", () => { }); expect(result).toEqual({ - openaiWsWarmup: true, + openaiWsWarmup: false, parallel_tool_calls: true, text_verbosity: "low", }); diff --git a/src/agents/pi-embedded-runner-extraparams.test.ts b/src/agents/pi-embedded-runner-extraparams.test.ts index a7e1fb43e5c..3a286cb6035 100644 --- a/src/agents/pi-embedded-runner-extraparams.test.ts +++ b/src/agents/pi-embedded-runner-extraparams.test.ts @@ -538,6 +538,7 @@ describe("applyExtraParamsToAgent", () => { model: | Model<"openai-completions"> | Model<"openai-responses"> + | Model<"openai-codex-responses"> | Model<"azure-openai-responses"> | Model<"anthropic-messages">; cfg?: Record; @@ -823,6 +824,34 @@ describe("applyExtraParamsToAgent", () => { expect(payload.parallel_tool_calls).toBe(true); }); + it("injects parallel_tool_calls for openai-codex-responses payloads when configured", () => { + const payload = runParallelToolCallsPayloadMutationCase({ + applyProvider: "openai-codex", + applyModelId: "gpt-5.4", + cfg: { + agents: { + defaults: { + models: { + "openai-codex/gpt-5.4": { + params: { + parallelToolCalls: true, + }, + }, + }, + }, + }, + }, + model: { + api: "openai-codex-responses", + provider: "openai-codex", + id: "gpt-5.4", + baseUrl: "https://chatgpt.com/backend-api/codex", + } as unknown as Model<"openai-codex-responses">, + }); + + expect(payload.parallel_tool_calls).toBe(true); + }); + it("strips function.strict for xai providers", () => { const payload = runToolPayloadMutationCase({ applyProvider: "xai", @@ -1551,7 +1580,7 @@ describe("applyExtraParamsToAgent", () => { expect(calls[0]?.transport).toBe("auto"); }); - it("defaults OpenAI transport to auto with websocket warm-up", () => { + it("defaults OpenAI transport to auto without websocket warm-up", () => { const { calls, agent } = createOptionsCaptureAgent(); applyExtraParamsToAgent(agent, undefined, "openai", "gpt-5"); @@ -1566,7 +1595,7 @@ describe("applyExtraParamsToAgent", () => { expect(calls).toHaveLength(1); expect(calls[0]?.transport).toBe("auto"); - expect(calls[0]?.openaiWsWarmup).toBe(true); + expect(calls[0]?.openaiWsWarmup).toBe(false); }); it("injects GPT-5 default parallel tool calls and low verbosity for OpenAI Responses payloads", () => { @@ -1585,6 +1614,22 @@ describe("applyExtraParamsToAgent", () => { expect(payload.text).toEqual({ verbosity: "low" }); }); + it("injects GPT-5 default parallel tool calls for Codex Responses payloads", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "openai-codex", + applyModelId: "gpt-5.4", + model: { + api: "openai-codex-responses", + provider: "openai-codex", + id: "gpt-5.4", + } as Model<"openai-codex-responses">, + payload: {}, + }); + + expect(payload.parallel_tool_calls).toBe(true); + expect(payload.text).toEqual({ verbosity: "low" }); + }); + it("injects native Codex web_search for direct openai-codex Responses models", () => { const payload = runResponsesPayloadMutationCase({ applyProvider: "openai-codex", diff --git a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts index 270a5b0b78c..b9b5b24ccb1 100644 --- a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts +++ b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts @@ -381,6 +381,21 @@ const writeCopilotAuthStore = async (agentDir: string, token = "gh-token") => { await fs.writeFile(authPath, JSON.stringify(payload)); }; +const writeOpenAiCodexAuthStore = async (agentDir: string) => { + const authPath = path.join(agentDir, "auth-profiles.json"); + const payload = { + version: 1, + profiles: { + "openai-codex:work": { + type: "api_key", + provider: "openai-codex", + key: "sk-codex", + }, + }, + }; + await fs.writeFile(authPath, JSON.stringify(payload)); +}; + const buildCopilotAssistant = (overrides: Partial = {}) => buildAssistant({ provider: "github-copilot", model: copilotModelId, ...overrides }); @@ -1026,26 +1041,33 @@ describe("runEmbeddedPiAgent auth profile rotation", () => { }); }); - it("does not rotate for user-pinned profiles", async () => { + it("surfaces rate limits without rotating for user-pinned profiles", async () => { await withAgentWorkspace(async ({ agentDir, workspaceDir }) => { await writeAuthStore(agentDir); mockSingleErrorAttempt({ errorMessage: "rate limit" }); - await runEmbeddedPiAgentInline({ - sessionId: "session:test", - sessionKey: "agent:test:user", - sessionFile: path.join(workspaceDir, "session.jsonl"), - workspaceDir, - agentDir, - config: makeConfig(), - prompt: "hello", + await expect( + runEmbeddedPiAgentInline({ + sessionId: "session:test", + sessionKey: "agent:test:user", + sessionFile: path.join(workspaceDir, "session.jsonl"), + workspaceDir, + agentDir, + config: makeConfig(), + prompt: "hello", + provider: "openai", + model: "mock-1", + authProfileId: "openai:p1", + authProfileIdSource: "user", + timeoutMs: 5_000, + runId: "run:user", + }), + ).rejects.toMatchObject({ + name: "FailoverError", + reason: "rate_limit", provider: "openai", model: "mock-1", - authProfileId: "openai:p1", - authProfileIdSource: "user", - timeoutMs: 5_000, - runId: "run:user", }); expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(1); @@ -1099,6 +1121,36 @@ describe("runEmbeddedPiAgent auth profile rotation", () => { }); }); + it("preserves user-pinned auth profiles across provider aliases", async () => { + await withAgentWorkspace(async ({ agentDir, workspaceDir }) => { + await writeOpenAiCodexAuthStore(agentDir); + mockSingleSuccessfulAttempt(); + + await runEmbeddedPiAgentInline({ + sessionId: "session:test", + sessionKey: "agent:test:user-auth-alias", + sessionFile: path.join(workspaceDir, "session.jsonl"), + workspaceDir, + agentDir, + config: makeConfig(), + prompt: "hello", + provider: "codex-cli", + model: "gpt-5.4", + authProfileId: "openai-codex:work", + authProfileIdSource: "user", + timeoutMs: 5_000, + runId: "run:user-auth-alias", + }); + + expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(1); + expect(runEmbeddedAttemptMock.mock.calls[0]?.[0]).toMatchObject({ + authProfileId: "openai-codex:work", + authProfileIdSource: "user", + provider: "codex-cli", + }); + }); + }); + it("ignores user-locked profile when provider mismatches", async () => { await withAgentWorkspace(async ({ agentDir, workspaceDir }) => { await writeAuthStore(agentDir, { includeAnthropic: true }); diff --git a/src/agents/pi-embedded-runner/compact.ts b/src/agents/pi-embedded-runner/compact.ts index 701139f89a6..fa7f1212899 100644 --- a/src/agents/pi-embedded-runner/compact.ts +++ b/src/agents/pi-embedded-runner/compact.ts @@ -22,6 +22,7 @@ import { resolveHeartbeatSummaryForAgent } from "../../infra/heartbeat-summary.j import { getMachineDisplayName } from "../../infra/machine-name.js"; import { generateSecureToken } from "../../infra/secure-random.js"; import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js"; +import { extractModelCompat } from "../../plugins/provider-model-compat.js"; import type { ProviderRuntimeModel } from "../../plugins/provider-runtime-model.types.js"; import { prepareProviderRuntimeAuth, @@ -528,7 +529,7 @@ export async function compactEmbeddedPiSessionDirect( abortSignal: runAbortController.signal, modelProvider: model.provider, modelId, - modelCompat: effectiveModel.compat, + modelCompat: extractModelCompat(effectiveModel), modelApi: model.api, modelContextWindowTokens: ctxInfo.tokens, modelAuthMode: resolveModelAuthMode(model.provider, params.config), diff --git a/src/agents/pi-embedded-runner/extra-params.ts b/src/agents/pi-embedded-runner/extra-params.ts index 8c5afa6e6bc..cb0a0efe506 100644 --- a/src/agents/pi-embedded-runner/extra-params.ts +++ b/src/agents/pi-embedded-runner/extra-params.ts @@ -214,7 +214,7 @@ function applyDefaultOpenAIGptRuntimeParams( merged.text_verbosity = "low"; } if (!Object.hasOwn(merged, "openaiWsWarmup")) { - merged.openaiWsWarmup = true; + merged.openaiWsWarmup = false; } } @@ -334,6 +334,7 @@ function createParallelToolCallsWrapper( if ( model.api !== "openai-completions" && model.api !== "openai-responses" && + model.api !== "openai-codex-responses" && model.api !== "azure-openai-responses" ) { return underlying(model, context, options); diff --git a/src/agents/pi-embedded-runner/openai-stream-wrappers.ts b/src/agents/pi-embedded-runner/openai-stream-wrappers.ts index ebdb33a3455..679211f8b5c 100644 --- a/src/agents/pi-embedded-runner/openai-stream-wrappers.ts +++ b/src/agents/pi-embedded-runner/openai-stream-wrappers.ts @@ -28,11 +28,15 @@ function resolveOpenAIRequestCapabilities(model: { baseUrl?: unknown; compat?: unknown; }) { + const compat = + model.compat && typeof model.compat === "object" + ? (model.compat as { supportsStore?: boolean }) + : undefined; return resolveProviderRequestPolicyConfig({ provider: readStringValue(model.provider), api: readStringValue(model.api), baseUrl: readStringValue(model.baseUrl), - compat: model.compat, + compat, capability: "llm", transport: "stream", }).capabilities; diff --git a/src/agents/pi-embedded-runner/result-fallback-classifier.ts b/src/agents/pi-embedded-runner/result-fallback-classifier.ts new file mode 100644 index 00000000000..67f12df6aac --- /dev/null +++ b/src/agents/pi-embedded-runner/result-fallback-classifier.ts @@ -0,0 +1,111 @@ +import { isSilentReplyPayloadText } from "../../auto-reply/tokens.js"; +import { isGpt5ModelId } from "../gpt5-prompt-overlay.js"; +import type { ModelFallbackResultClassification } from "../model-fallback.js"; +import type { EmbeddedPiRunResult } from "./types.js"; + +const EMPTY_TERMINAL_REPLY_RE = /Agent couldn't generate a response/i; +const PLAN_ONLY_TERMINAL_REPLY_RE = /Agent stopped after repeated plan-only turns/i; + +function isEmbeddedPiRunResult(value: unknown): value is EmbeddedPiRunResult { + return Boolean( + value && + typeof value === "object" && + "meta" in value && + (value as { meta?: unknown }).meta && + typeof (value as { meta?: unknown }).meta === "object", + ); +} + +function hasVisibleNonErrorPayload(result: EmbeddedPiRunResult): boolean { + return (result.payloads ?? []).some((payload) => { + if (!payload || payload.isError === true || payload.isReasoning === true) { + return false; + } + const text = typeof payload.text === "string" ? payload.text.trim() : ""; + return ( + text.length > 0 || + Boolean(payload.mediaUrl) || + (Array.isArray(payload.mediaUrls) && payload.mediaUrls.length > 0) + ); + }); +} + +function hasOutboundSideEffects(result: EmbeddedPiRunResult): boolean { + return ( + result.didSendViaMessagingTool === true || + (result.messagingToolSentTexts?.length ?? 0) > 0 || + (result.messagingToolSentMediaUrls?.length ?? 0) > 0 || + (result.messagingToolSentTargets?.length ?? 0) > 0 || + (result.successfulCronAdds ?? 0) > 0 || + (result.meta.toolSummary?.calls ?? 0) > 0 + ); +} + +function hasDeliberateSilentTerminalReply(result: EmbeddedPiRunResult): boolean { + return [result.meta.finalAssistantRawText, result.meta.finalAssistantVisibleText].some( + (text) => typeof text === "string" && isSilentReplyPayloadText(text), + ); +} + +export function classifyEmbeddedPiRunResultForModelFallback(params: { + provider: string; + model: string; + result: unknown; + hasDirectlySentBlockReply?: boolean; + hasBlockReplyPipelineOutput?: boolean; +}): ModelFallbackResultClassification { + if (!isGpt5ModelId(params.model) || !isEmbeddedPiRunResult(params.result)) { + return null; + } + if ( + params.result.meta.aborted || + params.hasDirectlySentBlockReply === true || + params.hasBlockReplyPipelineOutput === true || + hasVisibleNonErrorPayload(params.result) + ) { + return null; + } + if (hasOutboundSideEffects(params.result)) { + return null; + } + + const payloads = params.result.payloads ?? []; + if (payloads.length === 0 && hasDeliberateSilentTerminalReply(params.result)) { + return null; + } + if (payloads.length === 0) { + return { + message: `${params.provider}/${params.model} ended without a visible assistant reply`, + reason: "format", + code: "empty_result", + }; + } + if (payloads.every((payload) => payload.isReasoning === true)) { + return { + message: `${params.provider}/${params.model} ended with reasoning only`, + reason: "format", + code: "reasoning_only_result", + }; + } + + const errorText = payloads + .filter((payload) => payload?.isError === true) + .map((payload) => (typeof payload.text === "string" ? payload.text : "")) + .join("\n"); + if (PLAN_ONLY_TERMINAL_REPLY_RE.test(errorText)) { + return { + message: `${params.provider}/${params.model} exhausted plan-only retries without taking action`, + reason: "format", + code: "planning_only_result", + }; + } + if (!EMPTY_TERMINAL_REPLY_RE.test(errorText)) { + return null; + } + + return { + message: `${params.provider}/${params.model} ended with an incomplete terminal response`, + reason: "format", + code: "incomplete_result", + }; +} diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 8ba5f3f2231..f9a925f017a 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -53,7 +53,6 @@ import { resolveAuthProfileOrder, shouldPreferExplicitConfigApiKeyAuth, } from "../model-auth.js"; -import { normalizeProviderId } from "../model-selection.js"; import { ensureOpenClawModelsJson } from "../models-config.js"; import { retireSessionMcpRuntime, @@ -75,6 +74,7 @@ import { parseImageSizeError, pickFallbackThinkingLevel, } from "../pi-embedded-helpers.js"; +import { resolveProviderIdForAuth } from "../provider-auth-aliases.js"; import { ensureRuntimePluginsLoaded } from "../runtime-plugins.js"; import { derivePromptTokens, normalizeUsage, type UsageLike } from "../usage.js"; import { redactRunIdentifier, resolveRunWorkspaceDir } from "../workspace-run.js"; @@ -415,10 +415,17 @@ export async function runEmbeddedPiAgent( let lockedProfileId = params.authProfileIdSource === "user" ? preferredProfileId : undefined; if (lockedProfileId) { const lockedProfile = authStore.profiles[lockedProfileId]; - if ( - !lockedProfile || - normalizeProviderId(lockedProfile.provider) !== normalizeProviderId(provider) - ) { + const lockedProfileProvider = lockedProfile + ? resolveProviderIdForAuth(lockedProfile.provider, { + config: params.config, + workspaceDir: resolvedWorkspace, + }) + : undefined; + const runProvider = resolveProviderIdForAuth(provider, { + config: params.config, + workspaceDir: resolvedWorkspace, + }); + if (!lockedProfile || !lockedProfileProvider || lockedProfileProvider !== runProvider) { lockedProfileId = undefined; } } @@ -1735,6 +1742,10 @@ export async function runEmbeddedPiAgent( toolMediaUrls: attempt.toolMediaUrls, toolAudioAsVoice: attempt.toolAudioAsVoice, }); + const attemptToolSummary = buildTraceToolSummary({ + toolMetas: attempt.toolMetas, + hadFailure: Boolean(attempt.lastToolError), + }); // Timeout aborts can leave the run without any assistant payloads. // Emit an explicit timeout error instead of silently completing, so @@ -1774,6 +1785,7 @@ export async function runEmbeddedPiAgent( finalAssistantRawText, replayInvalid, livenessState, + toolSummary: attemptToolSummary, }, didSendViaMessagingTool: attempt.didSendViaMessagingTool, didSendDeterministicApprovalPrompt: attempt.didSendDeterministicApprovalPrompt, @@ -1934,6 +1946,7 @@ export async function runEmbeddedPiAgent( finalAssistantRawText, replayInvalid, livenessState, + toolSummary: attemptToolSummary, }, didSendViaMessagingTool: attempt.didSendViaMessagingTool, didSendDeterministicApprovalPrompt: attempt.didSendDeterministicApprovalPrompt, @@ -1981,6 +1994,7 @@ export async function runEmbeddedPiAgent( finalAssistantRawText, replayInvalid, livenessState, + toolSummary: attemptToolSummary, }, didSendViaMessagingTool: attempt.didSendViaMessagingTool, didSendDeterministicApprovalPrompt: attempt.didSendDeterministicApprovalPrompt, @@ -2090,6 +2104,7 @@ export async function runEmbeddedPiAgent( finalAssistantRawText, replayInvalid, livenessState, + toolSummary: attemptToolSummary, }, didSendViaMessagingTool: attempt.didSendViaMessagingTool, didSendDeterministicApprovalPrompt: attempt.didSendDeterministicApprovalPrompt, @@ -2188,10 +2203,7 @@ export async function runEmbeddedPiAgent( ...(params.verboseLevel ? { verbose: params.verboseLevel } : {}), ...(params.blockReplyBreak ? { blockStreaming: params.blockReplyBreak } : {}), }, - toolSummary: buildTraceToolSummary({ - toolMetas: attempt.toolMetas, - hadFailure: Boolean(attempt.lastToolError), - }), + toolSummary: attemptToolSummary, completion: { ...(stopReason ? { stopReason } : {}), ...(stopReason ? { finishReason: stopReason } : {}), diff --git a/src/agents/pi-embedded-runner/run/attempt.prompt-helpers.ts b/src/agents/pi-embedded-runner/run/attempt.prompt-helpers.ts index 2cd45b0c975..200153f1ed0 100644 --- a/src/agents/pi-embedded-runner/run/attempt.prompt-helpers.ts +++ b/src/agents/pi-embedded-runner/run/attempt.prompt-helpers.ts @@ -122,7 +122,155 @@ export function shouldWarnOnOrphanedUserRepair( return trigger === "user" || trigger === "manual"; } -function extractUserMessagePlainText(content: unknown): string | undefined { +const QUEUED_USER_MESSAGE_MARKER = + "[Queued user message that arrived while the previous turn was still active]"; +const MAX_STRUCTURED_MEDIA_REF_CHARS = 300; +const MAX_STRUCTURED_JSON_STRING_CHARS = 300; +const MAX_STRUCTURED_JSON_DEPTH = 4; +const MAX_STRUCTURED_JSON_ARRAY_ITEMS = 16; +const MAX_STRUCTURED_JSON_OBJECT_KEYS = 32; + +function summarizeStructuredMediaRef(label: string, value: unknown): string | undefined { + if (typeof value !== "string") { + return undefined; + } + const trimmed = value.trim(); + if (!trimmed) { + return undefined; + } + const dataUriMatch = trimmed.match(/^data:([^;,]+)?(?:;[^,]*)?,/i); + if (dataUriMatch) { + const mimeType = dataUriMatch[1]?.trim() || "unknown"; + return `[${label}] inline data URI (${mimeType}, ${trimmed.length} chars)`; + } + if (trimmed.length > MAX_STRUCTURED_MEDIA_REF_CHARS) { + return `[${label}] ${trimmed.slice(0, MAX_STRUCTURED_MEDIA_REF_CHARS)}... (${trimmed.length} chars)`; + } + return `[${label}] ${trimmed}`; +} + +function summarizeStructuredJsonString(value: string): string { + const mediaSummary = summarizeStructuredMediaRef("value", value); + if (mediaSummary?.includes("inline data URI")) { + return mediaSummary; + } + const trimmed = value.trim(); + if (trimmed.length > MAX_STRUCTURED_JSON_STRING_CHARS) { + return `${trimmed.slice(0, MAX_STRUCTURED_JSON_STRING_CHARS)}... (${trimmed.length} chars)`; + } + return value; +} + +function sanitizeStructuredJsonValue( + value: unknown, + depth = 0, + seen: WeakSet = new WeakSet(), +): unknown { + if (typeof value === "string") { + return summarizeStructuredJsonString(value); + } + if (!value || typeof value !== "object") { + return value; + } + if (seen.has(value)) { + return "[circular]"; + } + if (depth >= MAX_STRUCTURED_JSON_DEPTH) { + return "[max depth]"; + } + seen.add(value); + if (Array.isArray(value)) { + const limited = value + .slice(0, MAX_STRUCTURED_JSON_ARRAY_ITEMS) + .map((item) => sanitizeStructuredJsonValue(item, depth + 1, seen)); + if (value.length > MAX_STRUCTURED_JSON_ARRAY_ITEMS) { + limited.push(`[${value.length - MAX_STRUCTURED_JSON_ARRAY_ITEMS} more items]`); + } + seen.delete(value); + return limited; + } + const output: Record = {}; + let copied = 0; + let skipped = 0; + for (const key in value as Record) { + if (!Object.hasOwn(value, key)) { + continue; + } + if (copied >= MAX_STRUCTURED_JSON_OBJECT_KEYS) { + skipped += 1; + continue; + } + output[key] = sanitizeStructuredJsonValue( + (value as Record)[key], + depth + 1, + seen, + ); + copied += 1; + } + if (skipped > 0) { + output.__truncated = `${skipped} more keys`; + } + seen.delete(value); + return output; +} + +function stringifyStructuredJsonFallback(part: unknown): string | undefined { + try { + const serialized = JSON.stringify(sanitizeStructuredJsonValue(part)); + if (!serialized || serialized === "{}") { + return undefined; + } + const withoutInlineData = serialized.replace( + /data:[^"'\\\s]+/gi, + (match) => `[inline data URI: ${match.length} chars]`, + ); + return withoutInlineData.length > 1_000 + ? `${withoutInlineData.slice(0, 1_000)}... (${withoutInlineData.length} chars)` + : withoutInlineData; + } catch { + return undefined; + } +} + +function stringifyStructuredContentPart(part: unknown): string | undefined { + if (!part || typeof part !== "object") { + return undefined; + } + const record = part as Record; + if (record.type === "text") { + const text = typeof record.text === "string" ? record.text.trim() : ""; + return text || undefined; + } + if (record.type === "image_url") { + const imageUrl = record.image_url; + const url = + typeof imageUrl === "string" + ? imageUrl + : imageUrl && typeof imageUrl === "object" + ? (imageUrl as { url?: unknown }).url + : undefined; + return summarizeStructuredMediaRef("image_url", url); + } + if (record.type === "image" || record.type === "input_image") { + return ( + summarizeStructuredMediaRef(record.type, record.url) ?? + summarizeStructuredMediaRef(record.type, record.source) + ); + } + if (typeof record.type === "string") { + const typedRef = + summarizeStructuredMediaRef(record.type, record.audio_url) ?? + summarizeStructuredMediaRef(record.type, record.media_url) ?? + summarizeStructuredMediaRef(record.type, record.url) ?? + summarizeStructuredMediaRef(record.type, record.source); + if (typedRef) { + return typedRef; + } + } + return stringifyStructuredJsonFallback(part); +} + +function extractUserMessagePromptText(content: unknown): string | undefined { if (typeof content === "string") { const trimmed = content.trim(); return trimmed || undefined; @@ -131,38 +279,47 @@ function extractUserMessagePlainText(content: unknown): string | undefined { return undefined; } const text = content - .flatMap((part) => - part && typeof part === "object" && "type" in part && part.type === "text" - ? [typeof part.text === "string" ? part.text : ""] - : [], - ) + .flatMap((part) => { + const text = stringifyStructuredContentPart(part); + return text ? [text] : []; + }) .join("\n") .trim(); return text || undefined; } +function promptAlreadyIncludesQueuedUserMessage(prompt: string, orphanText: string): boolean { + const normalizedPrompt = prompt.replace(/\r\n/g, "\n"); + const normalizedOrphanText = orphanText.replace(/\r\n/g, "\n").trim(); + if (!normalizedOrphanText) { + return false; + } + const queuedBlockPrefix = `${QUEUED_USER_MESSAGE_MARKER}\n${normalizedOrphanText}`; + return ( + normalizedPrompt === queuedBlockPrefix || + normalizedPrompt.startsWith(`${queuedBlockPrefix}\n`) || + normalizedPrompt.includes(`\n${queuedBlockPrefix}\n`) || + `\n${normalizedPrompt}\n`.includes(`\n${normalizedOrphanText}\n`) + ); +} + export function mergeOrphanedTrailingUserPrompt(params: { prompt: string; trigger: EmbeddedRunAttemptParams["trigger"]; leafMessage: { content?: unknown }; -}): { prompt: string; merged: boolean } { - if (!shouldWarnOnOrphanedUserRepair(params.trigger)) { - return { prompt: params.prompt, merged: false }; +}): { prompt: string; merged: boolean; removeLeaf: boolean } { + const orphanText = extractUserMessagePromptText(params.leafMessage.content); + if (!orphanText) { + return { prompt: params.prompt, merged: false, removeLeaf: true }; } - - const orphanText = extractUserMessagePlainText(params.leafMessage.content); - if (!orphanText || orphanText.length < 4 || params.prompt.includes(orphanText)) { - return { prompt: params.prompt, merged: false }; + if (promptAlreadyIncludesQueuedUserMessage(params.prompt, orphanText)) { + return { prompt: params.prompt, merged: false, removeLeaf: true }; } return { - prompt: [ - "[Queued user message that arrived while the previous turn was still active]", - orphanText, - "", - params.prompt, - ].join("\n"), + prompt: [QUEUED_USER_MESSAGE_MARKER, orphanText, "", params.prompt].join("\n"), merged: true, + removeLeaf: true, }; } diff --git a/src/agents/pi-embedded-runner/run/attempt.test.ts b/src/agents/pi-embedded-runner/run/attempt.test.ts index ace2e34d3e8..2befcd7602e 100644 --- a/src/agents/pi-embedded-runner/run/attempt.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.test.ts @@ -317,6 +317,7 @@ describe("mergeOrphanedTrailingUserPrompt", () => { }), ).toEqual({ merged: true, + removeLeaf: true, prompt: "[Queued user message that arrived while the previous turn was still active]\n" + "older active-turn message\n\nnewest inbound message", @@ -334,11 +335,124 @@ describe("mergeOrphanedTrailingUserPrompt", () => { }), ).toEqual({ merged: false, + removeLeaf: true, prompt: "summary\nolder active-turn message\nnewest inbound message", }); }); - it("skips orphan prompt merging for non-user triggers", () => { + it("does not treat short orphan text as duplicate from a substring match", () => { + expect( + mergeOrphanedTrailingUserPrompt({ + prompt: "please inspect this token", + trigger: "user", + leafMessage: { + content: "ok", + } as never, + }), + ).toEqual({ + merged: true, + removeLeaf: true, + prompt: + "[Queued user message that arrived while the previous turn was still active]\n" + + "ok\n\nplease inspect this token", + }); + }); + + it("preserves structured orphaned user content before removing the leaf", () => { + expect( + mergeOrphanedTrailingUserPrompt({ + prompt: "newest inbound message", + trigger: "user", + leafMessage: { + content: [ + { type: "text", text: "please inspect this" }, + { type: "image_url", image_url: { url: "https://example.test/cat.png" } }, + { type: "input_audio", audio_url: "https://example.test/cat.wav" }, + ], + } as never, + }), + ).toEqual({ + merged: true, + removeLeaf: true, + prompt: + "[Queued user message that arrived while the previous turn was still active]\n" + + "please inspect this\n" + + "[image_url] https://example.test/cat.png\n" + + "[input_audio] https://example.test/cat.wav\n\n" + + "newest inbound message", + }); + }); + + it("summarizes inline structured media without embedding data URIs", () => { + const dataUri = `data:image/png;base64,${"a".repeat(4096)}`; + + const result = mergeOrphanedTrailingUserPrompt({ + prompt: "newest inbound message", + trigger: "user", + leafMessage: { + content: [ + { type: "text", text: "please inspect this inline image" }, + { type: "image_url", image_url: { url: dataUri } }, + ], + } as never, + }); + + expect(result).toMatchObject({ + merged: true, + removeLeaf: true, + }); + expect(result.prompt).toContain("please inspect this inline image"); + expect(result.prompt).toContain("[image_url] inline data URI (image/png, 4118 chars)"); + expect(result.prompt).not.toContain("base64"); + expect(result.prompt).not.toContain("aaaa"); + }); + + it("summarizes unknown structured data before JSON serialization", () => { + const dataUri = `data:image/png;base64,${"a".repeat(10_000)}`; + const result = mergeOrphanedTrailingUserPrompt({ + prompt: "newest inbound message", + trigger: "user", + leafMessage: { + content: [ + { + type: "unknown_content", + nested: { + inline: dataUri, + longText: "b".repeat(2_000), + }, + }, + ], + } as never, + }); + + expect(result).toMatchObject({ + merged: true, + removeLeaf: true, + }); + expect(result.prompt).toContain("[value] inline data URI (image/png, 10022 chars)"); + expect(result.prompt).toContain("bbbb"); + expect(result.prompt).toContain("(2000 chars)"); + expect(result.prompt).not.toContain("base64"); + expect(result.prompt).not.toContain("aaaa"); + }); + + it("removes an empty orphaned user leaf to prevent consecutive user turns", () => { + expect( + mergeOrphanedTrailingUserPrompt({ + prompt: "newest inbound message", + trigger: "user", + leafMessage: { + content: [], + } as never, + }), + ).toEqual({ + merged: false, + removeLeaf: true, + prompt: "newest inbound message", + }); + }); + + it("merges orphan prompt text for non-user triggers without warning policy changes", () => { expect( mergeOrphanedTrailingUserPrompt({ prompt: "HEARTBEAT_OK", @@ -348,8 +462,11 @@ describe("mergeOrphanedTrailingUserPrompt", () => { } as never, }), ).toEqual({ - merged: false, - prompt: "HEARTBEAT_OK", + merged: true, + removeLeaf: true, + prompt: + "[Queued user message that arrived while the previous turn was still active]\n" + + "older active-turn message\n\nHEARTBEAT_OK", }); }); }); diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 5bceb3c0ea8..39083fd79a7 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -19,7 +19,10 @@ import { resolveHeartbeatSummaryForAgent } from "../../../infra/heartbeat-summar import { getMachineDisplayName } from "../../../infra/machine-name.js"; import { MAX_IMAGE_BYTES } from "../../../media/constants.js"; import { getGlobalHookRunner } from "../../../plugins/hook-runner-global.js"; -import { resolveToolCallArgumentsEncoding } from "../../../plugins/provider-model-compat.js"; +import { + extractModelCompat, + resolveToolCallArgumentsEncoding, +} from "../../../plugins/provider-model-compat.js"; import { resolveProviderSystemPromptContribution, resolveProviderTextTransforms, @@ -550,7 +553,7 @@ export async function runEmbeddedAttempt( abortSignal: runAbortController.signal, modelProvider: params.model.provider, modelId: params.modelId, - modelCompat: params.model.compat, + modelCompat: extractModelCompat(params.model), modelApi: params.model.api, modelContextWindowTokens: params.model.contextWindow, modelAuthMode: resolveModelAuthMode(params.model.provider, params.config), @@ -2069,15 +2072,23 @@ export async function runEmbeddedAttempt( leafMessage: leafEntry.message, }); effectivePrompt = orphanPromptMerge.prompt; - if (leafEntry.parentId) { - sessionManager.branch(leafEntry.parentId); - } else { - sessionManager.resetLeaf(); + if (orphanPromptMerge.removeLeaf) { + if (leafEntry.parentId) { + sessionManager.branch(leafEntry.parentId); + } else { + sessionManager.resetLeaf(); + } + const sessionContext = sessionManager.buildSessionContext(); + activeSession.agent.state.messages = sessionContext.messages; } - const sessionContext = sessionManager.buildSessionContext(); - activeSession.agent.state.messages = sessionContext.messages; const orphanRepairMessage = - `${orphanPromptMerge.merged ? "Merged and removed" : "Removed"} orphaned user message ` + + `${ + orphanPromptMerge.removeLeaf + ? orphanPromptMerge.merged + ? "Merged and removed" + : "Removed already-queued" + : "Preserved" + } orphaned user message ` + `to prevent consecutive user turns. ` + `runId=${params.runId} sessionId=${params.sessionId} trigger=${params.trigger}`; if (shouldWarnOnOrphanedUserRepair(params.trigger)) { diff --git a/src/agents/pi-model-discovery.synthetic-auth.test.ts b/src/agents/pi-model-discovery.synthetic-auth.test.ts index 8208fbaee38..207ba6b7d19 100644 --- a/src/agents/pi-model-discovery.synthetic-auth.test.ts +++ b/src/agents/pi-model-discovery.synthetic-auth.test.ts @@ -49,6 +49,8 @@ describe("pi model discovery synthetic auth", () => { beforeEach(() => { resolveRuntimeSyntheticAuthProviderRefs.mockClear(); resolveProviderSyntheticAuthWithPlugin.mockClear(); + vi.stubEnv("ANTHROPIC_API_KEY", ""); + vi.stubEnv("ANTHROPIC_OAUTH_TOKEN", ""); }); afterEach(() => { diff --git a/src/agents/provider-auth-aliases.test.ts b/src/agents/provider-auth-aliases.test.ts new file mode 100644 index 00000000000..6684632d511 --- /dev/null +++ b/src/agents/provider-auth-aliases.test.ts @@ -0,0 +1,35 @@ +import { describe, expect, it, vi } from "vitest"; + +const loadPluginManifestRegistry = vi.hoisted(() => vi.fn()); + +vi.mock("../plugins/manifest-registry.js", () => ({ + loadPluginManifestRegistry, +})); + +import { resolveProviderIdForAuth } from "./provider-auth-aliases.js"; + +describe("provider auth aliases", () => { + it("treats deprecated auth choice ids as provider auth aliases", () => { + loadPluginManifestRegistry.mockReturnValue({ + plugins: [ + { + id: "openai", + origin: "bundled", + providerAuthChoices: [ + { + provider: "openai-codex", + method: "oauth", + choiceId: "openai-codex", + deprecatedChoiceIds: ["codex-cli", "openai-codex-import"], + }, + ], + }, + ], + diagnostics: [], + }); + + expect(resolveProviderIdForAuth("codex-cli")).toBe("openai-codex"); + expect(resolveProviderIdForAuth("openai-codex-import")).toBe("openai-codex"); + expect(resolveProviderIdForAuth("openai-codex")).toBe("openai-codex"); + }); +}); diff --git a/src/agents/provider-auth-aliases.ts b/src/agents/provider-auth-aliases.ts index c3ebeb90989..120940ed69e 100644 --- a/src/agents/provider-auth-aliases.ts +++ b/src/agents/provider-auth-aliases.ts @@ -56,6 +56,30 @@ function shouldUsePluginAuthAliases( return isWorkspacePluginTrustedForAuthAliases(plugin, params?.config); } +function setPreferredAlias(params: { + aliases: Map; + alias: string; + origin?: PluginOrigin; + target: string; +}) { + const normalizedAlias = normalizeProviderId(params.alias); + const normalizedTarget = normalizeProviderId(params.target); + if (!normalizedAlias || !normalizedTarget) { + return; + } + const existing = params.aliases.get(normalizedAlias); + if ( + !existing || + resolveProviderAuthAliasOriginPriority(params.origin) < + resolveProviderAuthAliasOriginPriority(existing.origin) + ) { + params.aliases.set(normalizedAlias, { + origin: params.origin, + target: normalizedTarget, + }); + } +} + export function resolveProviderAuthAliasMap( params?: ProviderAuthAliasLookupParams, ): Record { @@ -73,20 +97,21 @@ export function resolveProviderAuthAliasMap( for (const [alias, target] of Object.entries(plugin.providerAuthAliases ?? {}).toSorted( ([left], [right]) => left.localeCompare(right), )) { - const normalizedAlias = normalizeProviderId(alias); - const normalizedTarget = normalizeProviderId(target); - if (normalizedAlias && normalizedTarget) { - const existing = preferredAliases.get(normalizedAlias); - if ( - !existing || - resolveProviderAuthAliasOriginPriority(plugin.origin) < - resolveProviderAuthAliasOriginPriority(existing.origin) - ) { - preferredAliases.set(normalizedAlias, { - origin: plugin.origin, - target: normalizedTarget, - }); - } + setPreferredAlias({ + aliases: preferredAliases, + alias, + origin: plugin.origin, + target, + }); + } + for (const choice of plugin.providerAuthChoices ?? []) { + for (const deprecatedChoiceId of choice.deprecatedChoiceIds ?? []) { + setPreferredAlias({ + aliases: preferredAliases, + alias: deprecatedChoiceId, + origin: plugin.origin, + target: choice.provider, + }); } } } diff --git a/src/agents/tools-effective-inventory.ts b/src/agents/tools-effective-inventory.ts index 21f0241c961..f43f915a5c7 100644 --- a/src/agents/tools-effective-inventory.ts +++ b/src/agents/tools-effective-inventory.ts @@ -1,4 +1,5 @@ import type { OpenClawConfig } from "../config/config.js"; +import { extractModelCompat } from "../plugins/provider-model-compat.js"; import { getPluginToolMeta } from "../plugins/tools.js"; import { normalizeLowercaseStringOrEmpty, @@ -95,7 +96,7 @@ function resolveEffectiveModelCompat(params: { return undefined; } try { - return resolveModel(provider, modelId, params.agentDir, params.cfg).model?.compat; + return extractModelCompat(resolveModel(provider, modelId, params.agentDir, params.cfg).model); } catch { return undefined; } diff --git a/src/agents/tools/message-tool.test.ts b/src/agents/tools/message-tool.test.ts index 993f1af3bfe..8b9c53b7971 100644 --- a/src/agents/tools/message-tool.test.ts +++ b/src/agents/tools/message-tool.test.ts @@ -802,6 +802,57 @@ describe("message tool description", () => { expect(tool.description).toContain("telegram (delete, edit, react, send, topic-create)"); }); + it("does not advertise cross-channel actions whose params are hidden by current-channel schema", () => { + const signalPlugin = createChannelPlugin({ + id: "signal", + label: "Signal", + docsPath: "/channels/signal", + blurb: "Signal test plugin.", + actions: ["send", "react"], + }); + const matrixProfilePlugin = createChannelPlugin({ + id: "matrix", + label: "Matrix", + docsPath: "/channels/matrix", + blurb: "Matrix test plugin.", + actions: ["send", "set-profile"], + toolSchema: { + properties: { + displayName: Type.Optional(Type.String()), + avatarUrl: Type.Optional(Type.String()), + }, + }, + }); + + setActivePluginRegistry( + createTestRegistry([ + { pluginId: "signal", source: "test", plugin: signalPlugin }, + { pluginId: "matrix", source: "test", plugin: matrixProfilePlugin }, + ]), + ); + + const crossChannelTool = createMessageTool({ + config: {} as never, + currentChannelProvider: "signal", + }); + const crossChannelProperties = getToolProperties(crossChannelTool); + + expect(getActionEnum(crossChannelProperties)).not.toContain("set-profile"); + expect(crossChannelProperties.displayName).toBeUndefined(); + expect(crossChannelProperties.avatarUrl).toBeUndefined(); + expect(crossChannelTool.description).not.toContain("matrix (send, set-profile)"); + + const currentChannelTool = createMessageTool({ + config: {} as never, + currentChannelProvider: "matrix", + }); + const currentChannelProperties = getToolProperties(currentChannelTool); + + expect(getActionEnum(currentChannelProperties)).toContain("set-profile"); + expect(currentChannelProperties.displayName).toBeDefined(); + expect(currentChannelProperties.avatarUrl).toBeDefined(); + }); + it("normalizes channel aliases before building the current channel description", () => { const signalPlugin = createChannelPlugin({ id: "signal", diff --git a/src/agents/tools/message-tool.ts b/src/agents/tools/message-tool.ts index d296ff1f54c..13b52c8b254 100644 --- a/src/agents/tools/message-tool.ts +++ b/src/agents/tools/message-tool.ts @@ -4,6 +4,7 @@ import { channelSupportsMessageCapability, channelSupportsMessageCapabilityForChannel, type ChannelMessageActionDiscoveryInput, + listCrossChannelSchemaSupportedMessageActions, resolveChannelMessageToolSchemaProperties, } from "../../channels/plugins/message-action-discovery.js"; import { CHANNEL_MESSAGE_ACTION_NAMES } from "../../channels/plugins/message-action-names.js"; @@ -491,7 +492,7 @@ function resolveMessageToolSchemaActions(params: MessageToolDiscoveryParams): st if (plugin.id === currentChannel) { continue; } - for (const action of listChannelSupportedActions( + for (const action of listCrossChannelSchemaSupportedMessageActions( buildMessageActionDiscoveryInput(params, plugin.id), )) { allActions.add(action); @@ -603,7 +604,7 @@ function buildMessageToolDescription(options?: { if (plugin.id === currentChannel) { continue; } - const actions = listChannelSupportedActions( + const actions = listCrossChannelSchemaSupportedMessageActions( buildMessageActionDiscoveryInput(messageToolDiscoveryParams, plugin.id), ); if (actions.length > 0) { diff --git a/src/auto-reply/reply/agent-runner-auth-profile.ts b/src/auto-reply/reply/agent-runner-auth-profile.ts index 87bb9cc2c9c..5e5820459b7 100644 --- a/src/auto-reply/reply/agent-runner-auth-profile.ts +++ b/src/auto-reply/reply/agent-runner-auth-profile.ts @@ -1,3 +1,7 @@ +import { + resolveProviderIdForAuth, + type ProviderAuthAliasLookupParams, +} from "../../agents/provider-auth-aliases.js"; import type { FollowupRun } from "./queue.js"; export function resolveProviderScopedAuthProfile(params: { @@ -5,20 +9,32 @@ export function resolveProviderScopedAuthProfile(params: { primaryProvider: string; authProfileId?: string; authProfileIdSource?: "auto" | "user"; + config?: ProviderAuthAliasLookupParams["config"]; + workspaceDir?: ProviderAuthAliasLookupParams["workspaceDir"]; }): { authProfileId?: string; authProfileIdSource?: "auto" | "user" } { + const aliasParams = { config: params.config, workspaceDir: params.workspaceDir }; const authProfileId = - params.provider === params.primaryProvider ? params.authProfileId : undefined; + resolveProviderIdForAuth(params.provider, aliasParams) === + resolveProviderIdForAuth(params.primaryProvider, aliasParams) + ? params.authProfileId + : undefined; return { authProfileId, authProfileIdSource: authProfileId ? params.authProfileIdSource : undefined, }; } -export function resolveRunAuthProfile(run: FollowupRun["run"], provider: string) { +export function resolveRunAuthProfile( + run: FollowupRun["run"], + provider: string, + params?: { config?: ProviderAuthAliasLookupParams["config"] }, +) { return resolveProviderScopedAuthProfile({ provider, primaryProvider: run.provider, authProfileId: run.authProfileId, authProfileIdSource: run.authProfileIdSource, + config: params?.config ?? run.config, + workspaceDir: run.workspaceDir, }); } diff --git a/src/auto-reply/reply/agent-runner-execution.test.ts b/src/auto-reply/reply/agent-runner-execution.test.ts index d2189196317..e664e205997 100644 --- a/src/auto-reply/reply/agent-runner-execution.test.ts +++ b/src/auto-reply/reply/agent-runner-execution.test.ts @@ -3,7 +3,7 @@ import { LiveSessionModelSwitchError } from "../../agents/live-model-switch-erro import type { SessionEntry } from "../../config/sessions.js"; import { CommandLaneClearedError, GatewayDrainingError } from "../../process/command-queue.js"; import type { TemplateContext } from "../templating.js"; -import type { GetReplyOptions } from "../types.js"; +import type { GetReplyOptions, ReplyPayload } from "../types.js"; import { MAX_LIVE_SWITCH_RETRIES } from "./agent-runner-execution.js"; import type { FollowupRun } from "./queue.js"; import type { ReplyOperation } from "./reply-run-registry.js"; @@ -15,6 +15,7 @@ const state = vi.hoisted(() => ({ runWithModelFallbackMock: vi.fn(), isCliProviderMock: vi.fn((_: unknown) => false), isInternalMessageChannelMock: vi.fn((_: unknown) => false), + createBlockReplyDeliveryHandlerMock: vi.fn(), })); vi.mock("../../agents/pi-embedded.js", () => ({ @@ -133,7 +134,8 @@ vi.mock("./agent-runner-utils.js", () => ({ })); vi.mock("./reply-delivery.js", () => ({ - createBlockReplyDeliveryHandler: vi.fn(), + createBlockReplyDeliveryHandler: (params: unknown) => + state.createBlockReplyDeliveryHandlerMock(params), })); vi.mock("./reply-media-paths.runtime.js", () => ({ @@ -153,9 +155,17 @@ async function getApplyFallbackCandidateSelectionToEntry() { type FallbackRunnerParams = { run: (provider: string, model: string) => Promise; + classifyResult?: (params: { + result: { payloads?: Array<{ text?: string; isError?: boolean; isReasoning?: boolean }> }; + provider: string; + model: string; + attempt: number; + total: number; + }) => Promise; }; type EmbeddedAgentParams = { + onBlockReply?: (payload: { text?: string; mediaUrls?: string[] }) => Promise | void; onToolResult?: (payload: { text?: string; mediaUrls?: string[] }) => Promise | void; onItemEvent?: (payload: { itemId?: string; @@ -247,6 +257,35 @@ function createMockReplyOperation(): { }; } +function createMinimalRunAgentTurnParams(overrides?: { + followupRun?: FollowupRun; + opts?: GetReplyOptions; +}) { + return { + commandBody: "fix it", + followupRun: overrides?.followupRun ?? createFollowupRun(), + sessionCtx: { + Provider: "whatsapp", + MessageSid: "msg", + } as unknown as TemplateContext, + opts: overrides?.opts ?? ({} satisfies GetReplyOptions), + typingSignals: createMockTypingSignaler(), + blockReplyPipeline: null, + blockStreamingEnabled: false, + resolvedBlockStreamingBreak: "message_end" as const, + applyReplyToMode: (payload: ReplyPayload) => payload, + shouldEmitToolResult: () => true, + shouldEmitToolOutput: () => false, + pendingToolTasks: new Set>(), + resetSessionAfterCompactionFailure: async () => false, + resetSessionAfterRoleOrderingConflict: async () => false, + isHeartbeat: false, + sessionKey: "main", + getActiveSessionEntry: () => undefined, + resolvedVerboseLevel: "off" as const, + }; +} + describe("runAgentTurnWithFallback", () => { beforeEach(() => { state.runEmbeddedPiAgentMock.mockReset(); @@ -256,6 +295,8 @@ describe("runAgentTurnWithFallback", () => { state.isCliProviderMock.mockReturnValue(false); state.isInternalMessageChannelMock.mockReset(); state.isInternalMessageChannelMock.mockReturnValue(false); + state.createBlockReplyDeliveryHandlerMock.mockReset(); + state.createBlockReplyDeliveryHandlerMock.mockReturnValue(undefined); state.runWithModelFallbackMock.mockImplementation(async (params: FallbackRunnerParams) => ({ result: await params.run("anthropic", "claude"), provider: "anthropic", @@ -512,6 +553,253 @@ describe("runAgentTurnWithFallback", () => { }); }); + it("classifies GPT-5 plan-only terminal results as fallback-eligible", async () => { + const followupRun = createFollowupRun(); + followupRun.run.provider = "openai-codex"; + followupRun.run.model = "gpt-5.4"; + state.runEmbeddedPiAgentMock.mockResolvedValueOnce({ + payloads: [ + { + text: "agent stopped after repeated plan-only turns without taking a concrete action.", + isError: true, + }, + ], + meta: {}, + }); + state.runWithModelFallbackMock.mockImplementationOnce(async (params: FallbackRunnerParams) => { + const first = (await params.run("openai-codex", "gpt-5.4")) as { + payloads?: Array<{ text?: string; isError?: boolean; isReasoning?: boolean }>; + }; + expect( + await params.classifyResult?.({ + result: first, + provider: "openai-codex", + model: "gpt-5.4", + attempt: 1, + total: 2, + }), + ).toMatchObject({ + reason: "format", + code: "planning_only_result", + }); + return { + result: { payloads: [{ text: "fallback ok" }], meta: {} }, + provider: "anthropic", + model: "claude", + attempts: [ + { + provider: "openai-codex", + model: "gpt-5.4", + error: "planning-only", + reason: "format", + }, + ], + }; + }); + + const runAgentTurnWithFallback = await getRunAgentTurnWithFallback(); + const result = await runAgentTurnWithFallback(createMinimalRunAgentTurnParams({ followupRun })); + + expect(result.kind).toBe("success"); + if (result.kind === "success") { + expect(result.runResult.payloads?.[0]?.text).toBe("fallback ok"); + expect(result.fallbackProvider).toBe("anthropic"); + expect(result.fallbackAttempts[0]?.reason).toBe("format"); + } + }); + + it("does not classify silent NO_REPLY terminal results for fallback", async () => { + state.runWithModelFallbackMock.mockImplementationOnce(async (params: FallbackRunnerParams) => { + const result = { payloads: [{ text: "NO_REPLY" }], meta: {} }; + expect( + await params.classifyResult?.({ + result, + provider: "openai-codex", + model: "gpt-5.4", + attempt: 1, + total: 2, + }), + ).toBeNull(); + return { + result, + provider: "openai-codex", + model: "gpt-5.4", + attempts: [], + }; + }); + + const runAgentTurnWithFallback = await getRunAgentTurnWithFallback(); + const result = await runAgentTurnWithFallback(createMinimalRunAgentTurnParams()); + + expect(result.kind).toBe("success"); + }); + + it("does not classify empty final payloads after block replies were sent", async () => { + const followupRun = createFollowupRun(); + followupRun.run.provider = "openai-codex"; + followupRun.run.model = "gpt-5.4"; + state.createBlockReplyDeliveryHandlerMock.mockImplementationOnce( + (params: { directlySentBlockKeys?: Set }) => async () => { + params.directlySentBlockKeys?.add("block:1"); + }, + ); + state.runEmbeddedPiAgentMock.mockImplementationOnce(async (params: EmbeddedAgentParams) => { + await params.onBlockReply?.({ text: "streamed block" }); + return { payloads: [], meta: {} }; + }); + state.runWithModelFallbackMock.mockImplementationOnce(async (params: FallbackRunnerParams) => { + const result = (await params.run("openai-codex", "gpt-5.4")) as { + payloads?: Array<{ text?: string; isError?: boolean; isReasoning?: boolean }>; + }; + expect( + await params.classifyResult?.({ + result, + provider: "openai-codex", + model: "gpt-5.4", + attempt: 1, + total: 2, + }), + ).toBeNull(); + return { + result, + provider: "openai-codex", + model: "gpt-5.4", + attempts: [], + }; + }); + + const runAgentTurnWithFallback = await getRunAgentTurnWithFallback(); + const result = await runAgentTurnWithFallback( + createMinimalRunAgentTurnParams({ + followupRun, + opts: { onBlockReply: vi.fn() } satisfies GetReplyOptions, + }), + ); + + expect(result.kind).toBe("success"); + }); + + it("does not classify empty final payloads while block replies are buffered", async () => { + const followupRun = createFollowupRun(); + followupRun.run.provider = "openai-codex"; + followupRun.run.model = "gpt-5.4"; + const blockReplyPipeline = { + enqueue: vi.fn(), + flush: vi.fn(async () => {}), + stop: vi.fn(), + hasBuffered: vi.fn(() => true), + didStream: vi.fn(() => false), + isAborted: vi.fn(() => false), + hasSentPayload: vi.fn(() => false), + }; + state.runWithModelFallbackMock.mockImplementationOnce(async (params: FallbackRunnerParams) => { + const result = { payloads: [], meta: {} }; + expect( + await params.classifyResult?.({ + result, + provider: "openai-codex", + model: "gpt-5.4", + attempt: 1, + total: 2, + }), + ).toBeNull(); + return { + result, + provider: "openai-codex", + model: "gpt-5.4", + attempts: [], + }; + }); + + const runAgentTurnWithFallback = await getRunAgentTurnWithFallback(); + const result = await runAgentTurnWithFallback({ + ...createMinimalRunAgentTurnParams({ followupRun }), + blockReplyPipeline, + blockStreamingEnabled: true, + opts: { onBlockReply: vi.fn() } satisfies GetReplyOptions, + }); + + expect(result.kind).toBe("success"); + }); + + it("classifies final GPT-5 terminal-empty results instead of silently succeeding", async () => { + state.runWithModelFallbackMock.mockImplementationOnce(async (params: FallbackRunnerParams) => { + const result = { payloads: [], meta: {} }; + expect( + await params.classifyResult?.({ + result, + provider: "openai-codex", + model: "gpt-5.4", + attempt: 1, + total: 1, + }), + ).toMatchObject({ + reason: "format", + code: "empty_result", + }); + return { + result, + provider: "openai-codex", + model: "gpt-5.4", + attempts: [], + }; + }); + + const runAgentTurnWithFallback = await getRunAgentTurnWithFallback(); + const result = await runAgentTurnWithFallback(createMinimalRunAgentTurnParams()); + + expect(result.kind).toBe("success"); + }); + + it("rolls back persisted fallback selection when result classification rejects a candidate", async () => { + const followupRun = createFollowupRun(); + followupRun.run.provider = "anthropic"; + followupRun.run.model = "claude"; + const sessionEntry: SessionEntry = { + sessionId: "session", + updatedAt: Date.now(), + totalTokens: 1, + compactionCount: 0, + }; + const activeSessionStore = { main: sessionEntry }; + state.runEmbeddedPiAgentMock.mockResolvedValueOnce({ payloads: [], meta: {} }); + state.runWithModelFallbackMock.mockImplementationOnce(async (params: FallbackRunnerParams) => { + const failedResult = await params.run("openai-codex", "gpt-5.4"); + expect(sessionEntry.providerOverride).toBe("openai-codex"); + expect(sessionEntry.modelOverride).toBe("gpt-5.4"); + expect( + await params.classifyResult?.({ + result: failedResult as { payloads?: [] }, + provider: "openai-codex", + model: "gpt-5.4", + attempt: 1, + total: 2, + }), + ).toMatchObject({ + code: "empty_result", + }); + expect(sessionEntry.providerOverride).toBeUndefined(); + expect(sessionEntry.modelOverride).toBeUndefined(); + return { + result: { payloads: [{ text: "fallback ok" }], meta: {} }, + provider: "anthropic", + model: "claude", + attempts: [], + }; + }); + + const runAgentTurnWithFallback = await getRunAgentTurnWithFallback(); + const result = await runAgentTurnWithFallback({ + ...createMinimalRunAgentTurnParams({ followupRun }), + activeSessionStore, + getActiveSessionEntry: () => sessionEntry, + }); + + expect(result.kind).toBe("success"); + expect(sessionEntry.providerOverride).toBeUndefined(); + expect(sessionEntry.modelOverride).toBeUndefined(); + }); + it("strips a glued leading NO_REPLY token from streamed tool results", async () => { const onToolResult = vi.fn(); state.runEmbeddedPiAgentMock.mockImplementationOnce(async (params: EmbeddedAgentParams) => { diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index 13b27eefea8..fc71f783e05 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -26,6 +26,7 @@ import { isTransientHttpError, } from "../../agents/pi-embedded-helpers.js"; import { sanitizeUserFacingText } from "../../agents/pi-embedded-helpers/sanitize-user-facing-text.js"; +import { classifyEmbeddedPiRunResultForModelFallback } from "../../agents/pi-embedded-runner/result-fallback-classifier.js"; import { isLikelyExecutionAckPrompt } from "../../agents/pi-embedded-runner/run/incomplete-turn.js"; import { runEmbeddedPiAgent } from "../../agents/pi-embedded.js"; import { @@ -116,6 +117,8 @@ export type AgentRunLoopResult = } | { kind: "final"; payload: ReplyPayload }; +type EmbeddedAgentRunResult = Awaited>; + type FallbackSelectionState = Pick< SessionEntry, | "providerOverride" @@ -685,6 +688,32 @@ export async function runAgentTurnWithFallback(params: { let bootstrapPromptWarningSignaturesSeen = resolveBootstrapWarningSignaturesSeen( params.getActiveSessionEntry()?.systemPromptReport, ); + let pendingFallbackCandidateRollback: + | { + provider: string; + model: string; + rollback: () => Promise; + } + | undefined; + const clearPendingFallbackRollback = (rollback?: () => Promise) => { + if (!rollback || pendingFallbackCandidateRollback?.rollback === rollback) { + pendingFallbackCandidateRollback = undefined; + } + }; + const rollbackClassifiedFallbackCandidateSelection = async (provider: string, model: string) => { + const pending = pendingFallbackCandidateRollback; + if (!pending || pending.provider !== provider || pending.model !== model) { + return; + } + pendingFallbackCandidateRollback = undefined; + try { + await pending.rollback(); + } catch (rollbackError) { + logVerbose( + `failed to roll back classified fallback candidate selection (non-fatal): ${String(rollbackError)}`, + ); + } + }; const persistFallbackCandidateSelection = async ( provider: string, model: string, @@ -856,9 +885,24 @@ export async function runAgentTurnWithFallback(params: { }) : undefined; const onToolResult = params.opts?.onToolResult; - const fallbackResult = await runWithModelFallback({ + const fallbackResult = await runWithModelFallback({ ...resolveModelFallbackOptions(params.followupRun.run), runId, + classifyResult: async ({ result, provider, model }) => { + const classification = classifyEmbeddedPiRunResultForModelFallback({ + result, + provider, + model, + hasDirectlySentBlockReply: directlySentBlockKeys.size > 0, + hasBlockReplyPipelineOutput: Boolean( + blockReplyPipeline?.hasBuffered() || blockReplyPipeline?.didStream(), + ), + }); + if (classification) { + await rollbackClassifiedFallbackCandidateSelection(provider, model); + } + return classification; + }, run: async (provider, model, runOptions) => { // Notify that model selection is complete (including after fallback). // This allows responsePrefix template interpolation with the actual model. @@ -873,6 +917,13 @@ export async function runAgentTurnWithFallback(params: { provider, model, ); + if (rollbackFallbackCandidateSelection) { + pendingFallbackCandidateRollback = { + provider, + model, + rollback: rollbackFallbackCandidateSelection, + }; + } } catch (error) { logVerbose( `failed to persist fallback candidate selection (non-fatal): ${String(error)}`, @@ -894,10 +945,9 @@ export async function runAgentTurnWithFallback(params: { params.getActiveSessionEntry(), provider, ); - const authProfileId = - provider === params.followupRun.run.provider - ? params.followupRun.run.authProfileId - : undefined; + const authProfile = resolveRunAuthProfile(params.followupRun.run, provider, { + config: runtimeConfig, + }); const hookMessageProvider = resolveOriginMessageProvider({ originatingChannel: params.followupRun.originatingChannel, provider: params.sessionCtx.Provider, @@ -924,7 +974,7 @@ export async function runAgentTurnWithFallback(params: { ownerNumbers: params.followupRun.run.ownerNumbers, cliSessionId: cliSessionBinding?.sessionId, cliSessionBinding, - authProfileId, + authProfileId: authProfile.authProfileId, bootstrapPromptWarningSignaturesSeen, bootstrapPromptWarningSignature: bootstrapPromptWarningSignaturesSeen[ @@ -972,6 +1022,7 @@ export async function runAgentTurnWithFallback(params: { if (rollbackFallbackCandidateSelection) { try { await rollbackFallbackCandidateSelection(); + clearPendingFallbackRollback(rollbackFallbackCandidateSelection); } catch (rollbackError) { logVerbose( `failed to roll back fallback candidate selection (non-fatal): ${String(rollbackError)}`, @@ -1286,6 +1337,7 @@ export async function runAgentTurnWithFallback(params: { if (rollbackFallbackCandidateSelection) { try { await rollbackFallbackCandidateSelection(); + clearPendingFallbackRollback(rollbackFallbackCandidateSelection); } catch (rollbackError) { logVerbose( `failed to roll back fallback candidate selection (non-fatal): ${String(rollbackError)}`, diff --git a/src/auto-reply/reply/followup-runner.test.ts b/src/auto-reply/reply/followup-runner.test.ts index 5eb6722c93a..c24815e73db 100644 --- a/src/auto-reply/reply/followup-runner.test.ts +++ b/src/auto-reply/reply/followup-runner.test.ts @@ -1279,13 +1279,13 @@ describe("createFollowupRunner messaging tool dedupe", () => { persistSpy.mockRestore(); }); - it("does not fall back to dispatcher when cross-channel origin routing fails", async () => { - routeReplyMock.mockResolvedValueOnce({ + it("does not send cross-channel payload content to dispatcher when origin routing fails", async () => { + routeReplyMock.mockResolvedValue({ ok: false, error: "forced route failure", }); const { onBlockReply } = await runMessagingCase({ - agentResult: { payloads: [{ text: "hello world!" }] }, + agentResult: { payloads: [{ text: "hello world!" }, { text: "second payload" }] }, queued: { ...baseQueuedRun("webchat"), originatingChannel: "discord", @@ -1293,8 +1293,59 @@ describe("createFollowupRunner messaging tool dedupe", () => { } as FollowupRun, }); - expect(routeReplyMock).toHaveBeenCalled(); - expect(onBlockReply).not.toHaveBeenCalled(); + expect(routeReplyMock).toHaveBeenCalledTimes(2); + expect(onBlockReply).toHaveBeenCalledTimes(1); + expect(onBlockReply).toHaveBeenCalledWith( + expect.objectContaining({ + isError: true, + text: expect.stringContaining("could not deliver it to the originating channel"), + }), + ); + expect(onBlockReply).not.toHaveBeenCalledWith( + expect.objectContaining({ text: "hello world!" }), + ); + expect(onBlockReply).not.toHaveBeenCalledWith( + expect.objectContaining({ text: "second payload" }), + ); + }); + + it("does not emit cross-channel route-failure notice when a later payload routes", async () => { + routeReplyMock + .mockResolvedValueOnce({ + ok: false, + error: "transient route failure", + }) + .mockResolvedValueOnce({ ok: true }); + const { onBlockReply } = await runMessagingCase({ + agentResult: { payloads: [{ text: "hello world!" }, { text: "second payload" }] }, + queued: { + ...baseQueuedRun("webchat"), + originatingChannel: "discord", + originatingTo: "channel:C1", + } as FollowupRun, + }); + + expect(routeReplyMock).toHaveBeenCalledTimes(2); + expect(onBlockReply).not.toHaveBeenCalledWith( + expect.objectContaining({ + text: expect.stringContaining("could not deliver it to the originating channel"), + }), + ); + }); + + it("uses dispatcher when origin routing metadata is incomplete", async () => { + const { onBlockReply } = await runMessagingCase({ + agentResult: { payloads: [{ text: "hello world!" }] }, + queued: { + ...baseQueuedRun("webchat"), + originatingChannel: "discord", + originatingTo: undefined, + } as FollowupRun, + }); + + expect(routeReplyMock).not.toHaveBeenCalled(); + expect(onBlockReply).toHaveBeenCalledTimes(1); + expect(onBlockReply).toHaveBeenCalledWith(expect.objectContaining({ text: "hello world!" })); }); it("falls back to dispatcher when same-channel origin routing fails", async () => { diff --git a/src/auto-reply/reply/followup-runner.ts b/src/auto-reply/reply/followup-runner.ts index 8badb983291..903daa6724a 100644 --- a/src/auto-reply/reply/followup-runner.ts +++ b/src/auto-reply/reply/followup-runner.ts @@ -9,6 +9,7 @@ import { resolveContextTokensForModel } from "../../agents/context.js"; import { DEFAULT_CONTEXT_TOKENS } from "../../agents/defaults.js"; import { runWithModelFallback } from "../../agents/model-fallback.js"; import { isCliProvider } from "../../agents/model-selection.js"; +import { classifyEmbeddedPiRunResultForModelFallback } from "../../agents/pi-embedded-runner/result-fallback-classifier.js"; import { runEmbeddedPiAgent } from "../../agents/pi-embedded.js"; import type { SessionEntry } from "../../config/sessions.js"; import type { TypingMode } from "../../config/types.js"; @@ -35,6 +36,8 @@ import { incrementRunCompactionCount, persistRunSessionUsage } from "./session-r import { createTypingSignaler } from "./typing-mode.js"; import type { TypingController } from "./typing.js"; +type EmbeddedAgentRunResult = Awaited>; + export function createFollowupRunner(params: { opts?: GetReplyOptions; typing: TypingController; @@ -78,10 +81,14 @@ export function createFollowupRunner(params: { const shouldRouteToOriginating = isRoutableChannel(originatingChannel) && originatingTo; if (!shouldRouteToOriginating && !opts?.onBlockReply) { - logVerbose("followup queue: no onBlockReply handler; dropping payloads"); + defaultRuntime.error?.( + "followup queue: completed with payloads but no origin route or visible dispatcher is available", + ); return; } + let crossChannelRouteFailureNeedsNotice = false; + let routedAnyCrossChannelPayloadToOrigin = false; for (const payload of payloads) { if (!payload || !hasOutboundReplyContent(payload)) { continue; @@ -112,26 +119,49 @@ export function createFollowupRunner(params: { if (!result.ok) { const errorMsg = result.error ?? "unknown error"; logVerbose(`followup queue: route-reply failed: ${errorMsg}`); - // Fall back to the caller-provided dispatcher only when the - // originating channel matches the session's message provider. - // In that case onBlockReply was created by the same channel's - // handler and delivers to the correct destination. For true - // cross-channel routing (origin !== provider), falling back - // would send to the wrong channel, so we drop the payload. const provider = resolveOriginMessageProvider({ provider: queued.run.messageProvider, }); const origin = resolveOriginMessageProvider({ originatingChannel, }); - if (opts?.onBlockReply && origin && origin === provider) { - await opts.onBlockReply(payload); + if (opts?.onBlockReply) { + if (origin && origin === provider) { + await opts.onBlockReply(payload); + } else { + crossChannelRouteFailureNeedsNotice = true; + } + } else { + defaultRuntime.error?.(`followup queue: route-reply failed: ${errorMsg}`); + } + } else { + const provider = resolveOriginMessageProvider({ + provider: queued.run.messageProvider, + }); + const origin = resolveOriginMessageProvider({ + originatingChannel, + }); + if (origin && provider && origin !== provider) { + routedAnyCrossChannelPayloadToOrigin = true; } } } else if (opts?.onBlockReply) { await opts.onBlockReply(payload); } } + if ( + crossChannelRouteFailureNeedsNotice && + !routedAnyCrossChannelPayloadToOrigin && + opts?.onBlockReply + ) { + await opts.onBlockReply({ + text: + "Follow-up completed, but OpenClaw could not deliver it to the originating " + + "channel. The reply content was not forwarded to this channel to avoid " + + "cross-channel misdelivery.", + isError: true, + }); + } }; return async (queued: FollowupRun) => { @@ -195,7 +225,7 @@ export function createFollowupRunner(params: { ); replyOperation.setPhase("running"); try { - const fallbackResult = await runWithModelFallback({ + const fallbackResult = await runWithModelFallback({ cfg: runtimeConfig, provider: run.provider, model: run.model, @@ -206,8 +236,10 @@ export function createFollowupRunner(params: { agentId: run.agentId, sessionKey: run.sessionKey, }), + classifyResult: ({ result, provider, model }) => + classifyEmbeddedPiRunResultForModelFallback({ result, provider, model }), run: async (provider, model, runOptions) => { - const authProfile = resolveRunAuthProfile(run, provider); + const authProfile = resolveRunAuthProfile(run, provider, { config: runtimeConfig }); let attemptCompactionCount = 0; try { const result = await runEmbeddedPiAgent({ diff --git a/src/channels/plugins/message-action-discovery.ts b/src/channels/plugins/message-action-discovery.ts index d250b06c3e0..62087932946 100644 --- a/src/channels/plugins/message-action-discovery.ts +++ b/src/channels/plugins/message-action-discovery.ts @@ -231,6 +231,48 @@ export function listChannelMessageActions(cfg: OpenClawConfig): ChannelMessageAc return Array.from(actions); } +export function listCrossChannelSchemaSupportedMessageActions( + params: ChannelMessageActionDiscoveryParams & { + channel?: string; + }, +): ChannelMessageActionName[] { + const channelId = resolveMessageActionDiscoveryChannelId(params.channel); + if (!channelId) { + return []; + } + const pluginActions = resolveCurrentChannelMessageToolDiscoveryAdapter(channelId); + if (!pluginActions?.actions) { + return []; + } + const resolved = resolveMessageActionDiscoveryForPlugin({ + pluginId: pluginActions.pluginId, + actions: pluginActions.actions, + context: createMessageActionDiscoveryContext(params), + includeActions: true, + includeSchema: true, + }); + const schemaBlockedActions = new Set(); + for (const contribution of resolved.schemaContributions) { + if ((contribution.visibility ?? "current-channel") !== "current-channel") { + continue; + } + if (!Object.hasOwn(contribution, "actions")) { + return []; + } + const actions = contribution.actions; + if (!Array.isArray(actions)) { + return []; + } + if (actions.length === 0) { + continue; + } + for (const action of actions) { + schemaBlockedActions.add(action); + } + } + return resolved.actions.filter((action) => !schemaBlockedActions.has(action)); +} + export function listChannelMessageCapabilities(cfg: OpenClawConfig): ChannelMessageCapability[] { const capabilities = new Set(); for (const plugin of listChannelPlugins()) { diff --git a/src/channels/plugins/message-actions.test.ts b/src/channels/plugins/message-actions.test.ts index 177c2572333..c011c35b24f 100644 --- a/src/channels/plugins/message-actions.test.ts +++ b/src/channels/plugins/message-actions.test.ts @@ -11,6 +11,7 @@ import { __testing, channelSupportsMessageCapability, channelSupportsMessageCapabilityForChannel, + listCrossChannelSchemaSupportedMessageActions, listChannelMessageActions, listChannelMessageCapabilities, listChannelMessageCapabilitiesForChannel, @@ -192,6 +193,117 @@ describe("message action capability checks", () => { ).toHaveProperty("components"); }); + it("filters only actions that depend on current-channel-only schema", () => { + const scopedSchemaPlugin: ChannelPlugin = { + ...createChannelTestPluginBase({ + id: "demo-scoped-schema", + label: "Demo Scoped Schema", + capabilities: { chatTypes: ["direct", "group"] }, + config: { + listAccountIds: () => ["default"], + }, + }), + actions: { + describeMessageTool: () => ({ + actions: ["read", "list-pins", "unpin"], + schema: { + actions: ["unpin"], + properties: { + pinnedMessageId: Type.Optional(Type.String()), + }, + }, + }), + }, + }; + setActivePluginRegistry( + createTestRegistry([ + { pluginId: "demo-scoped-schema", source: "test", plugin: scopedSchemaPlugin }, + ]), + ); + + expect( + listCrossChannelSchemaSupportedMessageActions({ + cfg: {} as OpenClawConfig, + channel: "demo-scoped-schema", + }), + ).toEqual(["read", "list-pins"]); + }); + + it("keeps unscoped current-channel schema conservative for cross-channel actions", () => { + const unscopedSchemaPlugin: ChannelPlugin = { + ...createChannelTestPluginBase({ + id: "demo-unscoped-schema", + label: "Demo Unscoped Schema", + capabilities: { chatTypes: ["direct", "group"] }, + config: { + listAccountIds: () => ["default"], + }, + }), + actions: { + describeMessageTool: () => ({ + actions: ["read", "unpin"], + schema: { + properties: { + pinnedMessageId: Type.Optional(Type.String()), + }, + }, + }), + }, + }; + setActivePluginRegistry( + createTestRegistry([ + { pluginId: "demo-unscoped-schema", source: "test", plugin: unscopedSchemaPlugin }, + ]), + ); + + expect( + listCrossChannelSchemaSupportedMessageActions({ + cfg: {} as OpenClawConfig, + channel: "demo-unscoped-schema", + }), + ).toEqual([]); + }); + + it("treats empty current-channel schema action lists as blocking no cross-channel actions", () => { + const emptyScopedSchemaPlugin: ChannelPlugin = { + ...createChannelTestPluginBase({ + id: "demo-empty-scoped-schema", + label: "Demo Empty Scoped Schema", + capabilities: { chatTypes: ["direct", "group"] }, + config: { + listAccountIds: () => ["default"], + }, + }), + actions: { + describeMessageTool: () => ({ + actions: ["read", "list-pins"], + schema: { + actions: [], + properties: { + optionalChannelOnlyValue: Type.Optional(Type.String()), + }, + }, + }), + }, + }; + setActivePluginRegistry( + createTestRegistry([ + { + pluginId: "demo-empty-scoped-schema", + source: "test", + plugin: emptyScopedSchemaPlugin, + }, + ]), + ); + + expect( + listCrossChannelSchemaSupportedMessageActions({ + cfg: {} as OpenClawConfig, + channel: "demo-empty-scoped-schema", + }), + ).toEqual(["read", "list-pins"]); + }); + it("derives plugin-owned media-source params for the current action", () => { const mediaPlugin: ChannelPlugin = { ...createChannelTestPluginBase({ diff --git a/src/channels/plugins/types.core.ts b/src/channels/plugins/types.core.ts index b98840c7602..c476246f8fe 100644 --- a/src/channels/plugins/types.core.ts +++ b/src/channels/plugins/types.core.ts @@ -60,6 +60,12 @@ export type ChannelMessageActionDiscoveryContext = { */ export type ChannelMessageToolSchemaContribution = { properties: Record; + /** + * Actions whose validation depends on this schema fragment. Cross-channel + * discovery can hide only these actions when the fragment is current-channel + * scoped. Omit to keep the legacy conservative behavior. + */ + actions?: readonly ChannelMessageActionName[] | null; visibility?: "current-channel" | "all-configured"; }; diff --git a/src/plugins/provider-runtime.test.ts b/src/plugins/provider-runtime.test.ts index 8430d2221c5..39fe1230a9d 100644 --- a/src/plugins/provider-runtime.test.ts +++ b/src/plugins/provider-runtime.test.ts @@ -567,6 +567,71 @@ describe("provider-runtime", () => { expect(contribution?.sectionOverrides).toEqual({}); }); + it("ignores OpenAI plugin personality fallback for non-OpenAI GPT-5 providers", () => { + const contribution = resolveProviderSystemPromptContribution({ + provider: "openrouter", + config: { + plugins: { + entries: { + openai: { config: { personality: "off" } }, + }, + }, + }, + context: { + provider: "openrouter", + modelId: "openai/gpt-5.4", + promptMode: "full", + } as never, + }); + + expect(contribution?.stablePrefix).toContain(""); + expect(contribution?.sectionOverrides?.interaction_style).toContain( + "This is a live chat, not a memo.", + ); + }); + + it("keeps OpenAI plugin personality fallback for OpenAI-family GPT-5 providers", () => { + const contribution = resolveProviderSystemPromptContribution({ + provider: "openai-codex", + config: { + plugins: { + entries: { + openai: { config: { personality: "off" } }, + }, + }, + }, + context: { + provider: "openai-codex", + modelId: "gpt-5.4", + promptMode: "full", + } as never, + }); + + expect(contribution?.stablePrefix).toContain(""); + expect(contribution?.sectionOverrides).toEqual({}); + }); + + it("keeps OpenAI plugin personality fallback for Azure OpenAI GPT-5 providers", () => { + const contribution = resolveProviderSystemPromptContribution({ + provider: "azure-openai-responses", + config: { + plugins: { + entries: { + openai: { config: { personality: "off" } }, + }, + }, + }, + context: { + provider: "azure-openai-responses", + modelId: "gpt-5.4", + promptMode: "full", + } as never, + }); + + expect(contribution?.stablePrefix).toContain(""); + expect(contribution?.sectionOverrides).toEqual({}); + }); + it("does not apply the shared GPT-5 prompt overlay to non-GPT-5 models", () => { expect( resolveProviderSystemPromptContribution({ diff --git a/src/plugins/provider-runtime.ts b/src/plugins/provider-runtime.ts index 75e78ae7524..a6fc03ca69a 100644 --- a/src/plugins/provider-runtime.ts +++ b/src/plugins/provider-runtime.ts @@ -10,6 +10,7 @@ import type { OpenClawConfig } from "../config/types.openclaw.js"; import { createSubsystemLogger } from "../logging/subsystem.js"; import { normalizeOptionalString } from "../shared/string-coerce.js"; import { sanitizeForLog } from "../terminal/ansi.js"; +import { resolvePluginDiscoveryProvidersRuntime } from "./provider-discovery.runtime.js"; import { __testing as providerHookRuntimeTesting, clearProviderRuntimeHookCache, @@ -20,7 +21,6 @@ import { resolveProviderRuntimePlugin, wrapProviderStreamFn, } from "./provider-hook-runtime.js"; -import { resolvePluginDiscoveryProvidersRuntime } from "./provider-discovery.runtime.js"; import { resolveBundledProviderPolicySurface } from "./provider-public-artifacts.js"; import type { ProviderRuntimeModel } from "./provider-runtime-model.types.js"; import type { ProviderThinkingProfile } from "./provider-thinking.types.js"; @@ -139,6 +139,7 @@ export function resolveProviderSystemPromptContribution(params: { return mergeProviderSystemPromptContributions( resolveGpt5SystemPromptContribution({ config: params.context.config ?? params.config, + providerId: params.context.provider ?? params.provider, modelId: params.context.modelId, }), resolveProviderRuntimePlugin(params)?.resolveSystemPromptContribution?.(params.context) ?? @@ -762,7 +763,9 @@ export function resolveProviderSyntheticAuthWithPlugin(params: { env?: NodeJS.ProcessEnv; context: ProviderResolveSyntheticAuthContext; }) { - const runtimeResolved = resolveProviderRuntimePlugin(params)?.resolveSyntheticAuth?.(params.context); + const runtimeResolved = resolveProviderRuntimePlugin(params)?.resolveSyntheticAuth?.( + params.context, + ); if (runtimeResolved) { return runtimeResolved; } @@ -770,7 +773,9 @@ export function resolveProviderSyntheticAuthWithPlugin(params: { config: params.config, workspaceDir: params.workspaceDir, env: params.env, - }).find((provider) => provider.id === params.provider)?.resolveSyntheticAuth?.(params.context); + }) + .find((provider) => provider.id === params.provider) + ?.resolveSyntheticAuth?.(params.context); } export function resolveExternalAuthProfilesWithPlugins(params: {