fix: harden OpenAI tool replay compatibility

2026-04-17 12:11:20 +00:00 · 2026-04-11 01:21:04 +01:00
parent f9a5e0a64f
commit ab687f4637
15 changed files with 170 additions and 26 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -117,6 +117,7 @@ Docs: https://docs.openclaw.ai
 - Agents/OpenAI replay: preserve malformed function-call arguments in stored assistant history, avoid double-encoding preserved raw strings on replay, and coerce replayed string args back to objects at Anthropic and Google provider boundaries. (#61956) Thanks @100yenadmin.
 - Heartbeat/config: accept and honor `agents.defaults.heartbeat.timeoutSeconds` and per-agent heartbeat timeout overrides for heartbeat agent turns. (#64491) Thanks @cedillarack.
 - OpenAI/Codex: add required Codex OAuth scopes, classify provider/runtime failures more clearly, and stop suggesting `/elevated full` when auto-approved host exec is unavailable. (#64439) Thanks @100yenadmin.
+- Providers/OpenAI: add OpenAI/Codex tool-schema compatibility and preserve embedded-run replay/liveness truth across compaction retries and mutating side effects. (#64300) Thanks @100yenadmin.

 ## 2026.4.9

--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -565,6 +565,7 @@ export async function runEmbeddedPiAgent(
        };
        let authRetryPending = false;
        let accumulatedReplayInvalid = false;
+        let accumulatedHadPotentialSideEffects = false;
        // Hoisted so the retry-limit error path can use the most recent API total.
        let lastTurnTotal: number | undefined;
        while (true) {
@@ -676,6 +677,7 @@ export async function runEmbeddedPiAgent(
            authProfileId: lastProfileId,
            authProfileIdSource: lockedProfileId ? "user" : "auto",
            initialReplayInvalid: accumulatedReplayInvalid,
+            initialHadPotentialSideEffects: accumulatedHadPotentialSideEffects,
            authStorage,
            modelRegistry,
            agentId: workspaceResolution.agentId,
@@ -760,6 +762,9 @@ export async function runEmbeddedPiAgent(
          if (resolveReplayInvalidForAttempt(null)) {
            accumulatedReplayInvalid = true;
          }
+          if (attempt.replayMetadata.hadPotentialSideEffects) {
+            accumulatedHadPotentialSideEffects = true;
+          }
          const formattedAssistantErrorText = lastAssistant
            ? formatAssistantErrorText(lastAssistant, {
                cfg: params.config,
--- a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts
@@ -87,6 +87,8 @@ const hoisted = vi.hoisted((): AttemptSpawnWorkspaceHoisted => {
        getMessagingToolSentMediaUrls: () => [] as string[],
        getMessagingToolSentTargets: () => [] as MessagingToolSend[],
        getSuccessfulCronAdds: () => 0,
+        getReplayInvalid: () => false,
+        getHadPotentialSideEffects: () => false,
        didSendViaMessagingTool: () => false,
        didSendDeterministicApprovalPrompt: () => false,
        getLastToolError: () => undefined,
@@ -627,6 +629,8 @@ export function createSubscriptionMock(): SubscriptionMock {
    getMessagingToolSentMediaUrls: () => [] as string[],
    getMessagingToolSentTargets: () => [] as MessagingToolSend[],
    getSuccessfulCronAdds: () => 0,
+    getReplayInvalid: () => false,
+    getHadPotentialSideEffects: () => false,
    didSendViaMessagingTool: () => false,
    didSendDeterministicApprovalPrompt: () => false,
    getLastToolError: () => undefined,
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -1409,6 +1409,7 @@ export async function runEmbeddedAttempt(
          session: activeSession,
          runId: params.runId,
          initialReplayInvalid: params.initialReplayInvalid,
+          initialHadPotentialSideEffects: params.initialHadPotentialSideEffects,
          hookRunner: getGlobalHookRunner() ?? undefined,
          verboseLevel: params.verboseLevel,
          reasoningMode: params.reasoningLevel ?? "off",
@@ -1446,6 +1447,8 @@ export async function runEmbeddedAttempt(
        getMessagingToolSentMediaUrls,
        getMessagingToolSentTargets,
        getSuccessfulCronAdds,
+        getReplayInvalid,
+        getHadPotentialSideEffects,
        didSendViaMessagingTool,
        getLastToolError,
        setTerminalLifecycleMeta,
@@ -2272,12 +2275,19 @@ export async function runEmbeddedAttempt(
          });
      }

+      const observedReplayMetadata = buildAttemptReplayMetadata({
+        toolMetas: toolMetasNormalized,
+        didSendViaMessagingTool: didSendViaMessagingTool(),
+        successfulCronAdds: getSuccessfulCronAdds(),
+      });
+      const replayMetadata = {
+        hadPotentialSideEffects:
+          observedReplayMetadata.hadPotentialSideEffects || getHadPotentialSideEffects(),
+        replaySafe: observedReplayMetadata.replaySafe && !getReplayInvalid(),
+      };
+
      return {
-        replayMetadata: buildAttemptReplayMetadata({
-          toolMetas: toolMetasNormalized,
-          didSendViaMessagingTool: didSendViaMessagingTool(),
-          successfulCronAdds: getSuccessfulCronAdds(),
-        }),
+        replayMetadata,
        itemLifecycle: getItemLifecycle(),
        setTerminalLifecycleMeta,
        aborted,
--- a/src/agents/pi-embedded-runner/run/types.ts
+++ b/src/agents/pi-embedded-runner/run/types.ts
@@ -19,6 +19,7 @@ type EmbeddedRunAttemptBase = Omit<

 export type EmbeddedRunAttemptParams = EmbeddedRunAttemptBase & {
  initialReplayInvalid?: boolean;
+  initialHadPotentialSideEffects?: boolean;
  /** Pluggable context engine for ingest/assemble/compact lifecycle. */
  contextEngine?: ContextEngine;
  /** Resolved model context window in tokens for assemble/compact budgeting. */
--- a/src/agents/pi-embedded-subscribe.handlers.lifecycle.test.ts
+++ b/src/agents/pi-embedded-subscribe.handlers.lifecycle.test.ts
@@ -233,6 +233,31 @@ describe("handleAgentEnd", () => {
    });
  });

+  it("marks incomplete tool-use lifecycle end state before runner finalization", async () => {
+    const onAgentEvent = vi.fn();
+    const ctx = createContext(
+      {
+        role: "assistant",
+        stopReason: "toolUse",
+        content: [],
+      },
+      { onAgentEvent },
+    );
+    ctx.state.livenessState = "working";
+    ctx.state.assistantTexts = [];
+
+    await handleAgentEnd(ctx);
+
+    expect(onAgentEvent).toHaveBeenCalledWith({
+      stream: "lifecycle",
+      data: {
+        phase: "end",
+        livenessState: "abandoned",
+        replayInvalid: true,
+      },
+    });
+  });
+
  it("flushes orphaned tool media as a media-only block reply", async () => {
    const ctx = createContext(undefined);
    ctx.state.pendingToolMediaUrls = ["/tmp/reply.opus"];
--- a/src/agents/pi-embedded-subscribe.handlers.lifecycle.ts
+++ b/src/agents/pi-embedded-subscribe.handlers.lifecycle.ts
@@ -39,17 +39,18 @@ export function handleAgentEnd(ctx: EmbeddedPiSubscribeContext): void | Promise<
  const lastAssistant = ctx.state.lastAssistant;
  const isError = isAssistantMessage(lastAssistant) && lastAssistant.stopReason === "error";
  let lifecycleErrorText: string | undefined;
-  const replayInvalid = ctx.state.replayInvalid === true ? true : undefined;
  const hasAssistantVisibleText =
    Array.isArray(ctx.state.assistantTexts) &&
    ctx.state.assistantTexts.some((text) => hasAssistantVisibleReply({ text }));
-  const hadDeterministicSideEffect =
-    (ctx.state.messagingToolSentTexts?.length ?? 0) > 0 ||
-    (ctx.state.messagingToolSentMediaUrls?.length ?? 0) > 0 ||
-    (ctx.state.successfulCronAdds ?? 0) > 0;
+  const incompleteTerminalAssistant =
+    !hasAssistantVisibleText &&
+    isAssistantMessage(lastAssistant) &&
+    lastAssistant.stopReason === "toolUse";
+  const replayInvalid =
+    ctx.state.replayInvalid === true || incompleteTerminalAssistant ? true : undefined;
  const derivedWorkingTerminalState = isError
    ? "blocked"
-    : replayInvalid && !hasAssistantVisibleText && !hadDeterministicSideEffect
+    : replayInvalid && !hasAssistantVisibleText
      ? "abandoned"
      : ctx.state.livenessState;
  const livenessState =
--- a/src/agents/pi-embedded-subscribe.handlers.tools.test.ts
+++ b/src/agents/pi-embedded-subscribe.handlers.tools.test.ts
@@ -49,6 +49,7 @@ function createTestContext(): {
      pendingToolAudioAsVoice: false,
      deterministicApprovalPromptPending: false,
      replayInvalid: false,
+      hadPotentialSideEffects: false,
      messagingToolSentTexts: [],
      messagingToolSentTextsNormalized: [],
      messagingToolSentMediaUrls: [],
@@ -278,6 +279,7 @@ describe("handleToolExecutionEnd mutating failure recovery", () => {
    );

    expect(ctx.state.replayInvalid).toBe(true);
+    expect(ctx.state.hadPotentialSideEffects).toBe(true);
  });

  it("keeps successful mutating retries replay-invalid after an earlier tool failure", async () => {
@@ -335,6 +337,7 @@ describe("handleToolExecutionEnd mutating failure recovery", () => {

    expect(ctx.state.lastToolError).toBeUndefined();
    expect(ctx.state.replayInvalid).toBe(true);
+    expect(ctx.state.hadPotentialSideEffects).toBe(true);
  });
 });

--- a/src/agents/pi-embedded-subscribe.handlers.tools.ts
+++ b/src/agents/pi-embedded-subscribe.handlers.tools.ts
@@ -796,6 +796,7 @@ export async function handleToolExecutionEnd(
  }
  if (completedMutatingAction) {
    ctx.state.replayInvalid = true;
+    ctx.state.hadPotentialSideEffects = true;
  }

  // Commit messaging tool text on success, discard on error.
--- a/src/agents/pi-embedded-subscribe.handlers.types.ts
+++ b/src/agents/pi-embedded-subscribe.handlers.types.ts
@@ -66,6 +66,7 @@ export type EmbeddedPiSubscribeState = {
  compactionRetryPromise: Promise<void> | null;
  unsubscribed: boolean;
  replayInvalid?: boolean;
+  hadPotentialSideEffects?: boolean;
  livenessState?: EmbeddedRunLivenessState;

  messagingToolSentTexts: string[];
@@ -163,6 +164,7 @@ export type ToolHandlerState = Pick<
  | "pendingToolAudioAsVoice"
  | "deterministicApprovalPromptPending"
  | "replayInvalid"
+  | "hadPotentialSideEffects"
  | "messagingToolSentTexts"
  | "messagingToolSentTextsNormalized"
  | "messagingToolSentMediaUrls"
--- a/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.subscribeembeddedpisession.test.ts
+++ b/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.subscribeembeddedpisession.test.ts
@@ -554,7 +554,7 @@ describe("subscribeEmbeddedPiSession", () => {
    const { session, emit } = createStubSessionHarness();
    const onAgentEvent = vi.fn();

-    subscribeEmbeddedPiSession({
+    const subscription = subscribeEmbeddedPiSession({
      session,
      runId: "run-replay-invalid-compaction",
      onAgentEvent,
@@ -576,6 +576,8 @@ describe("subscribeEmbeddedPiSession", () => {
    emit({ type: "auto_compaction_end", willRetry: true, result: { summary: "compacted" } });
    emit({ type: "agent_end" });

+    expect(subscription.getReplayInvalid()).toBe(true);
+    expect(subscription.getHadPotentialSideEffects()).toBe(true);
    const payloads = extractAgentEventPayloads(onAgentEvent.mock.calls);
    expect(payloads).toContainEqual(
      expect.objectContaining({
--- a/src/agents/pi-embedded-subscribe.ts
+++ b/src/agents/pi-embedded-subscribe.ts
@@ -106,6 +106,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
    compactionRetryPromise: null,
    unsubscribed: false,
    replayInvalid: params.initialReplayInvalid === true,
+    hadPotentialSideEffects: params.initialHadPotentialSideEffects === true,
    livenessState: "working",
    messagingToolSentTexts: [],
    messagingToolSentTextsNormalized: [],
@@ -695,6 +696,8 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
    state.deterministicApprovalPromptPending = false;
    state.deterministicApprovalPromptSent = false;
    state.replayInvalid = state.replayInvalid || params.initialReplayInvalid === true;
+    state.hadPotentialSideEffects =
+      state.hadPotentialSideEffects || params.initialHadPotentialSideEffects === true;
    state.livenessState = "working";
    resetAssistantMessageState(0);
  };
@@ -794,6 +797,8 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
    getMessagingToolSentMediaUrls: () => messagingToolSentMediaUrls.slice(),
    getMessagingToolSentTargets: () => messagingToolSentTargets.slice(),
    getSuccessfulCronAdds: () => state.successfulCronAdds,
+    getReplayInvalid: () => state.replayInvalid === true,
+    getHadPotentialSideEffects: () => state.hadPotentialSideEffects === true,
    // Returns true if any messaging tool successfully sent a message.
    // Used to suppress agent's confirmation text (e.g., "Respondi no Telegram!")
    // which is generated AFTER the tool sends the actual answer.
--- a/src/agents/pi-embedded-subscribe.types.ts
+++ b/src/agents/pi-embedded-subscribe.types.ts
@@ -13,6 +13,7 @@ export type SubscribeEmbeddedPiSessionParams = {
  session: AgentSession;
  runId: string;
  initialReplayInvalid?: boolean;
+  initialHadPotentialSideEffects?: boolean;
  hookRunner?: HookRunner;
  verboseLevel?: VerboseLevel;
  reasoningMode?: ReasoningLevel;
--- a/src/plugin-sdk/provider-tools.test.ts
+++ b/src/plugin-sdk/provider-tools.test.ts
@@ -101,6 +101,39 @@ describe("buildProviderToolCompatFamilyHooks", () => {
    });
  });

+  it("preserves nested empty property schemas and object annotations", () => {
+    const hooks = buildProviderToolCompatFamilyHooks("openai");
+    const parameters = {
+      type: "object",
+      properties: {
+        payload: {},
+        mode: {
+          type: "string",
+          default: {},
+          const: {},
+        },
+      },
+      required: ["payload", "mode"],
+      additionalProperties: false,
+    };
+    const tools = [{ name: "demo", description: "", parameters }] as never;
+
+    const normalized = hooks.normalizeToolSchemas({
+      provider: "openai",
+      modelId: "gpt-5.4",
+      modelApi: "openai-responses",
+      model: {
+        provider: "openai",
+        api: "openai-responses",
+        baseUrl: "https://api.openai.com/v1",
+        id: "gpt-5.4",
+      } as never,
+      tools,
+    });
+
+    expect(normalized[0]?.parameters).toEqual(parameters);
+  });
+
  it("does not tighten permissive object schemas just to satisfy strict mode", () => {
    const hooks = buildProviderToolCompatFamilyHooks("openai");
    const permissiveParameters = {
--- a/src/plugin-sdk/provider-tools.ts
+++ b/src/plugin-sdk/provider-tools.ts
@@ -183,7 +183,7 @@ export function normalizeOpenAIToolSchemas(
 }

 function normalizeOpenAIStrictCompatSchema(schema: unknown): unknown {
-  return normalizeOpenAIStrictCompatSchemaRecursive(schema);
+  return normalizeOpenAIStrictCompatSchemaRecursive(schema, { promoteEmptyObject: true });
 }

 function shouldApplyOpenAIToolCompat(ctx: ProviderNormalizeToolSchemasContext): boolean {
@@ -217,11 +217,62 @@ function isOpenAICodexBaseUrl(baseUrl: string): boolean {
  return /^https:\/\/chatgpt\.com\/backend-api(?:\/|$)/i.test(baseUrl);
 }

-function normalizeOpenAIStrictCompatSchemaRecursive(schema: unknown): unknown {
+type NormalizeOpenAIStrictCompatOptions = {
+  promoteEmptyObject: boolean;
+};
+
+const OPENAI_STRICT_COMPAT_SCHEMA_MAP_KEYS = new Set([
+  "$defs",
+  "definitions",
+  "dependentSchemas",
+  "patternProperties",
+  "properties",
+]);
+
+const OPENAI_STRICT_COMPAT_SCHEMA_NESTED_KEYS = new Set([
+  "additionalProperties",
+  "allOf",
+  "anyOf",
+  "contains",
+  "else",
+  "if",
+  "items",
+  "not",
+  "oneOf",
+  "prefixItems",
+  "propertyNames",
+  "then",
+  "unevaluatedItems",
+  "unevaluatedProperties",
+]);
+
+function normalizeOpenAIStrictCompatSchemaMap(schema: unknown): unknown {
+  if (!schema || typeof schema !== "object" || Array.isArray(schema)) {
+    return schema;
+  }
+
+  let changed = false;
+  const normalized: Record<string, unknown> = {};
+  for (const [key, value] of Object.entries(schema as Record<string, unknown>)) {
+    const next = normalizeOpenAIStrictCompatSchemaRecursive(value, {
+      promoteEmptyObject: false,
+    });
+    normalized[key] = next;
+    changed ||= next !== value;
+  }
+  return changed ? normalized : schema;
+}
+
+function normalizeOpenAIStrictCompatSchemaRecursive(
+  schema: unknown,
+  options: NormalizeOpenAIStrictCompatOptions,
+): unknown {
  if (Array.isArray(schema)) {
    let changed = false;
    const normalized = schema.map((entry) => {
-      const next = normalizeOpenAIStrictCompatSchemaRecursive(entry);
+      const next = normalizeOpenAIStrictCompatSchemaRecursive(entry, {
+        promoteEmptyObject: false,
+      });
      changed ||= next !== entry;
      return next;
    });
@@ -235,22 +286,21 @@ function normalizeOpenAIStrictCompatSchemaRecursive(schema: unknown): unknown {
  let changed = false;
  const normalized: Record<string, unknown> = {};
  for (const [key, value] of Object.entries(record)) {
-    const next =
-      key === "properties" && value && typeof value === "object" && !Array.isArray(value)
-        ? Object.fromEntries(
-            Object.entries(value as Record<string, unknown>).map(
-              ([propertyName, propertyValue]) => [
-                propertyName,
-                normalizeOpenAIStrictCompatSchemaRecursive(propertyValue),
-              ],
-            ),
-          )
-        : normalizeOpenAIStrictCompatSchemaRecursive(value);
+    const next = OPENAI_STRICT_COMPAT_SCHEMA_MAP_KEYS.has(key)
+      ? normalizeOpenAIStrictCompatSchemaMap(value)
+      : OPENAI_STRICT_COMPAT_SCHEMA_NESTED_KEYS.has(key)
+        ? normalizeOpenAIStrictCompatSchemaRecursive(value, {
+            promoteEmptyObject: false,
+          })
+        : value;
    normalized[key] = next;
    changed ||= next !== value;
  }

  if (Object.keys(normalized).length === 0) {
+    if (!options.promoteEmptyObject) {
+      return schema;
+    }
    return {
      type: "object",
      properties: {},