fix: harden OpenAI tool replay compatibility

This commit is contained in:
Peter Steinberger
2026-04-11 01:21:04 +01:00
parent f9a5e0a64f
commit ab687f4637
15 changed files with 170 additions and 26 deletions

View File

@@ -117,6 +117,7 @@ Docs: https://docs.openclaw.ai
- Agents/OpenAI replay: preserve malformed function-call arguments in stored assistant history, avoid double-encoding preserved raw strings on replay, and coerce replayed string args back to objects at Anthropic and Google provider boundaries. (#61956) Thanks @100yenadmin.
- Heartbeat/config: accept and honor `agents.defaults.heartbeat.timeoutSeconds` and per-agent heartbeat timeout overrides for heartbeat agent turns. (#64491) Thanks @cedillarack.
- OpenAI/Codex: add required Codex OAuth scopes, classify provider/runtime failures more clearly, and stop suggesting `/elevated full` when auto-approved host exec is unavailable. (#64439) Thanks @100yenadmin.
- Providers/OpenAI: add OpenAI/Codex tool-schema compatibility and preserve embedded-run replay/liveness truth across compaction retries and mutating side effects. (#64300) Thanks @100yenadmin.
## 2026.4.9

View File

@@ -565,6 +565,7 @@ export async function runEmbeddedPiAgent(
};
let authRetryPending = false;
let accumulatedReplayInvalid = false;
let accumulatedHadPotentialSideEffects = false;
// Hoisted so the retry-limit error path can use the most recent API total.
let lastTurnTotal: number | undefined;
while (true) {
@@ -676,6 +677,7 @@ export async function runEmbeddedPiAgent(
authProfileId: lastProfileId,
authProfileIdSource: lockedProfileId ? "user" : "auto",
initialReplayInvalid: accumulatedReplayInvalid,
initialHadPotentialSideEffects: accumulatedHadPotentialSideEffects,
authStorage,
modelRegistry,
agentId: workspaceResolution.agentId,
@@ -760,6 +762,9 @@ export async function runEmbeddedPiAgent(
if (resolveReplayInvalidForAttempt(null)) {
accumulatedReplayInvalid = true;
}
if (attempt.replayMetadata.hadPotentialSideEffects) {
accumulatedHadPotentialSideEffects = true;
}
const formattedAssistantErrorText = lastAssistant
? formatAssistantErrorText(lastAssistant, {
cfg: params.config,

View File

@@ -87,6 +87,8 @@ const hoisted = vi.hoisted((): AttemptSpawnWorkspaceHoisted => {
getMessagingToolSentMediaUrls: () => [] as string[],
getMessagingToolSentTargets: () => [] as MessagingToolSend[],
getSuccessfulCronAdds: () => 0,
getReplayInvalid: () => false,
getHadPotentialSideEffects: () => false,
didSendViaMessagingTool: () => false,
didSendDeterministicApprovalPrompt: () => false,
getLastToolError: () => undefined,
@@ -627,6 +629,8 @@ export function createSubscriptionMock(): SubscriptionMock {
getMessagingToolSentMediaUrls: () => [] as string[],
getMessagingToolSentTargets: () => [] as MessagingToolSend[],
getSuccessfulCronAdds: () => 0,
getReplayInvalid: () => false,
getHadPotentialSideEffects: () => false,
didSendViaMessagingTool: () => false,
didSendDeterministicApprovalPrompt: () => false,
getLastToolError: () => undefined,

View File

@@ -1409,6 +1409,7 @@ export async function runEmbeddedAttempt(
session: activeSession,
runId: params.runId,
initialReplayInvalid: params.initialReplayInvalid,
initialHadPotentialSideEffects: params.initialHadPotentialSideEffects,
hookRunner: getGlobalHookRunner() ?? undefined,
verboseLevel: params.verboseLevel,
reasoningMode: params.reasoningLevel ?? "off",
@@ -1446,6 +1447,8 @@ export async function runEmbeddedAttempt(
getMessagingToolSentMediaUrls,
getMessagingToolSentTargets,
getSuccessfulCronAdds,
getReplayInvalid,
getHadPotentialSideEffects,
didSendViaMessagingTool,
getLastToolError,
setTerminalLifecycleMeta,
@@ -2272,12 +2275,19 @@ export async function runEmbeddedAttempt(
});
}
const observedReplayMetadata = buildAttemptReplayMetadata({
toolMetas: toolMetasNormalized,
didSendViaMessagingTool: didSendViaMessagingTool(),
successfulCronAdds: getSuccessfulCronAdds(),
});
const replayMetadata = {
hadPotentialSideEffects:
observedReplayMetadata.hadPotentialSideEffects || getHadPotentialSideEffects(),
replaySafe: observedReplayMetadata.replaySafe && !getReplayInvalid(),
};
return {
replayMetadata: buildAttemptReplayMetadata({
toolMetas: toolMetasNormalized,
didSendViaMessagingTool: didSendViaMessagingTool(),
successfulCronAdds: getSuccessfulCronAdds(),
}),
replayMetadata,
itemLifecycle: getItemLifecycle(),
setTerminalLifecycleMeta,
aborted,

View File

@@ -19,6 +19,7 @@ type EmbeddedRunAttemptBase = Omit<
export type EmbeddedRunAttemptParams = EmbeddedRunAttemptBase & {
initialReplayInvalid?: boolean;
initialHadPotentialSideEffects?: boolean;
/** Pluggable context engine for ingest/assemble/compact lifecycle. */
contextEngine?: ContextEngine;
/** Resolved model context window in tokens for assemble/compact budgeting. */

View File

@@ -233,6 +233,31 @@ describe("handleAgentEnd", () => {
});
});
it("marks incomplete tool-use lifecycle end state before runner finalization", async () => {
const onAgentEvent = vi.fn();
const ctx = createContext(
{
role: "assistant",
stopReason: "toolUse",
content: [],
},
{ onAgentEvent },
);
ctx.state.livenessState = "working";
ctx.state.assistantTexts = [];
await handleAgentEnd(ctx);
expect(onAgentEvent).toHaveBeenCalledWith({
stream: "lifecycle",
data: {
phase: "end",
livenessState: "abandoned",
replayInvalid: true,
},
});
});
it("flushes orphaned tool media as a media-only block reply", async () => {
const ctx = createContext(undefined);
ctx.state.pendingToolMediaUrls = ["/tmp/reply.opus"];

View File

@@ -39,17 +39,18 @@ export function handleAgentEnd(ctx: EmbeddedPiSubscribeContext): void | Promise<
const lastAssistant = ctx.state.lastAssistant;
const isError = isAssistantMessage(lastAssistant) && lastAssistant.stopReason === "error";
let lifecycleErrorText: string | undefined;
const replayInvalid = ctx.state.replayInvalid === true ? true : undefined;
const hasAssistantVisibleText =
Array.isArray(ctx.state.assistantTexts) &&
ctx.state.assistantTexts.some((text) => hasAssistantVisibleReply({ text }));
const hadDeterministicSideEffect =
(ctx.state.messagingToolSentTexts?.length ?? 0) > 0 ||
(ctx.state.messagingToolSentMediaUrls?.length ?? 0) > 0 ||
(ctx.state.successfulCronAdds ?? 0) > 0;
const incompleteTerminalAssistant =
!hasAssistantVisibleText &&
isAssistantMessage(lastAssistant) &&
lastAssistant.stopReason === "toolUse";
const replayInvalid =
ctx.state.replayInvalid === true || incompleteTerminalAssistant ? true : undefined;
const derivedWorkingTerminalState = isError
? "blocked"
: replayInvalid && !hasAssistantVisibleText && !hadDeterministicSideEffect
: replayInvalid && !hasAssistantVisibleText
? "abandoned"
: ctx.state.livenessState;
const livenessState =

View File

@@ -49,6 +49,7 @@ function createTestContext(): {
pendingToolAudioAsVoice: false,
deterministicApprovalPromptPending: false,
replayInvalid: false,
hadPotentialSideEffects: false,
messagingToolSentTexts: [],
messagingToolSentTextsNormalized: [],
messagingToolSentMediaUrls: [],
@@ -278,6 +279,7 @@ describe("handleToolExecutionEnd mutating failure recovery", () => {
);
expect(ctx.state.replayInvalid).toBe(true);
expect(ctx.state.hadPotentialSideEffects).toBe(true);
});
it("keeps successful mutating retries replay-invalid after an earlier tool failure", async () => {
@@ -335,6 +337,7 @@ describe("handleToolExecutionEnd mutating failure recovery", () => {
expect(ctx.state.lastToolError).toBeUndefined();
expect(ctx.state.replayInvalid).toBe(true);
expect(ctx.state.hadPotentialSideEffects).toBe(true);
});
});

View File

@@ -796,6 +796,7 @@ export async function handleToolExecutionEnd(
}
if (completedMutatingAction) {
ctx.state.replayInvalid = true;
ctx.state.hadPotentialSideEffects = true;
}
// Commit messaging tool text on success, discard on error.

View File

@@ -66,6 +66,7 @@ export type EmbeddedPiSubscribeState = {
compactionRetryPromise: Promise<void> | null;
unsubscribed: boolean;
replayInvalid?: boolean;
hadPotentialSideEffects?: boolean;
livenessState?: EmbeddedRunLivenessState;
messagingToolSentTexts: string[];
@@ -163,6 +164,7 @@ export type ToolHandlerState = Pick<
| "pendingToolAudioAsVoice"
| "deterministicApprovalPromptPending"
| "replayInvalid"
| "hadPotentialSideEffects"
| "messagingToolSentTexts"
| "messagingToolSentTextsNormalized"
| "messagingToolSentMediaUrls"

View File

@@ -554,7 +554,7 @@ describe("subscribeEmbeddedPiSession", () => {
const { session, emit } = createStubSessionHarness();
const onAgentEvent = vi.fn();
subscribeEmbeddedPiSession({
const subscription = subscribeEmbeddedPiSession({
session,
runId: "run-replay-invalid-compaction",
onAgentEvent,
@@ -576,6 +576,8 @@ describe("subscribeEmbeddedPiSession", () => {
emit({ type: "auto_compaction_end", willRetry: true, result: { summary: "compacted" } });
emit({ type: "agent_end" });
expect(subscription.getReplayInvalid()).toBe(true);
expect(subscription.getHadPotentialSideEffects()).toBe(true);
const payloads = extractAgentEventPayloads(onAgentEvent.mock.calls);
expect(payloads).toContainEqual(
expect.objectContaining({

View File

@@ -106,6 +106,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
compactionRetryPromise: null,
unsubscribed: false,
replayInvalid: params.initialReplayInvalid === true,
hadPotentialSideEffects: params.initialHadPotentialSideEffects === true,
livenessState: "working",
messagingToolSentTexts: [],
messagingToolSentTextsNormalized: [],
@@ -695,6 +696,8 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
state.deterministicApprovalPromptPending = false;
state.deterministicApprovalPromptSent = false;
state.replayInvalid = state.replayInvalid || params.initialReplayInvalid === true;
state.hadPotentialSideEffects =
state.hadPotentialSideEffects || params.initialHadPotentialSideEffects === true;
state.livenessState = "working";
resetAssistantMessageState(0);
};
@@ -794,6 +797,8 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
getMessagingToolSentMediaUrls: () => messagingToolSentMediaUrls.slice(),
getMessagingToolSentTargets: () => messagingToolSentTargets.slice(),
getSuccessfulCronAdds: () => state.successfulCronAdds,
getReplayInvalid: () => state.replayInvalid === true,
getHadPotentialSideEffects: () => state.hadPotentialSideEffects === true,
// Returns true if any messaging tool successfully sent a message.
// Used to suppress agent's confirmation text (e.g., "Respondi no Telegram!")
// which is generated AFTER the tool sends the actual answer.

View File

@@ -13,6 +13,7 @@ export type SubscribeEmbeddedPiSessionParams = {
session: AgentSession;
runId: string;
initialReplayInvalid?: boolean;
initialHadPotentialSideEffects?: boolean;
hookRunner?: HookRunner;
verboseLevel?: VerboseLevel;
reasoningMode?: ReasoningLevel;

View File

@@ -101,6 +101,39 @@ describe("buildProviderToolCompatFamilyHooks", () => {
});
});
it("preserves nested empty property schemas and object annotations", () => {
const hooks = buildProviderToolCompatFamilyHooks("openai");
const parameters = {
type: "object",
properties: {
payload: {},
mode: {
type: "string",
default: {},
const: {},
},
},
required: ["payload", "mode"],
additionalProperties: false,
};
const tools = [{ name: "demo", description: "", parameters }] as never;
const normalized = hooks.normalizeToolSchemas({
provider: "openai",
modelId: "gpt-5.4",
modelApi: "openai-responses",
model: {
provider: "openai",
api: "openai-responses",
baseUrl: "https://api.openai.com/v1",
id: "gpt-5.4",
} as never,
tools,
});
expect(normalized[0]?.parameters).toEqual(parameters);
});
it("does not tighten permissive object schemas just to satisfy strict mode", () => {
const hooks = buildProviderToolCompatFamilyHooks("openai");
const permissiveParameters = {

View File

@@ -183,7 +183,7 @@ export function normalizeOpenAIToolSchemas(
}
function normalizeOpenAIStrictCompatSchema(schema: unknown): unknown {
return normalizeOpenAIStrictCompatSchemaRecursive(schema);
return normalizeOpenAIStrictCompatSchemaRecursive(schema, { promoteEmptyObject: true });
}
function shouldApplyOpenAIToolCompat(ctx: ProviderNormalizeToolSchemasContext): boolean {
@@ -217,11 +217,62 @@ function isOpenAICodexBaseUrl(baseUrl: string): boolean {
return /^https:\/\/chatgpt\.com\/backend-api(?:\/|$)/i.test(baseUrl);
}
function normalizeOpenAIStrictCompatSchemaRecursive(schema: unknown): unknown {
type NormalizeOpenAIStrictCompatOptions = {
promoteEmptyObject: boolean;
};
const OPENAI_STRICT_COMPAT_SCHEMA_MAP_KEYS = new Set([
"$defs",
"definitions",
"dependentSchemas",
"patternProperties",
"properties",
]);
const OPENAI_STRICT_COMPAT_SCHEMA_NESTED_KEYS = new Set([
"additionalProperties",
"allOf",
"anyOf",
"contains",
"else",
"if",
"items",
"not",
"oneOf",
"prefixItems",
"propertyNames",
"then",
"unevaluatedItems",
"unevaluatedProperties",
]);
function normalizeOpenAIStrictCompatSchemaMap(schema: unknown): unknown {
if (!schema || typeof schema !== "object" || Array.isArray(schema)) {
return schema;
}
let changed = false;
const normalized: Record<string, unknown> = {};
for (const [key, value] of Object.entries(schema as Record<string, unknown>)) {
const next = normalizeOpenAIStrictCompatSchemaRecursive(value, {
promoteEmptyObject: false,
});
normalized[key] = next;
changed ||= next !== value;
}
return changed ? normalized : schema;
}
function normalizeOpenAIStrictCompatSchemaRecursive(
schema: unknown,
options: NormalizeOpenAIStrictCompatOptions,
): unknown {
if (Array.isArray(schema)) {
let changed = false;
const normalized = schema.map((entry) => {
const next = normalizeOpenAIStrictCompatSchemaRecursive(entry);
const next = normalizeOpenAIStrictCompatSchemaRecursive(entry, {
promoteEmptyObject: false,
});
changed ||= next !== entry;
return next;
});
@@ -235,22 +286,21 @@ function normalizeOpenAIStrictCompatSchemaRecursive(schema: unknown): unknown {
let changed = false;
const normalized: Record<string, unknown> = {};
for (const [key, value] of Object.entries(record)) {
const next =
key === "properties" && value && typeof value === "object" && !Array.isArray(value)
? Object.fromEntries(
Object.entries(value as Record<string, unknown>).map(
([propertyName, propertyValue]) => [
propertyName,
normalizeOpenAIStrictCompatSchemaRecursive(propertyValue),
],
),
)
: normalizeOpenAIStrictCompatSchemaRecursive(value);
const next = OPENAI_STRICT_COMPAT_SCHEMA_MAP_KEYS.has(key)
? normalizeOpenAIStrictCompatSchemaMap(value)
: OPENAI_STRICT_COMPAT_SCHEMA_NESTED_KEYS.has(key)
? normalizeOpenAIStrictCompatSchemaRecursive(value, {
promoteEmptyObject: false,
})
: value;
normalized[key] = next;
changed ||= next !== value;
}
if (Object.keys(normalized).length === 0) {
if (!options.promoteEmptyObject) {
return schema;
}
return {
type: "object",
properties: {},