From 2fb05bc402f1c5574c01b7e23142159f79bf805a Mon Sep 17 00:00:00 2001 From: stain lu <109842185+stainlu@users.noreply.github.com> Date: Sat, 9 May 2026 19:29:08 +0800 Subject: [PATCH] fix: harden agent recovery failures (#79729) Fixes #79688. Fixes #79712. --- CHANGELOG.md | 1 + src/agents/pi-embedded-runner/run.ts | 32 +++++++++++-------- .../run/assistant-failover.ts | 2 +- .../run/attempt.transcript-policy.test.ts | 19 +++++++++++ src/agents/sandbox/fs-paths.test.ts | 28 ++++++++++++++-- src/agents/sandbox/fs-paths.ts | 29 +++++++++++++---- src/agents/transcript-policy.test.ts | 19 +++++++++++ src/agents/transcript-policy.ts | 20 +++++++++++- 8 files changed, 126 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7a19ebe48fb..d365f933bfe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -167,6 +167,7 @@ Docs: https://docs.openclaw.ai - OpenAI/Codex: install the Codex runtime plugin from npm during OpenAI onboarding and load it automatically for implicit OpenAI model routes, while preserving manual PI runtime overrides. Fixes #79358. - OpenAI/realtime voice: defer `response.create` while a realtime response is still active, retry after `response.done`/`response.cancelled`, and align GA input transcription/noise-reduction defaults with the Codex realtime reference so Discord/Voice Call consult results can resume speaking instead of tripping the active-response race. - OpenAI/realtime voice: avoid duplicate barge-in cancellation requests, log realtime model interruption/cutoff events in Discord voice logs, and treat OpenAI's no-active-response cancellation reply as a completed cancel so Discord voice sessions do not wedge pending speech after fast interruptions. +- Agents/runtime: strip trailing assistant prefill for Claude-family OpenAI Responses routes, persist prompt/assistant profile cooldown marks before fallback, and show the configured container root in sandbox escape diagnostics. Fixes #79688 and #79712. Thanks @stainlu and @mushuiyu886. - Gateway: avoid false degraded event-loop health during rapid health/readiness/status probes unless sustained load has delay co-evidence, while keeping hard delay detection immediate. (#77028) Thanks @rubencu. - Markdown: keep blockquote spans off trailing paragraph separators. Fixes #79646. - Plugin SDK/LM Studio: recover Harmony plain-text tool calls from LM Studio streams. Fixes #78326. diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index da91920652e..93cf14dbf8a 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -2042,13 +2042,15 @@ export async function runEmbeddedPiAgent( (await advanceAuthProfile()) ) { if (failedPromptProfileId && promptProfileFailureReason) { - maybeMarkAuthProfileFailure({ - profileId: failedPromptProfileId, - reason: promptProfileFailureReason, - modelId, - }).catch((err) => - log.warn(`deferred prompt profile failure mark failed: ${String(err)}`), - ); + try { + await maybeMarkAuthProfileFailure({ + profileId: failedPromptProfileId, + reason: promptProfileFailureReason, + modelId, + }); + } catch (err) { + log.warn(`prompt profile failure mark failed: ${String(err)}`); + } } traceAttempts.push({ provider, @@ -2077,13 +2079,15 @@ export async function runEmbeddedPiAgent( }); } if (failedPromptProfileId && promptProfileFailureReason) { - maybeMarkAuthProfileFailure({ - profileId: failedPromptProfileId, - reason: promptProfileFailureReason, - modelId, - }).catch((err) => - log.warn(`deferred prompt profile failure mark failed: ${String(err)}`), - ); + try { + await maybeMarkAuthProfileFailure({ + profileId: failedPromptProfileId, + reason: promptProfileFailureReason, + modelId, + }); + } catch (err) { + log.warn(`prompt profile failure mark failed: ${String(err)}`); + } } const fallbackThinking = pickFallbackThinkingLevel({ message: errorText, diff --git a/src/agents/pi-embedded-runner/run/assistant-failover.ts b/src/agents/pi-embedded-runner/run/assistant-failover.ts index 5c19b7c3eef..db931f48201 100644 --- a/src/agents/pi-embedded-runner/run/assistant-failover.ts +++ b/src/agents/pi-embedded-runner/run/assistant-failover.ts @@ -153,7 +153,7 @@ export async function handleAssistantFailover(params: { } const rotated = await params.advanceAuthProfile(); - void markFailedProfile(); + await markFailedProfile(); if (params.timedOut && !params.isProbeSession && failedProfileId) { params.warn(`Profile ${failedProfileId} timed out. Trying next account...`); } diff --git a/src/agents/pi-embedded-runner/run/attempt.transcript-policy.test.ts b/src/agents/pi-embedded-runner/run/attempt.transcript-policy.test.ts index 64263c9aa3c..0a53ba2ea29 100644 --- a/src/agents/pi-embedded-runner/run/attempt.transcript-policy.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.transcript-policy.test.ts @@ -81,6 +81,7 @@ describe("resolveAttemptTranscriptPolicy", () => { sanitizeToolCallIds: true, toolCallIdMode: "strict", repairToolUseResultPairing: true, + validateAnthropicTurns: false, allowSyntheticToolResults: false, }); expect(resolveProviderRuntimePluginMock).toHaveBeenCalledWith({ @@ -90,4 +91,22 @@ describe("resolveAttemptTranscriptPolicy", () => { env, }); }); + + it("inherits Claude-family OpenAI Responses turn validation from legacy fallback", () => { + const policy = resolveAttemptTranscriptPolicy({ + runtimePlanModelContext: { + workspaceDir: "/tmp/openclaw-transcript-policy", + modelApi: "openai-responses", + }, + provider: "anthropic-foundry", + modelId: "anthropic-foundry/claude-opus-4-7", + }); + + expect(policy).toMatchObject({ + sanitizeToolCallIds: true, + toolCallIdMode: "strict", + validateAnthropicTurns: true, + validateGeminiTurns: false, + }); + }); }); diff --git a/src/agents/sandbox/fs-paths.test.ts b/src/agents/sandbox/fs-paths.test.ts index c6d2232363d..8780428272f 100644 --- a/src/agents/sandbox/fs-paths.test.ts +++ b/src/agents/sandbox/fs-paths.test.ts @@ -89,7 +89,7 @@ describe("resolveSandboxFsPathWithMounts", () => { expect(resolved.writable).toBe(true); }); - it("preserves legacy sandbox-root error for outside paths", () => { + it("includes the container workspace root in outside-path errors", () => { const sandbox = createSandbox(); const mounts = buildSandboxFsMounts(sandbox); expect(() => @@ -100,7 +100,31 @@ describe("resolveSandboxFsPathWithMounts", () => { defaultContainerRoot: sandbox.containerWorkdir, mounts, }), - ).toThrow(/Path escapes sandbox root/); + ).toThrow( + /Path escapes sandbox root \(.*container root \/workspace\): \/etc\/passwd\. Use a path under \/workspace\/ instead\./, + ); + }); + + it("uses the configured custom container root in outside-path errors", () => { + const sandbox = createSandbox({ + containerWorkdir: "/sandbox-root", + docker: { + ...createSandbox().docker, + workdir: "/sandbox-root", + }, + }); + const mounts = buildSandboxFsMounts(sandbox); + expect(() => + resolveSandboxFsPathWithMounts({ + filePath: "/tmp/healthcheck-alert/config.json", + cwd: sandbox.workspaceDir, + defaultWorkspaceRoot: sandbox.workspaceDir, + defaultContainerRoot: sandbox.containerWorkdir, + mounts, + }), + ).toThrow( + /Path escapes sandbox root \(.*container root \/sandbox-root\): \/tmp\/healthcheck-alert\/config\.json\. Use a path under \/sandbox-root\/ instead\./, + ); }); it("prefers custom bind mounts over default workspace mount at /workspace", () => { diff --git a/src/agents/sandbox/fs-paths.ts b/src/agents/sandbox/fs-paths.ts index 1f134e511ee..991840b319f 100644 --- a/src/agents/sandbox/fs-paths.ts +++ b/src/agents/sandbox/fs-paths.ts @@ -151,13 +151,30 @@ export function resolveSandboxFsPathWithMounts(params: { }; } - // Preserve legacy error wording for out-of-sandbox paths. - resolveSandboxPath({ - filePath: input, - cwd: params.cwd, - root: params.defaultWorkspaceRoot, + const escapeMessage = formatSandboxRootEscapeMessage({ + input, + defaultWorkspaceRoot: params.defaultWorkspaceRoot, + defaultContainerRoot: params.defaultContainerRoot, }); - throw new Error(`Path escapes sandbox root (${params.defaultWorkspaceRoot}): ${input}`); + try { + resolveSandboxPath({ + filePath: input, + cwd: params.cwd, + root: params.defaultWorkspaceRoot, + }); + } catch { + throw new Error(escapeMessage); + } + throw new Error(escapeMessage); +} + +function formatSandboxRootEscapeMessage(params: { + input: string; + defaultWorkspaceRoot: string; + defaultContainerRoot: string; +}): string { + const containerRoot = normalizeContainerPath(params.defaultContainerRoot); + return `Path escapes sandbox root (${params.defaultWorkspaceRoot}; container root ${containerRoot}): ${params.input}. Use a path under ${containerRoot}/ instead.`; } function compareMountsByContainerPath(a: SandboxFsMount, b: SandboxFsMount): number { diff --git a/src/agents/transcript-policy.test.ts b/src/agents/transcript-policy.test.ts index 10b3cc437d3..20bba832d84 100644 --- a/src/agents/transcript-policy.test.ts +++ b/src/agents/transcript-policy.test.ts @@ -366,6 +366,25 @@ describe("resolveTranscriptPolicy", () => { expectStrictOpenAiCompatibleReplayDefaults("custom-openai-proxy"); }); + it("enables assistant prefill stripping for unowned Claude OpenAI Responses routes (#79688)", () => { + const claudePolicy = resolveTranscriptPolicy({ + provider: "anthropic-foundry", + modelId: "anthropic-foundry/claude-opus-4-7", + modelApi: "openai-responses", + }); + expect(claudePolicy.sanitizeToolCallIds).toBe(true); + expect(claudePolicy.toolCallIdMode).toBe("strict"); + expect(claudePolicy.validateAnthropicTurns).toBe(true); + expect(claudePolicy.validateGeminiTurns).toBe(false); + + const gptPolicy = resolveTranscriptPolicy({ + provider: "custom-openai-proxy", + modelId: "gpt-5.4", + modelApi: "openai-responses", + }); + expect(gptPolicy.validateAnthropicTurns).toBe(false); + }); + it("preserves thinking blocks for newer Claude models in unowned Anthropic transport fallback", () => { // Opus 4.6 via custom proxy: should NOT drop thinking blocks const opus46 = resolveTranscriptPolicy({ diff --git a/src/agents/transcript-policy.ts b/src/agents/transcript-policy.ts index 5107249312c..dd44ef1d943 100644 --- a/src/agents/transcript-policy.ts +++ b/src/agents/transcript-policy.ts @@ -71,6 +71,19 @@ function isAnthropicApi(modelApi?: string | null): boolean { return modelApi === "anthropic-messages" || modelApi === "bedrock-converse-stream"; } +function isOpenAiResponsesCompatibleApi(modelApi?: string | null): boolean { + return ( + modelApi === "openai-responses" || + modelApi === "openai-codex-responses" || + modelApi === "azure-openai-responses" + ); +} + +function isClaudeFamilyModelId(modelId?: string | null): boolean { + const id = normalizeLowercaseStringOrEmpty(modelId); + return /(?:^|[./:_-])claude(?:$|[./:_-])/.test(id); +} + /** * Provides a narrow replay-policy fallback for providers that do not have an * owning runtime plugin. @@ -101,6 +114,9 @@ function buildUnownedProviderTransportReplayFallback(params: { } const modelId = normalizeLowercaseStringOrEmpty(params.modelId); + const isClaudeOpenAiResponses = isOpenAiResponsesCompatibleApi(params.modelApi) + ? isClaudeFamilyModelId(modelId) + : false; return { ...(isGoogle || isAnthropic ? { sanitizeMode: "full" as const } : {}), ...(isGoogle || isAnthropic || requiresOpenAiCompatibleToolIdSanitization @@ -126,7 +142,9 @@ function buildUnownedProviderTransportReplayFallback(params: { : {}), ...(isGoogle || isStrictOpenAiCompatible ? { applyAssistantFirstOrderingFix: true } : {}), ...(isGoogle || isStrictOpenAiCompatible ? { validateGeminiTurns: true } : {}), - ...(isAnthropic || isStrictOpenAiCompatible ? { validateAnthropicTurns: true } : {}), + ...(isAnthropic || isStrictOpenAiCompatible || isClaudeOpenAiResponses + ? { validateAnthropicTurns: true } + : {}), ...(isGoogle || isAnthropic ? { allowSyntheticToolResults: true } : {}), }; }