From 2fb05bc402f1c5574c01b7e23142159f79bf805a Mon Sep 17 00:00:00 2001
From: stain lu <109842185+stainlu@users.noreply.github.com>
Date: Sat, 9 May 2026 19:29:08 +0800
Subject: [PATCH] fix: harden agent recovery failures (#79729)

Fixes #79688. Fixes #79712.
---
 CHANGELOG.md                                  |  1 +
 src/agents/pi-embedded-runner/run.ts          | 32 +++++++++++--------
 .../run/assistant-failover.ts                 |  2 +-
 .../run/attempt.transcript-policy.test.ts     | 19 +++++++++++
 src/agents/sandbox/fs-paths.test.ts           | 28 ++++++++++++++--
 src/agents/sandbox/fs-paths.ts                | 29 +++++++++++++----
 src/agents/transcript-policy.test.ts          | 19 +++++++++++
 src/agents/transcript-policy.ts               | 20 +++++++++++-
 8 files changed, 126 insertions(+), 24 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7a19ebe48fb..d365f933bfe 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -167,6 +167,7 @@ Docs: https://docs.openclaw.ai
 - OpenAI/Codex: install the Codex runtime plugin from npm during OpenAI onboarding and load it automatically for implicit OpenAI model routes, while preserving manual PI runtime overrides. Fixes #79358.
 - OpenAI/realtime voice: defer `response.create` while a realtime response is still active, retry after `response.done`/`response.cancelled`, and align GA input transcription/noise-reduction defaults with the Codex realtime reference so Discord/Voice Call consult results can resume speaking instead of tripping the active-response race.
 - OpenAI/realtime voice: avoid duplicate barge-in cancellation requests, log realtime model interruption/cutoff events in Discord voice logs, and treat OpenAI's no-active-response cancellation reply as a completed cancel so Discord voice sessions do not wedge pending speech after fast interruptions.
+- Agents/runtime: strip trailing assistant prefill for Claude-family OpenAI Responses routes, persist prompt/assistant profile cooldown marks before fallback, and show the configured container root in sandbox escape diagnostics. Fixes #79688 and #79712. Thanks @stainlu and @mushuiyu886.
 - Gateway: avoid false degraded event-loop health during rapid health/readiness/status probes unless sustained load has delay co-evidence, while keeping hard delay detection immediate. (#77028) Thanks @rubencu.
 - Markdown: keep blockquote spans off trailing paragraph separators. Fixes #79646.
 - Plugin SDK/LM Studio: recover Harmony plain-text tool calls from LM Studio streams. Fixes #78326.
diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts
index da91920652e..93cf14dbf8a 100644
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -2042,13 +2042,15 @@ export async function runEmbeddedPiAgent(
               (await advanceAuthProfile())
             ) {
               if (failedPromptProfileId && promptProfileFailureReason) {
-                maybeMarkAuthProfileFailure({
-                  profileId: failedPromptProfileId,
-                  reason: promptProfileFailureReason,
-                  modelId,
-                }).catch((err) =>
-                  log.warn(`deferred prompt profile failure mark failed: ${String(err)}`),
-                );
+                try {
+                  await maybeMarkAuthProfileFailure({
+                    profileId: failedPromptProfileId,
+                    reason: promptProfileFailureReason,
+                    modelId,
+                  });
+                } catch (err) {
+                  log.warn(`prompt profile failure mark failed: ${String(err)}`);
+                }
               }
               traceAttempts.push({
                 provider,
@@ -2077,13 +2079,15 @@ export async function runEmbeddedPiAgent(
               });
             }
             if (failedPromptProfileId && promptProfileFailureReason) {
-              maybeMarkAuthProfileFailure({
-                profileId: failedPromptProfileId,
-                reason: promptProfileFailureReason,
-                modelId,
-              }).catch((err) =>
-                log.warn(`deferred prompt profile failure mark failed: ${String(err)}`),
-              );
+              try {
+                await maybeMarkAuthProfileFailure({
+                  profileId: failedPromptProfileId,
+                  reason: promptProfileFailureReason,
+                  modelId,
+                });
+              } catch (err) {
+                log.warn(`prompt profile failure mark failed: ${String(err)}`);
+              }
             }
             const fallbackThinking = pickFallbackThinkingLevel({
               message: errorText,
diff --git a/src/agents/pi-embedded-runner/run/assistant-failover.ts b/src/agents/pi-embedded-runner/run/assistant-failover.ts
index 5c19b7c3eef..db931f48201 100644
--- a/src/agents/pi-embedded-runner/run/assistant-failover.ts
+++ b/src/agents/pi-embedded-runner/run/assistant-failover.ts
@@ -153,7 +153,7 @@ export async function handleAssistantFailover(params: {
     }
 
     const rotated = await params.advanceAuthProfile();
-    void markFailedProfile();
+    await markFailedProfile();
     if (params.timedOut && !params.isProbeSession && failedProfileId) {
       params.warn(`Profile ${failedProfileId} timed out. Trying next account...`);
     }
diff --git a/src/agents/pi-embedded-runner/run/attempt.transcript-policy.test.ts b/src/agents/pi-embedded-runner/run/attempt.transcript-policy.test.ts
index 64263c9aa3c..0a53ba2ea29 100644
--- a/src/agents/pi-embedded-runner/run/attempt.transcript-policy.test.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.transcript-policy.test.ts
@@ -81,6 +81,7 @@ describe("resolveAttemptTranscriptPolicy", () => {
       sanitizeToolCallIds: true,
       toolCallIdMode: "strict",
       repairToolUseResultPairing: true,
+      validateAnthropicTurns: false,
       allowSyntheticToolResults: false,
     });
     expect(resolveProviderRuntimePluginMock).toHaveBeenCalledWith({
@@ -90,4 +91,22 @@ describe("resolveAttemptTranscriptPolicy", () => {
       env,
     });
   });
+
+  it("inherits Claude-family OpenAI Responses turn validation from legacy fallback", () => {
+    const policy = resolveAttemptTranscriptPolicy({
+      runtimePlanModelContext: {
+        workspaceDir: "/tmp/openclaw-transcript-policy",
+        modelApi: "openai-responses",
+      },
+      provider: "anthropic-foundry",
+      modelId: "anthropic-foundry/claude-opus-4-7",
+    });
+
+    expect(policy).toMatchObject({
+      sanitizeToolCallIds: true,
+      toolCallIdMode: "strict",
+      validateAnthropicTurns: true,
+      validateGeminiTurns: false,
+    });
+  });
 });
diff --git a/src/agents/sandbox/fs-paths.test.ts b/src/agents/sandbox/fs-paths.test.ts
index c6d2232363d..8780428272f 100644
--- a/src/agents/sandbox/fs-paths.test.ts
+++ b/src/agents/sandbox/fs-paths.test.ts
@@ -89,7 +89,7 @@ describe("resolveSandboxFsPathWithMounts", () => {
     expect(resolved.writable).toBe(true);
   });
 
-  it("preserves legacy sandbox-root error for outside paths", () => {
+  it("includes the container workspace root in outside-path errors", () => {
     const sandbox = createSandbox();
     const mounts = buildSandboxFsMounts(sandbox);
     expect(() =>
@@ -100,7 +100,31 @@ describe("resolveSandboxFsPathWithMounts", () => {
         defaultContainerRoot: sandbox.containerWorkdir,
         mounts,
       }),
-    ).toThrow(/Path escapes sandbox root/);
+    ).toThrow(
+      /Path escapes sandbox root \(.*container root \/workspace\): \/etc\/passwd\. Use a path under \/workspace\/ instead\./,
+    );
+  });
+
+  it("uses the configured custom container root in outside-path errors", () => {
+    const sandbox = createSandbox({
+      containerWorkdir: "/sandbox-root",
+      docker: {
+        ...createSandbox().docker,
+        workdir: "/sandbox-root",
+      },
+    });
+    const mounts = buildSandboxFsMounts(sandbox);
+    expect(() =>
+      resolveSandboxFsPathWithMounts({
+        filePath: "/tmp/healthcheck-alert/config.json",
+        cwd: sandbox.workspaceDir,
+        defaultWorkspaceRoot: sandbox.workspaceDir,
+        defaultContainerRoot: sandbox.containerWorkdir,
+        mounts,
+      }),
+    ).toThrow(
+      /Path escapes sandbox root \(.*container root \/sandbox-root\): \/tmp\/healthcheck-alert\/config\.json\. Use a path under \/sandbox-root\/ instead\./,
+    );
   });
 
   it("prefers custom bind mounts over default workspace mount at /workspace", () => {
diff --git a/src/agents/sandbox/fs-paths.ts b/src/agents/sandbox/fs-paths.ts
index 1f134e511ee..991840b319f 100644
--- a/src/agents/sandbox/fs-paths.ts
+++ b/src/agents/sandbox/fs-paths.ts
@@ -151,13 +151,30 @@ export function resolveSandboxFsPathWithMounts(params: {
     };
   }
 
-  // Preserve legacy error wording for out-of-sandbox paths.
-  resolveSandboxPath({
-    filePath: input,
-    cwd: params.cwd,
-    root: params.defaultWorkspaceRoot,
+  const escapeMessage = formatSandboxRootEscapeMessage({
+    input,
+    defaultWorkspaceRoot: params.defaultWorkspaceRoot,
+    defaultContainerRoot: params.defaultContainerRoot,
   });
-  throw new Error(`Path escapes sandbox root (${params.defaultWorkspaceRoot}): ${input}`);
+  try {
+    resolveSandboxPath({
+      filePath: input,
+      cwd: params.cwd,
+      root: params.defaultWorkspaceRoot,
+    });
+  } catch {
+    throw new Error(escapeMessage);
+  }
+  throw new Error(escapeMessage);
+}
+
+function formatSandboxRootEscapeMessage(params: {
+  input: string;
+  defaultWorkspaceRoot: string;
+  defaultContainerRoot: string;
+}): string {
+  const containerRoot = normalizeContainerPath(params.defaultContainerRoot);
+  return `Path escapes sandbox root (${params.defaultWorkspaceRoot}; container root ${containerRoot}): ${params.input}. Use a path under ${containerRoot}/ instead.`;
 }
 
 function compareMountsByContainerPath(a: SandboxFsMount, b: SandboxFsMount): number {
diff --git a/src/agents/transcript-policy.test.ts b/src/agents/transcript-policy.test.ts
index 10b3cc437d3..20bba832d84 100644
--- a/src/agents/transcript-policy.test.ts
+++ b/src/agents/transcript-policy.test.ts
@@ -366,6 +366,25 @@ describe("resolveTranscriptPolicy", () => {
     expectStrictOpenAiCompatibleReplayDefaults("custom-openai-proxy");
   });
 
+  it("enables assistant prefill stripping for unowned Claude OpenAI Responses routes (#79688)", () => {
+    const claudePolicy = resolveTranscriptPolicy({
+      provider: "anthropic-foundry",
+      modelId: "anthropic-foundry/claude-opus-4-7",
+      modelApi: "openai-responses",
+    });
+    expect(claudePolicy.sanitizeToolCallIds).toBe(true);
+    expect(claudePolicy.toolCallIdMode).toBe("strict");
+    expect(claudePolicy.validateAnthropicTurns).toBe(true);
+    expect(claudePolicy.validateGeminiTurns).toBe(false);
+
+    const gptPolicy = resolveTranscriptPolicy({
+      provider: "custom-openai-proxy",
+      modelId: "gpt-5.4",
+      modelApi: "openai-responses",
+    });
+    expect(gptPolicy.validateAnthropicTurns).toBe(false);
+  });
+
   it("preserves thinking blocks for newer Claude models in unowned Anthropic transport fallback", () => {
     // Opus 4.6 via custom proxy: should NOT drop thinking blocks
     const opus46 = resolveTranscriptPolicy({
diff --git a/src/agents/transcript-policy.ts b/src/agents/transcript-policy.ts
index 5107249312c..dd44ef1d943 100644
--- a/src/agents/transcript-policy.ts
+++ b/src/agents/transcript-policy.ts
@@ -71,6 +71,19 @@ function isAnthropicApi(modelApi?: string | null): boolean {
   return modelApi === "anthropic-messages" || modelApi === "bedrock-converse-stream";
 }
 
+function isOpenAiResponsesCompatibleApi(modelApi?: string | null): boolean {
+  return (
+    modelApi === "openai-responses" ||
+    modelApi === "openai-codex-responses" ||
+    modelApi === "azure-openai-responses"
+  );
+}
+
+function isClaudeFamilyModelId(modelId?: string | null): boolean {
+  const id = normalizeLowercaseStringOrEmpty(modelId);
+  return /(?:^|[./:_-])claude(?:$|[./:_-])/.test(id);
+}
+
 /**
  * Provides a narrow replay-policy fallback for providers that do not have an
  * owning runtime plugin.
@@ -101,6 +114,9 @@ function buildUnownedProviderTransportReplayFallback(params: {
   }
 
   const modelId = normalizeLowercaseStringOrEmpty(params.modelId);
+  const isClaudeOpenAiResponses = isOpenAiResponsesCompatibleApi(params.modelApi)
+    ? isClaudeFamilyModelId(modelId)
+    : false;
   return {
     ...(isGoogle || isAnthropic ? { sanitizeMode: "full" as const } : {}),
     ...(isGoogle || isAnthropic || requiresOpenAiCompatibleToolIdSanitization
@@ -126,7 +142,9 @@ function buildUnownedProviderTransportReplayFallback(params: {
       : {}),
     ...(isGoogle || isStrictOpenAiCompatible ? { applyAssistantFirstOrderingFix: true } : {}),
     ...(isGoogle || isStrictOpenAiCompatible ? { validateGeminiTurns: true } : {}),
-    ...(isAnthropic || isStrictOpenAiCompatible ? { validateAnthropicTurns: true } : {}),
+    ...(isAnthropic || isStrictOpenAiCompatible || isClaudeOpenAiResponses
+      ? { validateAnthropicTurns: true }
+      : {}),
     ...(isGoogle || isAnthropic ? { allowSyntheticToolResults: true } : {}),
   };
 }