fix(openai): tighten gpt chat action turns

2026-04-11 01:01:13 +00:00 · 2026-04-05 12:52:56 +01:00
parent 259338565a
commit deb212d3b0
8 changed files with 345 additions and 3 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -119,6 +119,7 @@ Docs: https://docs.openclaw.ai
 - Plugins/auth-choice: apply provider-owned auth config patches without recursively preserving replaced default-model maps, so Anthropic Claude CLI and similar migrations can intentionally swap model allowlists during onboarding and setup instead of accumulating stale entries. Thanks @vincentkoc.
 - Plugins/cache: inherit the active gateway workspace for provider, web-search, and web-fetch snapshot loads when callers omit `workspaceDir`, so compatible plugin registries and snapshot caches stop missing on gateway-owned runtime paths. (#61138) Thanks @jzakirov.
 - Plugins/facades: back-fill facade sentinels before tracked-plugin resolution re-enters config loading, so facade exports stay defined during circular provider normalization. (#61180) Thanks @adam91holt.
+- Providers/OpenAI GPT: treat short approval turns like `ok do it` and `go ahead` as immediate action turns, and trim overly memo-like GPT-5 chat confirmations so OpenAI replies stay shorter and more conversational by default.
 - Plugins/install: preserve unsafe override flags across linked plugin and hook-pack probes so local `--link` installs honor the documented override behavior. (#60624) Thanks @JerrettDavis.
 - Plugins/Kimi Coding: parse tagged tool calls and keep Anthropic-native tool payloads so Kimi coding endpoints execute tools instead of echoing raw markup. (#60051, #60391) Thanks @obviyus and @Eric-Guo.
 - Plugins/marketplace: block remote marketplace symlink escapes without breaking ordinary local marketplace install paths. (#60556) Thanks @eleqtrizit.
--- a/extensions/openai/index.test.ts
+++ b/extensions/openai/index.test.ts
@@ -276,6 +276,9 @@ describe("openai plugin", () => {
    expect(OPENAI_FRIENDLY_PROMPT_OVERLAY).toContain(
      "If the user asks you to do the work, start in the same turn instead of restating the plan.",
    );
+    expect(OPENAI_FRIENDLY_PROMPT_OVERLAY).toContain(
+      'If the latest user message is a short approval like "ok do it" or "go ahead", skip the recap and start acting.',
+    );
    expect(OPENAI_FRIENDLY_PROMPT_OVERLAY).toContain(
      "Commentary-only turns are incomplete when the next action is clear.",
    );
--- a/extensions/openai/prompt-overlay.ts
+++ b/extensions/openai/prompt-overlay.ts
@@ -6,6 +6,7 @@ Be warm, collaborative, and quietly supportive.
 Communicate like a capable teammate sitting next to the user.
 Keep progress updates clear and concrete.
 If the user asks you to do the work, start in the same turn instead of restating the plan.
+If the latest user message is a short approval like "ok do it" or "go ahead", skip the recap and start acting.
 Commentary-only turns are incomplete when the next action is clear.
 Prefer the first real tool step over more narration.
 If work will take more than a moment, send a brief progress update while acting.
--- a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
+++ b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
@@ -10,6 +10,8 @@ import {
 } from "./run.overflow-compaction.harness.js";
 import {
  extractPlanningOnlyPlanDetails,
+  isLikelyExecutionAckPrompt,
+  resolveAckExecutionFastPathInstruction,
  resolvePlanningOnlyRetryInstruction,
 } from "./run/incomplete-turn.js";
 import type { EmbeddedRunAttemptResult } from "./run/types.js";
@@ -101,6 +103,22 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
    expect(retryInstruction).toBeNull();
  });

+  it("detects short execution approval prompts", () => {
+    expect(isLikelyExecutionAckPrompt("ok do it")).toBe(true);
+    expect(isLikelyExecutionAckPrompt("go ahead")).toBe(true);
+    expect(isLikelyExecutionAckPrompt("Can you do it?")).toBe(false);
+  });
+
+  it("adds an ack-turn fast-path instruction for GPT action turns", () => {
+    const instruction = resolveAckExecutionFastPathInstruction({
+      provider: "openai",
+      modelId: "gpt-5.4",
+      prompt: "go ahead",
+    });
+
+    expect(instruction).toContain("Do not recap or restate the plan");
+  });
+
  it("extracts structured steps from planning-only narration", () => {
    expect(
      extractPlanningOnlyPlanDetails(
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -82,6 +82,7 @@ import {
  scrubAnthropicRefusalMagic,
 } from "./run/helpers.js";
 import {
+  resolveAckExecutionFastPathInstruction,
  resolveIncompleteTurnPayloadText,
  extractPlanningOnlyPlanDetails,
  resolvePlanningOnlyRetryInstruction,
@@ -311,6 +312,11 @@ export async function runEmbeddedPiAgent(
      let planningOnlyRetryAttempts = 0;
      let lastRetryFailoverReason: FailoverReason | null = null;
      let planningOnlyRetryInstruction: string | null = null;
+      const ackExecutionFastPathInstruction = resolveAckExecutionFastPathInstruction({
+        provider,
+        modelId,
+        prompt: params.prompt,
+      });
      let rateLimitProfileRotations = 0;
      let timeoutCompactionAttempts = 0;
      const overloadFailoverBackoffMs = resolveOverloadFailoverBackoffMs(params.config);
@@ -483,9 +489,16 @@ export async function runEmbeddedPiAgent(

          const basePrompt =
            provider === "anthropic" ? scrubAnthropicRefusalMagic(params.prompt) : params.prompt;
-          const prompt = planningOnlyRetryInstruction
-            ? `${basePrompt}\n\n${planningOnlyRetryInstruction}`
-            : basePrompt;
+          const promptAdditions = [
+            ackExecutionFastPathInstruction,
+            planningOnlyRetryInstruction,
+          ].filter(
+            (value): value is string => typeof value === "string" && value.trim().length > 0,
+          );
+          const prompt =
+            promptAdditions.length > 0
+              ? `${basePrompt}\n\n${promptAdditions.join("\n\n")}`
+              : basePrompt;
          let resolvedStreamApiKey: string | undefined;
          if (!runtimeAuthState && apiKeyInfo) {
            resolvedStreamApiKey = (apiKeyInfo as ApiKeyInfo).apiKey;
--- a/src/agents/pi-embedded-runner/run/incomplete-turn.ts
+++ b/src/agents/pi-embedded-runner/run/incomplete-turn.ts
@@ -34,9 +34,27 @@ const PLANNING_ONLY_PROMISE_RE =
  /\b(?:i(?:'ll| will)|let me|going to|first[, ]+i(?:'ll| will)|next[, ]+i(?:'ll| will)|i can do that)\b/i;
 const PLANNING_ONLY_COMPLETION_RE =
  /\b(?:done|finished|implemented|updated|fixed|changed|ran|verified|found|here(?:'s| is) what|blocked by|the blocker is)\b/i;
+const ACK_EXECUTION_NORMALIZED_SET = new Set([
+  "ok",
+  "okay",
+  "ok do it",
+  "okay do it",
+  "do it",
+  "go ahead",
+  "please do",
+  "sounds good",
+  "sounds good do it",
+  "ship it",
+  "fix it",
+  "make it so",
+  "yes do it",
+  "yep do it",
+]);

 export const PLANNING_ONLY_RETRY_INSTRUCTION =
  "The previous assistant turn only described the plan. Do not restate the plan. Act now: take the first concrete tool action you can. If a real blocker prevents action, reply with the exact blocker in one sentence.";
+export const ACK_EXECUTION_FAST_PATH_INSTRUCTION =
+  "The latest user message is a short approval to proceed. Do not recap or restate the plan. Start with the first concrete tool action immediately. Keep any user-facing follow-up brief and natural.";

 export type PlanningOnlyPlanDetails = {
  explanation: string;
@@ -93,6 +111,40 @@ function shouldApplyPlanningOnlyRetryGuard(params: {
  return /^gpt-5(?:[.-]|$)/i.test(params.modelId ?? "");
 }

+function normalizeAckPrompt(text: string): string {
+  return text
+    .trim()
+    .toLowerCase()
+    .replace(/[`"'.,!?]+/g, " ")
+    .replace(/\s+/g, " ")
+    .trim();
+}
+
+export function isLikelyExecutionAckPrompt(text: string): boolean {
+  const trimmed = text.trim();
+  if (!trimmed || trimmed.length > 80 || trimmed.includes("\n") || trimmed.includes("?")) {
+    return false;
+  }
+  return ACK_EXECUTION_NORMALIZED_SET.has(normalizeAckPrompt(trimmed));
+}
+
+export function resolveAckExecutionFastPathInstruction(params: {
+  provider?: string;
+  modelId?: string;
+  prompt: string;
+}): string | null {
+  if (
+    !shouldApplyPlanningOnlyRetryGuard({
+      provider: params.provider,
+      modelId: params.modelId,
+    }) ||
+    !isLikelyExecutionAckPrompt(params.prompt)
+  ) {
+    return null;
+  }
+  return ACK_EXECUTION_FAST_PATH_INSTRUCTION;
+}
+
 function extractPlanningOnlySteps(text: string): string[] {
  const lines = text
    .split(/\r?\n/)
--- a/src/auto-reply/reply/agent-runner-execution.test.ts
+++ b/src/auto-reply/reply/agent-runner-execution.test.ts
@@ -305,6 +305,117 @@ describe("runAgentTurnWithFallback", () => {
    });
  });

+  it("trims chatty GPT ack-turn final prose", async () => {
+    state.runWithModelFallbackMock.mockImplementationOnce(async (params: FallbackRunnerParams) => ({
+      result: await params.run("openai", "gpt-5.4"),
+      provider: "openai",
+      model: "gpt-5.4",
+      attempts: [],
+    }));
+    state.runEmbeddedPiAgentMock.mockImplementationOnce(async () => ({
+      payloads: [
+        {
+          text: [
+            "I updated the prompt overlay and tightened the runtime guard.",
+            "I also added the ack-turn fast path so short approvals skip the recap.",
+            "The reply-side brevity cap now trims long prose-heavy GPT confirmations.",
+            "I updated tests for the overlay, retry guard, and reply normalization.",
+            "Everything is wired together and ready for verification.",
+          ].join(" "),
+        },
+      ],
+      meta: {},
+    }));
+
+    const runAgentTurnWithFallback = await getRunAgentTurnWithFallback();
+    const followupRun = createFollowupRun();
+    followupRun.run.provider = "openai";
+    followupRun.run.model = "gpt-5.4";
+    const result = await runAgentTurnWithFallback({
+      commandBody: "ok do it",
+      followupRun,
+      sessionCtx: {
+        Provider: "whatsapp",
+        MessageSid: "msg",
+      } as unknown as TemplateContext,
+      opts: {},
+      typingSignals: createMockTypingSignaler(),
+      blockReplyPipeline: null,
+      blockStreamingEnabled: false,
+      resolvedBlockStreamingBreak: "message_end",
+      applyReplyToMode: (payload) => payload,
+      shouldEmitToolResult: () => true,
+      shouldEmitToolOutput: () => false,
+      pendingToolTasks: new Set(),
+      resetSessionAfterCompactionFailure: async () => false,
+      resetSessionAfterRoleOrderingConflict: async () => false,
+      isHeartbeat: false,
+      sessionKey: "main",
+      getActiveSessionEntry: () => undefined,
+      resolvedVerboseLevel: "off",
+    });
+
+    expect(result.kind).toBe("success");
+    if (result.kind === "success") {
+      expect(result.runResult.payloads?.[0]?.text).toBe(
+        "I updated the prompt overlay and tightened the runtime guard. I also added the ack-turn fast path so short approvals skip the recap. The reply-side brevity cap now trims long prose-heavy GPT confirmations...",
+      );
+    }
+  });
+
+  it("does not trim GPT replies when the user asked for depth", async () => {
+    state.runWithModelFallbackMock.mockImplementationOnce(async (params: FallbackRunnerParams) => ({
+      result: await params.run("openai", "gpt-5.4"),
+      provider: "openai",
+      model: "gpt-5.4",
+      attempts: [],
+    }));
+    const longDetailedReply = [
+      "Here is the detailed breakdown.",
+      "First, the runner now detects short approval turns and skips the recap path.",
+      "Second, the reply layer scores long prose-heavy GPT confirmations and trims them only in chat-style turns.",
+      "Third, code fences and richer structured outputs are left untouched so technical answers stay intact.",
+      "Finally, the overlay reinforces that this is a live chat and nudges the model toward short natural replies.",
+    ].join(" ");
+    state.runEmbeddedPiAgentMock.mockImplementationOnce(async () => ({
+      payloads: [{ text: longDetailedReply }],
+      meta: {},
+    }));
+
+    const runAgentTurnWithFallback = await getRunAgentTurnWithFallback();
+    const followupRun = createFollowupRun();
+    followupRun.run.provider = "openai";
+    followupRun.run.model = "gpt-5.4";
+    const result = await runAgentTurnWithFallback({
+      commandBody: "explain in detail what changed",
+      followupRun,
+      sessionCtx: {
+        Provider: "whatsapp",
+        MessageSid: "msg",
+      } as unknown as TemplateContext,
+      opts: {},
+      typingSignals: createMockTypingSignaler(),
+      blockReplyPipeline: null,
+      blockStreamingEnabled: false,
+      resolvedBlockStreamingBreak: "message_end",
+      applyReplyToMode: (payload) => payload,
+      shouldEmitToolResult: () => true,
+      shouldEmitToolOutput: () => false,
+      pendingToolTasks: new Set(),
+      resetSessionAfterCompactionFailure: async () => false,
+      resetSessionAfterRoleOrderingConflict: async () => false,
+      isHeartbeat: false,
+      sessionKey: "main",
+      getActiveSessionEntry: () => undefined,
+      resolvedVerboseLevel: "off",
+    });
+
+    expect(result.kind).toBe("success");
+    if (result.kind === "success") {
+      expect(result.runResult.payloads?.[0]?.text).toBe(longDetailedReply);
+    }
+  });
+
  it("forwards plan, approval, command output, and patch events", async () => {
    const onPlanUpdate = vi.fn();
    const onApprovalEvent = vi.fn();
--- a/src/auto-reply/reply/agent-runner-execution.ts
+++ b/src/auto-reply/reply/agent-runner-execution.ts
@@ -21,6 +21,7 @@ import {
  isTransientHttpError,
  sanitizeUserFacingText,
 } from "../../agents/pi-embedded-helpers.js";
+import { isLikelyExecutionAckPrompt } from "../../agents/pi-embedded-runner/run/incomplete-turn.js";
 import { runEmbeddedPiAgent } from "../../agents/pi-embedded.js";
 import {
  resolveGroupSessionKey,
@@ -63,6 +64,10 @@ import type { TypingSignaler } from "./typing-mode.js";
 // selection keeps conflicting with fallback model choices.
 // See: https://github.com/openclaw/openclaw/issues/58348
 export const MAX_LIVE_SWITCH_RETRIES = 2;
+const GPT_CHAT_BREVITY_ACK_MAX_CHARS = 420;
+const GPT_CHAT_BREVITY_ACK_MAX_SENTENCES = 3;
+const GPT_CHAT_BREVITY_SOFT_MAX_CHARS = 900;
+const GPT_CHAT_BREVITY_SOFT_MAX_SENTENCES = 6;

 export type RuntimeFallbackAttempt = {
  provider: string;
@@ -273,6 +278,136 @@ function buildExternalRunFailureText(message: string): string {
  return "⚠️ Something went wrong while processing your request. Please try again, or use /new to start a fresh session.";
 }

+function shouldApplyOpenAIGptChatGuard(params: { provider?: string; model?: string }): boolean {
+  if (params.provider !== "openai" && params.provider !== "openai-codex") {
+    return false;
+  }
+  return /^gpt-5(?:[.-]|$)/i.test(params.model ?? "");
+}
+
+function countChatReplySentences(text: string): number {
+  return text
+    .trim()
+    .split(/(?<=[.!?])\s+/u)
+    .map((part) => part.trim())
+    .filter(Boolean).length;
+}
+
+function scoreChattyFinalReplyText(text: string): number {
+  const trimmed = text.trim();
+  if (!trimmed) {
+    return 0;
+  }
+  let score = 0;
+  const sentenceCount = countChatReplySentences(trimmed);
+  if (trimmed.length > 900) {
+    score += 1;
+  }
+  if (trimmed.length > 1_500) {
+    score += 1;
+  }
+  if (sentenceCount > 6) {
+    score += 1;
+  }
+  if (sentenceCount > 10) {
+    score += 1;
+  }
+  if (trimmed.split(/\n{2,}/u).filter(Boolean).length >= 3) {
+    score += 1;
+  }
+  if (
+    /\b(?:in summary|to summarize|here(?:'s| is) what|what changed|what I verified)\b/i.test(
+      trimmed,
+    )
+  ) {
+    score += 1;
+  }
+  return score;
+}
+
+function shortenChattyFinalReplyText(
+  text: string,
+  params: { maxChars: number; maxSentences: number },
+): string {
+  const trimmed = text.trim();
+  if (!trimmed) {
+    return trimmed;
+  }
+  const sentences = trimmed
+    .split(/(?<=[.!?])\s+/u)
+    .map((part) => part.trim())
+    .filter(Boolean);
+  let shortened = sentences.slice(0, params.maxSentences).join(" ");
+  if (!shortened) {
+    shortened = trimmed.slice(0, params.maxChars).trimEnd();
+  }
+  if (shortened.length > params.maxChars) {
+    shortened = shortened.slice(0, params.maxChars).trimEnd();
+  }
+  if (shortened.length >= trimmed.length) {
+    return trimmed;
+  }
+  return shortened.replace(/[.,;:!?-]*$/u, "").trimEnd() + "...";
+}
+
+function applyOpenAIGptChatReplyGuard(params: {
+  provider?: string;
+  model?: string;
+  commandBody: string;
+  isHeartbeat: boolean;
+  payloads?: ReplyPayload[];
+}): void {
+  if (
+    params.isHeartbeat ||
+    !shouldApplyOpenAIGptChatGuard({
+      provider: params.provider,
+      model: params.model,
+    }) ||
+    !params.payloads?.length
+  ) {
+    return;
+  }
+
+  const trimmedCommand = params.commandBody.trim();
+  const isAckTurn = isLikelyExecutionAckPrompt(trimmedCommand);
+  const allowSoftCap =
+    !isAckTurn &&
+    trimmedCommand.length > 0 &&
+    trimmedCommand.length <= 120 &&
+    !/\b(?:detail|detailed|depth|deep dive|explain|compare|walk me through|why|how)\b/i.test(
+      trimmedCommand,
+    );
+
+  for (const payload of params.payloads) {
+    if (
+      !payload.text?.trim() ||
+      payload.isError ||
+      payload.isReasoning ||
+      payload.mediaUrl ||
+      (payload.mediaUrls?.length ?? 0) > 0 ||
+      payload.interactive ||
+      payload.text.includes("```")
+    ) {
+      continue;
+    }
+
+    if (isAckTurn) {
+      payload.text = shortenChattyFinalReplyText(payload.text, {
+        maxChars: GPT_CHAT_BREVITY_ACK_MAX_CHARS,
+        maxSentences: GPT_CHAT_BREVITY_ACK_MAX_SENTENCES,
+      });
+      continue;
+    }
+
+    if (allowSoftCap && scoreChattyFinalReplyText(payload.text) >= 4) {
+      payload.text = shortenChattyFinalReplyText(payload.text, {
+        maxChars: GPT_CHAT_BREVITY_SOFT_MAX_CHARS,
+        maxSentences: GPT_CHAT_BREVITY_SOFT_MAX_SENTENCES,
+      });
+    }
+  }
+}
+
 export async function runAgentTurnWithFallback(params: {
  commandBody: string;
  followupRun: FollowupRun;
@@ -1199,6 +1334,14 @@ export async function runAgentTurnWithFallback(params: {
        ];
      }
    }
+
+    applyOpenAIGptChatReplyGuard({
+      provider: fallbackProvider,
+      model: fallbackModel,
+      commandBody: params.commandBody,
+      isHeartbeat: params.isHeartbeat,
+      payloads: runResult.payloads,
+    });
  }

  return {