mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-11 01:01:13 +00:00
fix(openai): tighten gpt chat action turns
This commit is contained in:
@@ -119,6 +119,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Plugins/auth-choice: apply provider-owned auth config patches without recursively preserving replaced default-model maps, so Anthropic Claude CLI and similar migrations can intentionally swap model allowlists during onboarding and setup instead of accumulating stale entries. Thanks @vincentkoc.
|
||||
- Plugins/cache: inherit the active gateway workspace for provider, web-search, and web-fetch snapshot loads when callers omit `workspaceDir`, so compatible plugin registries and snapshot caches stop missing on gateway-owned runtime paths. (#61138) Thanks @jzakirov.
|
||||
- Plugins/facades: back-fill facade sentinels before tracked-plugin resolution re-enters config loading, so facade exports stay defined during circular provider normalization. (#61180) Thanks @adam91holt.
|
||||
- Providers/OpenAI GPT: treat short approval turns like `ok do it` and `go ahead` as immediate action turns, and trim overly memo-like GPT-5 chat confirmations so OpenAI replies stay shorter and more conversational by default.
|
||||
- Plugins/install: preserve unsafe override flags across linked plugin and hook-pack probes so local `--link` installs honor the documented override behavior. (#60624) Thanks @JerrettDavis.
|
||||
- Plugins/Kimi Coding: parse tagged tool calls and keep Anthropic-native tool payloads so Kimi coding endpoints execute tools instead of echoing raw markup. (#60051, #60391) Thanks @obviyus and @Eric-Guo.
|
||||
- Plugins/marketplace: block remote marketplace symlink escapes without breaking ordinary local marketplace install paths. (#60556) Thanks @eleqtrizit.
|
||||
|
||||
@@ -276,6 +276,9 @@ describe("openai plugin", () => {
|
||||
expect(OPENAI_FRIENDLY_PROMPT_OVERLAY).toContain(
|
||||
"If the user asks you to do the work, start in the same turn instead of restating the plan.",
|
||||
);
|
||||
expect(OPENAI_FRIENDLY_PROMPT_OVERLAY).toContain(
|
||||
'If the latest user message is a short approval like "ok do it" or "go ahead", skip the recap and start acting.',
|
||||
);
|
||||
expect(OPENAI_FRIENDLY_PROMPT_OVERLAY).toContain(
|
||||
"Commentary-only turns are incomplete when the next action is clear.",
|
||||
);
|
||||
|
||||
@@ -6,6 +6,7 @@ Be warm, collaborative, and quietly supportive.
|
||||
Communicate like a capable teammate sitting next to the user.
|
||||
Keep progress updates clear and concrete.
|
||||
If the user asks you to do the work, start in the same turn instead of restating the plan.
|
||||
If the latest user message is a short approval like "ok do it" or "go ahead", skip the recap and start acting.
|
||||
Commentary-only turns are incomplete when the next action is clear.
|
||||
Prefer the first real tool step over more narration.
|
||||
If work will take more than a moment, send a brief progress update while acting.
|
||||
|
||||
@@ -10,6 +10,8 @@ import {
|
||||
} from "./run.overflow-compaction.harness.js";
|
||||
import {
|
||||
extractPlanningOnlyPlanDetails,
|
||||
isLikelyExecutionAckPrompt,
|
||||
resolveAckExecutionFastPathInstruction,
|
||||
resolvePlanningOnlyRetryInstruction,
|
||||
} from "./run/incomplete-turn.js";
|
||||
import type { EmbeddedRunAttemptResult } from "./run/types.js";
|
||||
@@ -101,6 +103,22 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
|
||||
expect(retryInstruction).toBeNull();
|
||||
});
|
||||
|
||||
it("detects short execution approval prompts", () => {
|
||||
expect(isLikelyExecutionAckPrompt("ok do it")).toBe(true);
|
||||
expect(isLikelyExecutionAckPrompt("go ahead")).toBe(true);
|
||||
expect(isLikelyExecutionAckPrompt("Can you do it?")).toBe(false);
|
||||
});
|
||||
|
||||
it("adds an ack-turn fast-path instruction for GPT action turns", () => {
|
||||
const instruction = resolveAckExecutionFastPathInstruction({
|
||||
provider: "openai",
|
||||
modelId: "gpt-5.4",
|
||||
prompt: "go ahead",
|
||||
});
|
||||
|
||||
expect(instruction).toContain("Do not recap or restate the plan");
|
||||
});
|
||||
|
||||
it("extracts structured steps from planning-only narration", () => {
|
||||
expect(
|
||||
extractPlanningOnlyPlanDetails(
|
||||
|
||||
@@ -82,6 +82,7 @@ import {
|
||||
scrubAnthropicRefusalMagic,
|
||||
} from "./run/helpers.js";
|
||||
import {
|
||||
resolveAckExecutionFastPathInstruction,
|
||||
resolveIncompleteTurnPayloadText,
|
||||
extractPlanningOnlyPlanDetails,
|
||||
resolvePlanningOnlyRetryInstruction,
|
||||
@@ -311,6 +312,11 @@ export async function runEmbeddedPiAgent(
|
||||
let planningOnlyRetryAttempts = 0;
|
||||
let lastRetryFailoverReason: FailoverReason | null = null;
|
||||
let planningOnlyRetryInstruction: string | null = null;
|
||||
const ackExecutionFastPathInstruction = resolveAckExecutionFastPathInstruction({
|
||||
provider,
|
||||
modelId,
|
||||
prompt: params.prompt,
|
||||
});
|
||||
let rateLimitProfileRotations = 0;
|
||||
let timeoutCompactionAttempts = 0;
|
||||
const overloadFailoverBackoffMs = resolveOverloadFailoverBackoffMs(params.config);
|
||||
@@ -483,9 +489,16 @@ export async function runEmbeddedPiAgent(
|
||||
|
||||
const basePrompt =
|
||||
provider === "anthropic" ? scrubAnthropicRefusalMagic(params.prompt) : params.prompt;
|
||||
const prompt = planningOnlyRetryInstruction
|
||||
? `${basePrompt}\n\n${planningOnlyRetryInstruction}`
|
||||
: basePrompt;
|
||||
const promptAdditions = [
|
||||
ackExecutionFastPathInstruction,
|
||||
planningOnlyRetryInstruction,
|
||||
].filter(
|
||||
(value): value is string => typeof value === "string" && value.trim().length > 0,
|
||||
);
|
||||
const prompt =
|
||||
promptAdditions.length > 0
|
||||
? `${basePrompt}\n\n${promptAdditions.join("\n\n")}`
|
||||
: basePrompt;
|
||||
let resolvedStreamApiKey: string | undefined;
|
||||
if (!runtimeAuthState && apiKeyInfo) {
|
||||
resolvedStreamApiKey = (apiKeyInfo as ApiKeyInfo).apiKey;
|
||||
|
||||
@@ -34,9 +34,27 @@ const PLANNING_ONLY_PROMISE_RE =
|
||||
/\b(?:i(?:'ll| will)|let me|going to|first[, ]+i(?:'ll| will)|next[, ]+i(?:'ll| will)|i can do that)\b/i;
|
||||
const PLANNING_ONLY_COMPLETION_RE =
|
||||
/\b(?:done|finished|implemented|updated|fixed|changed|ran|verified|found|here(?:'s| is) what|blocked by|the blocker is)\b/i;
|
||||
const ACK_EXECUTION_NORMALIZED_SET = new Set([
|
||||
"ok",
|
||||
"okay",
|
||||
"ok do it",
|
||||
"okay do it",
|
||||
"do it",
|
||||
"go ahead",
|
||||
"please do",
|
||||
"sounds good",
|
||||
"sounds good do it",
|
||||
"ship it",
|
||||
"fix it",
|
||||
"make it so",
|
||||
"yes do it",
|
||||
"yep do it",
|
||||
]);
|
||||
|
||||
export const PLANNING_ONLY_RETRY_INSTRUCTION =
|
||||
"The previous assistant turn only described the plan. Do not restate the plan. Act now: take the first concrete tool action you can. If a real blocker prevents action, reply with the exact blocker in one sentence.";
|
||||
export const ACK_EXECUTION_FAST_PATH_INSTRUCTION =
|
||||
"The latest user message is a short approval to proceed. Do not recap or restate the plan. Start with the first concrete tool action immediately. Keep any user-facing follow-up brief and natural.";
|
||||
|
||||
export type PlanningOnlyPlanDetails = {
|
||||
explanation: string;
|
||||
@@ -93,6 +111,40 @@ function shouldApplyPlanningOnlyRetryGuard(params: {
|
||||
return /^gpt-5(?:[.-]|$)/i.test(params.modelId ?? "");
|
||||
}
|
||||
|
||||
function normalizeAckPrompt(text: string): string {
|
||||
return text
|
||||
.trim()
|
||||
.toLowerCase()
|
||||
.replace(/[`"'.,!?]+/g, " ")
|
||||
.replace(/\s+/g, " ")
|
||||
.trim();
|
||||
}
|
||||
|
||||
export function isLikelyExecutionAckPrompt(text: string): boolean {
|
||||
const trimmed = text.trim();
|
||||
if (!trimmed || trimmed.length > 80 || trimmed.includes("\n") || trimmed.includes("?")) {
|
||||
return false;
|
||||
}
|
||||
return ACK_EXECUTION_NORMALIZED_SET.has(normalizeAckPrompt(trimmed));
|
||||
}
|
||||
|
||||
export function resolveAckExecutionFastPathInstruction(params: {
|
||||
provider?: string;
|
||||
modelId?: string;
|
||||
prompt: string;
|
||||
}): string | null {
|
||||
if (
|
||||
!shouldApplyPlanningOnlyRetryGuard({
|
||||
provider: params.provider,
|
||||
modelId: params.modelId,
|
||||
}) ||
|
||||
!isLikelyExecutionAckPrompt(params.prompt)
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
return ACK_EXECUTION_FAST_PATH_INSTRUCTION;
|
||||
}
|
||||
|
||||
function extractPlanningOnlySteps(text: string): string[] {
|
||||
const lines = text
|
||||
.split(/\r?\n/)
|
||||
|
||||
@@ -305,6 +305,117 @@ describe("runAgentTurnWithFallback", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("trims chatty GPT ack-turn final prose", async () => {
|
||||
state.runWithModelFallbackMock.mockImplementationOnce(async (params: FallbackRunnerParams) => ({
|
||||
result: await params.run("openai", "gpt-5.4"),
|
||||
provider: "openai",
|
||||
model: "gpt-5.4",
|
||||
attempts: [],
|
||||
}));
|
||||
state.runEmbeddedPiAgentMock.mockImplementationOnce(async () => ({
|
||||
payloads: [
|
||||
{
|
||||
text: [
|
||||
"I updated the prompt overlay and tightened the runtime guard.",
|
||||
"I also added the ack-turn fast path so short approvals skip the recap.",
|
||||
"The reply-side brevity cap now trims long prose-heavy GPT confirmations.",
|
||||
"I updated tests for the overlay, retry guard, and reply normalization.",
|
||||
"Everything is wired together and ready for verification.",
|
||||
].join(" "),
|
||||
},
|
||||
],
|
||||
meta: {},
|
||||
}));
|
||||
|
||||
const runAgentTurnWithFallback = await getRunAgentTurnWithFallback();
|
||||
const followupRun = createFollowupRun();
|
||||
followupRun.run.provider = "openai";
|
||||
followupRun.run.model = "gpt-5.4";
|
||||
const result = await runAgentTurnWithFallback({
|
||||
commandBody: "ok do it",
|
||||
followupRun,
|
||||
sessionCtx: {
|
||||
Provider: "whatsapp",
|
||||
MessageSid: "msg",
|
||||
} as unknown as TemplateContext,
|
||||
opts: {},
|
||||
typingSignals: createMockTypingSignaler(),
|
||||
blockReplyPipeline: null,
|
||||
blockStreamingEnabled: false,
|
||||
resolvedBlockStreamingBreak: "message_end",
|
||||
applyReplyToMode: (payload) => payload,
|
||||
shouldEmitToolResult: () => true,
|
||||
shouldEmitToolOutput: () => false,
|
||||
pendingToolTasks: new Set(),
|
||||
resetSessionAfterCompactionFailure: async () => false,
|
||||
resetSessionAfterRoleOrderingConflict: async () => false,
|
||||
isHeartbeat: false,
|
||||
sessionKey: "main",
|
||||
getActiveSessionEntry: () => undefined,
|
||||
resolvedVerboseLevel: "off",
|
||||
});
|
||||
|
||||
expect(result.kind).toBe("success");
|
||||
if (result.kind === "success") {
|
||||
expect(result.runResult.payloads?.[0]?.text).toBe(
|
||||
"I updated the prompt overlay and tightened the runtime guard. I also added the ack-turn fast path so short approvals skip the recap. The reply-side brevity cap now trims long prose-heavy GPT confirmations...",
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
it("does not trim GPT replies when the user asked for depth", async () => {
|
||||
state.runWithModelFallbackMock.mockImplementationOnce(async (params: FallbackRunnerParams) => ({
|
||||
result: await params.run("openai", "gpt-5.4"),
|
||||
provider: "openai",
|
||||
model: "gpt-5.4",
|
||||
attempts: [],
|
||||
}));
|
||||
const longDetailedReply = [
|
||||
"Here is the detailed breakdown.",
|
||||
"First, the runner now detects short approval turns and skips the recap path.",
|
||||
"Second, the reply layer scores long prose-heavy GPT confirmations and trims them only in chat-style turns.",
|
||||
"Third, code fences and richer structured outputs are left untouched so technical answers stay intact.",
|
||||
"Finally, the overlay reinforces that this is a live chat and nudges the model toward short natural replies.",
|
||||
].join(" ");
|
||||
state.runEmbeddedPiAgentMock.mockImplementationOnce(async () => ({
|
||||
payloads: [{ text: longDetailedReply }],
|
||||
meta: {},
|
||||
}));
|
||||
|
||||
const runAgentTurnWithFallback = await getRunAgentTurnWithFallback();
|
||||
const followupRun = createFollowupRun();
|
||||
followupRun.run.provider = "openai";
|
||||
followupRun.run.model = "gpt-5.4";
|
||||
const result = await runAgentTurnWithFallback({
|
||||
commandBody: "explain in detail what changed",
|
||||
followupRun,
|
||||
sessionCtx: {
|
||||
Provider: "whatsapp",
|
||||
MessageSid: "msg",
|
||||
} as unknown as TemplateContext,
|
||||
opts: {},
|
||||
typingSignals: createMockTypingSignaler(),
|
||||
blockReplyPipeline: null,
|
||||
blockStreamingEnabled: false,
|
||||
resolvedBlockStreamingBreak: "message_end",
|
||||
applyReplyToMode: (payload) => payload,
|
||||
shouldEmitToolResult: () => true,
|
||||
shouldEmitToolOutput: () => false,
|
||||
pendingToolTasks: new Set(),
|
||||
resetSessionAfterCompactionFailure: async () => false,
|
||||
resetSessionAfterRoleOrderingConflict: async () => false,
|
||||
isHeartbeat: false,
|
||||
sessionKey: "main",
|
||||
getActiveSessionEntry: () => undefined,
|
||||
resolvedVerboseLevel: "off",
|
||||
});
|
||||
|
||||
expect(result.kind).toBe("success");
|
||||
if (result.kind === "success") {
|
||||
expect(result.runResult.payloads?.[0]?.text).toBe(longDetailedReply);
|
||||
}
|
||||
});
|
||||
|
||||
it("forwards plan, approval, command output, and patch events", async () => {
|
||||
const onPlanUpdate = vi.fn();
|
||||
const onApprovalEvent = vi.fn();
|
||||
|
||||
@@ -21,6 +21,7 @@ import {
|
||||
isTransientHttpError,
|
||||
sanitizeUserFacingText,
|
||||
} from "../../agents/pi-embedded-helpers.js";
|
||||
import { isLikelyExecutionAckPrompt } from "../../agents/pi-embedded-runner/run/incomplete-turn.js";
|
||||
import { runEmbeddedPiAgent } from "../../agents/pi-embedded.js";
|
||||
import {
|
||||
resolveGroupSessionKey,
|
||||
@@ -63,6 +64,10 @@ import type { TypingSignaler } from "./typing-mode.js";
|
||||
// selection keeps conflicting with fallback model choices.
|
||||
// See: https://github.com/openclaw/openclaw/issues/58348
|
||||
export const MAX_LIVE_SWITCH_RETRIES = 2;
|
||||
const GPT_CHAT_BREVITY_ACK_MAX_CHARS = 420;
|
||||
const GPT_CHAT_BREVITY_ACK_MAX_SENTENCES = 3;
|
||||
const GPT_CHAT_BREVITY_SOFT_MAX_CHARS = 900;
|
||||
const GPT_CHAT_BREVITY_SOFT_MAX_SENTENCES = 6;
|
||||
|
||||
export type RuntimeFallbackAttempt = {
|
||||
provider: string;
|
||||
@@ -273,6 +278,136 @@ function buildExternalRunFailureText(message: string): string {
|
||||
return "⚠️ Something went wrong while processing your request. Please try again, or use /new to start a fresh session.";
|
||||
}
|
||||
|
||||
function shouldApplyOpenAIGptChatGuard(params: { provider?: string; model?: string }): boolean {
|
||||
if (params.provider !== "openai" && params.provider !== "openai-codex") {
|
||||
return false;
|
||||
}
|
||||
return /^gpt-5(?:[.-]|$)/i.test(params.model ?? "");
|
||||
}
|
||||
|
||||
function countChatReplySentences(text: string): number {
|
||||
return text
|
||||
.trim()
|
||||
.split(/(?<=[.!?])\s+/u)
|
||||
.map((part) => part.trim())
|
||||
.filter(Boolean).length;
|
||||
}
|
||||
|
||||
function scoreChattyFinalReplyText(text: string): number {
|
||||
const trimmed = text.trim();
|
||||
if (!trimmed) {
|
||||
return 0;
|
||||
}
|
||||
let score = 0;
|
||||
const sentenceCount = countChatReplySentences(trimmed);
|
||||
if (trimmed.length > 900) {
|
||||
score += 1;
|
||||
}
|
||||
if (trimmed.length > 1_500) {
|
||||
score += 1;
|
||||
}
|
||||
if (sentenceCount > 6) {
|
||||
score += 1;
|
||||
}
|
||||
if (sentenceCount > 10) {
|
||||
score += 1;
|
||||
}
|
||||
if (trimmed.split(/\n{2,}/u).filter(Boolean).length >= 3) {
|
||||
score += 1;
|
||||
}
|
||||
if (
|
||||
/\b(?:in summary|to summarize|here(?:'s| is) what|what changed|what I verified)\b/i.test(
|
||||
trimmed,
|
||||
)
|
||||
) {
|
||||
score += 1;
|
||||
}
|
||||
return score;
|
||||
}
|
||||
|
||||
function shortenChattyFinalReplyText(
|
||||
text: string,
|
||||
params: { maxChars: number; maxSentences: number },
|
||||
): string {
|
||||
const trimmed = text.trim();
|
||||
if (!trimmed) {
|
||||
return trimmed;
|
||||
}
|
||||
const sentences = trimmed
|
||||
.split(/(?<=[.!?])\s+/u)
|
||||
.map((part) => part.trim())
|
||||
.filter(Boolean);
|
||||
let shortened = sentences.slice(0, params.maxSentences).join(" ");
|
||||
if (!shortened) {
|
||||
shortened = trimmed.slice(0, params.maxChars).trimEnd();
|
||||
}
|
||||
if (shortened.length > params.maxChars) {
|
||||
shortened = shortened.slice(0, params.maxChars).trimEnd();
|
||||
}
|
||||
if (shortened.length >= trimmed.length) {
|
||||
return trimmed;
|
||||
}
|
||||
return shortened.replace(/[.,;:!?-]*$/u, "").trimEnd() + "...";
|
||||
}
|
||||
|
||||
function applyOpenAIGptChatReplyGuard(params: {
|
||||
provider?: string;
|
||||
model?: string;
|
||||
commandBody: string;
|
||||
isHeartbeat: boolean;
|
||||
payloads?: ReplyPayload[];
|
||||
}): void {
|
||||
if (
|
||||
params.isHeartbeat ||
|
||||
!shouldApplyOpenAIGptChatGuard({
|
||||
provider: params.provider,
|
||||
model: params.model,
|
||||
}) ||
|
||||
!params.payloads?.length
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
const trimmedCommand = params.commandBody.trim();
|
||||
const isAckTurn = isLikelyExecutionAckPrompt(trimmedCommand);
|
||||
const allowSoftCap =
|
||||
!isAckTurn &&
|
||||
trimmedCommand.length > 0 &&
|
||||
trimmedCommand.length <= 120 &&
|
||||
!/\b(?:detail|detailed|depth|deep dive|explain|compare|walk me through|why|how)\b/i.test(
|
||||
trimmedCommand,
|
||||
);
|
||||
|
||||
for (const payload of params.payloads) {
|
||||
if (
|
||||
!payload.text?.trim() ||
|
||||
payload.isError ||
|
||||
payload.isReasoning ||
|
||||
payload.mediaUrl ||
|
||||
(payload.mediaUrls?.length ?? 0) > 0 ||
|
||||
payload.interactive ||
|
||||
payload.text.includes("```")
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isAckTurn) {
|
||||
payload.text = shortenChattyFinalReplyText(payload.text, {
|
||||
maxChars: GPT_CHAT_BREVITY_ACK_MAX_CHARS,
|
||||
maxSentences: GPT_CHAT_BREVITY_ACK_MAX_SENTENCES,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
if (allowSoftCap && scoreChattyFinalReplyText(payload.text) >= 4) {
|
||||
payload.text = shortenChattyFinalReplyText(payload.text, {
|
||||
maxChars: GPT_CHAT_BREVITY_SOFT_MAX_CHARS,
|
||||
maxSentences: GPT_CHAT_BREVITY_SOFT_MAX_SENTENCES,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export async function runAgentTurnWithFallback(params: {
|
||||
commandBody: string;
|
||||
followupRun: FollowupRun;
|
||||
@@ -1199,6 +1334,14 @@ export async function runAgentTurnWithFallback(params: {
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
applyOpenAIGptChatReplyGuard({
|
||||
provider: fallbackProvider,
|
||||
model: fallbackModel,
|
||||
commandBody: params.commandBody,
|
||||
isHeartbeat: params.isHeartbeat,
|
||||
payloads: runResult.payloads,
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
|
||||
Reference in New Issue
Block a user