fix(openai): remove GPT reply brevity cap

This commit is contained in:
Peter Steinberger
2026-05-17 09:28:39 +01:00
parent 37806afd2d
commit 69d588cf2a
5 changed files with 34 additions and 151 deletions

View File

@@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Plugins/tokenjuice: bump the bundled tokenjuice runtime to 0.7.1, bringing Codex hook approval compatibility, pre-tool command wrapping fixes, and Rolldown/Vitest output compaction improvements into the OpenClaw plugin.
- Agents/OpenAI: stop post-processing GPT-5 final replies with hardcoded brevity caps, preserving full channel responses instead of appending synthetic ellipses, and log when strict-agentic GPT-5 execution activates. Fixes #82910.
- Agents/media: deliver failed async image, music, and video generation completions directly when requester-session completion handoff fails, so channel users see provider errors instead of silent fallback stalls.
- Agents/music: steer song, jingle, beat, anthem, and instrumental requests toward `music_generate` audio creation instead of lyric-only replies, and reserve `lyrics` for exact sung words.
- Codex app-server: record native Codex tool calls and results into trajectory artifacts so debug/trajectory exports capture the full Codex-native tool history, not just OpenClaw-bridged turns. Thanks @vyctorbrzezowski.

View File

@@ -53,6 +53,10 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
return mockedLog.warn.mock.calls.map(([message]) => String(message));
}
function infoMessages(): string[] {
return mockedLog.info.mock.calls.map(([message]) => String(message));
}
function expectWarnMessageWith(text: string): void {
expect(warnMessages().join("\n")).toContain(text);
}
@@ -349,6 +353,12 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
},
]);
expect(result.meta.livenessState).toBe("blocked");
expect(infoMessages().join("\n")).toContain(
"strict-agentic execution contract active: runId=run-strict-agentic-auto-activated",
);
expect(infoMessages().join("\n")).toContain(
"provider=openai-codex/gpt-5.4 harness=codex configured=unspecified",
);
});
it("respects explicit default contract opt-out on GPT-5 openai runs", async () => {

View File

@@ -980,8 +980,10 @@ export async function runEmbeddedPiAgent(
config: params.config,
agentId: params.agentId,
});
const configuredExecutionContract =
resolveAgentExecutionContract(params.config, sessionAgentId) ?? "default";
const configuredExecutionContract = resolveAgentExecutionContract(
params.config,
sessionAgentId,
);
const strictAgenticActive = isStrictAgenticExecutionContractActive({
config: params.config,
sessionKey: params.sessionKey,
@@ -990,6 +992,14 @@ export async function runEmbeddedPiAgent(
modelId,
});
const executionContract = strictAgenticActive ? "strict-agentic" : "default";
const configuredExecutionContractForLog = configuredExecutionContract ?? "default";
if (strictAgenticActive) {
log.info(
`strict-agentic execution contract active: runId=${params.runId} sessionId=${params.sessionId} ` +
`provider=${sanitizeForLog(provider)}/${sanitizeForLog(modelId)} harness=${sanitizeForLog(agentHarness.id)} ` +
`configured=${configuredExecutionContract ?? "unspecified"}`,
);
}
const maxPlanningOnlyRetryAttempts = resolvePlanningOnlyRetryLimit(executionContract);
const maxReasoningOnlyRetryAttempts = DEFAULT_REASONING_ONLY_RETRY_LIMIT;
const maxEmptyResponseRetryAttempts = DEFAULT_EMPTY_RESPONSE_RETRY_LIMIT;
@@ -2790,7 +2800,7 @@ export async function runEmbeddedPiAgent(
planningOnlyRetryInstruction = nextPlanningOnlyRetryInstruction;
log.warn(
`planning-only turn detected: runId=${params.runId} sessionId=${params.sessionId} ` +
`provider=${provider}/${modelId} contract=${executionContract} configured=${configuredExecutionContract} — retrying ` +
`provider=${provider}/${modelId} contract=${executionContract} configured=${configuredExecutionContractForLog} — retrying ` +
`${planningOnlyRetryAttempts}/${maxPlanningOnlyRetryAttempts} with act-now steer`,
);
continue;
@@ -2869,7 +2879,7 @@ export async function runEmbeddedPiAgent(
if (!incompleteTurnText && nextPlanningOnlyRetryInstruction && strictAgenticActive) {
log.warn(
`strict-agentic run exhausted planning-only retries: runId=${params.runId} sessionId=${params.sessionId} ` +
`provider=${provider}/${modelId} configured=${configuredExecutionContract} — surfacing blocked state`,
`provider=${provider}/${modelId} configured=${configuredExecutionContractForLog} — surfacing blocked state`,
);
// Criterion 4 of the GPT-5.4 parity gate requires every terminal
// exit path to emit an explicit livenessState + replayInvalid so

View File

@@ -2734,7 +2734,7 @@ describe("runAgentTurnWithFallback", () => {
});
});
it("trims chatty GPT ack-turn final prose", async () => {
it("preserves GPT ack-turn final prose without reply-side truncation", async () => {
state.runWithModelFallbackMock.mockImplementationOnce(async (params: FallbackRunnerParams) => ({
result: await params.run("openai", "gpt-5.4"),
provider: "openai",
@@ -2747,7 +2747,7 @@ describe("runAgentTurnWithFallback", () => {
text: [
"I updated the prompt overlay and tightened the runtime guard.",
"I also added the ack-turn fast path so short approvals skip the recap.",
"The reply-side brevity cap now trims long prose-heavy GPT confirmations.",
"The reply-side output now keeps long prose-heavy GPT confirmations intact.",
"I updated tests for the overlay, retry guard, and reply normalization.",
"Everything is wired together and ready for verification.",
].join(" "),
@@ -2787,7 +2787,13 @@ describe("runAgentTurnWithFallback", () => {
expect(result.kind).toBe("success");
if (result.kind === "success") {
expect(result.runResult.payloads?.[0]?.text).toBe(
"I updated the prompt overlay and tightened the runtime guard. I also added the ack-turn fast path so short approvals skip the recap. The reply-side brevity cap now trims long prose-heavy GPT confirmations...",
[
"I updated the prompt overlay and tightened the runtime guard.",
"I also added the ack-turn fast path so short approvals skip the recap.",
"The reply-side output now keeps long prose-heavy GPT confirmations intact.",
"I updated tests for the overlay, retry guard, and reply normalization.",
"Everything is wired together and ready for verification.",
].join(" "),
);
}
});

View File

@@ -42,7 +42,6 @@ import {
isTransientHttpError,
} from "../../agents/pi-embedded-helpers.js";
import { sanitizeUserFacingText } from "../../agents/pi-embedded-helpers/sanitize-user-facing-text.js";
import { isLikelyExecutionAckPrompt } from "../../agents/pi-embedded-runner/run/incomplete-turn.js";
import { runEmbeddedPiAgent } from "../../agents/pi-embedded.js";
import { buildAgentRuntimeOutcomePlan } from "../../agents/runtime-plan/build.js";
import {
@@ -112,10 +111,6 @@ import type { TypingSignaler } from "./typing-mode.js";
// selection keeps conflicting with fallback model choices.
// See: https://github.com/openclaw/openclaw/issues/58348
export const MAX_LIVE_SWITCH_RETRIES = 2;
const GPT_CHAT_BREVITY_ACK_MAX_CHARS = 420;
const GPT_CHAT_BREVITY_ACK_MAX_SENTENCES = 3;
const GPT_CHAT_BREVITY_SOFT_MAX_CHARS = 900;
const GPT_CHAT_BREVITY_SOFT_MAX_SENTENCES = 6;
function readApprovalScopeValue(value: unknown): "turn" | "session" | undefined {
return value === "turn" || value === "session" ? value : undefined;
@@ -857,137 +852,6 @@ export function buildContextOverflowRecoveryText(params: {
);
}
function shouldApplyOpenAIGptChatGuard(params: { provider?: string; model?: string }): boolean {
if (params.provider !== "openai" && params.provider !== "openai-codex") {
return false;
}
return /^gpt-5(?:[.-]|$)/i.test(params.model ?? "");
}
function countChatReplySentences(text: string): number {
return text
.trim()
.split(/(?<=[.!?])\s+/u)
.map((part) => part.trim())
.filter(Boolean).length;
}
function scoreChattyFinalReplyText(text: string): number {
const trimmed = text.trim();
if (!trimmed) {
return 0;
}
let score = 0;
const sentenceCount = countChatReplySentences(trimmed);
if (trimmed.length > 900) {
score += 1;
}
if (trimmed.length > 1_500) {
score += 1;
}
if (sentenceCount > 6) {
score += 1;
}
if (sentenceCount > 10) {
score += 1;
}
if (trimmed.split(/\n{2,}/u).filter(Boolean).length >= 3) {
score += 1;
}
if (
/\b(?:in summary|to summarize|here(?:'s| is) what|what changed|what I verified)\b/i.test(
trimmed,
)
) {
score += 1;
}
return score;
}
function shortenChattyFinalReplyText(
text: string,
params: { maxChars: number; maxSentences: number },
): string {
const trimmed = text.trim();
if (!trimmed) {
return trimmed;
}
const sentences = trimmed
.split(/(?<=[.!?])\s+/u)
.map((part) => part.trim())
.filter(Boolean);
let shortened = sentences.slice(0, params.maxSentences).join(" ");
if (!shortened) {
shortened = trimmed.slice(0, params.maxChars).trimEnd();
}
if (shortened.length > params.maxChars) {
shortened = shortened.slice(0, params.maxChars).trimEnd();
}
if (shortened.length >= trimmed.length) {
return trimmed;
}
return shortened.replace(/[.,;:!?-]*$/u, "").trimEnd() + "...";
}
function applyOpenAIGptChatReplyGuard(params: {
provider?: string;
model?: string;
commandBody: string;
isHeartbeat: boolean;
payloads?: ReplyPayload[];
}): void {
if (
params.isHeartbeat ||
!shouldApplyOpenAIGptChatGuard({
provider: params.provider,
model: params.model,
}) ||
!params.payloads?.length
) {
return;
}
const trimmedCommand = params.commandBody.trim();
const isAckTurn = isLikelyExecutionAckPrompt(trimmedCommand);
const allowSoftCap =
!isAckTurn &&
trimmedCommand.length > 0 &&
trimmedCommand.length <= 120 &&
!/\b(?:detail|detailed|depth|deep dive|explain|compare|walk me through|why|how)\b/i.test(
trimmedCommand,
);
for (const payload of params.payloads) {
const text = normalizeOptionalString(payload.text);
if (
!text ||
payload.isError ||
payload.isReasoning ||
payload.mediaUrl ||
(payload.mediaUrls?.length ?? 0) > 0 ||
payload.interactive ||
text.includes("```")
) {
continue;
}
if (isAckTurn) {
payload.text = shortenChattyFinalReplyText(text, {
maxChars: GPT_CHAT_BREVITY_ACK_MAX_CHARS,
maxSentences: GPT_CHAT_BREVITY_ACK_MAX_SENTENCES,
});
continue;
}
if (allowSoftCap && scoreChattyFinalReplyText(text) >= 4) {
payload.text = shortenChattyFinalReplyText(text, {
maxChars: GPT_CHAT_BREVITY_SOFT_MAX_CHARS,
maxSentences: GPT_CHAT_BREVITY_SOFT_MAX_SENTENCES,
});
}
}
}
function buildRestartLifecycleReplyText(): string {
return "⚠️ Gateway is restarting. Please wait a few seconds and try again.";
}
@@ -2521,14 +2385,6 @@ export async function runAgentTurnWithFallback(params: {
];
}
}
applyOpenAIGptChatReplyGuard({
provider: fallbackProvider,
model: fallbackModel,
commandBody: params.commandBody,
isHeartbeat: params.isHeartbeat,
payloads: runResult.payloads,
});
}
return {