agents: split GPT-5 prompt and retry behavior (#65597)

* agents: split GPT-5 prompt and retry behavior

* agents: fix GPT-5 review follow-ups

* agents: address GPT-5 review follow-ups

* agents: avoid replaying side-effectful GPT retries

* agents: mark subagent control as mutating

* agents: fail closed on single-action retries

* commands: stabilize channel legacy doctor migration test

* agents: narrow single-action retry promise trigger
This commit is contained in:
pashpashpash
2026-04-12 18:52:22 -07:00
committed by GitHub
parent d0c83777fb
commit c848ebc8ce
8 changed files with 352 additions and 17 deletions

View File

@@ -14,6 +14,7 @@ import {
OPENAI_FRIENDLY_PROMPT_OVERLAY,
OPENAI_GPT5_EXECUTION_BIAS,
OPENAI_GPT5_OUTPUT_CONTRACT,
OPENAI_GPT5_TOOL_CALL_STYLE,
} from "./prompt-overlay.js";
const runtimeMocks = vi.hoisted(() => ({
@@ -365,7 +366,7 @@ describe("openai plugin", () => {
};
expect(openaiProvider.resolveSystemPromptContribution?.(contributionContext)).toEqual({
stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
sectionOverrides: {
interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY,
execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
@@ -382,7 +383,7 @@ describe("openai plugin", () => {
"Occasional emoji are welcome when they fit naturally, especially for warmth or brief celebration; keep them sparse.",
);
expect(codexProvider.resolveSystemPromptContribution?.(contributionContext)).toEqual({
stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
sectionOverrides: {
interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY,
execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
@@ -454,9 +455,22 @@ describe("openai plugin", () => {
expect(OPENAI_FRIENDLY_PROMPT_OVERLAY).toContain(
"Occasional emoji are welcome when they fit naturally, especially for warmth or brief celebration; keep them sparse.",
);
expect(OPENAI_GPT5_EXECUTION_BIAS).toContain(
"Use a real tool call or concrete action FIRST when the task is actionable. Do not stop at a plan or promise-to-act reply.",
);
expect(OPENAI_GPT5_EXECUTION_BIAS).toContain(
"If the work will take multiple steps, keep calling tools until the task is done or you hit a real blocker. Do not stop after one step to ask permission.",
);
expect(OPENAI_GPT5_EXECUTION_BIAS).toContain(
"Do prerequisite lookup or discovery before dependent actions.",
);
expect(OPENAI_GPT5_TOOL_CALL_STYLE).toContain(
"Call tools directly without narrating what you are about to do. Do not describe a plan before each tool call.",
);
expect(OPENAI_GPT5_TOOL_CALL_STYLE).toContain(
"When a first-class tool exists for an action, use the tool instead of asking the user to run a command.",
);
expect(OPENAI_GPT5_TOOL_CALL_STYLE).not.toContain("/approve");
expect(OPENAI_GPT5_OUTPUT_CONTRACT).toContain(
"Return the requested sections only, in the requested order.",
);
@@ -486,7 +500,7 @@ describe("openai plugin", () => {
agentId: undefined,
}),
).toEqual({
stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
sectionOverrides: {
interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY,
execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
@@ -514,7 +528,7 @@ describe("openai plugin", () => {
agentId: undefined,
}),
).toEqual({
stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
sectionOverrides: {
execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
},
@@ -540,7 +554,7 @@ describe("openai plugin", () => {
agentId: undefined,
}),
).toEqual({
stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
sectionOverrides: {
execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
},
@@ -567,7 +581,7 @@ describe("openai plugin", () => {
agentId: undefined,
}),
).toEqual({
stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
sectionOverrides: {
interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY,
execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
@@ -594,7 +608,7 @@ describe("openai plugin", () => {
agentId: undefined,
}),
).toEqual({
stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
sectionOverrides: {
interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY,
execution_bias: OPENAI_GPT5_EXECUTION_BIAS,

View File

@@ -64,11 +64,20 @@ Do not use em dashes unless the user explicitly asks for them or they are requir
export const OPENAI_GPT5_EXECUTION_BIAS = `## Execution Bias
Start the real work in the same turn when the next step is clear.
Use a real tool call or concrete action FIRST when the task is actionable. Do not stop at a plan or promise-to-act reply.
Commentary-only turns are incomplete when tools are available and the next action is clear.
If the work will take multiple steps, keep calling tools until the task is done or you hit a real blocker. Do not stop after one step to ask permission.
Do prerequisite lookup or discovery before dependent actions.
If another tool call would likely improve correctness or completeness, keep going instead of stopping at partial progress.
Multi-part requests stay incomplete until every requested item is handled or clearly marked blocked.
Before the final answer, quickly verify correctness, coverage, formatting, and obvious side effects.`;
Act first, then verify if needed. Do not pause to summarize or verify before taking the next action.`;
export const OPENAI_GPT5_TOOL_CALL_STYLE = `## Tool Call Style
Call tools directly without narrating what you are about to do. Do not describe a plan before each tool call.
When a first-class tool exists for an action, use the tool instead of asking the user to run a command.
If multiple tool calls are needed, call them in sequence without stopping to explain between calls.
Default: do not narrate routine, low-risk tool calls (just call the tool).
Narrate only when it genuinely helps: complex multi-step work, sensitive actions like deletions, or when the user explicitly asks for commentary.`;
export type OpenAIPromptOverlayMode = "friendly" | "off";
@@ -103,8 +112,14 @@ export function resolveOpenAISystemPromptContribution(params: {
) {
return undefined;
}
// tool_call_style is NOT overridden via sectionOverrides because the
// default section includes dynamic channel-specific approval guidance
// from buildExecApprovalPromptGuidance() that varies per runtime
// channel. Overriding it with a static string would lose that dynamic
// content. Instead, the tool-first reinforcement lives in stablePrefix
// so it's always present alongside the default tool_call_style section.
return {
stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
sectionOverrides: {
execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
...(params.mode === "friendly" ? { interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY } : {}),