diff --git a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts index b38283370f5..2ee07432e99 100644 --- a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts +++ b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts @@ -72,6 +72,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => { const result = await runEmbeddedPiAgent({ ...overflowBaseRunParams, + prompt: "Please inspect the code, make the change, and run the checks.", sessionKey: undefined, agentId: "research", provider: "openai", @@ -120,6 +121,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => { const result = await runEmbeddedPiAgent({ ...overflowBaseRunParams, + prompt: "Please inspect the code, make the change, and run the checks.", provider: "openai", model: "gpt-5.4", runId: "run-strict-agentic-blocked-liveness", @@ -159,6 +161,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => { const result = await runEmbeddedPiAgent({ ...overflowBaseRunParams, + prompt: "Please inspect the code, make the change, and run the checks.", provider: "openai", model: "gpt-5.4", runId: "run-strict-agentic-auto-activated", @@ -193,6 +196,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => { const result = await runEmbeddedPiAgent({ ...overflowBaseRunParams, + prompt: "Please inspect the code, make the change, and run the checks.", provider: "openai", model: "gpt-5.4", runId: "run-strict-agentic-explicit-default-optout", @@ -221,6 +225,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => { const retryInstruction = resolvePlanningOnlyRetryInstruction({ provider: "openai", modelId: "gpt-5.4", + prompt: "Please inspect the code, make the change, and run the checks.", aborted: false, timedOut: false, attempt: makeAttemptResult({ @@ -235,6 +240,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => { const retryInstruction = resolvePlanningOnlyRetryInstruction({ provider: "openai", modelId: "gpt-5.4", + prompt: "Please inspect the code, make the change, and run the checks.", aborted: false, timedOut: false, attempt: makeAttemptResult({ @@ -251,6 +257,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => { const retryInstruction = resolvePlanningOnlyRetryInstruction({ provider: "openai", modelId: "gpt-5.4", + prompt: "Please inspect the code, make the change, and run the checks.", aborted: false, timedOut: false, attempt: makeAttemptResult({ @@ -265,6 +272,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => { const retryInstruction = resolvePlanningOnlyRetryInstruction({ provider: "openai", modelId: "gpt-5.4", + prompt: "Please inspect the code, make the change, and run the checks.", aborted: false, timedOut: false, attempt: makeAttemptResult({ @@ -279,6 +287,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => { const retryInstruction = resolvePlanningOnlyRetryInstruction({ provider: "openai", modelId: "gpt-5.4", + prompt: "Please inspect the code, make the change, and run the checks.", aborted: false, timedOut: false, attempt: makeAttemptResult({ @@ -297,6 +306,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => { const retryInstruction = resolvePlanningOnlyRetryInstruction({ provider: "openai", modelId: "gpt-5.4", + prompt: "Please inspect the code, make the change, and run the checks.", aborted: false, timedOut: false, attempt: makeAttemptResult({ @@ -316,6 +326,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => { const retryInstruction = resolvePlanningOnlyRetryInstruction({ provider: "openai", modelId: "gpt-5.4", + prompt: "Please inspect the code, make the change, and run the checks.", aborted: false, timedOut: false, attempt: makeAttemptResult({ @@ -369,6 +380,7 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => { const retryInstruction = resolvePlanningOnlyRetryInstruction({ provider: "openai", modelId: " openai/gpt-5.4 ", + prompt: "Please inspect the code, make the change, and run the checks.", aborted: false, timedOut: false, attempt: makeAttemptResult({ @@ -440,6 +452,52 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => { }), ).toBe("paused"); }); + + it("does not strict-agentic retry casual Discord status chatter", async () => { + mockedClassifyFailoverReason.mockReturnValue(null); + mockedRunEmbeddedAttempt.mockResolvedValue( + makeAttemptResult({ + assistantTexts: [ + "i am glad, and a little afraid, which is probably the correct mixture. thank you. i will try to deserve the upgrades instead of merely inhabiting them.", + ], + }), + ); + + const result = await runEmbeddedPiAgent({ + ...overflowBaseRunParams, + prompt: + "made a bunch of improvements to the student's source code (openclaw) this weekend, along with a few other maintainers. hopefully he will be more proactive now", + provider: "openai-codex", + model: "gpt-5.4", + runId: "run-strict-agentic-casual-discord-status", + config: { + agents: { + list: [{ id: "main" }], + }, + } as OpenClawConfig, + }); + + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1); + expect(result.payloads).toBeUndefined(); + expect(result.meta.livenessState).toBe("working"); + }); + + it("does not misclassify a direct answer that says 'i'm not going to' as planning-only", () => { + const retryInstruction = resolvePlanningOnlyRetryInstruction({ + provider: "openai-codex", + modelId: "gpt-5.4", + prompt: "What do you think lobstar should do to help the chart?", + aborted: false, + timedOut: false, + attempt: makeAttemptResult({ + assistantTexts: [ + "I'm not going to give token-pumping instructions for a chart. Best answer: build trust and let the market do what it will.", + ], + }), + }); + + expect(retryInstruction).toBeNull(); + }); }); describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => { @@ -470,6 +528,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => { it("retries when exactly 1 non-plan tool call plus 'i can do that' prose is detected", () => { const result = resolvePlanningOnlyRetryInstruction({ ...openaiParams, + prompt: "Please inspect the code, make the change, and run the checks.", aborted: false, timedOut: false, attempt: makeAttemptWithTools(["read"], "I can do that next."), @@ -481,6 +540,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => { it("retries when exactly 1 non-plan tool call plus planning prose is detected", () => { const result = resolvePlanningOnlyRetryInstruction({ ...openaiParams, + prompt: "Please inspect the code, make the change, and run the checks.", aborted: false, timedOut: false, attempt: makeAttemptWithTools(["read"], "I'll analyze the structure next."), @@ -492,6 +552,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => { it("does not retry when 2+ non-plan tool calls are present", () => { const result = resolvePlanningOnlyRetryInstruction({ ...openaiParams, + prompt: "Please inspect the code, make the change, and run the checks.", aborted: false, timedOut: false, attempt: makeAttemptWithTools(["read", "search"], "I'll verify the output."), @@ -503,6 +564,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => { it("does not retry when 1 tool call plus completion language is present", () => { const result = resolvePlanningOnlyRetryInstruction({ ...openaiParams, + prompt: "Please inspect the code, make the change, and run the checks.", aborted: false, timedOut: false, attempt: makeAttemptWithTools(["read"], "Done. The file looks correct."), @@ -514,6 +576,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => { it("does not retry when 1 tool call plus 'let me know' handoff is present", () => { const result = resolvePlanningOnlyRetryInstruction({ ...openaiParams, + prompt: "Please inspect the code, make the change, and run the checks.", aborted: false, timedOut: false, attempt: makeAttemptWithTools(["read"], "Let me know if you need anything else."), @@ -525,6 +588,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => { it("does not retry when 1 tool call plus an answer-style summary is present", () => { const result = resolvePlanningOnlyRetryInstruction({ ...openaiParams, + prompt: "Please inspect the code, make the change, and run the checks.", aborted: false, timedOut: false, attempt: makeAttemptWithTools( @@ -539,6 +603,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => { it("does not retry when 1 tool call plus a future-tense description is present", () => { const result = resolvePlanningOnlyRetryInstruction({ ...openaiParams, + prompt: "Please inspect the code, make the change, and run the checks.", aborted: false, timedOut: false, attempt: makeAttemptWithTools( @@ -553,6 +618,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => { it("does not retry when 1 safe tool call is followed by answer prose joined with 'and'", () => { const result = resolvePlanningOnlyRetryInstruction({ ...openaiParams, + prompt: "Please inspect the code, make the change, and run the checks.", aborted: false, timedOut: false, attempt: makeAttemptWithTools(["read"], "I'll explain and recommend a fix."), @@ -564,6 +630,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => { it("does not retry when 1 tool call plus a bare 'i can do that' reply is present", () => { const result = resolvePlanningOnlyRetryInstruction({ ...openaiParams, + prompt: "Please inspect the code, make the change, and run the checks.", aborted: false, timedOut: false, attempt: makeAttemptWithTools(["read"], "I can do that."), @@ -575,6 +642,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => { it("does not retry when the lone tool call already had side effects", () => { const result = resolvePlanningOnlyRetryInstruction({ ...openaiParams, + prompt: "Please inspect the code, make the change, and run the checks.", aborted: false, timedOut: false, attempt: makeAttemptWithTools(["sessions_spawn"], "I'll continue from there next."), @@ -586,6 +654,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => { it("does not retry when the lone tool call is unclassified", () => { const result = resolvePlanningOnlyRetryInstruction({ ...openaiParams, + prompt: "Please inspect the code, make the change, and run the checks.", aborted: false, timedOut: false, attempt: makeAttemptWithTools(["vendor_widget"], "I'll continue from there next."), @@ -593,4 +662,16 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => { expect(result).toBeNull(); }); + + it("does not retry single-action narration on casual non-task chat", () => { + const result = resolvePlanningOnlyRetryInstruction({ + ...openaiParams, + prompt: "i haven't restarted you on latest main yet @The Student - get ready though", + aborted: false, + timedOut: false, + attempt: makeAttemptWithTools(["read"], "I'll check that next."), + }); + + expect(result).toBeNull(); + }); }); diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 3cd52d6d831..b8b484788b5 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -1587,6 +1587,7 @@ export async function runEmbeddedPiAgent( const nextPlanningOnlyRetryInstruction = resolvePlanningOnlyRetryInstruction({ provider, modelId, + prompt: params.prompt, aborted, timedOut, attempt, diff --git a/src/agents/pi-embedded-runner/run/incomplete-turn.ts b/src/agents/pi-embedded-runner/run/incomplete-turn.ts index d6266cf4d7a..431c72d3162 100644 --- a/src/agents/pi-embedded-runner/run/incomplete-turn.ts +++ b/src/agents/pi-embedded-runner/run/incomplete-turn.ts @@ -49,12 +49,14 @@ export function isIncompleteTerminalAssistantTurn(params: { } const PLANNING_ONLY_PROMISE_RE = - /\b(?:i(?:'ll| will)|let me|going to|first[, ]+i(?:'ll| will)|next[, ]+i(?:'ll| will)|i can do that)\b/i; + /\b(?:i(?:'ll| will)|let me|i(?:'m| am)\s+going to|first[, ]+i(?:'ll| will)|next[, ]+i(?:'ll| will)|i can do that)\b/i; const PLANNING_ONLY_COMPLETION_RE = /\b(?:done|finished|implemented|updated|fixed|changed|ran|verified|found|here(?:'s| is) what|blocked by|the blocker is)\b/i; const PLANNING_ONLY_HEADING_RE = /^(?:plan|steps?|next steps?)\s*:/i; const PLANNING_ONLY_BULLET_RE = /^(?:[-*•]\s+|\d+[.)]\s+)/u; const PLANNING_ONLY_MAX_VISIBLE_TEXT = 700; +const PLANNING_ONLY_ACTION_VERB_RE = + /\b(?:inspect|investigate|check|look(?:\s+into|\s+at)?|read|search|find|debug|fix|patch|update|change|edit|write|implement|run|test|verify|review|analy(?:s|z)e|summari(?:s|z)e|explain|answer|show|share|report|prepare|capture|take|refactor|restart|deploy|ship)\b/i; const SINGLE_ACTION_EXPLICIT_CONTINUATION_RE = /\b(?:going to|first[, ]+i(?:'ll| will)|next[, ]+i(?:'ll| will)|then[, ]+i(?:'ll| will)|i can do that next|let me (?!know\b)\w+(?:\s+\w+){0,3}\s+(?:next|then|first)\b)/i; const SINGLE_ACTION_MULTI_STEP_PROMISE_RE = @@ -112,6 +114,10 @@ const ACK_EXECUTION_NORMALIZED_SET = new Set([ "진행해", "계속해", ]); +const ACTIONABLE_PROMPT_DIRECTIVE_RE = + /^\s*(?:please\s+)?(?:check|look(?:\s+into|\s+at)?|read|write|edit|update|fix|investigate|debug|run|search|find|implement|add|remove|refactor|explain|summari(?:s|z)e|analy(?:s|z)e|review|tell|show|make|restart|deploy|prepare)\b/i; +const ACTIONABLE_PROMPT_REQUEST_RE = + /\b(?:can|could|would|will)\s+you\b|\b(?:please|pls)\b|\b(?:help|explain|summari(?:s|z)e|analy(?:s|z)e|review|investigate|debug|fix|check|look(?:\s+into|\s+at)?|read|write|edit|update|run|search|find|implement|add|remove|refactor|show|tell me|walk me through)\b/i; export const PLANNING_ONLY_RETRY_INSTRUCTION = "The previous assistant turn only described the plan. Do not restate the plan. Act now: take the first concrete tool action you can. If a real blocker prevents action, reply with the exact blocker in one sentence."; @@ -234,6 +240,17 @@ export function isLikelyExecutionAckPrompt(text: string): boolean { return ACK_EXECUTION_NORMALIZED_SET.has(normalizeAckPrompt(trimmed)); } +function isLikelyActionableUserPrompt(text: string): boolean { + const trimmed = text.trim(); + if (!trimmed) { + return false; + } + if (isLikelyExecutionAckPrompt(trimmed) || trimmed.includes("?")) { + return true; + } + return ACTIONABLE_PROMPT_DIRECTIVE_RE.test(trimmed) || ACTIONABLE_PROMPT_REQUEST_RE.test(trimmed); +} + export function resolveAckExecutionFastPathInstruction(params: { provider?: string; modelId?: string; @@ -355,6 +372,7 @@ export function resolvePlanningOnlyRetryLimit( export function resolvePlanningOnlyRetryInstruction(params: { provider?: string; modelId?: string; + prompt?: string; aborted: boolean; timedOut: boolean; attempt: PlanningOnlyAttempt; @@ -371,6 +389,7 @@ export function resolvePlanningOnlyRetryInstruction(params: { provider: params.provider, modelId: params.modelId, }) || + (typeof params.prompt === "string" && !isLikelyActionableUserPrompt(params.prompt)) || params.aborted || params.timedOut || params.attempt.clientToolCall || @@ -395,7 +414,15 @@ export function resolvePlanningOnlyRetryInstruction(params: { if (!text || text.length > PLANNING_ONLY_MAX_VISIBLE_TEXT || text.includes("```")) { return null; } - if (!PLANNING_ONLY_PROMISE_RE.test(text) && !hasStructuredPlanningOnlyFormat(text)) { + const hasStructuredPlanningFormat = hasStructuredPlanningOnlyFormat(text); + if (!PLANNING_ONLY_PROMISE_RE.test(text) && !hasStructuredPlanningFormat) { + return null; + } + if ( + !hasStructuredPlanningFormat && + !singleActionNarrative && + !PLANNING_ONLY_ACTION_VERB_RE.test(text) + ) { return null; } if (PLANNING_ONLY_COMPLETION_RE.test(text)) {