From 4f4be666eb7041b00656681c4ef835c43cf78ebf Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 17 May 2026 13:11:27 +0100 Subject: [PATCH] chore: update codex review fallback handling --- .agents/skills/codex-review/SKILL.md | 21 +- .../skills/codex-review/scripts/codex-review | 263 +++++++++++++++++- 2 files changed, 265 insertions(+), 19 deletions(-) diff --git a/.agents/skills/codex-review/SKILL.md b/.agents/skills/codex-review/SKILL.md index 10fdbd33c06..bd44165ba7d 100644 --- a/.agents/skills/codex-review/SKILL.md +++ b/.agents/skills/codex-review/SKILL.md @@ -19,9 +19,9 @@ Use when: - Read dependency docs/source/types when the finding depends on external behavior. - Reject unrealistic edge cases, speculative risks, broad rewrites, and fixes that over-complicate the codebase. - Prefer small fixes at the right ownership boundary; no refactor unless it clearly improves the bug class. -- Keep going until Codex review returns no accepted/actionable findings. -- If a review-triggered fix changes code, rerun focused tests and rerun Codex review. -- Never switch or override the review model. If the review hits model capacity, retry the same command a few times with the same model. The helper runs nested review in yolo/full-access mode by default; use `--no-yolo` only when intentionally testing sandbox behavior. +- Keep going until the selected review path returns no accepted/actionable findings. +- If a review-triggered fix changes code, rerun focused tests and rerun the review helper. +- Default to Codex review. If Codex is unavailable or exits with an error, the helper may fall back to `claude -p`; `pi -p` and `opencode run` are explicit reviewer/fallback options. The helper runs nested Codex review in yolo/full-access mode by default; use `--no-yolo` only when intentionally testing sandbox behavior. - Stop as soon as the review command/helper exits 0 with no accepted/actionable findings. Do not run an extra direct `codex review` just to get a nicer "clean" line, a second opinion, or clearer closeout wording. - Treat the helper's successful exit plus absence of actionable findings as the clean review result, even if the underlying Codex CLI output is terse. - If rejecting a finding as intentional/not worth fixing, add a brief inline code comment only when it explains a real invariant or ownership decision that future reviewers should know. @@ -66,7 +66,7 @@ codex review --commit HEAD or with the helper: ```bash -/Users/steipete/Projects/agent-scripts/skills/codex-review/scripts/codex-review --mode commit --commit HEAD +.agents/skills/codex-review/scripts/codex-review --mode commit --commit HEAD ``` Use commit review for already-landed or already-pushed work on `main`. Reviewing @@ -79,7 +79,7 @@ with `--base`. Format first if formatting can change line locations. Then it is OK to run tests and review in parallel: ```bash -scripts/codex-review --parallel-tests "" +.agents/skills/codex-review/scripts/codex-review --parallel-tests "" ``` Tradeoff: tests may force code changes that stale the review. If tests or review lead to code edits, rerun the affected tests and rerun review until no accepted/actionable findings remain. Once that rerun exits cleanly, stop; do not spend another long review cycle on redundant confirmation. @@ -98,13 +98,7 @@ Run inline only for tiny changes or when subagents are unavailable. Bundled helper: ```bash -~/.codex/skills/codex-review/scripts/codex-review --help -``` - -If installed from `agent-scripts`, path is: - -```bash -/Users/steipete/Projects/agent-scripts/skills/codex-review/scripts/codex-review --help +.agents/skills/codex-review/scripts/codex-review --help ``` The helper: @@ -113,6 +107,9 @@ The helper: - otherwise uses `origin/main` for non-main branches - use `--mode commit --commit ` for already-committed work, especially clean `main` after landing - should be left in `--mode auto` or forced to `--mode branch` for PR/branch work; do not force `--mode local` after committing +- supports `--reviewer codex|claude|pi|opencode|auto`; `auto` runs Codex first +- supports `--fallback-reviewer claude|pi|opencode|none`; default is `claude` +- falls back only when Codex is unavailable or exits nonzero, not when Codex reports findings - writes only to stdout unless `--output` or `CODEX_REVIEW_OUTPUT` is set - supports `--dry-run`, `--parallel-tests`, and commit refs - runs nested review with `--dangerously-bypass-approvals-and-sandbox` by default diff --git a/.agents/skills/codex-review/scripts/codex-review b/.agents/skills/codex-review/scripts/codex-review index 808d6d15673..1d3363ba7d2 100755 --- a/.agents/skills/codex-review/scripts/codex-review +++ b/.agents/skills/codex-review/scripts/codex-review @@ -10,7 +10,14 @@ Options: Target selection. Default: auto. --base REF Base ref for branch review. Default: PR base or origin/main. --commit REF Commit ref for commit review. Default: HEAD. + --reviewer codex|claude|pi|opencode|auto + Review engine. Default: auto (Codex, fallback reviewer on error). + --fallback-reviewer claude|pi|opencode|none + Fallback when Codex is unavailable or exits nonzero. Default: claude. --codex-bin PATH Codex binary. Default: codex. + --claude-bin PATH Claude binary. Default: claude. + --pi-bin PATH Pi binary. Default: pi. + --opencode-bin PATH OpenCode binary. Default: opencode. --full-access Keep yolo/full-access mode enabled. Default. --no-yolo Run nested Codex review with normal sandbox/approval prompts. --output FILE Also save output to file. @@ -29,7 +36,12 @@ EOF mode=auto base_ref= commit_ref=HEAD +reviewer=${CODEX_REVIEW_REVIEWER:-auto} +fallback_reviewer=${CODEX_REVIEW_FALLBACK_REVIEWER:-claude} codex_bin=${CODEX_BIN:-codex} +claude_bin=${CLAUDE_BIN:-claude} +pi_bin=${PI_BIN:-pi} +opencode_bin=${OPENCODE_BIN:-opencode} codex_args=() yolo=${CODEX_REVIEW_YOLO:-1} output=${CODEX_REVIEW_OUTPUT:-} @@ -50,10 +62,30 @@ while [[ $# -gt 0 ]]; do commit_ref=${2:-} shift 2 ;; + --reviewer) + reviewer=${2:-} + shift 2 + ;; + --fallback-reviewer) + fallback_reviewer=${2:-} + shift 2 + ;; --codex-bin) codex_bin=${2:-} shift 2 ;; + --claude-bin) + claude_bin=${2:-} + shift 2 + ;; + --pi-bin) + pi_bin=${2:-} + shift 2 + ;; + --opencode-bin) + opencode_bin=${2:-} + shift 2 + ;; --full-access) yolo=1 shift @@ -98,7 +130,23 @@ case "$mode" in ;; esac -git rev-parse --show-toplevel >/dev/null +case "$reviewer" in + auto|codex|claude|pi|opencode) ;; + *) + echo "invalid --reviewer: $reviewer" >&2 + exit 2 + ;; +esac + +case "$fallback_reviewer" in + claude|pi|opencode|none) ;; + *) + echo "invalid --fallback-reviewer: $fallback_reviewer" >&2 + exit 2 + ;; +esac + +repo_root=$(git rev-parse --show-toplevel) current_branch=$(git branch --show-current 2>/dev/null || true) dirty=false @@ -146,9 +194,17 @@ printf 'branch: %s\n' "${current_branch:-detached}" if [[ -n "$pr_url" ]]; then printf 'pr: %s\n' "$pr_url" fi -printf 'review:' -printf ' %q' "${review_cmd[@]}" -printf '\n' +printf 'reviewer: %s\n' "$reviewer" +if [[ "$reviewer" == auto ]]; then + printf 'fallback-reviewer: %s\n' "$fallback_reviewer" +fi +if [[ "$reviewer" == auto || "$reviewer" == codex ]]; then + printf 'review:' + printf ' %q' "${review_cmd[@]}" + printf '\n' +else + printf 'review: %s prompt review\n' "$reviewer" +fi if [[ -n "$parallel_tests" ]]; then printf 'tests: %s\n' "$parallel_tests" fi @@ -175,11 +231,16 @@ if [[ -z "$review_output" ]]; then review_output=$(mktemp) review_output_is_temp=true fi +mkdir -p "$(dirname "$review_output")" +: > "$review_output" cleanup() { if [[ "${review_output_is_temp:-false}" == true && -n "${review_output:-}" ]]; then rm -f "$review_output" fi + if [[ -n "${prompt_file:-}" ]]; then + rm -f "$prompt_file" + fi } trap cleanup EXIT @@ -188,6 +249,166 @@ run_review() { "${review_cmd[@]}" 2>&1 | tee "$review_output" } +diff_for_review() { + case "$review_kind" in + local) + git -C "$repo_root" diff --stat + git -C "$repo_root" diff --cached --stat + git -C "$repo_root" diff --find-renames + git -C "$repo_root" diff --cached --find-renames + while IFS= read -r untracked_file; do + [[ -n "$untracked_file" ]] || continue + git -C "$repo_root" diff --no-index -- /dev/null "$untracked_file" || true + done < <(git -C "$repo_root" ls-files --others --exclude-standard) + ;; + commit) + git -C "$repo_root" show --find-renames --stat --format=fuller "$commit_ref" + git -C "$repo_root" show --find-renames --format=medium "$commit_ref" + ;; + branch) + git -C "$repo_root" diff --find-renames --stat "$base_ref"...HEAD + git -C "$repo_root" diff --find-renames "$base_ref"...HEAD + ;; + esac +} + +build_prompt_file() { + prompt_file=$(mktemp) + { + cat <] Short title + File: path:line + Why: one sentence + Fix: one sentence +- If no accepted/actionable findings, output exactly: + codex-review clean: no accepted/actionable findings reported + +Diff: +EOF + diff_for_review + } > "$prompt_file" || return +} + +run_prompt_reviewer() { + local selected=$1 + local reviewer_output + local status=0 + + if ! build_prompt_file; then + rm -f "${prompt_file:-}" + prompt_file= + return 1 + fi + reviewer_output=$(mktemp) + mkdir -p "$(dirname "$review_output")" + + case "$selected" in + claude) + if ! command -v "$claude_bin" >/dev/null 2>&1; then + echo "fallback reviewer unavailable: $claude_bin" >&2 + status=127 + elif printf 'fallback: claude -p\n' | tee -a "$review_output"; then + "$claude_bin" --tools "" --no-session-persistence -p < "$prompt_file" 2>&1 | tee -a "$review_output" "$reviewer_output" + status=$? + else + status=$? + fi + ;; + pi) + if ! command -v "$pi_bin" >/dev/null 2>&1; then + echo "fallback reviewer unavailable: $pi_bin" >&2 + status=127 + elif printf 'fallback: pi -p\n' | tee -a "$review_output"; then + "$pi_bin" --no-tools --no-session -p < "$prompt_file" 2>&1 | tee -a "$review_output" "$reviewer_output" + status=$? + else + status=$? + fi + ;; + opencode) + if ! command -v "$opencode_bin" >/dev/null 2>&1; then + echo "fallback reviewer unavailable: $opencode_bin" >&2 + status=127 + elif printf 'fallback: opencode run\n' | tee -a "$review_output"; then + "$opencode_bin" run --pure --dir "$(dirname "$prompt_file")" --file "$prompt_file" \ + "Review the attached prompt file. Do not modify files." 2>&1 | tee -a "$review_output" "$reviewer_output" + status=$? + else + status=$? + fi + ;; + *) + echo "unsupported prompt reviewer: $selected" >&2 + status=2 + ;; + esac + if [[ "$status" == 0 ]]; then + if grep -Eq '\[P[0-3]\]' "$reviewer_output"; then + status=1 + elif ! grep -q '[^[:space:]]' "$reviewer_output"; then + status=1 + elif ! grep -Fxq 'codex-review clean: no accepted/actionable findings reported' "$reviewer_output"; then + status=1 + fi + fi + rm -f "$reviewer_output" + rm -f "$prompt_file" + prompt_file= + return "$status" +} + +run_selected_review() { + local selected=$1 + case "$selected" in + codex) + if ! command -v "$codex_bin" >/dev/null 2>&1; then + echo "codex reviewer unavailable: $codex_bin" >&2 + return 127 + fi + run_review + ;; + claude|pi|opencode) + run_prompt_reviewer "$selected" + ;; + *) + echo "unsupported reviewer: $selected" >&2 + return 2 + ;; + esac +} + +run_auto_review() { + run_selected_review codex + local status=$? + if [[ "$status" == 0 ]]; then + return 0 + fi + if (( status > 128 && status < 192 )); then + return "$status" + fi + if review_output_has_findings; then + return "$status" + fi + if [[ "$fallback_reviewer" == none ]]; then + return "$status" + fi + printf 'codex-review warning: codex exited %s; falling back to %s\n' "$status" "$fallback_reviewer" >&2 + run_selected_review "$fallback_reviewer" +} + elapsed_since() { local started_at=$1 local finished_at @@ -208,8 +429,28 @@ review_output_empty() { [[ ! -s "$review_output" ]] || ! grep -q '[^[:space:]]' "$review_output" } +review_findings_text() { + if grep -Fxq 'codex' "$review_output"; then + awk ' + $0 == "codex" { + capture = 1 + output = $0 ORS + next + } + capture { + output = output $0 ORS + } + END { + printf "%s", output + } + ' "$review_output" + return + fi + cat "$review_output" +} + review_output_has_findings() { - grep -Eq '\[P[0-3]\]' "$review_output" + review_findings_text | grep -Eq '\[P[0-3]\]' } report_clean_review_or_fail() { @@ -232,7 +473,11 @@ report_clean_review_or_fail() { if [[ -z "$parallel_tests" ]]; then review_started_at=$(date +%s) set +e - run_review + if [[ "$reviewer" == auto ]]; then + run_auto_review + else + run_selected_review "$reviewer" + fi review_status=$? review_elapsed_seconds=$(elapsed_since "$review_started_at") set -e @@ -250,7 +495,11 @@ tests_status_file=$(mktemp) ( set +e review_started_at=$(date +%s) - run_review + if [[ "$reviewer" == auto ]]; then + run_auto_review + else + run_selected_review "$reviewer" + fi status=$? elapsed=$(elapsed_since "$review_started_at") printf '%s\n' "$status" > "$review_status_file"