chore: update codex review fallback handling

This commit is contained in:
Peter Steinberger
2026-05-17 13:11:27 +01:00
parent e3621f5057
commit 4f4be666eb
2 changed files with 265 additions and 19 deletions

View File

@@ -19,9 +19,9 @@ Use when:
- Read dependency docs/source/types when the finding depends on external behavior.
- Reject unrealistic edge cases, speculative risks, broad rewrites, and fixes that over-complicate the codebase.
- Prefer small fixes at the right ownership boundary; no refactor unless it clearly improves the bug class.
- Keep going until Codex review returns no accepted/actionable findings.
- If a review-triggered fix changes code, rerun focused tests and rerun Codex review.
- Never switch or override the review model. If the review hits model capacity, retry the same command a few times with the same model. The helper runs nested review in yolo/full-access mode by default; use `--no-yolo` only when intentionally testing sandbox behavior.
- Keep going until the selected review path returns no accepted/actionable findings.
- If a review-triggered fix changes code, rerun focused tests and rerun the review helper.
- Default to Codex review. If Codex is unavailable or exits with an error, the helper may fall back to `claude -p`; `pi -p` and `opencode run` are explicit reviewer/fallback options. The helper runs nested Codex review in yolo/full-access mode by default; use `--no-yolo` only when intentionally testing sandbox behavior.
- Stop as soon as the review command/helper exits 0 with no accepted/actionable findings. Do not run an extra direct `codex review` just to get a nicer "clean" line, a second opinion, or clearer closeout wording.
- Treat the helper's successful exit plus absence of actionable findings as the clean review result, even if the underlying Codex CLI output is terse.
- If rejecting a finding as intentional/not worth fixing, add a brief inline code comment only when it explains a real invariant or ownership decision that future reviewers should know.
@@ -66,7 +66,7 @@ codex review --commit HEAD
or with the helper:
```bash
/Users/steipete/Projects/agent-scripts/skills/codex-review/scripts/codex-review --mode commit --commit HEAD
.agents/skills/codex-review/scripts/codex-review --mode commit --commit HEAD
```
Use commit review for already-landed or already-pushed work on `main`. Reviewing
@@ -79,7 +79,7 @@ with `--base`.
Format first if formatting can change line locations. Then it is OK to run tests and review in parallel:
```bash
scripts/codex-review --parallel-tests "<focused test command>"
.agents/skills/codex-review/scripts/codex-review --parallel-tests "<focused test command>"
```
Tradeoff: tests may force code changes that stale the review. If tests or review lead to code edits, rerun the affected tests and rerun review until no accepted/actionable findings remain. Once that rerun exits cleanly, stop; do not spend another long review cycle on redundant confirmation.
@@ -98,13 +98,7 @@ Run inline only for tiny changes or when subagents are unavailable.
Bundled helper:
```bash
~/.codex/skills/codex-review/scripts/codex-review --help
```
If installed from `agent-scripts`, path is:
```bash
/Users/steipete/Projects/agent-scripts/skills/codex-review/scripts/codex-review --help
.agents/skills/codex-review/scripts/codex-review --help
```
The helper:
@@ -113,6 +107,9 @@ The helper:
- otherwise uses `origin/main` for non-main branches
- use `--mode commit --commit <ref>` for already-committed work, especially clean `main` after landing
- should be left in `--mode auto` or forced to `--mode branch` for PR/branch work; do not force `--mode local` after committing
- supports `--reviewer codex|claude|pi|opencode|auto`; `auto` runs Codex first
- supports `--fallback-reviewer claude|pi|opencode|none`; default is `claude`
- falls back only when Codex is unavailable or exits nonzero, not when Codex reports findings
- writes only to stdout unless `--output` or `CODEX_REVIEW_OUTPUT` is set
- supports `--dry-run`, `--parallel-tests`, and commit refs
- runs nested review with `--dangerously-bypass-approvals-and-sandbox` by default

View File

@@ -10,7 +10,14 @@ Options:
Target selection. Default: auto.
--base REF Base ref for branch review. Default: PR base or origin/main.
--commit REF Commit ref for commit review. Default: HEAD.
--reviewer codex|claude|pi|opencode|auto
Review engine. Default: auto (Codex, fallback reviewer on error).
--fallback-reviewer claude|pi|opencode|none
Fallback when Codex is unavailable or exits nonzero. Default: claude.
--codex-bin PATH Codex binary. Default: codex.
--claude-bin PATH Claude binary. Default: claude.
--pi-bin PATH Pi binary. Default: pi.
--opencode-bin PATH OpenCode binary. Default: opencode.
--full-access Keep yolo/full-access mode enabled. Default.
--no-yolo Run nested Codex review with normal sandbox/approval prompts.
--output FILE Also save output to file.
@@ -29,7 +36,12 @@ EOF
mode=auto
base_ref=
commit_ref=HEAD
reviewer=${CODEX_REVIEW_REVIEWER:-auto}
fallback_reviewer=${CODEX_REVIEW_FALLBACK_REVIEWER:-claude}
codex_bin=${CODEX_BIN:-codex}
claude_bin=${CLAUDE_BIN:-claude}
pi_bin=${PI_BIN:-pi}
opencode_bin=${OPENCODE_BIN:-opencode}
codex_args=()
yolo=${CODEX_REVIEW_YOLO:-1}
output=${CODEX_REVIEW_OUTPUT:-}
@@ -50,10 +62,30 @@ while [[ $# -gt 0 ]]; do
commit_ref=${2:-}
shift 2
;;
--reviewer)
reviewer=${2:-}
shift 2
;;
--fallback-reviewer)
fallback_reviewer=${2:-}
shift 2
;;
--codex-bin)
codex_bin=${2:-}
shift 2
;;
--claude-bin)
claude_bin=${2:-}
shift 2
;;
--pi-bin)
pi_bin=${2:-}
shift 2
;;
--opencode-bin)
opencode_bin=${2:-}
shift 2
;;
--full-access)
yolo=1
shift
@@ -98,7 +130,23 @@ case "$mode" in
;;
esac
git rev-parse --show-toplevel >/dev/null
case "$reviewer" in
auto|codex|claude|pi|opencode) ;;
*)
echo "invalid --reviewer: $reviewer" >&2
exit 2
;;
esac
case "$fallback_reviewer" in
claude|pi|opencode|none) ;;
*)
echo "invalid --fallback-reviewer: $fallback_reviewer" >&2
exit 2
;;
esac
repo_root=$(git rev-parse --show-toplevel)
current_branch=$(git branch --show-current 2>/dev/null || true)
dirty=false
@@ -146,9 +194,17 @@ printf 'branch: %s\n' "${current_branch:-detached}"
if [[ -n "$pr_url" ]]; then
printf 'pr: %s\n' "$pr_url"
fi
printf 'review:'
printf ' %q' "${review_cmd[@]}"
printf '\n'
printf 'reviewer: %s\n' "$reviewer"
if [[ "$reviewer" == auto ]]; then
printf 'fallback-reviewer: %s\n' "$fallback_reviewer"
fi
if [[ "$reviewer" == auto || "$reviewer" == codex ]]; then
printf 'review:'
printf ' %q' "${review_cmd[@]}"
printf '\n'
else
printf 'review: %s prompt review\n' "$reviewer"
fi
if [[ -n "$parallel_tests" ]]; then
printf 'tests: %s\n' "$parallel_tests"
fi
@@ -175,11 +231,16 @@ if [[ -z "$review_output" ]]; then
review_output=$(mktemp)
review_output_is_temp=true
fi
mkdir -p "$(dirname "$review_output")"
: > "$review_output"
cleanup() {
if [[ "${review_output_is_temp:-false}" == true && -n "${review_output:-}" ]]; then
rm -f "$review_output"
fi
if [[ -n "${prompt_file:-}" ]]; then
rm -f "$prompt_file"
fi
}
trap cleanup EXIT
@@ -188,6 +249,166 @@ run_review() {
"${review_cmd[@]}" 2>&1 | tee "$review_output"
}
diff_for_review() {
case "$review_kind" in
local)
git -C "$repo_root" diff --stat
git -C "$repo_root" diff --cached --stat
git -C "$repo_root" diff --find-renames
git -C "$repo_root" diff --cached --find-renames
while IFS= read -r untracked_file; do
[[ -n "$untracked_file" ]] || continue
git -C "$repo_root" diff --no-index -- /dev/null "$untracked_file" || true
done < <(git -C "$repo_root" ls-files --others --exclude-standard)
;;
commit)
git -C "$repo_root" show --find-renames --stat --format=fuller "$commit_ref"
git -C "$repo_root" show --find-renames --format=medium "$commit_ref"
;;
branch)
git -C "$repo_root" diff --find-renames --stat "$base_ref"...HEAD
git -C "$repo_root" diff --find-renames "$base_ref"...HEAD
;;
esac
}
build_prompt_file() {
prompt_file=$(mktemp)
{
cat <<EOF
You are performing a closeout code review for the current repository.
Review target: $review_kind
Branch: ${current_branch:-detached}
Base: ${base_ref:-}
Commit: ${commit_ref:-}
Rules:
- Review only the diff below.
- Do not modify files.
- Prioritize correctness bugs, regressions, security issues, and missing tests.
- Ignore speculative edge cases and broad rewrites.
- For each accepted/actionable finding, use exactly this format:
[P<0-3>] Short title
File: path:line
Why: one sentence
Fix: one sentence
- If no accepted/actionable findings, output exactly:
codex-review clean: no accepted/actionable findings reported
Diff:
EOF
diff_for_review
} > "$prompt_file" || return
}
run_prompt_reviewer() {
local selected=$1
local reviewer_output
local status=0
if ! build_prompt_file; then
rm -f "${prompt_file:-}"
prompt_file=
return 1
fi
reviewer_output=$(mktemp)
mkdir -p "$(dirname "$review_output")"
case "$selected" in
claude)
if ! command -v "$claude_bin" >/dev/null 2>&1; then
echo "fallback reviewer unavailable: $claude_bin" >&2
status=127
elif printf 'fallback: claude -p\n' | tee -a "$review_output"; then
"$claude_bin" --tools "" --no-session-persistence -p < "$prompt_file" 2>&1 | tee -a "$review_output" "$reviewer_output"
status=$?
else
status=$?
fi
;;
pi)
if ! command -v "$pi_bin" >/dev/null 2>&1; then
echo "fallback reviewer unavailable: $pi_bin" >&2
status=127
elif printf 'fallback: pi -p\n' | tee -a "$review_output"; then
"$pi_bin" --no-tools --no-session -p < "$prompt_file" 2>&1 | tee -a "$review_output" "$reviewer_output"
status=$?
else
status=$?
fi
;;
opencode)
if ! command -v "$opencode_bin" >/dev/null 2>&1; then
echo "fallback reviewer unavailable: $opencode_bin" >&2
status=127
elif printf 'fallback: opencode run\n' | tee -a "$review_output"; then
"$opencode_bin" run --pure --dir "$(dirname "$prompt_file")" --file "$prompt_file" \
"Review the attached prompt file. Do not modify files." 2>&1 | tee -a "$review_output" "$reviewer_output"
status=$?
else
status=$?
fi
;;
*)
echo "unsupported prompt reviewer: $selected" >&2
status=2
;;
esac
if [[ "$status" == 0 ]]; then
if grep -Eq '\[P[0-3]\]' "$reviewer_output"; then
status=1
elif ! grep -q '[^[:space:]]' "$reviewer_output"; then
status=1
elif ! grep -Fxq 'codex-review clean: no accepted/actionable findings reported' "$reviewer_output"; then
status=1
fi
fi
rm -f "$reviewer_output"
rm -f "$prompt_file"
prompt_file=
return "$status"
}
run_selected_review() {
local selected=$1
case "$selected" in
codex)
if ! command -v "$codex_bin" >/dev/null 2>&1; then
echo "codex reviewer unavailable: $codex_bin" >&2
return 127
fi
run_review
;;
claude|pi|opencode)
run_prompt_reviewer "$selected"
;;
*)
echo "unsupported reviewer: $selected" >&2
return 2
;;
esac
}
run_auto_review() {
run_selected_review codex
local status=$?
if [[ "$status" == 0 ]]; then
return 0
fi
if (( status > 128 && status < 192 )); then
return "$status"
fi
if review_output_has_findings; then
return "$status"
fi
if [[ "$fallback_reviewer" == none ]]; then
return "$status"
fi
printf 'codex-review warning: codex exited %s; falling back to %s\n' "$status" "$fallback_reviewer" >&2
run_selected_review "$fallback_reviewer"
}
elapsed_since() {
local started_at=$1
local finished_at
@@ -208,8 +429,28 @@ review_output_empty() {
[[ ! -s "$review_output" ]] || ! grep -q '[^[:space:]]' "$review_output"
}
review_findings_text() {
if grep -Fxq 'codex' "$review_output"; then
awk '
$0 == "codex" {
capture = 1
output = $0 ORS
next
}
capture {
output = output $0 ORS
}
END {
printf "%s", output
}
' "$review_output"
return
fi
cat "$review_output"
}
review_output_has_findings() {
grep -Eq '\[P[0-3]\]' "$review_output"
review_findings_text | grep -Eq '\[P[0-3]\]'
}
report_clean_review_or_fail() {
@@ -232,7 +473,11 @@ report_clean_review_or_fail() {
if [[ -z "$parallel_tests" ]]; then
review_started_at=$(date +%s)
set +e
run_review
if [[ "$reviewer" == auto ]]; then
run_auto_review
else
run_selected_review "$reviewer"
fi
review_status=$?
review_elapsed_seconds=$(elapsed_since "$review_started_at")
set -e
@@ -250,7 +495,11 @@ tests_status_file=$(mktemp)
(
set +e
review_started_at=$(date +%s)
run_review
if [[ "$reviewer" == auto ]]; then
run_auto_review
else
run_selected_review "$reviewer"
fi
status=$?
elapsed=$(elapsed_since "$review_started_at")
printf '%s\n' "$status" > "$review_status_file"