Skip to content

Commit a72ff8d

Browse files
alexeyrclaude
andcommitted
Fix Bencher reporting permanently broken on pushes to main
The benchmark workflow passed --start-point main --start-point-hash <github.event.before> for push-to-main events. Since main IS the base branch, Bencher tried to look up a version of main at the "before" hash — which often didn't exist (e.g., docs-only commits skipped by paths-ignore). This caused a 404, the report was never stored, and subsequent pushes also failed because their "before" hash was also missing. This cascading failure meant no main data was stored after the first version (Jan 18). Fix: don't pass --start-point args for pushes to main (thresholds are defined inline via --threshold-* args). For PRs/dispatch where the start-point hash may be missing, retry without --start-point-hash so the report still gets stored using the latest available baseline. Co-Authored-By: Claude Opus 4.6 (1M context) <[email protected]>
1 parent de5b53a commit a72ff8d

1 file changed

Lines changed: 81 additions & 74 deletions

File tree

.github/workflows/benchmark.yml

Lines changed: 81 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -553,102 +553,109 @@ jobs:
553553
BOUNDARY=0.95
554554
MAX_SAMPLE=64
555555
556-
# Set branch and start-point based on event type
556+
# Set branch and start-point based on event type.
557+
# Main pushes don't need --start-point because main IS the base
558+
# branch — thresholds are defined inline via --threshold-* args.
559+
# Feature branches (PRs, dispatch) use --start-point to clone
560+
# thresholds from main and pin the comparison baseline.
557561
if [ "${{ github.event_name }}" = "push" ]; then
558562
BRANCH="main"
559-
START_POINT="main"
560-
START_POINT_HASH="${{ github.event.before }}"
561-
EXTRA_ARGS=""
563+
START_POINT_ARGS=""
562564
elif [ "${{ github.event_name }}" = "pull_request" ]; then
563565
BRANCH="$GITHUB_HEAD_REF"
564-
START_POINT="$GITHUB_BASE_REF"
565-
START_POINT_HASH="${{ github.event.pull_request.base.sha }}"
566-
EXTRA_ARGS="--start-point-reset"
566+
START_POINT_ARGS="--start-point $GITHUB_BASE_REF --start-point-hash ${{ github.event.pull_request.base.sha }} --start-point-clone-thresholds --start-point-reset"
567567
elif [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
568-
# Get merge-base from GitHub API (avoids needing deep fetch)
569-
# See: https://stackoverflow.com/a/74710919
570568
BRANCH="${{ github.ref_name }}"
571-
START_POINT="main"
572-
START_POINT_HASH=$(gh api "repos/${{ github.repository }}/compare/main...$BRANCH" --jq '.merge_base_commit.sha' || true)
573-
574-
if [ -n "$START_POINT_HASH" ]; then
575-
echo "Found merge-base via API: $START_POINT_HASH"
569+
if [ "$BRANCH" = "main" ]; then
570+
START_POINT_ARGS=""
576571
else
577-
echo "⚠️ Could not find merge-base with main via GitHub API, continuing without it"
572+
# Get merge-base from GitHub API (avoids needing deep fetch)
573+
# See: https://stackoverflow.com/a/74710919
574+
START_POINT_HASH=$(gh api "repos/${{ github.repository }}/compare/main...$BRANCH" --jq '.merge_base_commit.sha' || true)
575+
if [ -n "$START_POINT_HASH" ]; then
576+
echo "Found merge-base via API: $START_POINT_HASH"
577+
START_POINT_ARGS="--start-point main --start-point-hash $START_POINT_HASH --start-point-clone-thresholds --start-point-reset"
578+
else
579+
echo "⚠️ Could not find merge-base with main via GitHub API, continuing without hash"
580+
START_POINT_ARGS="--start-point main --start-point-clone-thresholds --start-point-reset"
581+
fi
578582
fi
579-
EXTRA_ARGS=""
580583
else
581584
echo "❌ ERROR: Unexpected event type: ${{ github.event_name }}"
582585
exit 1
583586
fi
584587
585-
# Run bencher and capture HTML output (stdout) while letting stderr go to a file
586-
# so we can distinguish missing baselines (404) from actual regression alerts.
587-
# Use set +e to capture exit code without failing immediately.
588+
# Wrap bencher run in a function so we can retry with different
589+
# start-point args if the pinned hash isn't found in Bencher.
590+
run_bencher() {
591+
local sp_args="$1"
592+
# shellcheck disable=SC2086
593+
bencher run \
594+
--project react-on-rails-t8a9ncxo \
595+
--token '${{ secrets.BENCHER_API_TOKEN }}' \
596+
--branch "$BRANCH" \
597+
$sp_args \
598+
--testbed github-actions \
599+
--adapter json \
600+
--file bench_results/benchmark.json \
601+
--err \
602+
--quiet \
603+
--format html \
604+
--threshold-measure rps \
605+
--threshold-test t_test \
606+
--threshold-max-sample-size $MAX_SAMPLE \
607+
--threshold-lower-boundary $BOUNDARY \
608+
--threshold-upper-boundary _ \
609+
--threshold-measure p50_latency \
610+
--threshold-test t_test \
611+
--threshold-max-sample-size $MAX_SAMPLE \
612+
--threshold-lower-boundary _ \
613+
--threshold-upper-boundary $BOUNDARY \
614+
--threshold-measure p90_latency \
615+
--threshold-test t_test \
616+
--threshold-max-sample-size $MAX_SAMPLE \
617+
--threshold-lower-boundary _ \
618+
--threshold-upper-boundary $BOUNDARY \
619+
--threshold-measure p99_latency \
620+
--threshold-test t_test \
621+
--threshold-max-sample-size $MAX_SAMPLE \
622+
--threshold-lower-boundary _ \
623+
--threshold-upper-boundary $BOUNDARY \
624+
--threshold-measure failed_pct \
625+
--threshold-test t_test \
626+
--threshold-max-sample-size $MAX_SAMPLE \
627+
--threshold-lower-boundary _ \
628+
--threshold-upper-boundary $BOUNDARY
629+
}
630+
631+
# Run bencher and capture HTML output (stdout) while letting stderr
632+
# go to a file so we can inspect failure reasons.
588633
BENCHER_STDERR=$(mktemp)
589634
trap 'rm -f "$BENCHER_STDERR"' EXIT
590635
set +e
591-
bencher run \
592-
--project react-on-rails-t8a9ncxo \
593-
--token '${{ secrets.BENCHER_API_TOKEN }}' \
594-
--branch "$BRANCH" \
595-
--start-point "$START_POINT" \
596-
--start-point-hash "$START_POINT_HASH" \
597-
--start-point-clone-thresholds \
598-
--testbed github-actions \
599-
--adapter json \
600-
--file bench_results/benchmark.json \
601-
--err \
602-
--quiet \
603-
--format html \
604-
--threshold-measure rps \
605-
--threshold-test t_test \
606-
--threshold-max-sample-size $MAX_SAMPLE \
607-
--threshold-lower-boundary $BOUNDARY \
608-
--threshold-upper-boundary _ \
609-
--threshold-measure p50_latency \
610-
--threshold-test t_test \
611-
--threshold-max-sample-size $MAX_SAMPLE \
612-
--threshold-lower-boundary _ \
613-
--threshold-upper-boundary $BOUNDARY \
614-
--threshold-measure p90_latency \
615-
--threshold-test t_test \
616-
--threshold-max-sample-size $MAX_SAMPLE \
617-
--threshold-lower-boundary _ \
618-
--threshold-upper-boundary $BOUNDARY \
619-
--threshold-measure p99_latency \
620-
--threshold-test t_test \
621-
--threshold-max-sample-size $MAX_SAMPLE \
622-
--threshold-lower-boundary _ \
623-
--threshold-upper-boundary $BOUNDARY \
624-
--threshold-measure failed_pct \
625-
--threshold-test t_test \
626-
--threshold-max-sample-size $MAX_SAMPLE \
627-
--threshold-lower-boundary _ \
628-
--threshold-upper-boundary $BOUNDARY \
629-
$EXTRA_ARGS > bench_results/bencher_report.html 2>"$BENCHER_STDERR"
636+
run_bencher "$START_POINT_ARGS" > bench_results/bencher_report.html 2>"$BENCHER_STDERR"
630637
BENCHER_EXIT_CODE=$?
631638
set -e
632639
633640
# Print stderr for visibility in logs
634641
cat "$BENCHER_STDERR" >&2
635642
636-
# If bencher failed due to missing baseline data (404 Not Found) and there
637-
# are no regression alerts, treat as a warning instead of failing the workflow.
638-
# This commonly happens when the PR base commit was a docs-only change
639-
# skipped by paths-ignore, so no benchmark data exists in Bencher.
640-
#
641-
# Safety checks before overriding exit code:
642-
# 1. stderr must contain "404 Not Found" (HTTP status from Bencher API)
643-
# 2. stderr must NOT contain regression indicators ("alert", "threshold",
644-
# or "boundary") to avoid suppressing actual performance regressions
645-
if [ $BENCHER_EXIT_CODE -ne 0 ] && grep -q "404 Not Found" "$BENCHER_STDERR" && ! grep -qiE "alert|threshold violation|boundary violation" "$BENCHER_STDERR"; then
646-
echo "⚠️ Bencher baseline not found for start-point hash '$START_POINT_HASH' — this is expected when the base commit was not benchmarked (e.g., docs-only changes skipped by paths-ignore)"
647-
echo "⚠️ Benchmark data was collected but regression comparison is unavailable for this run"
648-
echo "📋 Bencher stderr output:"
649-
cat "$BENCHER_STDERR"
650-
echo "::warning::Bencher baseline not found for start-point hash '$START_POINT_HASH' — regression comparison unavailable for this run"
651-
BENCHER_EXIT_CODE=0
643+
# If bencher failed because the start-point hash doesn't exist in
644+
# Bencher (e.g., the base commit was a docs-only change skipped by
645+
# paths-ignore), retry without --start-point-hash so the report
646+
# still gets stored using the latest available baseline.
647+
if [ $BENCHER_EXIT_CODE -ne 0 ] && grep -q "Head Version" "$BENCHER_STDERR" && grep -q "not found" "$BENCHER_STDERR"; then
648+
RETRY_ARGS=$(echo "$START_POINT_ARGS" | sed 's/--start-point-hash [^ ]*//')
649+
if [ "$RETRY_ARGS" != "$START_POINT_ARGS" ]; then
650+
echo ""
651+
echo "⚠️ Start-point hash not found in Bencher — retrying without --start-point-hash (will use latest baseline)"
652+
echo "::warning::Start-point hash not found in Bencher — falling back to latest baseline for comparison"
653+
set +e
654+
run_bencher "$RETRY_ARGS" > bench_results/bencher_report.html 2>"$BENCHER_STDERR"
655+
BENCHER_EXIT_CODE=$?
656+
set -e
657+
cat "$BENCHER_STDERR" >&2
658+
fi
652659
fi
653660
rm -f "$BENCHER_STDERR"
654661

0 commit comments

Comments
 (0)