Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 82 additions & 26 deletions .github/actions/pr-review/scripts/fetch-pr-context.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,19 @@
import re
import subprocess
import sys
import time
from typing import Optional

REVIEW_STATE_PATTERN = re.compile(
r"<!--\s*review-state:\s*(\{.*?\})\s*-->", re.DOTALL
)
HTTP_STATUS_PATTERN = re.compile(r"HTTP\s+(\d{3})")

# Bot logins that post review comments via GitHub Actions.
BOT_LOGINS = {"github-actions[bot]", "github-actions"}
DEFAULT_REVIEW_SUMMARY_HEADING = "### Connector PR Review:"
LEGACY_REVIEW_SUMMARY_HEADING = "### PR Review:"
DEFAULT_API_ATTEMPTS = 3


def review_comment_heading(comment: dict, summary_heading: str) -> Optional[str]:
Expand All @@ -47,18 +50,75 @@ def is_legacy_review_comment(comment: dict, summary_heading: str) -> bool:
return review_comment_heading(comment, summary_heading) == LEGACY_REVIEW_SUMMARY_HEADING


def command_error_summary(error: subprocess.CalledProcessError) -> str:
detail = (error.stderr or error.stdout or "").strip()
if not detail:
detail = f"exit status {error.returncode}"
return detail.splitlines()[-1]


def error_http_status(error: subprocess.CalledProcessError) -> Optional[int]:
match = HTTP_STATUS_PATTERN.search(error.stderr or error.stdout or "")
if not match:
return None
return int(match.group(1))


def retry_limit_for_error(
error: subprocess.CalledProcessError,
attempts: int,
) -> int:
status = error_http_status(error)
if status == 404:
return min(attempts, 2)
if status in (408, 429) or (status is not None and status >= 500):
return attempts
if status is None:
return attempts
return 1


def gh_api(args: list[str], *, attempts: int = DEFAULT_API_ATTEMPTS) -> subprocess.CompletedProcess:
"""Run gh api with a small retry window for transient GitHub API failures."""
last_error = None
for attempt in range(1, attempts + 1):
try:
return subprocess.run(
["gh", "api", *args],
capture_output=True,
text=True,
check=True,
)
except subprocess.CalledProcessError as e:
last_error = e
retry_limit = retry_limit_for_error(e, attempts)
e.retry_limit = retry_limit
if attempt >= retry_limit:
break
print(
"::warning::GitHub API request failed; "
f"retrying ({attempt}/{retry_limit}): gh api {' '.join(args)}: "
f"{command_error_summary(e)}",
file=sys.stderr,
)
time.sleep(attempt)
raise last_error


def gh_api_paginate(endpoint: str) -> list[dict]:
"""Fetch all pages from a gh api endpoint."""
result = subprocess.run(
["gh", "api", endpoint, "--paginate"],
capture_output=True,
text=True,
check=True,
result = gh_api(
[endpoint, "--paginate"],
)
return parse_paginated_json(result.stdout)


def parse_paginated_json(output: str) -> list[dict]:
"""Parse gh api --paginate output."""
# --paginate concatenates JSON arrays; each page is a JSON array
# Parse by finding all top-level arrays
entries = []
for line in result.stdout.strip().splitlines():
for line in output.strip().splitlines():
line = line.strip()
if not line:
continue
Expand All @@ -73,7 +133,7 @@ def gh_api_paginate(endpoint: str) -> list[dict]:
# If the whole output is a single JSON array, handle that too
if not entries:
try:
entries = json.loads(result.stdout)
entries = json.loads(output)
except json.JSONDecodeError:
pass
return entries
Expand All @@ -83,12 +143,7 @@ def fetch_compare_diff(head_repo: str, base_sha: str, head_sha: str) -> Optional
"""Fetch a compare diff from the PR head repo without checking out PR code."""
endpoint = f"repos/{head_repo}/compare/{base_sha}...{head_sha}"
try:
metadata = subprocess.run(
["gh", "api", endpoint],
capture_output=True,
text=True,
check=True,
)
metadata = gh_api([endpoint])
compare = json.loads(metadata.stdout)
status = compare.get("status", "")
if status != "ahead":
Expand All @@ -97,12 +152,7 @@ def fetch_compare_diff(head_repo: str, base_sha: str, head_sha: str) -> Optional
file=sys.stderr,
)
return None
result = subprocess.run(
["gh", "api", "-H", "Accept: application/vnd.github.diff", endpoint],
capture_output=True,
text=True,
check=True,
)
result = gh_api(["-H", "Accept: application/vnd.github.diff", endpoint])
except subprocess.CalledProcessError as e:
print(
f"Could not fetch incremental diff from {head_repo}: {e.stderr}",
Expand Down Expand Up @@ -134,7 +184,18 @@ def main():

endpoint = f"repos/{repo}/issues/{pr_number}/comments"
print(f"Fetching comments from {endpoint}...")
raw_comments = gh_api_paginate(endpoint)
try:
raw_comments = gh_api_paginate(endpoint)
except subprocess.CalledProcessError as e:
retry_limit = getattr(e, "retry_limit", DEFAULT_API_ATTEMPTS)
print(
"::error::Could not fetch prior PR comments after "
f"{retry_limit} attempt(s). Endpoint: {endpoint}. "
f"Repository: {repo}. PR: {pr_number}. Last error: "
f"{command_error_summary(e)}",
file=sys.stderr,
)
raise
print(f"Found {len(raw_comments)} comments")

# Extract comment summaries
Expand Down Expand Up @@ -183,12 +244,7 @@ def main():
summary_comment_id = legacy_summary_comment_id

pr_endpoint = f"repos/{repo}/pulls/{pr_number}"
pr_result = subprocess.run(
["gh", "api", pr_endpoint],
capture_output=True,
text=True,
check=True,
)
pr_result = gh_api([pr_endpoint])
pr = json.loads(pr_result.stdout)
current_sha = pr["head"]["sha"]
current_base_sha = pr["base"]["sha"]
Expand Down
77 changes: 69 additions & 8 deletions .github/actions/pr-review/scripts/resolve-outdated-threads.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,15 @@

import json
import os
import re
import subprocess
import sys
import time
from typing import Optional

REVIEW_PREFIXES = ("🔴 Security:", "🟠 Bug:", "🟡 Suggestion:")
DEFAULT_API_ATTEMPTS = 3
HTTP_STATUS_PATTERN = re.compile(r"HTTP\s+(\d{3})")

LIST_THREADS_QUERY = """
query($owner: String!, $repo: String!, $number: Int!, $after: String) {
Expand Down Expand Up @@ -50,14 +55,56 @@
"""


def command_error_summary(error: subprocess.CalledProcessError) -> str:
detail = (error.stderr or error.stdout or "").strip()
if not detail:
detail = f"exit status {error.returncode}"
return detail.splitlines()[-1]


def error_http_status(error: subprocess.CalledProcessError) -> Optional[int]:
match = HTTP_STATUS_PATTERN.search(error.stderr or error.stdout or "")
if not match:
return None
return int(match.group(1))


def retry_limit_for_error(error: subprocess.CalledProcessError) -> int:
status = error_http_status(error)
if status == 404:
return min(DEFAULT_API_ATTEMPTS, 2)
if status in (408, 429) or (status is not None and status >= 500):
return DEFAULT_API_ATTEMPTS
if status is None:
return DEFAULT_API_ATTEMPTS
return 1


def gh_graphql(query: str, **variables: str) -> dict:
"""Call gh api graphql and return parsed JSON."""
cmd = ["gh", "api", "graphql", "-f", f"query={query}"]
for key, value in variables.items():
flag = "-F" if isinstance(value, int) else "-f"
cmd.extend([flag, f"{key}={value}"])
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
return json.loads(result.stdout)
last_error = None
for attempt in range(1, DEFAULT_API_ATTEMPTS + 1):
try:
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
return json.loads(result.stdout)
except subprocess.CalledProcessError as e:
last_error = e
retry_limit = retry_limit_for_error(e)
e.retry_limit = retry_limit
if attempt >= retry_limit:
break
print(
"::warning::GitHub GraphQL request failed; "
f"retrying ({attempt}/{retry_limit}): "
f"{command_error_summary(e)}",
file=sys.stderr,
)
time.sleep(attempt)
raise last_error


def get_all_threads(owner: str, repo: str, number: int) -> list[dict]:
Expand Down Expand Up @@ -101,6 +148,14 @@ def resolve_thread(thread_id: str) -> bool:
return False


def write_summary(summary: dict) -> None:
output_path = os.path.join(".github", "resolved-threads.json")
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, "w") as f:
json.dump(summary, f, indent=2)
print(f"Summary written to {output_path}")


def main():
repo = os.environ.get("GITHUB_REPOSITORY", "")
pr_number = os.environ.get("PR_NUMBER", "")
Expand All @@ -112,7 +167,17 @@ def main():
number = int(pr_number)

print(f"Fetching review threads for {owner}/{repo_name}#{number}...")
threads = get_all_threads(owner, repo_name, number)
try:
threads = get_all_threads(owner, repo_name, number)
except subprocess.CalledProcessError as e:
retry_limit = getattr(e, "retry_limit", DEFAULT_API_ATTEMPTS)
print(
"::error::Could not fetch review threads after "
f"{retry_limit} attempt(s). Repository: {repo}. "
f"PR: {pr_number}. Last error: {command_error_summary(e)}",
file=sys.stderr,
)
raise
print(f"Found {len(threads)} total review threads")

to_resolve = [t for t in threads if should_resolve(t)]
Expand All @@ -137,13 +202,9 @@ def main():
"resolved": resolved,
}

output_path = os.path.join(".github", "resolved-threads.json")
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, "w") as f:
json.dump(summary, f, indent=2)
write_summary(summary)

print(f"\nDone: resolved {len(resolved)}/{len(to_resolve)} threads")
print(f"Summary written to {output_path}")


if __name__ == "__main__":
Expand Down
Loading