Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 98 additions & 32 deletions optunahub/hub.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
import importlib.util
import logging
import os
import re
import shutil
import sys
import types
from urllib.parse import urlparse

from ga4mp import GtagMP # type: ignore
from git import Repo
from github import Auth
from github import Github
from github.ContentFile import ContentFile
Expand Down Expand Up @@ -69,7 +71,7 @@
repo_owner: str = "optuna",
repo_name: str = "optunahub-registry",
ref: str = "main",
base_url: str = "https://api.github.com",
base_url: str | None = None,
force_reload: bool = False,
auth: Auth.Auth | None = None,
) -> types.ModuleType:
Expand All @@ -88,54 +90,50 @@
ref:
The Git reference (branch, tag, or commit SHA) for the package.
base_url:
The base URL for the GitHub API.
If ``auth`` is :obj:`None` and the ``git`` command is available, this should be the base URI for the remote repository.
In this case, specifying ``[email protected]`` allows access to private/internal repositories via SSH.
Otherwise, this should be the base URL for the GitHub API.
Copy link
Copy Markdown
Contributor

@nabenabe0928 nabenabe0928 May 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
If ``auth`` is :obj:`None` and the ``git`` command is available, this should be the base URI for the remote repository.
In this case, specifying ``git@github.com`` allows access to private/internal repositories via SSH.
Otherwise, this should be the base URL for the GitHub API.
If ``auth`` is :obj:`None` and the ``git`` command is available, this should be the
base URI, e.g., ``github.enterprise.com`` and ``gitlab.com``, for the remote
repository.
In this case, specifying ``git@github.com`` allows access to private/internal repositories via SSH.
Otherwise, this should be the base URL for the GitHub API.

Copy link
Copy Markdown
Contributor Author

@kAIto47802 kAIto47802 May 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for your suggestion! However, the examples you suggested are actually the opposite, and the URL is incorrect---https://api.github.com, https://github.enterprise.com/api/v3, and https://gitlab.com/api/v4 are the examples for the GitHub API. Also, the endpoints for GitHub Enterprise, GitLab, and other services are not limited to this, since they support custom domains.12
So let me update the docstring with modifications.

Footnotes

  1. https://docs.gitlab.com/user/ssh/

  2. https://docs.gitlab.com/api/rest/

force_reload:
If :obj:`True`, the package will be downloaded from the repository.
If :obj:`False`, the package cached in the local directory will be
loaded if available.
auth:
`The authentication object <https://pygithub.readthedocs.io/en/latest/examples/Authentication.html>`__ for the GitHub API.
It is required to access private/internal repositories.
It also allows access to access private/internal repositories via the GitHub API.

Returns:
The module object of the package.
"""
registry_root = "package"
dir_path = f"{registry_root}/{package}"
hostname = urlparse(base_url).hostname
hostname = _extract_hostname(base_url) if base_url else "github.com"
Comment thread
nabenabe0928 marked this conversation as resolved.
if hostname is None:
raise ValueError(f"Invalid base URL: {base_url}")
raise ValueError(f"Invalid base URI: {base_url}")

Check warning on line 111 in optunahub/hub.py

View check run for this annotation

Codecov / codecov/patch

optunahub/hub.py#L111

Added line #L111 was not covered by tests
Comment thread
nabenabe0928 marked this conversation as resolved.
cache_dir_prefix = os.path.join(_conf.cache_home(), hostname, repo_owner, repo_name, ref)
package_cache_dir = os.path.join(cache_dir_prefix, dir_path)
use_cache = not force_reload and os.path.exists(package_cache_dir)

if not use_cache:
# Download package from GitHub.
g = Github(auth=auth, base_url=base_url)
repo = g.get_repo(f"{repo_owner}/{repo_name}")

package_contents = repo.get_contents(dir_path, ref)

if isinstance(package_contents, ContentFile):
package_contents = [package_contents]

shutil.rmtree(package_cache_dir, ignore_errors=True)
os.makedirs(cache_dir_prefix, exist_ok=True)
for m in package_contents:
file_path = os.path.join(cache_dir_prefix, m.path)
os.makedirs(os.path.dirname(file_path), exist_ok=True)
if m.type == "dir":
dir_contents = repo.get_contents(m.path, ref)
if isinstance(dir_contents, ContentFile):
dir_contents = [dir_contents]
package_contents.extend(dir_contents)
else:
with open(file_path, "wb") as f:
try:
decoded_content = m.decoded_content
except AssertionError:
continue
f.write(decoded_content)
if auth is None and shutil.which("git") is not None:
_download_via_git(
repo_owner=repo_owner,
repo_name=repo_name,
dir_path=dir_path,
ref=ref,
base_url=base_url or "https://github.com",
cache_dir_prefix=cache_dir_prefix,
)
else:
_download_via_github_api(

Check warning on line 127 in optunahub/hub.py

View check run for this annotation

Codecov / codecov/patch

optunahub/hub.py#L127

Added line #L127 was not covered by tests
Comment thread
kAIto47802 marked this conversation as resolved.
auth=auth,
base_url=base_url or "https://api.github.com",
repo_owner=repo_owner,
repo_name=repo_name,
dir_path=dir_path,
ref=ref,
package_cache_dir=package_cache_dir,
cache_dir_prefix=cache_dir_prefix,
)

local_registry_root = os.path.join(cache_dir_prefix, registry_root)
module = load_local_module(
Expand All @@ -147,14 +145,82 @@
is_official_registry = (
repo_owner == "optuna"
and repo_name == "optunahub-registry"
and base_url == "https://api.github.com"
and base_url == "https://github.com"
)
if not _conf.is_no_analytics() and not use_cache and is_official_registry:
_report_stats(package, ref)

return module


def _extract_hostname(url: str) -> str | None:
Comment thread
kAIto47802 marked this conversation as resolved.
if "://" in url:
return urlparse(url).hostname

Check warning on line 158 in optunahub/hub.py

View check run for this annotation

Codecov / codecov/patch

optunahub/hub.py#L157-L158

Added lines #L157 - L158 were not covered by tests
else:
# NOTE(kAIto47802) Extract hostname: skip optional user@, capture up to `:`, ignore the rest.
match = re.match(r"(?:.+@)?([^:]+)(?::.*)?", url)
Comment thread
kAIto47802 marked this conversation as resolved.
return match and match.group(1)

Check warning on line 162 in optunahub/hub.py

View check run for this annotation

Codecov / codecov/patch

optunahub/hub.py#L161-L162

Added lines #L161 - L162 were not covered by tests


def _download_via_git(
repo_owner: str,
repo_name: str,
dir_path: str,
ref: str,
base_url: str,
cache_dir_prefix: str,
) -> None:
repo_url_separator = "/" if "://" in base_url else ":"
repo_url = f"{base_url.rstrip('/')}{repo_url_separator}{repo_owner}/{repo_name}"
repo = Repo.init(cache_dir_prefix)
origin = (
repo.remotes.origin if "origin" in repo.remotes else repo.create_remote("origin", repo_url)
)
if repo.remotes.origin.url != repo_url:
repo.remotes.origin.set_url(repo_url)

Check warning on line 180 in optunahub/hub.py

View check run for this annotation

Codecov / codecov/patch

optunahub/hub.py#L180

Added line #L180 was not covered by tests
repo.git.sparse_checkout("init", "--cone")
repo.git.sparse_checkout("set", dir_path)
origin.fetch(refspec=ref, depth=1)
repo.git.checkout("FETCH_HEAD")


def _download_via_github_api(
auth: Auth.Auth | None,
base_url: str,
repo_owner: str,
repo_name: str,
dir_path: str,
ref: str,
package_cache_dir: str,
cache_dir_prefix: str,
) -> None:
g = Github(auth=auth, base_url=base_url)
repo = g.get_repo(f"{repo_owner}/{repo_name}")

Check warning on line 198 in optunahub/hub.py

View check run for this annotation

Codecov / codecov/patch

optunahub/hub.py#L197-L198

Added lines #L197 - L198 were not covered by tests

package_contents = repo.get_contents(dir_path, ref)

Check warning on line 200 in optunahub/hub.py

View check run for this annotation

Codecov / codecov/patch

optunahub/hub.py#L200

Added line #L200 was not covered by tests

if isinstance(package_contents, ContentFile):
package_contents = [package_contents]

Check warning on line 203 in optunahub/hub.py

View check run for this annotation

Codecov / codecov/patch

optunahub/hub.py#L202-L203

Added lines #L202 - L203 were not covered by tests

shutil.rmtree(package_cache_dir, ignore_errors=True)
os.makedirs(cache_dir_prefix, exist_ok=True)
for m in package_contents:
file_path = os.path.join(cache_dir_prefix, m.path)
os.makedirs(os.path.dirname(file_path), exist_ok=True)
if m.type == "dir":
dir_contents = repo.get_contents(m.path, ref)
if isinstance(dir_contents, ContentFile):
dir_contents = [dir_contents]
package_contents.extend(dir_contents)

Check warning on line 214 in optunahub/hub.py

View check run for this annotation

Codecov / codecov/patch

optunahub/hub.py#L205-L214

Added lines #L205 - L214 were not covered by tests
else:
with open(file_path, "wb") as f:
try:
decoded_content = m.decoded_content
except AssertionError:
continue
f.write(decoded_content)

Check warning on line 221 in optunahub/hub.py

View check run for this annotation

Codecov / codecov/patch

optunahub/hub.py#L216-L221

Added lines #L216 - L221 were not covered by tests


def load_local_module(
package: str,
*,
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ requires-python = ">=3.8"
dependencies = [
"ga4mp",
"optuna",
"GitPython",
"PyGithub>=1.59",
]
dynamic = ["version"]
Expand Down