Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions netkan/netkan/cli/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def download_counter(common: SharedArgs) -> None:
logging.info('Starting Download Count Calculation (%s)...', game_id)
DownloadCounter(game_id,
common.game(game_id).ckanmeta_repo,
common.game(game_id).netkan_repo,
common.token).update_counts()
logging.info('Download Counter completed! (%s)', game_id)

Expand Down
53 changes: 35 additions & 18 deletions netkan/netkan/download_counter.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,8 @@
from requests.exceptions import ConnectTimeout

from .utils import repo_file_add_or_changed, legacy_read_text
from .repos import CkanMetaRepo
from .metadata import Ckan

from .repos import CkanMetaRepo, NetkanRepo
from .metadata import Ckan, Netkan

class GitHubBatchedQuery:

Expand All @@ -40,7 +39,8 @@ class GitHubBatchedQuery:
def __init__(self, github_token: str) -> None:
self.repos: Dict[str, Tuple[str, str]] = {}
self.requests: Dict[Tuple[str, str], str] = {}
self.cache: Dict[Tuple[str, str], int] = {}
self.cache: Dict[Tuple[str, str, bool], int] = {}
self.include_parents: Dict[str, bool] = {}
self.github_token = github_token
logging.info('Starting new GraphQL query')

Expand All @@ -52,18 +52,21 @@ def full(self) -> bool:
# We only need to do requests for uncached mods
return len(self.requests) >= self.MODULES_PER_GRAPHQL

def add(self, identifier: str, user: str, repo: str) -> None:
def add(self, identifier: str, user: str, repo: str, include_parents: bool) -> None:
user_repo = (user, repo)
cache_key = (user, repo, include_parents)
self.repos[identifier] = user_repo
self.include_parents[identifier] = include_parents
# Queue this request if we haven't already
if user_repo not in self.cache and user_repo not in self.requests:
if cache_key not in self.cache and user_repo not in self.requests:
self.requests[user_repo] = identifier
else:
logging.debug('Skipping duplicate request for %s, %s, %s',
identifier, user, repo)

def remove(self, identifier: str, user_repo: Tuple[str, str]) -> None:
self.repos.pop(identifier, None)
self.include_parents.pop(identifier, None)
self.requests.pop(user_repo, None)
# Keep self.cache for shared $krefs

Expand Down Expand Up @@ -120,20 +123,24 @@ def get_result(self, counts: Optional[Dict[str, int]] = None) -> Dict[str, int]:
if apidata:
real_ident = self.from_graphql_safe_identifier(fake_ident)
try:
count = self.sum_graphql_result(apidata)
user_repo = self.repos[real_ident]
include_parents = self.include_parents.get(real_ident, True)
count = self.sum_graphql_result(apidata, include_parents)
user, repo = self.repos[real_ident]
cache_key = (user, repo, include_parents)
# Cache results per repo, for shared $krefs
self.cache[user_repo] = count
self.cache[cache_key] = count
except Exception: # pylint: disable=broad-except
pass
# Retrieve everything from the cache, new and old alike
for ident, user_repo in list(self.repos.items()):
if user_repo in self.cache:
count = self.cache[user_repo]
for ident, (user, repo) in list(self.repos.items()):
include_parents = self.include_parents.get(ident, True)
cache_key = (user, repo, include_parents)
if cache_key in self.cache:
count = self.cache[cache_key]
logging.info('Count for %s is %s', ident, count)
counts[ident] = counts.get(ident, 0) + count
# Purge completed requests
self.remove(ident, user_repo)
self.remove(ident, (user, repo))
return counts

def graphql_to_github(self, query: str) -> Optional[Dict[str, Any]]:
Expand Down Expand Up @@ -168,10 +175,10 @@ def _retry_interval(self, response: requests.Response) -> Optional[timedelta]:

return None

def sum_graphql_result(self, apidata: Dict[str, Any]) -> int:
def sum_graphql_result(self, apidata: Dict[str, Any], include_parents: bool) -> int:
total = 0
if apidata.get('parent', None):
total += self.sum_graphql_result(apidata['parent'])
if include_parents and apidata.get('parent', None):
total += self.sum_graphql_result(apidata['parent'], include_parents)
for release in apidata['releases']['nodes']:
for asset in release['releaseAssets']['nodes']:
total += asset['downloadCount']
Expand Down Expand Up @@ -282,9 +289,10 @@ def get_result(cls, ident: str, proj_id: str,

class DownloadCounter:

def __init__(self, game_id: str, ckm_repo: CkanMetaRepo, github_token: str) -> None:
def __init__(self, game_id: str, ckm_repo: CkanMetaRepo, nk_repo: NetkanRepo, github_token: str) -> None:
self.game_id = game_id
self.ckm_repo = ckm_repo
self.nk_repo = nk_repo
self.counts: Dict[str, Any] = {}
self.github_token = github_token
if self.ckm_repo.git_repo.working_dir:
Expand All @@ -299,14 +307,23 @@ def get_counts(self) -> None:
for ckan in self.ckm_repo.all_latest_modules(): # pylint: disable=too-many-nested-blocks
if ckan.kind == 'dlc':
continue
if self.nk_repo.nk_path(ckan.identifier).exists():
nk = Netkan(self.nk_repo.nk_path(ckan.identifier), game_id=self.game_id)
else:
nk = None
for download in ckan.downloads:
try:
url_parse = urllib.parse.urlparse(download)
if nk is not None:
include_parents = nk.check_parent_downloads()
else:
include_parents = True
if url_parse.netloc == 'github.com':
match = GitHubBatchedQuery.PATH_PATTERN.match(url_parse.path)
if match:
# Process GitHub modules together in big batches
graph_query.add(ckan.identifier, *match.groups())
user, repo = match.groups()
graph_query.add(ckan.identifier, user, repo, include_parents)
if graph_query.full():
# Run the query
graph_query.get_result(self.counts)
Expand Down
3 changes: 3 additions & 0 deletions netkan/netkan/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ def sqs_message(
'MessageAttributes': self.sqs_message_attribs(high_ver, high_ver_pre),
}

def check_parent_downloads(self) -> bool:
return getattr(self, 'x_netkan_check_parent_downloads', True)


class Ckan:

Expand Down