88
99import scrapy
1010
11- from variables import *
11+ from variables import * # noqa: F403
1212from assets import res as ori_res
1313from ComicSpider .items import ComicspiderItem
1414from ComicSpider .runtime .job_models import create_job_context , iter_download_items
1515from GUI .core .font import font_color
1616from utils import PresetHtmlEl , temp_p , conf
17- from utils .processed_class import TextBrowserState , ProcessState , Url
17+ from utils .processed_class import TextBrowserState , ProcessState
1818
19- from utils .protocol import SpiderDownloadJob , JobContext , LogEvent , ProcessStateEvent , TasksObjEvent
19+ from utils .protocol import SpiderDownloadJob , JobContext , LogEvent , ProcessStateEvent
2020from utils .website import (
21- correct_domain ,
22- InfoMinix , BookInfo , Episode
21+ correct_domain , BookInfo , Episode
22+ )
23+ from utils .website .registry import (
24+ resolve_provider_descriptor_by_spider ,
25+ create_spider_site_runtime ,
2326)
24- from utils .website .registry import resolve_spider_adapter
2527from utils .website .schema import BodyFormat
2628from utils .sql import SqlRecorder , SqlrV
2729from utils .meta import MetaRecorder
@@ -71,8 +73,8 @@ class BaseComicSpider(scrapy.Spider):
7173 text_browser_state = TextBrowserState (text = '' )
7274 process_state = ProcessState (process = 'init' )
7375 say : SayToGui = None
74- adapter = None
75- site = None
76+ provider_descriptor = None
77+ spider_site_runtime = None
7678 record_sql : SqlRecorder = None
7779 rv_sql : SqlrV = None
7880 ua = {}
@@ -96,6 +98,7 @@ class BaseComicSpider(scrapy.Spider):
9698 turn_page_search : str = None
9799 turn_page_info : tuple = None
98100 _enable_episode_dispatch = False
101+ remove_domain_cache_on_finished_miss = True
99102
100103 def preready (self ):
101104 ...
@@ -152,10 +155,7 @@ def _bind_runtime_context(self, job: SpiderDownloadJob):
152155 getattr (getattr (item , "from_book" , None ), "preview_url" , None ),
153156 ])
154157 elif isinstance (item , BookInfo ):
155- candidates .extend ([
156- getattr (item , "url" , None ),
157- getattr (item , "preview_url" , None ),
158- ])
158+ candidates .extend ([getattr (item , "url" , None ), getattr (item , "preview_url" , None )])
159159 for candidate in candidates :
160160 if origin := self ._url_origin (candidate ):
161161 self ._runtime_origin = origin
@@ -275,12 +275,7 @@ def parse_section(self, response):
275275 if isinstance (url_or_ep , Episode ):
276276 yield from self ._process_episode (url_or_ep )
277277 elif isinstance (url_or_ep , str ):
278- yield scrapy .Request (
279- url = url_or_ep ,
280- callback = self .parse_fin_page ,
281- meta = {'book' : book , 'page' : page },
282- dont_filter = True ,
283- )
278+ yield scrapy .Request (url = url_or_ep , callback = self .parse_fin_page , meta = {'book' : book , 'page' : page }, dont_filter = True )
284279
285280 def need_sec_next_page (self , resp ):
286281 pass
@@ -353,8 +348,8 @@ def from_crawler(cls, crawler, *args, **kwargs):
353348
354349 spider .record_sql = SqlRecorder ()
355350 spider .rv_sql = SqlrV (1 if spider .name in spider .settings .get ('SPECIAL' ) else 0 ).connect ()
356- spider .adapter = resolve_spider_adapter (spider .name )
357- spider .site = spider .adapter . create_session ( conf )
351+ spider .provider_descriptor = resolve_provider_descriptor_by_spider (spider .name )
352+ spider .spider_site_runtime = create_spider_site_runtime ( spider .name , conf_state = conf )
358353 spider .mr = MetaRecorder (conf )
359354
360355 if job :
@@ -373,7 +368,7 @@ def _remove_cache(self):
373368 os .remove (domain_cache )
374369
375370 def _finish_counters (self , stats ):
376- downloaded_count = stats .get_value ('image /downloaded' , 0 )
371+ downloaded_count = stats .get_value ('file_status_count /downloaded' , 0 )
377372 uptodate_count = stats .get_value ('file_status_count/uptodate' , 0 )
378373 total = self .job_context .total if self .job_context else self .total
379374 return downloaded_count , uptodate_count , downloaded_count + uptodate_count , total
@@ -412,11 +407,13 @@ def _handle_finished_status(self, stats):
412407 return
413408 downloaded_count , uptodate_count , processed_count , total = self ._finish_counters (stats )
414409 exception_count = stats .get_value ('process_exception/count' , 0 )
410+ remove_domain_cache = bool (self .remove_domain_cache_on_finished_miss )
415411 if total and processed_count < total :
416412 missing_count = total - processed_count
417413 self .say (font_color (f'miss: new[{ downloaded_count } ], cache[{ uptodate_count } ], miss[{ missing_count } ]<br>' ,
418414 cls = 'theme-err' , size = 3 ))
419- self ._remove_cache ()
415+ if remove_domain_cache :
416+ self ._remove_cache ()
420417 elif total != 0 and processed_count > 0 :
421418 if downloaded_count :
422419 _str = f'{ self .res .finished_success % downloaded_count } '
@@ -428,7 +425,8 @@ def _handle_finished_status(self, stats):
428425 self .say (font_color (
429426 f'<br>{ self .res .finished_err % last_exception } <br>log path/日志文件地址: [{ self .settings .get ("LOG_FILE" )} ]' ,
430427 cls = 'theme-err' , size = 3 ))
431- self ._remove_cache ()
428+ if remove_domain_cache :
429+ self ._remove_cache ()
432430 else :
433431 self .say (font_color (f'{ self .res .finished_empty } <br>' , cls = 'theme-highlight' , size = 4 ))
434432
0 commit comments