@@ -381,6 +381,19 @@ def _is_business_day_only_product(product: str) -> bool:
381381
382382 return normalize_product_name (product ) in BUSINESS_DAY_ONLY_PRODUCTS
383383
384+
385+ def _normalize_cached_api_dates (raw_dates : object , product : str ) -> List [str ]:
386+ """兼容新旧缓存格式,统一转成日期列表。"""
387+
388+ if isinstance (raw_dates , str ):
389+ items = [raw_dates ]
390+ elif isinstance (raw_dates , (list , tuple , set )):
391+ items = [str (x ) for x in raw_dates ]
392+ else :
393+ items = []
394+ return _normalize_date_queue (items , product = product , apply_business_day_filter = False )
395+
396+
384397def _normalize_date_queue (
385398 raw_dates : Sequence [str ],
386399 * ,
@@ -499,7 +512,7 @@ def _resolve_requested_dates_for_plan(
499512 t_product_start : float ,
500513 catch_up_to_latest : bool = False ,
501514 lock : Optional [threading .Lock ] = None ,
502- api_date_cache : Optional [Dict [str , Tuple [str , str ]]] = None ,
515+ api_date_cache : Optional [Dict [str , Tuple [List [ str ] , str ]]] = None ,
503516) -> Tuple [List [str ], bool ]:
504517 """
505518 解析单产品执行日期列表,并处理 timestamp 门控。
@@ -522,19 +535,19 @@ def _resolve_requested_dates_for_plan(
522535 if api_date_cache :
523536 cached = api_date_cache .get (product_name ) or api_date_cache .get (plan .name )
524537 if cached :
525- cached_date , checked_at_str = cached
538+ cached_dates , checked_at_str = cached
526539 if _is_cache_fresh (checked_at_str ):
540+ api_latest_candidates = _normalize_cached_api_dates (cached_dates , product_name )
527541 # 计算缓存年龄用于日志
528542 try :
529543 checked_at = datetime .strptime (checked_at_str , "%Y-%m-%dT%H:%M:%S" )
530544 age_seconds = (datetime .now () - checked_at ).total_seconds ()
531545 except ValueError :
532546 age_seconds = 0.0
533547 log_info (
534- f"[{ plan .name } ] 使用缓存 API 日期 { cached_date } ({ int (age_seconds )} s 前查询)" ,
548+ f"[{ plan .name } ] 使用缓存 API 日期 { api_latest_candidates [ - 1 ] } ({ int (age_seconds )} s 前查询)" ,
535549 event = "PRODUCT_PLAN" , decision = "cache_hit" ,
536550 )
537- api_latest_candidates = [cached_date ]
538551 cache_hit = True
539552
540553 if not cache_hit :
@@ -703,8 +716,14 @@ def _upsert_product_status_after_success(
703716 status .last_update_time = utc_now_iso ()
704717 status .data_time = actual_time
705718 status .data_content_time = actual_time
706- upsert_product_status (conn , status )
707- write_local_timestamp (command_ctx .data_root , product , actual_time )
719+ try :
720+ upsert_product_status (conn , status , commit_immediately = False )
721+ write_local_timestamp (command_ctx .data_root , product , actual_time )
722+ conn .commit ()
723+ except Exception :
724+ with contextlib .suppress (Exception ):
725+ conn .rollback ()
726+ raise
708727
709728
710729def _collect_preprocess_source_successes (report : RunReport ) -> List [ProductRunResult ]:
@@ -916,7 +935,7 @@ def _prefetch_api_dates(
916935 hid : str ,
917936 headers : Dict [str , str ],
918937 max_workers : int = 8 ,
919- ) -> Dict [str , Tuple [str , str ]]:
938+ ) -> Dict [str , Tuple [List [ str ] , str ]]:
920939 """并发预取产品的 API 最新日期,写入缓存并返回。
921940
922941 已在缓存中且未过期的产品跳过。失败的产品静默跳过,
@@ -947,18 +966,18 @@ def _prefetch_api_dates(
947966 f"[预取] 并发查询 { len (uncached )} /{ len (products )} 个产品" ,
948967 event = "PREFETCH" , decision = "fetching" ,
949968 )
950- fetched : Dict [str , str ] = {} # 写入仅在主线程的 as_completed 循环内,无并发写入
969+ fetched : Dict [str , List [ str ] ] = {} # 写入仅在主线程的 as_completed 循环内,无并发写入
951970 # abort_event 只能拦截尚未开始的 worker,已在执行的请求会自然完成或超时
952971 abort_event = threading .Event ()
953972 t_start = time .time ()
954973
955- def _fetch_one (product : str ) -> Tuple [str , Optional [str ]]:
974+ def _fetch_one (product : str ) -> Tuple [str , Optional [List [ str ] ]]:
956975 """单产品 HTTP 查询,401/403 触发全局中止。"""
957976 if abort_event .is_set ():
958977 return product , None
959978 try :
960- date_str = get_latest_time (api_base , product , hid , headers )
961- return product , date_str
979+ date_list = get_latest_times (api_base , product , hid , headers )
980+ return product , date_list
962981 except FatalRequestError as exc :
963982 # 认证失败时中止整个预取
964983 if exc .status_code in (401 , 403 ):
@@ -973,9 +992,9 @@ def _fetch_one(product: str) -> Tuple[str, Optional[str]]:
973992 futures = {executor .submit (_fetch_one , p ): p for p in uncached }
974993 for future in as_completed (futures , timeout = 30 ):
975994 try :
976- product , date_str = future .result ()
977- if date_str :
978- fetched [product ] = date_str
995+ product , date_list = future .result ()
996+ if date_list :
997+ fetched [product ] = date_list
979998 except Exception :
980999 pass
9811000 if abort_event .is_set ():
@@ -999,15 +1018,15 @@ def _fetch_one(product: str) -> Tuple[str, Optional[str]]:
9991018 pass
10001019 # 合并:保留新鲜的已有缓存 + 刚预取的结果
10011020 checked_at_now = datetime .now ().strftime ("%Y-%m-%dT%H:%M:%S" )
1002- merged : Dict [str , Tuple [str , str ]] = dict (existing_cache )
1003- for product , date_str in fetched .items ():
1004- merged [product ] = (date_str , checked_at_now )
1021+ merged : Dict [str , Tuple [List [ str ] , str ]] = dict (existing_cache )
1022+ for product , date_list in fetched .items ():
1023+ merged [product ] = (list ( date_list ) , checked_at_now )
10051024 return merged
10061025
10071026
10081027def _estimate_sync_workload (
10091028 plans : Sequence [ProductPlan ],
1010- api_date_cache : Dict [str , Tuple [str , str ]],
1029+ api_date_cache : Dict [str , Tuple [List [ str ] , str ]],
10111030 data_root : Path ,
10121031 api_call_limit : int = 50 ,
10131032 course_type : str = "" ,
@@ -1032,7 +1051,8 @@ def _estimate_sync_workload(
10321051 local_date = infer_local_date_from_csv (data_root , product_name , rule )
10331052 # 读 API 最新日期(来自预取缓存)
10341053 cached = api_date_cache .get (product_name )
1035- api_date = cached [0 ] if cached else None
1054+ api_dates = _normalize_cached_api_dates (cached [0 ], product_name ) if cached else []
1055+ api_date = api_dates [- 1 ] if api_dates else None
10361056 if not api_date :
10371057 # 无 API 日期,计为 1 次
10381058 products_list .append ({
@@ -1053,9 +1073,10 @@ def _estimate_sync_workload(
10531073 try :
10541074 local_d = date .fromisoformat (local_date )
10551075 api_d = date .fromisoformat (api_date )
1056- gap = max (0 , (api_d - local_d ).days )
1076+ calendar_gap = max (0 , (api_d - local_d ).days )
10571077 except ValueError :
1058- gap = 1
1078+ calendar_gap = 1
1079+ gap = len (_expected_catchup_dates (local_date , api_date , product_name )) or calendar_gap
10591080 if gap == 0 :
10601081 continue # 已是最新,不计入
10611082 products_list .append ({
@@ -1238,7 +1259,6 @@ def _run_one_plan(plan: ProductPlan) -> Tuple[bool, float, SyncStats, str, str]:
12381259 continue
12391260
12401261 # 成功路径:total.merge + 状态持久化 + _append_result 在同一锁作用域
1241- status_persist_warning = ""
12421262 with _lock :
12431263 total .merge (stats )
12441264 product_stats .merge (stats ) # 累积本产品 stats 用于进度回调
@@ -1250,9 +1270,13 @@ def _run_one_plan(plan: ProductPlan) -> Tuple[bool, float, SyncStats, str, str]:
12501270 actual_time = actual_time ,
12511271 )
12521272 except Exception as status_exc :
1253- status_persist_warning = (
1254- f"状态持久化失败(已忽略,不影响本次成功结果): { status_exc } "
1255- )
1273+ raise ProductSyncError (
1274+ message = (
1275+ f"产品 { product } 状态持久化失败;"
1276+ f"为避免数据文件与 timestamp/状态库不一致,本次按失败处理。原始错误:{ status_exc } "
1277+ ),
1278+ reason_code = REASON_MERGE_ERROR ,
1279+ ) from status_exc
12561280 _append_result (
12571281 report ,
12581282 product = product ,
@@ -1265,12 +1289,6 @@ def _run_one_plan(plan: ProductPlan) -> Tuple[bool, float, SyncStats, str, str]:
12651289 source_path = source_path ,
12661290 )
12671291 report .phase_sync_seconds += max (0.0 , time .time () - t_sync_phase )
1268- if status_persist_warning :
1269- log_info (
1270- f"[{ plan .name } ] { status_persist_warning } " ,
1271- event = "SYNC_WARN" ,
1272- reason_code = reason_code ,
1273- )
12741292 continue
12751293 except ProductSyncError as exc :
12761294 # 可预期业务错误:带有明确 reason_code。
0 commit comments