2828import sys
2929import os
3030from typing import Optional , Dict , List
31- from contextlib import asynccontextmanager , contextmanager
32- import threading
33-
31+ from contextlib import asynccontextmanager
3432
3533from utils .logging import get_logger
3634
5250# Semaphore for strict concurrency control
5351_context_semaphore : Optional [asyncio .Semaphore ] = None
5452
55- # Sync singleton globals for Windows threading support
56- _sync_browser = None
57- _sync_playwright_instance = None
58- _sync_lock = threading .Lock ()
59-
6053
6154async def _get_semaphore () -> asyncio .Semaphore :
6255 """Get or create the context semaphore."""
@@ -185,6 +178,132 @@ async def _get_browser(headless: bool = True):
185178 )
186179
187180
181+ def _get_sync_browser (headless : bool = True ):
182+ """Get or create the shared SYNC browser instance (thread-safe)."""
183+ global _sync_browser , _sync_playwright_instance
184+
185+ with _sync_lock :
186+ if _sync_browser is not None :
187+ try :
188+ if _sync_browser .is_connected ():
189+ return _sync_browser
190+ except Exception :
191+ pass
192+ # Browser died, clean up
193+ _sync_browser = None
194+
195+ logger .info (f"🚀 Launching shared SYNC browser instance (headless={ headless } )..." )
196+
197+ from playwright .sync_api import sync_playwright
198+
199+ if _sync_playwright_instance is None :
200+ _sync_playwright_instance = sync_playwright ().start ()
201+
202+ _sync_browser = _sync_playwright_instance .chromium .launch (
203+ headless = headless ,
204+ args = BROWSER_ARGS
205+ )
206+ logger .info ("✅ Sync browser launched and ready" )
207+ return _sync_browser
208+
209+
210+ def _force_sync_cleanup ():
211+ """Force cleanup of stale sync browser (e.g. after greenlet error)."""
212+ global _sync_browser , _sync_playwright_instance
213+ with _sync_lock :
214+ try :
215+ if _sync_browser :
216+ _sync_browser .close ()
217+ except Exception :
218+ pass
219+ _sync_browser = None
220+ try :
221+ if _sync_playwright_instance :
222+ _sync_playwright_instance .stop ()
223+ except Exception :
224+ pass
225+ _sync_playwright_instance = None
226+
227+
228+ @contextmanager
229+ def get_sync_browser_context (
230+ headless : bool = True ,
231+ viewport : Optional [Dict ] = None ,
232+ user_agent : Optional [str ] = None ,
233+ stealth_script : Optional [str ] = None ,
234+ block_resources : Optional [List [str ]] = None ,
235+ locale : str = "en-US" ,
236+ ):
237+ """
238+ Sync context manager for a browser context.
239+ Auto-recovers from greenlet errors (stale browser from dead thread)
240+ by tearing down and re-launching.
241+ Used on Windows via asyncio.to_thread to bypass subprocess issues.
242+ """
243+ default_viewport = {'width' : 1280 , 'height' : 800 }
244+ default_user_agent = (
245+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
246+ "(KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"
247+ )
248+
249+ context = None
250+ for attempt in range (2 ):
251+ browser = _get_sync_browser (headless = headless )
252+ try :
253+ context = browser .new_context (
254+ viewport = viewport or default_viewport ,
255+ user_agent = user_agent or default_user_agent ,
256+ locale = locale ,
257+ )
258+ break # Success
259+ except Exception as e :
260+ if "greenlet" in str (e ).lower () or "different thread" in str (e ).lower () or "exited" in str (e ).lower ():
261+ logger .info (f"🔄 Browser context stale (attempt { attempt + 1 } /2), re-launching..." )
262+ _force_sync_cleanup ()
263+ if attempt == 1 :
264+ raise
265+ else :
266+ raise
267+
268+ if stealth_script :
269+ context .add_init_script (stealth_script )
270+
271+ if block_resources :
272+ blocked = set (block_resources )
273+ context .route ("**/*" , lambda r : r .abort () if r .request .resource_type in blocked else r .continue_ ())
274+
275+ try :
276+ yield context
277+ finally :
278+ if context :
279+ try :
280+ context .close ()
281+ except Exception :
282+ pass
283+
284+
285+ def close_sync_browser_pool ():
286+ """Close the sync browser pool."""
287+ global _sync_browser , _sync_playwright_instance
288+
289+ with _sync_lock :
290+ if _sync_browser :
291+ try :
292+ _sync_browser .close ()
293+ except Exception :
294+ pass
295+ _sync_browser = None
296+
297+ if _sync_playwright_instance :
298+ try :
299+ _sync_playwright_instance .stop ()
300+ except Exception :
301+ pass
302+ _sync_playwright_instance = None
303+
304+ logger .info ("🛑 Sync browser pool closed" )
305+
306+
188307@asynccontextmanager
189308async def get_browser_context (
190309 headless : bool = True ,
@@ -251,74 +370,7 @@ async def handle_route(route):
251370 pass
252371
253372
254- @contextmanager
255- def get_sync_browser_context (
256- headless : bool = True ,
257- viewport : Optional [Dict ] = None ,
258- user_agent : Optional [str ] = None ,
259- stealth_script : Optional [str ] = None ,
260- block_resources : Optional [List [str ]] = None ,
261- locale : str = "en-US" ,
262- ):
263- """
264- Sync context manager for a browser context.
265- Used on Windows via asyncio.to_thread to bypass subprocess issues.
266- """
267- global _sync_browser , _sync_playwright_instance
268-
269- with _sync_lock :
270- if _sync_playwright_instance is None :
271- try :
272- from playwright .sync_api import sync_playwright
273- _sync_playwright_instance = sync_playwright ().start ()
274- logger .info ("✅ Sync Playwright started" )
275- except Exception as e :
276- logger .error (f"❌ Failed to start sync Playwright: { e } " )
277- raise
278-
279- if _sync_browser is None or not _sync_browser .is_connected ():
280- try :
281- logger .info (f"🚀 Launching shared SYNC browser (headless={ headless } )..." )
282- _sync_browser = _sync_playwright_instance .chromium .launch (
283- headless = headless ,
284- args = BROWSER_ARGS
285- )
286- except Exception as e :
287- logger .error (f"❌ Failed to launch sync browser: { e } " )
288- raise
289-
290- context = None
291- try :
292- # Create context
293- context = _sync_browser .new_context (
294- viewport = viewport or {'width' : 1280 , 'height' : 800 },
295- user_agent = user_agent or "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36" ,
296- locale = locale
297- )
298-
299- if stealth_script :
300- context .add_init_script (stealth_script )
301-
302- if block_resources :
303- def sync_handle_route (route ):
304- if route .request .resource_type in block_resources :
305- route .abort ()
306- else :
307- route .continue_ ()
308- context .route ("**/*" , sync_handle_route )
309-
310- yield context
311-
312- finally :
313- if context :
314- try :
315- context .close ()
316- except :
317- pass
318-
319-
320373async def close_browser_pool ():
321-
322374 """
323375 Close the browser pool and release all resources.
324376 Resets ALL globals including lock and semaphore to prevent
@@ -344,29 +396,17 @@ async def close_browser_pool():
344396 logger .debug (f"Error stopping playwright: { e } " )
345397 _playwright = None
346398
399+ # Also clean up sync resources
400+ close_sync_browser_pool ()
401+
347402 # Reset all synchronization primitives so they are recreated fresh
348403 # This prevents stale locks/semaphores from surviving across reloads
349404 _browser_lock = None
350405 _context_semaphore = None
351406 _active_contexts = 0
352-
353- # Also clean up sync resources
354- if _sync_browser :
355- try :
356- _sync_browser .close ()
357- except : pass
358- _sync_browser = None
359-
360- if _sync_playwright_instance :
361- try :
362- _sync_playwright_instance .stop ()
363- except : pass
364- _sync_playwright_instance = None
365-
366407 logger .info ("Browser pool: All resources released and reset" )
367408
368409
369-
370410def get_pool_status () -> dict :
371411 """Get current browser pool status."""
372412 browser_running = False
0 commit comments