Skip to content

Commit 8b13490

Browse files
committed
Restore robust Windows sync Playwright handling in browser_pool.py
1 parent 05daa68 commit 8b13490

File tree

1 file changed

+130
-90
lines changed

1 file changed

+130
-90
lines changed

form-flow-backend/services/form/browser_pool.py

Lines changed: 130 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,7 @@
2828
import sys
2929
import os
3030
from typing import Optional, Dict, List
31-
from contextlib import asynccontextmanager, contextmanager
32-
import threading
33-
31+
from contextlib import asynccontextmanager
3432

3533
from utils.logging import get_logger
3634

@@ -52,11 +50,6 @@
5250
# Semaphore for strict concurrency control
5351
_context_semaphore: Optional[asyncio.Semaphore] = None
5452

55-
# Sync singleton globals for Windows threading support
56-
_sync_browser = None
57-
_sync_playwright_instance = None
58-
_sync_lock = threading.Lock()
59-
6053

6154
async def _get_semaphore() -> asyncio.Semaphore:
6255
"""Get or create the context semaphore."""
@@ -185,6 +178,132 @@ async def _get_browser(headless: bool = True):
185178
)
186179

187180

181+
def _get_sync_browser(headless: bool = True):
182+
"""Get or create the shared SYNC browser instance (thread-safe)."""
183+
global _sync_browser, _sync_playwright_instance
184+
185+
with _sync_lock:
186+
if _sync_browser is not None:
187+
try:
188+
if _sync_browser.is_connected():
189+
return _sync_browser
190+
except Exception:
191+
pass
192+
# Browser died, clean up
193+
_sync_browser = None
194+
195+
logger.info(f"🚀 Launching shared SYNC browser instance (headless={headless})...")
196+
197+
from playwright.sync_api import sync_playwright
198+
199+
if _sync_playwright_instance is None:
200+
_sync_playwright_instance = sync_playwright().start()
201+
202+
_sync_browser = _sync_playwright_instance.chromium.launch(
203+
headless=headless,
204+
args=BROWSER_ARGS
205+
)
206+
logger.info("✅ Sync browser launched and ready")
207+
return _sync_browser
208+
209+
210+
def _force_sync_cleanup():
211+
"""Force cleanup of stale sync browser (e.g. after greenlet error)."""
212+
global _sync_browser, _sync_playwright_instance
213+
with _sync_lock:
214+
try:
215+
if _sync_browser:
216+
_sync_browser.close()
217+
except Exception:
218+
pass
219+
_sync_browser = None
220+
try:
221+
if _sync_playwright_instance:
222+
_sync_playwright_instance.stop()
223+
except Exception:
224+
pass
225+
_sync_playwright_instance = None
226+
227+
228+
@contextmanager
229+
def get_sync_browser_context(
230+
headless: bool = True,
231+
viewport: Optional[Dict] = None,
232+
user_agent: Optional[str] = None,
233+
stealth_script: Optional[str] = None,
234+
block_resources: Optional[List[str]] = None,
235+
locale: str = "en-US",
236+
):
237+
"""
238+
Sync context manager for a browser context.
239+
Auto-recovers from greenlet errors (stale browser from dead thread)
240+
by tearing down and re-launching.
241+
Used on Windows via asyncio.to_thread to bypass subprocess issues.
242+
"""
243+
default_viewport = {'width': 1280, 'height': 800}
244+
default_user_agent = (
245+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
246+
"(KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"
247+
)
248+
249+
context = None
250+
for attempt in range(2):
251+
browser = _get_sync_browser(headless=headless)
252+
try:
253+
context = browser.new_context(
254+
viewport=viewport or default_viewport,
255+
user_agent=user_agent or default_user_agent,
256+
locale=locale,
257+
)
258+
break # Success
259+
except Exception as e:
260+
if "greenlet" in str(e).lower() or "different thread" in str(e).lower() or "exited" in str(e).lower():
261+
logger.info(f"🔄 Browser context stale (attempt {attempt+1}/2), re-launching...")
262+
_force_sync_cleanup()
263+
if attempt == 1:
264+
raise
265+
else:
266+
raise
267+
268+
if stealth_script:
269+
context.add_init_script(stealth_script)
270+
271+
if block_resources:
272+
blocked = set(block_resources)
273+
context.route("**/*", lambda r: r.abort() if r.request.resource_type in blocked else r.continue_())
274+
275+
try:
276+
yield context
277+
finally:
278+
if context:
279+
try:
280+
context.close()
281+
except Exception:
282+
pass
283+
284+
285+
def close_sync_browser_pool():
286+
"""Close the sync browser pool."""
287+
global _sync_browser, _sync_playwright_instance
288+
289+
with _sync_lock:
290+
if _sync_browser:
291+
try:
292+
_sync_browser.close()
293+
except Exception:
294+
pass
295+
_sync_browser = None
296+
297+
if _sync_playwright_instance:
298+
try:
299+
_sync_playwright_instance.stop()
300+
except Exception:
301+
pass
302+
_sync_playwright_instance = None
303+
304+
logger.info("🛑 Sync browser pool closed")
305+
306+
188307
@asynccontextmanager
189308
async def get_browser_context(
190309
headless: bool = True,
@@ -251,74 +370,7 @@ async def handle_route(route):
251370
pass
252371

253372

254-
@contextmanager
255-
def get_sync_browser_context(
256-
headless: bool = True,
257-
viewport: Optional[Dict] = None,
258-
user_agent: Optional[str] = None,
259-
stealth_script: Optional[str] = None,
260-
block_resources: Optional[List[str]] = None,
261-
locale: str = "en-US",
262-
):
263-
"""
264-
Sync context manager for a browser context.
265-
Used on Windows via asyncio.to_thread to bypass subprocess issues.
266-
"""
267-
global _sync_browser, _sync_playwright_instance
268-
269-
with _sync_lock:
270-
if _sync_playwright_instance is None:
271-
try:
272-
from playwright.sync_api import sync_playwright
273-
_sync_playwright_instance = sync_playwright().start()
274-
logger.info("✅ Sync Playwright started")
275-
except Exception as e:
276-
logger.error(f"❌ Failed to start sync Playwright: {e}")
277-
raise
278-
279-
if _sync_browser is None or not _sync_browser.is_connected():
280-
try:
281-
logger.info(f"🚀 Launching shared SYNC browser (headless={headless})...")
282-
_sync_browser = _sync_playwright_instance.chromium.launch(
283-
headless=headless,
284-
args=BROWSER_ARGS
285-
)
286-
except Exception as e:
287-
logger.error(f"❌ Failed to launch sync browser: {e}")
288-
raise
289-
290-
context = None
291-
try:
292-
# Create context
293-
context = _sync_browser.new_context(
294-
viewport=viewport or {'width': 1280, 'height': 800},
295-
user_agent=user_agent or "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
296-
locale=locale
297-
)
298-
299-
if stealth_script:
300-
context.add_init_script(stealth_script)
301-
302-
if block_resources:
303-
def sync_handle_route(route):
304-
if route.request.resource_type in block_resources:
305-
route.abort()
306-
else:
307-
route.continue_()
308-
context.route("**/*", sync_handle_route)
309-
310-
yield context
311-
312-
finally:
313-
if context:
314-
try:
315-
context.close()
316-
except:
317-
pass
318-
319-
320373
async def close_browser_pool():
321-
322374
"""
323375
Close the browser pool and release all resources.
324376
Resets ALL globals including lock and semaphore to prevent
@@ -344,29 +396,17 @@ async def close_browser_pool():
344396
logger.debug(f"Error stopping playwright: {e}")
345397
_playwright = None
346398

399+
# Also clean up sync resources
400+
close_sync_browser_pool()
401+
347402
# Reset all synchronization primitives so they are recreated fresh
348403
# This prevents stale locks/semaphores from surviving across reloads
349404
_browser_lock = None
350405
_context_semaphore = None
351406
_active_contexts = 0
352-
353-
# Also clean up sync resources
354-
if _sync_browser:
355-
try:
356-
_sync_browser.close()
357-
except: pass
358-
_sync_browser = None
359-
360-
if _sync_playwright_instance:
361-
try:
362-
_sync_playwright_instance.stop()
363-
except: pass
364-
_sync_playwright_instance = None
365-
366407
logger.info("Browser pool: All resources released and reset")
367408

368409

369-
370410
def get_pool_status() -> dict:
371411
"""Get current browser pool status."""
372412
browser_running = False

0 commit comments

Comments
 (0)