Skip to main content

essence/engines/
browser.rs

1use crate::{
2    engines::{RawScrapeResult, ScrapeEngine, stealth::{apply_stealth_techniques, StealthMode}},
3    error::{Result, ScrapeError},
4    types::{BrowserAction, ScrapeRequest},
5    utils::{url_rewrites::rewrite_url, user_agents::random_user_agent},
6};
7use async_trait::async_trait;
8use base64::{engine::general_purpose, Engine as _};
9use chromiumoxide::{
10    browser::{Browser, BrowserConfig},
11    cdp::browser_protocol::{
12        fetch::{
13            EnableParams, EventRequestPaused, FailRequestParams, ContinueRequestParams,
14        },
15        network::ErrorReason,
16        page::CaptureScreenshotFormat,
17    },
18    Page,
19};
20use futures::StreamExt;
21use std::{env, path::PathBuf, sync::Arc, time::{Duration, Instant}};
22use tokio::sync::{Mutex, OwnedSemaphorePermit, Semaphore};
23use tracing::{debug, info, warn};
24
25/// Domains to block for ads, trackers, and analytics
26/// This speeds up page loads by 10-20% and reduces noise in scraped content
27const BLOCKED_DOMAINS: &[&str] = &[
28    "googletagmanager.com",
29    "google-analytics.com",
30    "analytics.google.com",
31    "facebook.net",
32    "connect.facebook.net",
33    "doubleclick.net",
34    "googlesyndication.com",
35    "adservice.google.com",
36    "static.ads-twitter.com",
37    "ads-twitter.com",
38    "ads.linkedin.com",
39    "bat.bing.com",
40    "stats.wp.com",
41    "scorecardresearch.com",
42    "quantserve.com",
43    "chartbeat.com",
44    "hotjar.com",
45    "mouseflow.com",
46    "mixpanel.com",
47    "segment.io",
48    "segment.com",
49    "analytics.tiktok.com",
50    "mktoresp.com",
51    "pardot.com",
52];
53
54/// Browser pool for managing multiple browser instances
55pub struct BrowserPool {
56    semaphore: Arc<Semaphore>,
57    #[allow(dead_code)]
58    max_instances: usize,
59    headless: bool,
60    user_agent: Option<String>,
61    browsers: Arc<Mutex<Vec<Browser>>>,
62    chrome_path: PathBuf,
63}
64
65impl BrowserPool {
66    /// Create a new browser pool
67    pub async fn new(max_instances: usize) -> Result<Self> {
68        let headless = std::env::var("BROWSER_HEADLESS")
69            .ok()
70            .and_then(|v| v.parse().ok())
71            .unwrap_or(true);
72
73        let user_agent = std::env::var("BROWSER_USER_AGENT").ok();
74
75        // Find Chrome executable at initialization
76        let chrome_path = Self::find_chrome_executable()?;
77        info!("Found Chrome at: {}", chrome_path.display());
78
79        Ok(Self {
80            semaphore: Arc::new(Semaphore::new(max_instances)),
81            max_instances,
82            headless,
83            user_agent,
84            browsers: Arc::new(Mutex::new(Vec::new())),
85            chrome_path,
86        })
87    }
88
89    /// Get a browser from the pool (creates new if needed)
90    pub async fn get_browser(self: &Arc<Self>) -> Result<BrowserGuard> {
91        // Acquire permit (blocks if pool full)
92        let permit = self
93            .semaphore
94            .clone()
95            .acquire_owned()
96            .await
97            .map_err(|e| ScrapeError::Internal(format!("Failed to acquire browser: {}", e)))?;
98
99        // Try to reuse existing browser
100        let browser = {
101            let mut browsers = self.browsers.lock().await;
102
103            // Try to find a healthy browser
104            while let Some(mut b) = browsers.pop() {
105                if Self::is_browser_healthy(&b).await {
106                    debug!("Reusing existing browser instance");
107                    return Ok(BrowserGuard {
108                        browser: Some(b),
109                        pool: self.browsers.clone(),
110                        _permit: permit,
111                    });
112                } else {
113                    // Unhealthy browser, discard it
114                    warn!("Discarding unhealthy browser");
115                    let _ = b.close().await;
116                }
117            }
118
119            // No healthy browser available, create new one
120            debug!("Creating new browser instance");
121            self.create_browser().await?
122        };
123
124        Ok(BrowserGuard {
125            browser: Some(browser),
126            pool: self.browsers.clone(),
127            _permit: permit,
128        })
129    }
130
131    /// Check if a browser is healthy
132    async fn is_browser_healthy(browser: &Browser) -> bool {
133        // Try to get version (simple health check)
134        match browser.version().await {
135            Ok(_) => true,
136            Err(_) => {
137                warn!("Browser health check failed");
138                false
139            }
140        }
141    }
142
143    /// Create a new browser instance
144    async fn create_browser(&self) -> Result<Browser> {
145        info!("Launching new browser instance (headless: {})", self.headless);
146
147        let mut browser_config = BrowserConfig::builder()
148            .chrome_executable(&self.chrome_path);
149
150        // Set headless mode
151        if self.headless {
152            browser_config = browser_config.no_sandbox().disable_default_args();
153        }
154
155        // Add launch arguments for stealth and stability
156        let mut args = vec![
157            "--disable-blink-features=AutomationControlled",
158            "--disable-dev-shm-usage",
159            "--disable-web-security",
160            "--disable-features=IsolateOrigins,site-per-process",
161            "--allow-running-insecure-content",
162            "--disable-setuid-sandbox",
163            "--no-first-run",
164            "--no-default-browser-check",
165            "--disable-popup-blocking",
166        ];
167
168        if self.headless {
169            args.push("--headless=new");
170        }
171
172        // Set viewport with some variance for anti-bot
173        args.push("--window-size=1920,1080");
174
175        for arg in args {
176            browser_config = browser_config.arg(arg);
177        }
178
179        // Use a unique temp directory for each browser instance to prevent
180        // SingletonLock conflicts when multiple instances launch concurrently
181        let unique_dir = std::env::temp_dir()
182            .join(format!("essence-browser-{}", uuid::Uuid::new_v4()));
183        browser_config = browser_config.arg(format!(
184            "--user-data-dir={}",
185            unique_dir.display()
186        ));
187
188        // Set user agent - use provided one or randomize
189        let user_agent = match &self.user_agent {
190            Some(ua) => ua.as_str(),
191            None => random_user_agent(),
192        };
193        debug!("Using User-Agent for browser: {}", user_agent);
194        browser_config = browser_config.arg(format!("--user-agent={}", user_agent));
195
196        let (browser, mut handler) = Browser::launch(browser_config.build().map_err(|e| {
197            ScrapeError::BrowserLaunchFailed(format!("Failed to build browser config: {}", e))
198        })?)
199        .await
200        .map_err(|e| {
201            ScrapeError::BrowserLaunchFailed(format!("Failed to launch browser: {}", e))
202        })?;
203
204        // Spawn a task to handle browser events
205        tokio::spawn(async move {
206            while let Some(event) = handler.next().await {
207                if let Err(e) = event {
208                    warn!("Browser handler error: {}", e);
209                }
210            }
211            debug!("Browser handler task finished");
212        });
213
214        info!("Browser instance created successfully");
215        Ok(browser)
216    }
217
218    /// Find Chrome executable across platforms
219    fn find_chrome_executable() -> Result<PathBuf> {
220        // Check environment variable first
221        if let Ok(path) = std::env::var("CHROME_PATH") {
222            let path_buf = PathBuf::from(&path);
223            if path_buf.exists() {
224                info!("Using Chrome from CHROME_PATH: {}", path);
225                return Ok(path_buf);
226            }
227        }
228
229        // Platform-specific paths
230        let paths: Vec<&str> = if cfg!(target_os = "macos") {
231            vec![
232                "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
233                "/Applications/Chromium.app/Contents/MacOS/Chromium",
234                "/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
235                "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
236            ]
237        } else if cfg!(target_os = "linux") {
238            vec![
239                "/usr/bin/google-chrome",
240                "/usr/bin/google-chrome-stable",
241                "/usr/bin/chromium",
242                "/usr/bin/chromium-browser",
243                "/snap/bin/chromium",
244                "/usr/bin/microsoft-edge",
245                "/usr/bin/microsoft-edge-stable",
246            ]
247        } else if cfg!(target_os = "windows") {
248            vec![
249                "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
250                "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe",
251                "C:\\Program Files\\Chromium\\Application\\chrome.exe",
252                "C:\\Program Files (x86)\\Chromium\\Application\\chrome.exe",
253                "C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe",
254                "C:\\Program Files\\Microsoft\\Edge\\Application\\msedge.exe",
255            ]
256        } else {
257            vec![]
258        };
259
260        for path in paths {
261            if std::fs::metadata(path).is_ok() {
262                info!("Found Chrome at: {}", path);
263                return Ok(PathBuf::from(path));
264            }
265        }
266
267        // Platform-specific error message
268        let install_msg = if cfg!(target_os = "macos") {
269            "brew install --cask chromium"
270        } else if cfg!(target_os = "linux") {
271            "sudo apt-get install chromium-browser  # Ubuntu/Debian\nsudo yum install chromium  # RHEL/CentOS"
272        } else if cfg!(target_os = "windows") {
273            "Install Chrome from https://www.google.com/chrome/"
274        } else {
275            "Install Chrome or Chromium"
276        };
277
278        Err(ScrapeError::BrowserNotFound(format!(
279            "Chrome/Chromium not found. Please install it:\n{}",
280            install_msg
281        )))
282    }
283}
284
285impl Drop for BrowserPool {
286    fn drop(&mut self) {
287        info!("Shutting down browser pool");
288
289        // Note: We can't use async in Drop, so we'll just log
290        // The browsers will be cleaned up when the Arc is dropped
291        if let Ok(browsers) = self.browsers.try_lock() {
292            info!("Browser pool had {} instances at shutdown", browsers.len());
293        } else {
294            info!("Browser pool shutting down (browsers still in use)");
295        }
296    }
297}
298
299/// Guard that returns browser to pool when dropped
300pub struct BrowserGuard {
301    browser: Option<Browser>,
302    pool: Arc<Mutex<Vec<Browser>>>,
303    _permit: OwnedSemaphorePermit,
304}
305
306impl Drop for BrowserGuard {
307    fn drop(&mut self) {
308        // Return browser to pool for reuse
309        if let Some(browser) = self.browser.take() {
310            let pool = self.pool.clone();
311
312            tokio::spawn(async move {
313                let mut browsers = pool.lock().await;
314                browsers.push(browser);
315                debug!("Browser returned to pool (total: {})", browsers.len());
316            });
317        }
318    }
319}
320
321impl std::ops::Deref for BrowserGuard {
322    type Target = Browser;
323
324    fn deref(&self) -> &Self::Target {
325        self.browser
326            .as_ref()
327            .expect("Browser guard must have a browser")
328    }
329}
330
331/// Browser engine using the browser pool
332pub struct BrowserEngine {
333    pool: Arc<BrowserPool>,
334}
335
336impl BrowserEngine {
337    /// Create a new browser engine with default settings
338    pub async fn new() -> Result<Self> {
339        Self::with_config(BrowserEngineConfig::default()).await
340    }
341
342    /// Create a new browser engine with custom configuration
343    pub async fn with_config(config: BrowserEngineConfig) -> Result<Self> {
344        let pool = Arc::new(BrowserPool::new(config.pool_size).await?);
345
346        info!("Browser engine initialized with pool size: {}", config.pool_size);
347
348        Ok(Self { pool })
349    }
350
351    /// Setup request blocking for ads and analytics
352    async fn setup_request_blocking(&self, page: &Page) -> Result<()> {
353        // Check if ad blocking is enabled
354        let block_ads = std::env::var("BROWSER_BLOCK_ADS")
355            .ok()
356            .and_then(|v| v.parse().ok())
357            .unwrap_or(true); // Default: enabled
358
359        if !block_ads {
360            debug!("Ad blocking disabled");
361            return Ok(());
362        }
363
364        info!("Enabling ad/analytics blocking for {} domains", BLOCKED_DOMAINS.len());
365
366        // Enable fetch domain for request interception
367        page.execute(EnableParams::default())
368            .await
369            .map_err(|e| ScrapeError::BrowserError(format!("Failed to enable fetch domain: {}", e)))?;
370
371        // Clone page for async task
372        let page = page.clone();
373
374        // Spawn task to handle request interception
375        tokio::spawn(async move {
376            // Listen for request paused events
377            let mut event_stream = match page.event_listener::<EventRequestPaused>().await {
378                Ok(stream) => stream,
379                Err(e) => {
380                    warn!("Failed to create request event listener: {}", e);
381                    return;
382                }
383            };
384
385            while let Some(event) = event_stream.next().await {
386                let url = &event.request.url;
387
388                // Check if URL matches blocked domain
389                let should_block = BLOCKED_DOMAINS.iter().any(|domain| url.contains(domain));
390
391                if should_block {
392                    debug!("Blocking request to: {}", url);
393                    // Abort the request
394                    let params = FailRequestParams::new(
395                        event.request_id.clone(),
396                        ErrorReason::BlockedByClient,
397                    );
398                    if let Err(e) = page.execute(params).await {
399                        warn!("Failed to block request: {}", e);
400                    }
401                } else {
402                    // Allow the request to continue
403                    let params = ContinueRequestParams::new(event.request_id.clone());
404                    if let Err(e) = page.execute(params).await {
405                        warn!("Failed to continue request: {}", e);
406                    }
407                }
408            }
409
410            debug!("Request interception task finished");
411        });
412
413        Ok(())
414    }
415
416    /// Execute browser actions on a page
417    async fn execute_actions(&self, page: &Page, actions: &[BrowserAction]) -> Result<()> {
418        for action in actions {
419            match action {
420                BrowserAction::Click { selector } => {
421                    debug!("Clicking element: {}", selector);
422                    let element = page.find_element(selector).await.map_err(|e| {
423                        ScrapeError::ElementNotFound(format!(
424                            "Failed to find element {}: {}",
425                            selector, e
426                        ))
427                    })?;
428
429                    element.click().await.map_err(|e| {
430                        ScrapeError::BrowserError(format!("Failed to click element: {}", e))
431                    })?;
432                }
433                BrowserAction::Type { selector, text } => {
434                    debug!("Typing '{}' into element: {}", text, selector);
435                    let element = page.find_element(selector).await.map_err(|e| {
436                        ScrapeError::ElementNotFound(format!(
437                            "Failed to find element {}: {}",
438                            selector, e
439                        ))
440                    })?;
441
442                    element.click().await.map_err(|e| {
443                        ScrapeError::BrowserError(format!("Failed to focus element: {}", e))
444                    })?;
445
446                    element.type_str(text).await.map_err(|e| {
447                        ScrapeError::BrowserError(format!("Failed to type text: {}", e))
448                    })?;
449                }
450                BrowserAction::Scroll { direction } => {
451                    debug!("Scrolling: {}", direction);
452                    let script = match direction.as_str() {
453                        "down" => "window.scrollBy(0, window.innerHeight);",
454                        "up" => "window.scrollBy(0, -window.innerHeight);",
455                        "bottom" => "window.scrollTo(0, document.body.scrollHeight);",
456                        "top" => "window.scrollTo(0, 0);",
457                        _ => {
458                            return Err(ScrapeError::BrowserError(format!(
459                                "Invalid scroll direction: {}",
460                                direction
461                            )))
462                        }
463                    };
464
465                    page.evaluate(script).await.map_err(|e| {
466                        ScrapeError::BrowserError(format!("Failed to scroll: {}", e))
467                    })?;
468                }
469                BrowserAction::Wait { milliseconds } => {
470                    debug!("Waiting for {} ms", milliseconds);
471                    tokio::time::sleep(Duration::from_millis(*milliseconds)).await;
472                }
473                BrowserAction::WaitForSelector { selector } => {
474                    debug!("Waiting for selector: {}", selector);
475                    page.find_element(selector).await.map_err(|e| {
476                        ScrapeError::ElementNotFound(format!(
477                            "Element not found after waiting: {}",
478                            e
479                        ))
480                    })?;
481                }
482            }
483        }
484
485        Ok(())
486    }
487
488    /// Capture screenshot of the page
489    /// Reserved for future screenshot functionality
490    #[allow(dead_code)]
491    async fn capture_screenshot(&self, page: &Page, format: &str) -> Result<String> {
492        let screenshot_format = match format {
493            "jpeg" | "jpg" => CaptureScreenshotFormat::Jpeg,
494            _ => CaptureScreenshotFormat::Png,
495        };
496
497        let screenshot_bytes = page
498            .screenshot(
499                chromiumoxide::page::ScreenshotParams::builder()
500                    .format(screenshot_format)
501                    .full_page(true)
502                    .build(),
503            )
504            .await
505            .map_err(|e| {
506                ScrapeError::BrowserError(format!("Failed to capture screenshot: {}", e))
507            })?;
508
509        Ok(general_purpose::STANDARD.encode(&screenshot_bytes))
510    }
511}
512
513#[async_trait]
514impl ScrapeEngine for BrowserEngine {
515    async fn scrape(&self, request: &ScrapeRequest) -> Result<RawScrapeResult> {
516        let start = Instant::now();
517
518        info!("Starting browser scrape for URL: {}", request.url);
519
520        // Get browser from pool
521        let browser = self.pool.get_browser().await?;
522
523        // Set timeout for entire operation
524        let timeout = Duration::from_millis(request.timeout);
525
526        let result = tokio::time::timeout(timeout, async {
527            // Create a new page
528            debug!("Creating new page");
529            let page = browser
530                .new_page("about:blank")
531                .await
532                .map_err(|e| ScrapeError::BrowserError(format!("Failed to create page: {}", e)))?;
533
534            // Apply stealth techniques (hide webdriver, inject JS, randomize fingerprints)
535            let stealth_mode = StealthMode::from_env();
536            apply_stealth_techniques(&page, stealth_mode).await?;
537
538            // Setup request blocking for ads/analytics before navigation
539            self.setup_request_blocking(&page).await?;
540
541            // Rewrite URL if needed (e.g., Google Docs → export URL)
542            let url_to_scrape = rewrite_url(&request.url);
543
544            // Navigate to URL
545            debug!("Navigating to URL: {}", url_to_scrape);
546            page.goto(&url_to_scrape)
547                .await
548                .map_err(|e| ScrapeError::NavigationFailed(format!("Failed to navigate: {}", e)))?;
549
550            // Wait for network idle
551            debug!("Waiting for network idle");
552            page.wait_for_navigation()
553                .await
554                .map_err(|e| ScrapeError::NavigationFailed(format!("Navigation timeout: {}", e)))?;
555
556            // Wait for selector if specified
557            if let Some(selector) = &request.wait_for_selector {
558                debug!("Waiting for selector: {}", selector);
559                page.find_element(selector)
560                    .await
561                    .map_err(|e| ScrapeError::ElementNotFound(format!("Selector not found: {}", e)))?;
562            }
563
564            // Additional wait time if specified
565            if request.wait_for > 0 {
566                debug!("Additional wait for {} ms", request.wait_for);
567                tokio::time::sleep(Duration::from_millis(request.wait_for)).await;
568            }
569
570            // Execute browser actions if any
571            if !request.actions.is_empty() {
572                debug!("Executing {} browser actions", request.actions.len());
573                self.execute_actions(&page, &request.actions).await?;
574            }
575
576            // Get final URL after any redirects
577            let final_url = page
578                .url()
579                .await
580                .map_err(|e| ScrapeError::BrowserError(format!("Failed to get URL: {}", e)))?
581                .unwrap_or_else(|| request.url.clone());
582
583            // Get HTML content
584            debug!("Extracting HTML content");
585            let html = page
586                .content()
587                .await
588                .map_err(|e| ScrapeError::BrowserError(format!("Failed to get HTML: {}", e)))?;
589
590            // Check response size limit
591            let max_response_size_mb = std::env::var("MAX_RESPONSE_SIZE_MB")
592                .ok()
593                .and_then(|s| s.parse::<usize>().ok())
594                .unwrap_or(50);
595
596            let max_size_bytes = max_response_size_mb * 1024 * 1024;
597
598            if html.len() > max_size_bytes {
599                return Err(ScrapeError::ResourceLimit(format!(
600                    "Response too large: {:.2}MB > {}MB",
601                    html.len() as f64 / (1024.0 * 1024.0),
602                    max_response_size_mb
603                )));
604            }
605
606            info!("Successfully scraped URL with browser: {}", final_url);
607
608            // Close the page to free resources
609            if let Err(e) = page.close().await {
610                warn!("Failed to close page: {}", e);
611            }
612
613            Ok::<_, ScrapeError>(RawScrapeResult {
614                url: final_url,
615                status_code: 200, // Browser always returns 200 if navigation succeeds
616                content_type: Some("text/html".to_string()),
617                html,
618                headers: vec![],
619            })
620        })
621        .await;
622
623        let _duration = start.elapsed().as_secs_f64();
624
625        // Handle timeout and unwrap result
626        match result {
627            Ok(Ok(result)) => Ok(result),
628            Ok(Err(e)) => Err(e),
629            Err(_) => Err(ScrapeError::Timeout),
630        }
631    }
632}
633
634/// Browser engine configuration
635#[derive(Debug, Clone)]
636pub struct BrowserEngineConfig {
637    pub headless: bool,
638    pub pool_size: usize,
639    pub user_agent: Option<String>,
640}
641
642impl Default for BrowserEngineConfig {
643    fn default() -> Self {
644        Self {
645            headless: env::var("BROWSER_HEADLESS")
646                .unwrap_or_else(|_| "true".to_string())
647                .parse()
648                .unwrap_or(true),
649            pool_size: env::var("BROWSER_POOL_SIZE")
650                .unwrap_or_else(|_| "5".to_string())
651                .parse()
652                .unwrap_or(5),
653            user_agent: env::var("BROWSER_USER_AGENT").ok(),
654        }
655    }
656}
657
658impl BrowserEngineConfig {
659    pub fn new() -> Self {
660        Self::default()
661    }
662
663    pub fn headless(mut self, headless: bool) -> Self {
664        self.headless = headless;
665        self
666    }
667
668    pub fn pool_size(mut self, size: usize) -> Self {
669        self.pool_size = size;
670        self
671    }
672
673    pub fn user_agent(mut self, agent: impl Into<String>) -> Self {
674        self.user_agent = Some(agent.into());
675        self
676    }
677}
678
679#[cfg(test)]
680mod tests {
681    use super::*;
682
683    #[test]
684    fn test_config_creation() {
685        let config = BrowserEngineConfig::new();
686        assert!(config.headless);
687        assert_eq!(config.pool_size, 5);
688    }
689
690    #[test]
691    fn test_config_builder() {
692        let config = BrowserEngineConfig::new()
693            .headless(false)
694            .pool_size(10)
695            .user_agent("Custom Agent");
696
697        assert!(!config.headless);
698        assert_eq!(config.pool_size, 10);
699        assert_eq!(config.user_agent.as_deref(), Some("Custom Agent"));
700    }
701
702    #[test]
703    fn test_chrome_detection() {
704        // This test will pass if Chrome is installed
705        let result = BrowserPool::find_chrome_executable();
706
707        // We can't guarantee Chrome is installed in CI, so just check the error message
708        if let Err(e) = result {
709            match e {
710                ScrapeError::BrowserNotFound(msg) => {
711                    assert!(msg.contains("Chrome/Chromium not found"));
712                }
713                _ => panic!("Expected BrowserNotFound error"),
714            }
715        } else {
716            // Chrome found, check it exists
717            let path = result.unwrap();
718            assert!(path.exists(), "Chrome path should exist: {:?}", path);
719        }
720    }
721
722    #[tokio::test]
723    #[ignore] // Requires Chrome to be installed
724    async fn test_browser_pool_creation() {
725        let pool = BrowserPool::new(2).await;
726        assert!(pool.is_ok(), "Browser pool creation failed: {:?}", pool.err());
727    }
728
729    #[tokio::test]
730    #[ignore] // Requires Chrome to be installed
731    async fn test_browser_pool_limit() {
732        let pool = Arc::new(BrowserPool::new(2).await.unwrap());
733
734        // Get 2 browsers (should work)
735        let b1 = pool.get_browser().await.unwrap();
736        let b2 = pool.get_browser().await.unwrap();
737
738        // Try to get 3rd (should wait)
739        let start = std::time::Instant::now();
740
741        let pool_ref = pool.clone();
742
743        tokio::spawn(async move {
744            tokio::time::sleep(Duration::from_millis(500)).await;
745            drop(b1); // Release after 500ms
746        });
747
748        let _b3 = pool_ref.get_browser().await.unwrap();
749        let elapsed = start.elapsed();
750
751        // Should have waited for b1 to be released
752        assert!(
753            elapsed.as_millis() >= 400,
754            "Should have waited for browser release, got: {}ms",
755            elapsed.as_millis()
756        );
757
758        drop(b2);
759    }
760
761    #[tokio::test]
762    #[ignore] // Requires Chrome to be installed
763    async fn test_browser_reuse() {
764        let pool = Arc::new(BrowserPool::new(1).await.unwrap());
765
766        {
767            let _b1 = pool.get_browser().await.unwrap();
768            // Use browser
769        } // b1 dropped, should be returned to pool
770
771        // Give time for the browser to be returned
772        tokio::time::sleep(Duration::from_millis(100)).await;
773
774        let start = std::time::Instant::now();
775        let _b2 = pool.get_browser().await.unwrap();
776        let elapsed = start.elapsed();
777
778        // b2 should be reused browser (very fast, < 100ms)
779        // Creating new browser takes > 1s typically
780        assert!(
781            elapsed.as_millis() < 500,
782            "Browser should be reused (fast), but took: {}ms",
783            elapsed.as_millis()
784        );
785    }
786
787    #[tokio::test]
788    #[ignore] // Requires Chrome to be installed and network
789    async fn test_browser_health_check() {
790        let pool = Arc::new(BrowserPool::new(1).await.unwrap());
791        let browser = pool.get_browser().await.unwrap();
792
793        // Browser should be healthy
794        assert!(
795            BrowserPool::is_browser_healthy(&browser).await,
796            "Browser should be healthy after creation"
797        );
798    }
799}