Skip to main content

html2pdf_api/
pool.rs

1//! Browser pool with lifecycle management.
2//!
3//! This module provides [`BrowserPool`], the main entry point for managing
4//! a pool of headless Chrome browsers with automatic lifecycle management.
5//!
6//! # Overview
7//!
8//! The browser pool provides:
9//! - **Connection Pooling**: Reuses browser instances to avoid expensive startup costs
10//! - **Health Monitoring**: Background thread continuously checks browser health
11//! - **TTL Management**: Automatically retires old browsers and creates replacements
12//! - **Race-Free Design**: Careful lock ordering prevents deadlocks
13//! - **Graceful Shutdown**: Clean termination of all background tasks
14//! - **RAII Pattern**: Automatic return of browsers to pool via Drop
15//!
16//! # Architecture
17//!
18//! ```text
19//! BrowserPool
20//!   ├─ BrowserPoolInner (shared state)
21//!   │   ├─ available: Vec<TrackedBrowser>  (pooled, ready to use)
22//!   │   ├─ active: HashMap<id, TrackedBrowser>  (in-use, tracked for health)
23//!   │   └─ replacement_tasks: Vec<JoinHandle>  (async replacement creators)
24//!   └─ keep_alive_handle: JoinHandle  (health monitoring thread)
25//! ```
26//!
27//! # Critical Invariants
28//!
29//! 1. **Lock Order**: Always acquire `active` before `available` to prevent deadlocks
30//! 2. **Shutdown Flag**: Check before all expensive operations
31//! 3. **Health Checks**: Never hold locks during I/O operations
32//!
33//! # Example
34//!
35//! ```rust,no_run
36//! use html2pdf_api::{BrowserPool, BrowserPoolConfigBuilder, ChromeBrowserFactory};
37//!
38//! #[tokio::main]
39//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
40//!     // Create pool
41//!     let mut pool = BrowserPool::builder()
42//!         .config(
43//!             BrowserPoolConfigBuilder::new()
44//!                 .max_pool_size(5)
45//!                 .warmup_count(3)
46//!                 .build()?
47//!         )
48//!         .factory(Box::new(ChromeBrowserFactory::with_defaults()))
49//!         .build()?;
50//!
51//!     // Warmup
52//!     pool.warmup().await?;
53//!
54//!     // Use browsers
55//!     {
56//!         let browser = pool.get()?;
57//!         let tab = browser.new_tab()?;
58//!         // ... do work ...
59//!     } // browser returned to pool automatically
60//!
61//!     // Shutdown
62//!     pool.shutdown_async().await;
63//!
64//!     Ok(())
65//! }
66//! ```
67
68use std::collections::HashMap;
69use std::sync::atomic::{AtomicBool, Ordering};
70use std::sync::{Arc, Condvar, Mutex};
71use std::thread::{self, JoinHandle};
72use std::time::Duration;
73
74use tokio::task::JoinHandle as TokioJoinHandle;
75
76use crate::config::BrowserPoolConfig;
77use crate::error::{BrowserPoolError, Result};
78use crate::factory::BrowserFactory;
79use crate::handle::BrowserHandle;
80use crate::stats::PoolStats;
81use crate::tracked::TrackedBrowser;
82
83// ============================================================================
84// BrowserPoolInner
85// ============================================================================
86
87/// Internal shared state for the browser pool.
88///
89/// This struct contains all shared state and is wrapped in Arc for thread-safe
90/// sharing between the pool, handles, and background threads.
91///
92/// # Lock Ordering (CRITICAL)
93///
94/// Always acquire locks in this order to prevent deadlocks:
95/// 1. `active` (browsers currently in use)
96/// 2. `available` (browsers in pool ready for use)
97///
98/// Never hold locks during I/O operations or browser creation.
99///
100/// # Thread Safety
101///
102/// All fields are protected by appropriate synchronization primitives:
103/// - `Mutex` for mutable collections
104/// - `AtomicBool` for shutdown flag
105/// - `Arc` for shared ownership
106pub(crate) struct BrowserPoolInner {
107    /// Configuration (immutable after creation).
108    config: BrowserPoolConfig,
109
110    /// Browsers available for checkout (not currently in use).
111    ///
112    /// Protected by Mutex. Browsers are moved from here when checked out
113    /// and returned here when released (if pool not full).
114    available: Mutex<Vec<Arc<TrackedBrowser>>>,
115
116    /// All browsers that exist (both pooled and checked out).
117    ///
118    /// Protected by Mutex. Used for health monitoring and lifecycle tracking.
119    /// Maps browser ID -> TrackedBrowser for fast lookup.
120    active: Mutex<HashMap<u64, Arc<TrackedBrowser>>>,
121
122    /// Factory for creating new browser instances.
123    factory: Box<dyn BrowserFactory>,
124
125    /// Atomic flag indicating shutdown in progress.
126    ///
127    /// Checked before expensive operations. Once set, no new operations start.
128    shutting_down: AtomicBool,
129
130    /// Background tasks creating replacement browsers.
131    ///
132    /// Tracked so we can abort them during shutdown.
133    replacement_tasks: Mutex<Vec<TokioJoinHandle<()>>>,
134
135    /// Handle to tokio runtime for spawning async tasks.
136    ///
137    /// Captured at creation time to allow spawning from any context.
138    runtime_handle: tokio::runtime::Handle,
139
140    /// Shutdown signaling mechanism for keep-alive thread.
141    ///
142    /// Tuple of (flag, condvar) allows immediate wake-up on shutdown
143    /// instead of waiting for full ping_interval.
144    shutdown_signal: Arc<(Mutex<bool>, Condvar)>,
145}
146
147impl BrowserPoolInner {
148    /// Create a new browser pool inner state.
149    ///
150    /// # Parameters
151    ///
152    /// * `config` - Validated configuration.
153    /// * `factory` - Browser factory for creating instances.
154    ///
155    /// # Panics
156    ///
157    /// Panics if called outside a tokio runtime context.
158    pub(crate) fn new(config: BrowserPoolConfig, factory: Box<dyn BrowserFactory>) -> Arc<Self> {
159        log::info!(
160            "🚀 Initializing browser pool with capacity {}",
161            config.max_pool_size
162        );
163        log::debug!(
164            "📋 Pool config: warmup={}, TTL={}s, ping_interval={}s",
165            config.warmup_count,
166            config.browser_ttl.as_secs(),
167            config.ping_interval.as_secs()
168        );
169
170        // Capture runtime handle for spawning async tasks
171        // This allows us to spawn from sync contexts (like Drop)
172        let runtime_handle = tokio::runtime::Handle::current();
173
174        Arc::new(Self {
175            config,
176            available: Mutex::new(Vec::new()),
177            active: Mutex::new(HashMap::new()),
178            factory,
179            shutting_down: AtomicBool::new(false),
180            replacement_tasks: Mutex::new(Vec::new()),
181            runtime_handle,
182            shutdown_signal: Arc::new((Mutex::new(false), Condvar::new())),
183        })
184    }
185
186    /// Create a lightweight mock pool for testing without background threads.
187    #[cfg(test)]
188    pub(crate) fn new_for_test(
189        config: BrowserPoolConfig,
190        factory: Box<dyn BrowserFactory>,
191        runtime_handle: tokio::runtime::Handle,
192    ) -> Self {
193        Self {
194            config,
195            available: Mutex::new(Vec::new()),
196            active: Mutex::new(HashMap::new()),
197            factory,
198            shutting_down: AtomicBool::new(false),
199            replacement_tasks: Mutex::new(Vec::new()),
200            runtime_handle,
201            shutdown_signal: Arc::new((Mutex::new(false), Condvar::new())),
202        }
203    }
204
205    /// Create a browser directly without using the pool.
206    ///
207    /// Used for:
208    /// - Initial warmup
209    /// - Replacing failed browsers
210    /// - When pool is empty
211    ///
212    /// # Important
213    ///
214    /// Adds the browser to `active` tracking immediately for health monitoring.
215    ///
216    /// # Errors
217    ///
218    /// - Returns [`BrowserPoolError::ShuttingDown`] if pool is shutting down.
219    /// - Returns [`BrowserPoolError::BrowserCreation`] if factory fails.
220    pub(crate) fn create_browser_direct(&self) -> Result<Arc<TrackedBrowser>> {
221        // Early exit if shutting down (don't waste time creating browsers)
222        if self.shutting_down.load(Ordering::Acquire) {
223            log::debug!("🛑 Skipping browser creation - pool is shutting down");
224            return Err(BrowserPoolError::ShuttingDown);
225        }
226
227        log::debug!("📦 Creating new browser directly via factory...");
228
229        // Factory handles all Chrome launch complexity
230        let browser = self.factory.create()?;
231
232        // Wrap with tracking metadata and Arc immediately
233        let tracked = Arc::new(TrackedBrowser::new(browser)?);
234        let id = tracked.id();
235
236        // Add to active tracking immediately for health monitoring
237        // This ensures keep-alive thread will monitor it
238        if let Ok(mut active) = self.active.lock() {
239            active.insert(id, Arc::clone(&tracked));
240            log::debug!(
241                "📊 Browser {} added to active tracking (total active: {})",
242                id,
243                active.len()
244            );
245        } else {
246            log::warn!(
247                "⚠️ Failed to add browser {} to active tracking (poisoned lock)",
248                id
249            );
250        }
251
252        log::info!("✅ Created new browser with ID {}", id);
253        Ok(tracked)
254    }
255
256    /// Get a browser from pool or create a new one.
257    ///
258    /// # Algorithm
259    ///
260    /// 1. Loop through pooled browsers
261    /// 2. **Grace Period Check**: Check if browser is within 30s of TTL.
262    ///    - If near expiry: Skip (drop) it immediately.
263    ///    - It remains in `active` tracking so the `keep_alive` thread handles standard retirement/replacement.
264    /// 3. For valid browsers, perform detailed health check (without holding locks)
265    /// 4. If healthy, return it
266    /// 5. If unhealthy, remove from active tracking and try next
267    /// 6. If pool empty or all skipped/unhealthy, create new browser
268    ///
269    /// # Critical: Lock-Free Health Checks
270    ///
271    /// Health checks are performed WITHOUT holding locks to avoid blocking
272    /// other threads. This is why we use a loop pattern instead of iterator.
273    ///
274    /// # Returns
275    ///
276    /// [`BrowserHandle`] that auto-returns browser to pool when dropped.
277    ///
278    /// # Errors
279    ///
280    /// - Returns [`BrowserPoolError::ShuttingDown`] if pool is shutting down.
281    /// - Returns [`BrowserPoolError::BrowserCreation`] if new browser creation fails.
282    pub(crate) fn get_or_create_browser(self: &Arc<Self>) -> Result<BrowserHandle> {
283        log::debug!("🔍 Attempting to get browser from pool...");
284
285        // Try to get from pool - LOOP pattern to avoid holding lock during health checks
286        // This is critical for concurrency: we release the lock between attempts
287        loop {
288            // Acquire lock briefly to pop one browser
289            let tracked_opt = {
290                let mut available = self.available.lock().unwrap_or_else(|poisoned| {
291                    log::warn!("Pool available lock poisoned, recovering");
292                    poisoned.into_inner()
293                });
294                let popped = available.pop();
295                log::trace!("📊 Pool size after pop: {}", available.len());
296                popped
297            }; // Lock released here - critical for performance
298
299            if let Some(tracked) = tracked_opt {
300                // === LOGIC START: Grace Period Check ===
301                let age = tracked.created_at().elapsed();
302                let ttl = self.config.browser_ttl;
303
304                // Safety margin matching your stagger interval
305                let safety_margin = Duration::from_secs(30);
306
307                // If browser is about to expire, don't use it.
308                if age + safety_margin > ttl {
309                    log::debug!(
310                        "⏳ Browser {} is near expiry (Age: {}s, Margin: 30s), skipping.",
311                        tracked.id(),
312                        age.as_secs()
313                    );
314
315                    // CRITICAL: We do NOT remove/recreate here.
316                    // By simply 'continuing', we drop this 'tracked' instance.
317                    // 1. It is NOT returned to 'available' (so no user gets it).
318                    // 2. It REMAINS in 'active' (so the keep_alive thread still tracks it).
319                    // 3. The keep_alive thread will see it expire and handle standard cleanup/replacement.
320                    continue;
321                }
322                // === LOGIC END: Grace Period Check ===
323
324                // Get pool size for logging (brief lock)
325                let pool_size = {
326                    let available = self.available.lock().unwrap_or_else(|poisoned| {
327                        log::warn!("Pool available lock poisoned, recovering");
328                        poisoned.into_inner()
329                    });
330                    available.len()
331                };
332
333                log::info!(
334                    "♻️ Reusing healthy browser {} from pool (pool size: {})",
335                    tracked.id(),
336                    pool_size
337                );
338
339                // Return healthy browser wrapped in RAII handle
340                return Ok(BrowserHandle::new(tracked, Arc::clone(self)));
341            } else {
342                // Pool is empty, break to create new browser
343                log::debug!("📥 Pool is empty, will create new browser");
344                break;
345            }
346        }
347
348        // Pool is empty or no healthy browsers found
349        log::info!("📦 Creating new browser (pool was empty or all browsers unhealthy)");
350
351        let tracked = self.create_browser_direct()?;
352
353        log::info!("✅ Returning newly created browser {}", tracked.id());
354        Ok(BrowserHandle::new(tracked, Arc::clone(self)))
355    }
356
357    /// Return a browser to the pool (called by BrowserHandle::drop).
358    ///
359    /// # Critical Lock Ordering
360    ///
361    /// Always acquires locks in order: active -> available.
362    /// Both locks are held together to prevent race conditions.
363    ///
364    /// # Algorithm
365    ///
366    /// 1. Acquire both locks (order: active, then available)
367    /// 2. Verify browser is in active tracking
368    /// 3. Check TTL - if expired, retire and trigger replacement
369    /// 4. If pool has space, add to available pool
370    /// 5. If pool full, remove from active (browser gets dropped)
371    ///
372    /// # Parameters
373    ///
374    /// * `self_arc` - Arc reference to self (needed for spawning async tasks).
375    /// * `tracked` - The browser being returned.
376    pub(crate) fn return_browser(self_arc: &Arc<Self>, tracked: Arc<TrackedBrowser>) {
377        log::debug!("♻️ Returning browser {} to pool...", tracked.id());
378
379        // Early exit if shutting down (don't waste time managing pool)
380        if self_arc.shutting_down.load(Ordering::Acquire) {
381            log::debug!(
382                "🛑 Pool shutting down, not returning browser {}",
383                tracked.id()
384            );
385            return;
386        }
387
388        // CRITICAL: Always acquire in order: active -> pool
389        // Holding both locks prevents ALL race conditions:
390        // - Prevents concurrent modifications to browser state
391        // - Prevents duplicate returns
392        // - Ensures pool size limits are respected
393        let mut active = self_arc.active.lock().unwrap_or_else(|poisoned| {
394            log::warn!("Pool active lock poisoned, recovering");
395            poisoned.into_inner()
396        });
397        let mut pool = self_arc.available.lock().unwrap_or_else(|poisoned| {
398            log::warn!("Pool available lock poisoned, recovering");
399            poisoned.into_inner()
400        });
401
402        // Verify browser is actually tracked (sanity check)
403        if !active.contains_key(&tracked.id()) {
404            log::warn!(
405                "❌ Browser {} not in active tracking (probably already removed), skipping return",
406                tracked.id()
407            );
408            return;
409        }
410
411        // Check TTL before returning to pool
412        // Expired browsers should be retired to prevent memory leaks
413        if tracked.is_expired(self_arc.config.browser_ttl) {
414            log::info!(
415                "⏰ Browser {} expired (age: {}min, TTL: {}min), retiring instead of returning",
416                tracked.id(),
417                tracked.age_minutes(),
418                self_arc.config.browser_ttl.as_secs() / 60
419            );
420
421            // Remove from active tracking
422            active.remove(&tracked.id());
423            log::debug!("📊 Active browsers after TTL retirement: {}", active.len());
424
425            // Release locks before spawning replacement task
426            drop(active);
427            drop(pool);
428
429            // Trigger async replacement creation (non-blocking)
430            log::debug!("🔍 Triggering replacement browser creation for expired browser");
431            Self::spawn_replacement_creation(Arc::clone(self_arc), 1);
432            return;
433        }
434
435        // Check health marker before returning to pool
436        // Crashed browsers must be retired to prevent poison pill loops
437        if !tracked.is_healthy() {
438            log::warn!(
439                "⚕️ Browser {} marked unhealthy, retiring instead of returning",
440                tracked.id()
441            );
442
443            // Remove from active tracking
444            active.remove(&tracked.id());
445            log::debug!(
446                "📊 Active browsers after health retirement: {}",
447                active.len()
448            );
449
450            // Release locks before spawning replacement task
451            drop(active);
452            drop(pool);
453
454            // Trigger async replacement creation (non-blocking)
455            log::debug!("🔍 Triggering replacement browser creation for unhealthy browser");
456            Self::spawn_replacement_creation(Arc::clone(self_arc), 1);
457            return;
458        }
459
460        // Prevent duplicate returns (defensive programming)
461        if pool.iter().any(|b| b.id() == tracked.id()) {
462            log::warn!(
463                "⚠️ Browser {} already in pool (duplicate return attempt), skipping",
464                tracked.id()
465            );
466            return;
467        }
468
469        // Check if pool has space for this browser
470        if pool.len() < self_arc.config.max_pool_size {
471            // Add to pool for reuse
472            pool.push(tracked.clone());
473            log::info!(
474                "♻️ Browser {} returned to pool (pool size: {}/{})",
475                tracked.id(),
476                pool.len(),
477                self_arc.config.max_pool_size
478            );
479        } else {
480            // Pool is full, remove from tracking (browser will be dropped)
481            log::debug!(
482                "️ Pool full ({}/{}), removing browser {} from system",
483                pool.len(),
484                self_arc.config.max_pool_size,
485                tracked.id()
486            );
487            active.remove(&tracked.id());
488            log::debug!("📊 Active browsers after removal: {}", active.len());
489        }
490    }
491
492    /// Asynchronously create replacement browsers (internal helper).
493    ///
494    /// This is the async work function that actually creates browsers.
495    /// It's spawned as a tokio task by `spawn_replacement_creation`.
496    ///
497    /// # Algorithm
498    ///
499    /// 1. Check shutdown flag before each creation
500    /// 2. Check pool space before each creation
501    /// 3. Use spawn_blocking for CPU-bound browser creation
502    /// 4. Add successful browsers to pool
503    /// 5. Log detailed status
504    ///
505    /// # Parameters
506    ///
507    /// * `inner` - Arc reference to pool state.
508    /// * `count` - Number of browsers to attempt to create.
509    async fn spawn_replacement_creation_async(inner: Arc<Self>, count: usize) {
510        log::info!(
511            "🔍 Starting async replacement creation for {} browsers",
512            count
513        );
514
515        let mut created_count = 0;
516        let mut failed_count = 0;
517
518        for i in 0..count {
519            // Check shutdown flag before each expensive operation
520            if inner.shutting_down.load(Ordering::Acquire) {
521                log::info!(
522                    "🛑 Shutdown detected during replacement creation, stopping at {}/{}",
523                    i,
524                    count
525                );
526                break;
527            }
528
529            // Check if pool has space BEFORE creating (avoid wasted work)
530            let pool_has_space = {
531                let pool = inner.available.lock().unwrap_or_else(|poisoned| {
532                    log::warn!("Pool available lock poisoned, recovering");
533                    poisoned.into_inner()
534                });
535                let has_space = pool.len() < inner.config.max_pool_size;
536                log::trace!(
537                    "📊 Pool space check: {}/{} (has space: {})",
538                    pool.len(),
539                    inner.config.max_pool_size,
540                    has_space
541                );
542                has_space
543            };
544
545            if !pool_has_space {
546                log::warn!(
547                    "⚠️ Pool is full, stopping replacement creation at {}/{}",
548                    i,
549                    count
550                );
551                break;
552            }
553
554            log::debug!("📦 Creating replacement browser {}/{}", i + 1, count);
555
556            // Use spawn_blocking for CPU-bound browser creation
557            // This prevents blocking the async runtime
558            let inner_clone = Arc::clone(&inner);
559            let result =
560                tokio::task::spawn_blocking(move || inner_clone.create_browser_direct()).await;
561
562            match result {
563                Ok(Ok(tracked)) => {
564                    let id = tracked.id();
565
566                    // Add to pool (with space check to handle race conditions)
567                    let mut pool = inner.available.lock().unwrap_or_else(|poisoned| {
568                        log::warn!("Pool available lock poisoned, recovering");
569                        poisoned.into_inner()
570                    });
571
572                    // Double-check space (another thread might have added browsers)
573                    if pool.len() < inner.config.max_pool_size {
574                        pool.push(tracked);
575                        created_count += 1;
576                        log::info!(
577                            "✅ Created replacement browser {} and added to pool ({}/{})",
578                            id,
579                            i + 1,
580                            count
581                        );
582                    } else {
583                        log::warn!(
584                            "⚠️ Pool became full during creation, replacement browser {} kept in active only",
585                            id
586                        );
587                        created_count += 1; // Still count as created (just not pooled)
588                    }
589                }
590                Ok(Err(e)) => {
591                    failed_count += 1;
592                    log::error!(
593                        "❌ Failed to create replacement browser {}/{}: {}",
594                        i + 1,
595                        count,
596                        e
597                    );
598                }
599                Err(e) => {
600                    failed_count += 1;
601                    log::error!(
602                        "❌ Replacement browser {}/{} task panicked: {:?}",
603                        i + 1,
604                        count,
605                        e
606                    );
607                }
608            }
609        }
610
611        // Final status report
612        let pool_size = inner
613            .available
614            .lock()
615            .unwrap_or_else(|poisoned| {
616                log::warn!("Pool available lock poisoned, recovering");
617                poisoned.into_inner()
618            })
619            .len();
620        let active_size = inner
621            .active
622            .lock()
623            .unwrap_or_else(|poisoned| {
624                log::warn!("Pool active lock poisoned, recovering");
625                poisoned.into_inner()
626            })
627            .len();
628
629        log::info!(
630            "🏁 Replacement creation completed: {}/{} created, {} failed. Pool: {}, Active: {}",
631            created_count,
632            count,
633            failed_count,
634            pool_size,
635            active_size
636        );
637    }
638
639    /// Spawn a background task to create replacement browsers.
640    ///
641    /// This is non-blocking and returns immediately. The actual browser
642    /// creation happens in a tokio task tracked in `replacement_tasks`.
643    ///
644    /// # Why Async
645    ///
646    /// Browser creation is slow (1-3 seconds per browser). Spawning async
647    /// tasks prevents blocking the caller.
648    ///
649    /// # Task Tracking
650    ///
651    /// Tasks are tracked so we can abort them during shutdown.
652    ///
653    /// # Parameters
654    ///
655    /// * `inner` - Arc reference to pool state.
656    /// * `count` - Number of replacement browsers to create.
657    pub(crate) fn spawn_replacement_creation(inner: Arc<Self>, count: usize) {
658        log::info!(
659            "📥 Spawning async task to create {} replacement browsers",
660            count
661        );
662
663        // Clone Arc for moving into async task
664        let inner_for_task = Arc::clone(&inner);
665
666        // Spawn async task on the captured runtime
667        let task_handle = inner.runtime_handle.spawn(async move {
668            Self::spawn_replacement_creation_async(inner_for_task, count).await;
669        });
670
671        // Track task handle for shutdown cleanup
672        if let Ok(mut tasks) = inner.replacement_tasks.lock() {
673            // Clean up finished tasks while we have the lock (housekeeping)
674            let original_count = tasks.len();
675            tasks.retain(|h| !h.is_finished());
676            let cleaned = original_count - tasks.len();
677
678            if cleaned > 0 {
679                log::trace!("🧹 Cleaned up {} finished replacement tasks", cleaned);
680            }
681
682            // Add new task
683            tasks.push(task_handle);
684
685            log::debug!("📋 Now tracking {} active replacement tasks", tasks.len());
686        } else {
687            log::warn!("⚠️ Failed to track replacement task (poisoned lock)");
688        }
689    }
690
691    /// Get the pool configuration.
692    #[inline]
693    pub(crate) fn config(&self) -> &BrowserPoolConfig {
694        &self.config
695    }
696
697    /// Check if the pool is shutting down.
698    #[inline]
699    pub(crate) fn is_shutting_down(&self) -> bool {
700        self.shutting_down.load(Ordering::Acquire)
701    }
702
703    /// Set the shutdown flag.
704    #[inline]
705    pub(crate) fn set_shutting_down(&self, value: bool) {
706        self.shutting_down.store(value, Ordering::Release);
707    }
708
709    /// Get the shutdown signal for the keep-alive thread.
710    #[inline]
711    pub(crate) fn shutdown_signal(&self) -> &Arc<(Mutex<bool>, Condvar)> {
712        &self.shutdown_signal
713    }
714
715    /// Get the available browsers count.
716    pub(crate) fn available_count(&self) -> usize {
717        self.available.lock().map(|g| g.len()).unwrap_or(0)
718    }
719
720    /// Get the active browsers count.
721    pub(crate) fn active_count(&self) -> usize {
722        self.active.lock().map(|g| g.len()).unwrap_or(0)
723    }
724
725    /// Get a snapshot of active browsers for health checking.
726    ///
727    /// Returns a cloned list to avoid holding locks during I/O.
728    pub(crate) fn get_active_browsers_snapshot(&self) -> Vec<(u64, Arc<TrackedBrowser>)> {
729        let active = self.active.lock().unwrap_or_else(|poisoned| {
730            log::warn!("Pool active lock poisoned, recovering");
731            poisoned.into_inner()
732        });
733        active
734            .iter()
735            .map(|(id, tracked)| (*id, Arc::clone(tracked)))
736            .collect()
737    }
738
739    /// Remove a browser from active tracking.
740    pub(crate) fn remove_from_active(&self, id: u64) -> Option<Arc<TrackedBrowser>> {
741        let mut active = self.active.lock().unwrap_or_else(|poisoned| {
742            log::warn!("Pool active lock poisoned, recovering");
743            poisoned.into_inner()
744        });
745        active.remove(&id)
746    }
747
748    /// Remove browsers from the available pool by ID.
749    pub(crate) fn remove_from_available(&self, ids: &[u64]) {
750        let mut pool = self.available.lock().unwrap_or_else(|poisoned| {
751            log::warn!("Pool available lock poisoned, recovering");
752            poisoned.into_inner()
753        });
754        let original_size = pool.len();
755        pool.retain(|b| !ids.contains(&b.id()));
756        let removed = original_size - pool.len();
757        if removed > 0 {
758            log::debug!("🗑️ Removed {} browsers from available pool", removed);
759        }
760    }
761
762    /// Abort all replacement tasks.
763    pub(crate) fn abort_replacement_tasks(&self) -> usize {
764        if let Ok(mut tasks) = self.replacement_tasks.lock() {
765            let count = tasks.len();
766            for handle in tasks.drain(..) {
767                handle.abort();
768            }
769            count
770        } else {
771            0
772        }
773    }
774}
775
776// ============================================================================
777// BrowserPool
778// ============================================================================
779
780/// Main browser pool with lifecycle management.
781///
782/// This is the public-facing API for the browser pool. It wraps the internal
783/// state and manages the keep-alive thread.
784///
785/// # Overview
786///
787/// `BrowserPool` provides:
788/// - Browser checkout via [`get()`](Self::get)
789/// - Pool warmup via [`warmup()`](Self::warmup)
790/// - Statistics via [`stats()`](Self::stats)
791/// - Graceful shutdown via [`shutdown_async()`](Self::shutdown_async)
792///
793/// # Example
794///
795/// ```rust,no_run
796/// use html2pdf_api::{BrowserPool, BrowserPoolConfigBuilder, ChromeBrowserFactory};
797/// use std::time::Duration;
798///
799/// #[tokio::main]
800/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
801///     // Create pool
802///     let mut pool = BrowserPool::builder()
803///         .config(
804///             BrowserPoolConfigBuilder::new()
805///                 .max_pool_size(5)
806///                 .warmup_count(3)
807///                 .build()?
808///         )
809///         .factory(Box::new(ChromeBrowserFactory::with_defaults()))
810///         .build()?;
811///
812///     // Warmup
813///     pool.warmup().await?;
814///
815///     // Use browsers
816///     {
817///         let browser = pool.get()?;
818///         let tab = browser.new_tab()?;
819///         // ... do work ...
820///     } // browser returned to pool automatically
821///
822///     // Shutdown
823///     pool.shutdown_async().await;
824///
825///     Ok(())
826/// }
827/// ```
828///
829/// # Thread Safety
830///
831/// `BrowserPool` uses fine-grained internal locks (`Mutex<Vec>`, `Mutex<HashMap>`)
832/// so it is safe to share as `Arc<BrowserPool>` without an outer `Mutex`.
833/// Use [`into_shared()`](Self::into_shared) for convenience.
834pub struct BrowserPool {
835    /// Shared internal state.
836    inner: Arc<BrowserPoolInner>,
837
838    /// Handle to keep-alive monitoring thread.
839    ///
840    /// Option allows taking during shutdown. None means keep-alive disabled.
841    keep_alive_handle: Option<JoinHandle<()>>,
842}
843
844impl BrowserPool {
845    /// Convert pool into a shared `Arc<BrowserPool>` for use in web handlers.
846    ///
847    /// This is convenient for web frameworks that need shared state.
848    /// No outer `Mutex` is needed — the pool uses fine-grained internal locks.
849    ///
850    /// # Example
851    ///
852    /// ```rust,ignore
853    /// let pool = BrowserPool::builder()
854    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
855    ///     .build()?
856    ///     .into_shared();
857    ///
858    /// // Can now be cloned and shared across handlers
859    /// let pool_clone = Arc::clone(&pool);
860    /// ```
861    pub fn into_shared(self) -> Arc<BrowserPool> {
862        log::debug!("🔍 Converting BrowserPool into shared Arc<BrowserPool>");
863        Arc::new(self)
864    }
865
866    /// Create a new builder for constructing a BrowserPool.
867    ///
868    /// This is the recommended way to create a pool.
869    ///
870    /// # Example
871    ///
872    /// ```rust,ignore
873    /// let pool = BrowserPool::builder()
874    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
875    ///     .build()?;
876    /// ```
877    pub fn builder() -> BrowserPoolBuilder {
878        BrowserPoolBuilder::new()
879    }
880
881    /// Get a browser from the pool (or create one if empty).
882    ///
883    /// Returns a [`BrowserHandle`] that implements `Deref<Target=Browser>`,
884    /// allowing transparent access to browser methods.
885    ///
886    /// # Automatic Return
887    ///
888    /// The browser is automatically returned to the pool when the handle
889    /// is dropped, even if your code panics (RAII pattern).
890    ///
891    /// # Errors
892    ///
893    /// - Returns [`BrowserPoolError::ShuttingDown`] if pool is shutting down.
894    /// - Returns [`BrowserPoolError::BrowserCreation`] if new browser creation fails.
895    /// - Returns [`BrowserPoolError::HealthCheckFailed`] if all pooled browsers are unhealthy.
896    ///
897    /// # Example
898    ///
899    /// ```rust,ignore
900    /// let browser = pool.get()?;
901    /// let tab = browser.new_tab()?;
902    /// tab.navigate_to("https://example.com")?;
903    /// // browser returned automatically when it goes out of scope
904    /// ```
905    pub fn get(&self) -> Result<BrowserHandle> {
906        log::trace!("🎯 BrowserPool::get() called");
907        self.inner.get_or_create_browser()
908    }
909
910    /// Get pool statistics snapshot.
911    ///
912    /// # Returns
913    ///
914    /// [`PoolStats`] containing:
915    /// - `available`: Browsers in pool ready for checkout
916    /// - `active`: All browsers (pooled + checked out)
917    /// - `total`: Currently same as `active` (for future expansion)
918    ///
919    /// # Example
920    ///
921    /// ```rust,ignore
922    /// let stats = pool.stats();
923    /// println!("Available: {}, Active: {}", stats.available, stats.active);
924    /// ```
925    pub fn stats(&self) -> PoolStats {
926        let available = self.inner.available_count();
927        let active = self.inner.active_count();
928
929        log::trace!("📊 Pool stats: available={}, active={}", available, active);
930
931        PoolStats {
932            available,
933            active,
934            total: active,
935        }
936    }
937
938    /// Get a reference to the pool configuration.
939    ///
940    /// Returns the configuration that was used to create this pool.
941    /// The configuration is immutable after pool creation.
942    ///
943    /// # Example
944    ///
945    /// ```rust,ignore
946    /// let pool = BrowserPool::builder()
947    ///     .config(
948    ///         BrowserPoolConfigBuilder::new()
949    ///             .max_pool_size(10)
950    ///             .build()?
951    ///     )
952    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
953    ///     .build()?;
954    ///
955    /// println!("Max pool size: {}", pool.config().max_pool_size);
956    /// println!("Browser TTL: {:?}", pool.config().browser_ttl);
957    /// ```
958    ///
959    /// # Use Cases
960    ///
961    /// - Logging configuration at startup
962    /// - Monitoring/metrics collection
963    /// - Readiness checks (comparing active count vs max_pool_size)
964    /// - Debugging pool behavior
965    #[inline]
966    pub fn config(&self) -> &BrowserPoolConfig {
967        self.inner.config()
968    }
969
970    /// Warmup the pool by pre-creating browsers.
971    ///
972    /// This is highly recommended to reduce first-request latency.
973    /// Should be called during application startup.
974    ///
975    /// # Process
976    ///
977    /// 1. Creates `warmup_count` browsers sequentially with staggered timing
978    /// 2. Tests each browser with navigation
979    /// 3. Returns all browsers to pool
980    /// 4. Entire process has timeout (configurable via `warmup_timeout`)
981    ///
982    /// # Staggered Creation
983    ///
984    /// Browsers are created with a 30-second delay between them to ensure
985    /// their TTLs are offset. This prevents all browsers from expiring
986    /// at the same time.
987    ///
988    /// # Errors
989    ///
990    /// - Returns error if warmup times out.
991    /// - Returns error if browser creation fails.
992    ///
993    /// # Example
994    ///
995    /// ```rust,ignore
996    /// let pool = BrowserPool::builder()
997    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
998    ///     .build()?;
999    ///
1000    /// // Warmup during startup
1001    /// pool.warmup().await?;
1002    /// ```
1003    pub async fn warmup(&self) -> Result<()> {
1004        let count = self.inner.config().warmup_count;
1005        let warmup_timeout = self.inner.config().warmup_timeout;
1006
1007        log::info!(
1008            "🔥 Starting browser pool warmup with {} instances (timeout: {}s)",
1009            count,
1010            warmup_timeout.as_secs()
1011        );
1012
1013        // Wrap entire warmup in timeout to prevent hanging forever
1014        let warmup_result = tokio::time::timeout(warmup_timeout, self.warmup_internal(count)).await;
1015
1016        match warmup_result {
1017            Ok(Ok(())) => {
1018                let stats = self.stats();
1019                log::info!(
1020                    "✅ Warmup completed successfully - Available: {}, Active: {}",
1021                    stats.available,
1022                    stats.active
1023                );
1024                Ok(())
1025            }
1026            Ok(Err(e)) => {
1027                log::error!("❌ Warmup failed with error: {}", e);
1028                Err(e)
1029            }
1030            Err(_) => {
1031                log::error!("❌ Warmup timed out after {}s", warmup_timeout.as_secs());
1032                Err(BrowserPoolError::Configuration(format!(
1033                    "Warmup timed out after {}s",
1034                    warmup_timeout.as_secs()
1035                )))
1036            }
1037        }
1038    }
1039
1040    /// Internal warmup implementation (separated for cleaner timeout wrapping).
1041    ///
1042    /// Creates browsers sequentially with a delay between them.
1043    /// This ensures they don't all reach their TTL (expiration) at the exact same moment.
1044    async fn warmup_internal(&self, count: usize) -> Result<()> {
1045        log::debug!("🛠️ Starting internal warmup process for {} browsers", count);
1046
1047        // STAGGER CONFIGURATION
1048        // We wait this long between creations to distribute expiration times
1049        let stagger_interval = self.config().warmup_stagger;
1050
1051        let mut handles = Vec::new();
1052        let mut created_count = 0;
1053        let mut failed_count = 0;
1054
1055        for i in 0..count {
1056            log::debug!("🌐 Creating startup browser instance {}/{}", i + 1, count);
1057
1058            // Per-browser timeout (15s per browser is reasonable)
1059            // This prevents one slow browser from blocking entire warmup
1060            let browser_result = tokio::time::timeout(
1061                Duration::from_secs(15),
1062                tokio::task::spawn_blocking({
1063                    let inner = Arc::clone(&self.inner);
1064                    move || inner.create_browser_direct()
1065                }),
1066            )
1067            .await;
1068
1069            match browser_result {
1070                Ok(Ok(Ok(tracked))) => {
1071                    log::debug!(
1072                        "✅ Browser {} created, performing validation test...",
1073                        tracked.id()
1074                    );
1075
1076                    // Test the browser with actual navigation
1077                    match tracked.browser().new_tab() {
1078                        Ok(tab) => {
1079                            log::trace!("✅ Browser {} test: new_tab() successful", tracked.id());
1080
1081                            // Navigate to test page
1082                            let nav_result = tab.navigate_to(
1083                                "data:text/html,<html><body>Warmup test</body></html>",
1084                            );
1085                            if let Err(e) = nav_result {
1086                                log::warn!(
1087                                    "⚠️ Browser {} test navigation failed: {}",
1088                                    tracked.id(),
1089                                    e
1090                                );
1091                            } else {
1092                                log::trace!(
1093                                    "✅ Browser {} test: navigation successful",
1094                                    tracked.id()
1095                                );
1096                            }
1097
1098                            // Clean up test tab
1099                            let _ = tab.close(true);
1100
1101                            // Keep handle so browser stays alive
1102                            handles.push(BrowserHandle::new(tracked, Arc::clone(&self.inner)));
1103
1104                            created_count += 1;
1105                            log::info!(
1106                                "✅ Browser instance {}/{} ready and validated",
1107                                i + 1,
1108                                count
1109                            );
1110                        }
1111                        Err(e) => {
1112                            failed_count += 1;
1113                            log::error!(
1114                                "❌ Browser {} validation test failed: {}",
1115                                tracked.id(),
1116                                e
1117                            );
1118
1119                            // Remove from active tracking since it's broken
1120                            self.inner.remove_from_active(tracked.id());
1121                        }
1122                    }
1123                }
1124                Ok(Ok(Err(e))) => {
1125                    failed_count += 1;
1126                    log::error!("❌ Failed to create browser {}/{}: {}", i + 1, count, e);
1127                }
1128                Ok(Err(e)) => {
1129                    failed_count += 1;
1130                    log::error!(
1131                        "❌ Browser {}/{} creation task panicked: {:?}",
1132                        i + 1,
1133                        count,
1134                        e
1135                    );
1136                }
1137                Err(_) => {
1138                    failed_count += 1;
1139                    log::error!(
1140                        "❌ Browser {}/{} creation timed out (15s limit)",
1141                        i + 1,
1142                        count
1143                    );
1144                }
1145            }
1146
1147            // === STAGGER LOGIC ===
1148            // If this is not the last browser, wait before creating the next one.
1149            // This ensures their TTLs are offset by `stagger_interval`.
1150            if i < count - 1 {
1151                log::info!(
1152                    "⏳ Waiting {}s before creating next warmup browser to stagger TTLs...",
1153                    stagger_interval.as_secs()
1154                );
1155                tokio::time::sleep(stagger_interval).await;
1156            }
1157        }
1158
1159        log::info!(
1160            "📊 Warmup creation phase: {} created, {} failed",
1161            created_count,
1162            failed_count
1163        );
1164
1165        // Return all browsers to pool by dropping handles
1166        log::debug!("🔍 Returning {} warmup browsers to pool...", handles.len());
1167        drop(handles);
1168
1169        // No delay needed: return_browser() is synchronous in the happy path,
1170        // and warmup browsers are never TTL-expired (which is the only path
1171        // that spawns async work via spawn_replacement_creation).
1172
1173        let final_stats = self.stats();
1174        log::info!(
1175            "🏁 Warmup internal completed - Pool: {}, Active: {}",
1176            final_stats.available,
1177            final_stats.active
1178        );
1179
1180        Ok(())
1181    }
1182
1183    /// Start the keep-alive monitoring thread.
1184    ///
1185    /// This background thread:
1186    /// - Pings all active browsers periodically
1187    /// - Removes unresponsive browsers after max_ping_failures
1188    /// - Retires browsers that exceed TTL
1189    /// - Spawns replacement browsers as needed
1190    ///
1191    /// # Critical Design Notes
1192    ///
1193    /// - Uses condvar for immediate shutdown signaling
1194    /// - Never holds locks during I/O operations
1195    /// - Uses consistent lock ordering (active -> pool)
1196    ///
1197    /// # Parameters
1198    ///
1199    /// * `inner` - Arc reference to pool state.
1200    ///
1201    /// # Returns
1202    ///
1203    /// JoinHandle for the background thread.
1204    fn start_keep_alive(inner: Arc<BrowserPoolInner>) -> JoinHandle<()> {
1205        let ping_interval = inner.config().ping_interval;
1206        let max_failures = inner.config().max_ping_failures;
1207        let browser_ttl = inner.config().browser_ttl;
1208        let shutdown_signal = Arc::clone(inner.shutdown_signal());
1209
1210        log::info!(
1211            "🚀 Starting keep-alive thread (interval: {}s, max failures: {}, TTL: {}min)",
1212            ping_interval.as_secs(),
1213            max_failures,
1214            browser_ttl.as_secs() / 60
1215        );
1216
1217        thread::spawn(move || {
1218            log::info!("🏁 Keep-alive thread started successfully");
1219
1220            // Track consecutive failures per browser ID
1221            let mut failure_counts: HashMap<u64, u32> = HashMap::new();
1222
1223            loop {
1224                // Wait for next ping interval OR shutdown signal (whichever comes first)
1225                // Using condvar instead of sleep allows immediate wake-up on shutdown
1226                let (lock, cvar) = &*shutdown_signal;
1227                let wait_result = {
1228                    let shutdown = lock.lock().unwrap_or_else(|poisoned| {
1229                        log::warn!("Shutdown lock poisoned, recovering");
1230                        poisoned.into_inner()
1231                    });
1232                    cvar.wait_timeout(shutdown, ping_interval)
1233                        .unwrap_or_else(|poisoned| {
1234                            log::warn!("Condvar wait_timeout lock poisoned, recovering");
1235                            poisoned.into_inner()
1236                        })
1237                };
1238
1239                let shutdown_flag = *wait_result.0;
1240                let timed_out = wait_result.1.timed_out();
1241
1242                // Check if we were signaled to shutdown
1243                if shutdown_flag {
1244                    log::info!("🛑 Keep-alive received shutdown signal via condvar");
1245                    break;
1246                }
1247
1248                // Double-check atomic shutdown flag (belt and suspenders)
1249                if inner.is_shutting_down() {
1250                    log::info!("🛑 Keep-alive detected shutdown via atomic flag");
1251                    break;
1252                }
1253
1254                // If spuriously woken (not timeout, not shutdown), continue waiting
1255                if !timed_out {
1256                    log::trace!("⏰ Keep-alive spuriously woken, continuing wait...");
1257                    continue;
1258                }
1259
1260                log::trace!("⚡ Keep-alive ping cycle starting...");
1261
1262                // Collect browsers to ping WITHOUT holding locks
1263                // This is critical: we clone the list and release the lock
1264                // before doing any I/O operations
1265                let browsers_to_ping = inner.get_active_browsers_snapshot();
1266                log::trace!(
1267                    "Keep-alive checking {} active browsers",
1268                    browsers_to_ping.len()
1269                );
1270
1271                // Now ping browsers without holding any locks
1272                let mut to_remove = Vec::new();
1273                let mut expired_browsers = Vec::new();
1274
1275                for (id, tracked) in browsers_to_ping {
1276                    // Check shutdown during ping loop (allows early exit)
1277                    if inner.is_shutting_down() {
1278                        log::info!("Shutdown detected during ping loop, exiting immediately");
1279                        return;
1280                    }
1281
1282                    // Check TTL before pinging (no point pinging expired browsers)
1283                    if tracked.is_expired(browser_ttl) {
1284                        log::info!(
1285                            "Browser {} expired (age: {}min, TTL: {}min), marking for retirement",
1286                            id,
1287                            tracked.age_minutes(),
1288                            browser_ttl.as_secs() / 60
1289                        );
1290                        expired_browsers.push(id);
1291                        continue; // Skip ping for expired browsers
1292                    }
1293
1294                    // Perform health check (this is I/O, no locks held)
1295                    use crate::traits::Healthcheck;
1296                    match tracked.ping() {
1297                        Ok(_) => {
1298                            // Reset failure count on success
1299                            if failure_counts.remove(&id).is_some() {
1300                                log::debug!("Browser {} ping successful, failure count reset", id);
1301                            }
1302                        }
1303                        Err(e) => {
1304                            // Only process failures if NOT shutting down
1305                            // (during shutdown, browsers may legitimately fail)
1306                            if !inner.is_shutting_down() {
1307                                let failures = failure_counts.entry(id).or_insert(0);
1308                                *failures += 1;
1309
1310                                log::warn!(
1311                                    "Browser {} ping failed (attempt {}/{}): {}",
1312                                    id,
1313                                    failures,
1314                                    max_failures,
1315                                    e
1316                                );
1317
1318                                // Remove if exceeded max failures
1319                                if *failures >= max_failures {
1320                                    log::error!(
1321                                        "Browser {} exceeded max ping failures ({}), marking for removal",
1322                                        id,
1323                                        max_failures
1324                                    );
1325                                    to_remove.push(id);
1326                                }
1327                            }
1328                        }
1329                    }
1330                }
1331
1332                // Check shutdown before cleanup (avoid work if shutting down)
1333                if inner.is_shutting_down() {
1334                    log::info!("Shutdown detected before cleanup, skipping and exiting");
1335                    break;
1336                }
1337
1338                // Handle TTL retirements first (they need replacement browsers)
1339                if !expired_browsers.is_empty() {
1340                    log::info!("Processing {} TTL-expired browsers", expired_browsers.len());
1341                    Self::handle_browser_retirement(&inner, expired_browsers, &mut failure_counts);
1342                }
1343
1344                // Handle failed browsers (remove from tracking and pool)
1345                if !to_remove.is_empty() {
1346                    log::warn!("Removing {} failed browsers from pool", to_remove.len());
1347
1348                    // Track how many were actually removed so we know how many to replace
1349                    let mut actual_removed_count = 0;
1350
1351                    // Remove dead browsers from active tracking
1352                    for id in &to_remove {
1353                        if inner.remove_from_active(*id).is_some() {
1354                            actual_removed_count += 1;
1355                            log::debug!("Removed failed browser {} from active tracking", id);
1356                        }
1357                        failure_counts.remove(id);
1358                    }
1359
1360                    log::debug!(
1361                        "Active browsers after failure cleanup: {}",
1362                        inner.active_count()
1363                    );
1364
1365                    // Clean up pool (remove dead browsers)
1366                    inner.remove_from_available(&to_remove);
1367
1368                    log::debug!("Pool size after cleanup: {}", inner.available_count());
1369
1370                    // Trigger replacement for the browsers we just removed
1371                    if actual_removed_count > 0 {
1372                        log::info!(
1373                            "Spawning {} replacement browsers for failed ones",
1374                            actual_removed_count
1375                        );
1376                        BrowserPoolInner::spawn_replacement_creation(
1377                            Arc::clone(&inner),
1378                            actual_removed_count,
1379                        );
1380                    }
1381                }
1382
1383                // Log keep-alive cycle summary
1384                log::debug!(
1385                    "Keep-alive cycle complete - Active: {}, Pooled: {}, Tracking {} failure states",
1386                    inner.active_count(),
1387                    inner.available_count(),
1388                    failure_counts.len()
1389                );
1390            }
1391
1392            log::info!("Keep-alive thread exiting cleanly");
1393        })
1394    }
1395
1396    /// Handle browser retirement due to TTL expiration.
1397    ///
1398    /// This function:
1399    /// 1. Removes expired browsers from active and pool tracking
1400    /// 2. Spawns async tasks to create replacement browsers
1401    /// 3. Maintains pool target size
1402    ///
1403    /// # Critical Lock Ordering
1404    ///
1405    /// Acquires active -> pool locks together to prevent races.
1406    ///
1407    /// # Parameters
1408    ///
1409    /// * `inner` - Arc reference to pool state.
1410    /// * `expired_ids` - List of browser IDs that have exceeded TTL.
1411    /// * `failure_counts` - Mutable map of failure counts (updated to remove retired browsers).
1412    fn handle_browser_retirement(
1413        inner: &Arc<BrowserPoolInner>,
1414        expired_ids: Vec<u64>,
1415        failure_counts: &mut HashMap<u64, u32>,
1416    ) {
1417        log::info!(
1418            "Retiring {} expired browsers (TTL enforcement)",
1419            expired_ids.len()
1420        );
1421
1422        // Remove expired browsers from active tracking
1423        let mut retired_count = 0;
1424        for id in &expired_ids {
1425            if inner.remove_from_active(*id).is_some() {
1426                retired_count += 1;
1427                log::debug!("Removed expired browser {} from active tracking", id);
1428            }
1429            // Clean up failure tracking
1430            failure_counts.remove(id);
1431        }
1432
1433        // Remove from pool as well
1434        inner.remove_from_available(&expired_ids);
1435
1436        log::debug!(
1437            "After retirement - Active: {}, Pooled: {}",
1438            inner.active_count(),
1439            inner.available_count()
1440        );
1441
1442        // Create replacement browsers to maintain target count
1443        if retired_count > 0 {
1444            log::info!(
1445                "Spawning {} replacement browsers for retired ones",
1446                retired_count
1447            );
1448            BrowserPoolInner::spawn_replacement_creation(Arc::clone(inner), retired_count);
1449        } else {
1450            log::debug!("No browsers were actually retired (already removed)");
1451        }
1452    }
1453
1454    /// Asynchronously shutdown the pool (recommended method).
1455    ///
1456    /// This is the preferred shutdown method as it can properly await
1457    /// async task cancellation. Should be called during application shutdown.
1458    ///
1459    /// # Shutdown Process
1460    ///
1461    /// 1. Set atomic shutdown flag (stops new operations)
1462    /// 2. Signal condvar to wake keep-alive thread immediately
1463    /// 3. Wait for keep-alive thread to exit (with timeout)
1464    /// 4. Abort all replacement creation tasks
1465    /// 5. Wait briefly for cleanup
1466    /// 6. Log final statistics
1467    ///
1468    /// # Timeout
1469    ///
1470    /// Keep-alive thread is given 5 seconds to exit gracefully.
1471    /// If it doesn't exit, we log an error but continue shutdown.
1472    ///
1473    /// # Example
1474    ///
1475    /// ```rust,ignore
1476    /// let mut pool = /* ... */;
1477    ///
1478    /// // During application shutdown
1479    /// pool.shutdown_async().await;
1480    /// ```
1481    pub async fn shutdown_async(&mut self) {
1482        log::info!("Shutting down browser pool (async mode)...");
1483
1484        // Step 1: Set shutdown flag (prevents new operations)
1485        self.inner.set_shutting_down(true);
1486        log::debug!("Shutdown flag set");
1487
1488        // Step 2: Signal condvar to wake keep-alive thread immediately
1489        // This is critical - without this, keep-alive waits for full ping_interval
1490        {
1491            let (lock, cvar) = &**self.inner.shutdown_signal();
1492            let mut shutdown = lock.lock().unwrap_or_else(|poisoned| {
1493                log::warn!("Shutdown lock poisoned, recovering");
1494                poisoned.into_inner()
1495            });
1496            *shutdown = true;
1497            cvar.notify_all();
1498            log::debug!("Shutdown signal sent to keep-alive thread");
1499        } // Lock released here
1500
1501        // Step 3: Wait for keep-alive thread to exit
1502        if let Some(handle) = self.keep_alive_handle.take() {
1503            log::debug!("Waiting for keep-alive thread to exit...");
1504
1505            // Wrap thread join in spawn_blocking to make it async-friendly
1506            let join_task = tokio::task::spawn_blocking(move || handle.join());
1507
1508            // Give it 5 seconds to exit gracefully
1509            match tokio::time::timeout(Duration::from_secs(5), join_task).await {
1510                Ok(Ok(Ok(_))) => {
1511                    log::info!("Keep-alive thread stopped cleanly");
1512                }
1513                Ok(Ok(Err(_))) => {
1514                    log::error!("Keep-alive thread panicked during shutdown");
1515                }
1516                Ok(Err(_)) => {
1517                    log::error!("Keep-alive join task panicked");
1518                }
1519                Err(_) => {
1520                    log::error!("Keep-alive thread didn't exit within 5s timeout");
1521                }
1522            }
1523        } else {
1524            log::debug!("No keep-alive thread to stop (was disabled or already stopped)");
1525        }
1526
1527        // Step 4: Abort all replacement creation tasks
1528        log::info!("Aborting replacement creation tasks...");
1529        let aborted_count = self.inner.abort_replacement_tasks();
1530        if aborted_count > 0 {
1531            log::info!("Aborted {} replacement tasks", aborted_count);
1532        } else {
1533            log::debug!("No replacement tasks to abort");
1534        }
1535
1536        // Step 5: Small delay to let aborted tasks clean up
1537        tokio::time::sleep(Duration::from_millis(100)).await;
1538
1539        // Step 6: Log final statistics
1540        let stats = self.stats();
1541        log::info!(
1542            "Async shutdown complete - Available: {}, Active: {}, Total: {}",
1543            stats.available,
1544            stats.active,
1545            stats.total
1546        );
1547    }
1548
1549    /// Synchronously shutdown the pool (fallback method).
1550    ///
1551    /// This is a simplified shutdown for use in Drop or non-async contexts.
1552    /// Prefer [`shutdown_async()`](Self::shutdown_async) when possible for cleaner task cancellation.
1553    ///
1554    /// # Note
1555    ///
1556    /// This method doesn't wait for replacement tasks to finish since
1557    /// there's no async runtime available. Tasks are aborted but may not
1558    /// have cleaned up yet.
1559    pub fn shutdown(&mut self) {
1560        log::debug!("Calling synchronous shutdown...");
1561        self.shutdown_sync();
1562    }
1563
1564    /// Internal synchronous shutdown implementation.
1565    fn shutdown_sync(&mut self) {
1566        log::info!("Shutting down browser pool (sync mode)...");
1567
1568        // Set shutdown flag
1569        self.inner.set_shutting_down(true);
1570        log::debug!("Shutdown flag set");
1571
1572        // Signal condvar (same as async version)
1573        {
1574            let (lock, cvar) = &**self.inner.shutdown_signal();
1575            let mut shutdown = lock.lock().unwrap_or_else(|poisoned| {
1576                log::warn!("Shutdown lock poisoned, recovering");
1577                poisoned.into_inner()
1578            });
1579            *shutdown = true;
1580            cvar.notify_all();
1581            log::debug!("Shutdown signal sent");
1582        }
1583
1584        // Wait for keep-alive thread
1585        if let Some(handle) = self.keep_alive_handle.take() {
1586            log::debug!("Joining keep-alive thread (sync)...");
1587
1588            match handle.join() {
1589                Ok(_) => log::info!("Keep-alive thread stopped"),
1590                Err(_) => log::error!("Keep-alive thread panicked"),
1591            }
1592        }
1593
1594        // Abort replacement tasks (best effort - they won't make progress without runtime)
1595        let aborted_count = self.inner.abort_replacement_tasks();
1596        if aborted_count > 0 {
1597            log::debug!("Aborted {} replacement tasks (sync mode)", aborted_count);
1598        }
1599
1600        let stats = self.stats();
1601        log::info!(
1602            "Sync shutdown complete - Available: {}, Active: {}",
1603            stats.available,
1604            stats.active
1605        );
1606    }
1607
1608    /// Get a reference to the inner pool state.
1609    ///
1610    /// This is primarily for internal use and testing.
1611    #[doc(hidden)]
1612    #[allow(dead_code)]
1613    pub(crate) fn inner(&self) -> &Arc<BrowserPoolInner> {
1614        &self.inner
1615    }
1616}
1617
1618impl Drop for BrowserPool {
1619    /// Automatic cleanup when pool is dropped.
1620    ///
1621    /// This ensures resources are released even if shutdown wasn't called explicitly.
1622    /// Uses sync shutdown since Drop can't be async.
1623    fn drop(&mut self) {
1624        log::debug!("🛑 BrowserPool Drop triggered - running cleanup");
1625
1626        // Only shutdown if not already done
1627        if !self.inner.is_shutting_down() {
1628            log::warn!("⚠ BrowserPool dropped without explicit shutdown - cleaning up");
1629            self.shutdown();
1630        } else {
1631            log::debug!(" Pool already shutdown, Drop is no-op");
1632        }
1633    }
1634}
1635
1636// ============================================================================
1637// BrowserPoolBuilder
1638// ============================================================================
1639
1640/// Builder for constructing a [`BrowserPool`] with validation.
1641///
1642/// This is the recommended way to create a pool as it validates
1643/// configuration and provides sensible defaults.
1644///
1645/// # Example
1646///
1647/// ```rust,ignore
1648/// use std::time::Duration;
1649/// use html2pdf_api::{BrowserPool, BrowserPoolConfigBuilder, ChromeBrowserFactory};
1650///
1651/// let pool = BrowserPool::builder()
1652///     .config(
1653///         BrowserPoolConfigBuilder::new()
1654///             .max_pool_size(10)
1655///             .warmup_count(5)
1656///             .browser_ttl(Duration::from_secs(7200))
1657///             .build()?
1658///     )
1659///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1660///     .enable_keep_alive(true)
1661///     .build()?;
1662/// ```
1663pub struct BrowserPoolBuilder {
1664    /// Optional configuration (uses default if not provided).
1665    config: Option<BrowserPoolConfig>,
1666
1667    /// Browser factory (required).
1668    factory: Option<Box<dyn BrowserFactory>>,
1669
1670    /// Whether to enable keep-alive thread (default: true).
1671    enable_keep_alive: bool,
1672}
1673
1674impl BrowserPoolBuilder {
1675    /// Create a new builder with defaults.
1676    ///
1677    /// # Example
1678    ///
1679    /// ```rust,ignore
1680    /// let builder = BrowserPoolBuilder::new();
1681    /// ```
1682    pub fn new() -> Self {
1683        Self {
1684            config: None,
1685            factory: None,
1686            enable_keep_alive: true,
1687        }
1688    }
1689
1690    /// Set custom configuration.
1691    ///
1692    /// If not called, uses [`BrowserPoolConfig::default()`].
1693    ///
1694    /// # Parameters
1695    ///
1696    /// * `config` - Validated configuration from [`crate::BrowserPoolConfigBuilder`].
1697    ///
1698    /// # Example
1699    ///
1700    /// ```rust,ignore
1701    /// let config = BrowserPoolConfigBuilder::new()
1702    ///     .max_pool_size(10)
1703    ///     .build()?;
1704    ///
1705    /// let pool = BrowserPool::builder()
1706    ///     .config(config)
1707    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1708    ///     .build()?;
1709    /// ```
1710    pub fn config(mut self, config: BrowserPoolConfig) -> Self {
1711        self.config = Some(config);
1712        self
1713    }
1714
1715    /// Set browser factory (required).
1716    ///
1717    /// The factory is responsible for creating browser instances.
1718    /// Use [`ChromeBrowserFactory`](crate::ChromeBrowserFactory) for Chrome/Chromium browsers.
1719    ///
1720    /// # Parameters
1721    ///
1722    /// * `factory` - A boxed [`BrowserFactory`] implementation.
1723    ///
1724    /// # Example
1725    ///
1726    /// ```rust,ignore
1727    /// let pool = BrowserPool::builder()
1728    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1729    ///     .build()?;
1730    /// ```
1731    pub fn factory(mut self, factory: Box<dyn BrowserFactory>) -> Self {
1732        self.factory = Some(factory);
1733        self
1734    }
1735
1736    /// Enable or disable keep-alive thread.
1737    ///
1738    /// Keep-alive should be disabled only for testing.
1739    /// Production use should always have it enabled.
1740    ///
1741    /// # Parameters
1742    ///
1743    /// * `enable` - Whether to enable the keep-alive thread.
1744    ///
1745    /// # Example
1746    ///
1747    /// ```rust,ignore
1748    /// // Disable for tests
1749    /// let pool = BrowserPool::builder()
1750    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1751    ///     .enable_keep_alive(false)
1752    ///     .build()?;
1753    /// ```
1754    pub fn enable_keep_alive(mut self, enable: bool) -> Self {
1755        self.enable_keep_alive = enable;
1756        self
1757    }
1758
1759    /// Build the browser pool.
1760    ///
1761    /// # Errors
1762    ///
1763    /// Returns [`BrowserPoolError::Configuration`] if factory is not provided.
1764    ///
1765    /// # Panics
1766    ///
1767    /// Panics if called outside a tokio runtime context.
1768    ///
1769    /// # Example
1770    ///
1771    /// ```rust,ignore
1772    /// let pool = BrowserPool::builder()
1773    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1774    ///     .build()?;
1775    /// ```
1776    pub fn build(self) -> Result<BrowserPool> {
1777        let config = self.config.unwrap_or_default();
1778        let factory = self.factory.ok_or_else(|| {
1779            BrowserPoolError::Configuration("No browser factory provided".to_string())
1780        })?;
1781
1782        log::info!("📦 Building browser pool with config: {:?}", config);
1783
1784        // Create inner state
1785        let inner = BrowserPoolInner::new(config, factory);
1786
1787        // Start keep-alive thread if enabled
1788        let keep_alive_handle = if self.enable_keep_alive {
1789            log::info!("🚀 Starting keep-alive monitoring thread");
1790            Some(BrowserPool::start_keep_alive(Arc::clone(&inner)))
1791        } else {
1792            log::warn!("⚠️ Keep-alive thread disabled (should only be used for testing)");
1793            None
1794        };
1795
1796        log::info!("✅ Browser pool built successfully");
1797
1798        Ok(BrowserPool {
1799            inner,
1800            keep_alive_handle,
1801        })
1802    }
1803}
1804
1805impl Default for BrowserPoolBuilder {
1806    fn default() -> Self {
1807        Self::new()
1808    }
1809}
1810
1811// ============================================================================
1812// Environment Initialization (feature-gated)
1813// ============================================================================
1814
1815/// Initialize browser pool from environment variables.
1816///
1817/// This is a convenience function for common initialization patterns.
1818/// It reads configuration from environment variables with sensible defaults.
1819///
1820/// # Feature Flag
1821///
1822/// This function is only available when the `env-config` feature is enabled.
1823///
1824/// # Environment Variables
1825///
1826/// - `BROWSER_POOL_SIZE`: Maximum pool size (default: 5)
1827/// - `BROWSER_WARMUP_COUNT`: Warmup browser count (default: 3)
1828/// - `BROWSER_TTL_SECONDS`: Browser TTL in seconds (default: 3600)
1829/// - `BROWSER_WARMUP_TIMEOUT_SECONDS`: Warmup timeout (default: 60)
1830/// - `CHROME_PATH`: Custom Chrome binary path (optional)
1831///
1832/// # Returns
1833///
1834/// `Arc<BrowserPool>` ready for use in web handlers.
1835///
1836/// # Errors
1837///
1838/// - Returns error if configuration is invalid.
1839/// - Returns error if warmup fails.
1840///
1841/// # Example
1842///
1843/// ```rust,ignore
1844/// #[tokio::main]
1845/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
1846///     env_logger::init();
1847///
1848///     let pool = init_browser_pool().await?;
1849///
1850///     // Use pool in handlers...
1851///
1852///     Ok(())
1853/// }
1854/// ```
1855#[cfg(feature = "env-config")]
1856pub async fn init_browser_pool() -> Result<Arc<BrowserPool>> {
1857    use crate::config::env::{chrome_path_from_env, from_env};
1858    use crate::factory::ChromeBrowserFactory;
1859
1860    log::info!("Initializing browser pool from environment...");
1861
1862    // Load configuration from environment
1863    let config = from_env()?;
1864
1865    // Get optional Chrome path
1866    let chrome_path = chrome_path_from_env();
1867
1868    log::info!("Pool configuration from environment:");
1869    log::info!("   - Max pool size: {}", config.max_pool_size);
1870    log::info!("   - Warmup count: {}", config.warmup_count);
1871    log::info!(
1872        "   - Browser TTL: {}s ({}min)",
1873        config.browser_ttl.as_secs(),
1874        config.browser_ttl.as_secs() / 60
1875    );
1876    log::info!("   - Warmup timeout: {}s", config.warmup_timeout.as_secs());
1877    log::info!(
1878        "   - Chrome path: {}",
1879        chrome_path.as_deref().unwrap_or("auto-detect")
1880    );
1881
1882    // Create factory based on whether custom path is provided
1883    let factory: Box<dyn BrowserFactory> = match chrome_path {
1884        Some(path) => {
1885            log::info!("Using custom Chrome path: {}", path);
1886            Box::new(ChromeBrowserFactory::with_path(path))
1887        }
1888        None => {
1889            log::info!("Using auto-detected Chrome browser");
1890            Box::new(ChromeBrowserFactory::with_defaults())
1891        }
1892    };
1893
1894    // Create browser pool with Chrome factory
1895    log::debug!("Building browser pool...");
1896    let pool = BrowserPool::builder()
1897        .config(config.clone())
1898        .factory(factory)
1899        .enable_keep_alive(true)
1900        .build()
1901        .map_err(|e| {
1902            log::error!("❌ Failed to create browser pool: {}", e);
1903            e
1904        })?;
1905
1906    log::info!("✅ Browser pool created successfully");
1907
1908    // Warmup the pool
1909    log::info!(
1910        "Warming up browser pool with {} instances...",
1911        config.warmup_count
1912    );
1913    pool.warmup().await.map_err(|e| {
1914        log::error!("❌ Failed to warmup pool: {}", e);
1915        e
1916    })?;
1917
1918    let stats = pool.stats();
1919    log::info!(
1920        "✅ Browser pool ready - Available: {}, Active: {}, Total: {}",
1921        stats.available,
1922        stats.active,
1923        stats.total
1924    );
1925
1926    Ok(pool.into_shared())
1927}
1928
1929// ============================================================================
1930// Unit Tests
1931// ============================================================================
1932
1933#[cfg(test)]
1934mod tests {
1935    use super::*;
1936
1937    /// Verifies that BrowserPool builder rejects missing factory.
1938    ///
1939    /// A factory is mandatory because the pool needs to know how to
1940    /// create browser instances. This test ensures proper error handling.
1941    #[test]
1942    fn test_pool_builder_missing_factory() {
1943        // We need a tokio runtime for the builder
1944        let rt = tokio::runtime::Runtime::new().unwrap();
1945
1946        rt.block_on(async {
1947            let config = crate::config::BrowserPoolConfigBuilder::new()
1948                .max_pool_size(3)
1949                .build()
1950                .unwrap();
1951
1952            let result = BrowserPool::builder()
1953                .config(config)
1954                // Intentionally missing factory
1955                .build();
1956
1957            assert!(result.is_err(), "Build should fail without factory");
1958
1959            match result {
1960                Err(BrowserPoolError::Configuration(msg)) => {
1961                    assert!(
1962                        msg.contains("No browser factory provided"),
1963                        "Expected factory error, got: {}",
1964                        msg
1965                    );
1966                }
1967                _ => panic!("Expected Configuration error for missing factory"),
1968            }
1969        });
1970    }
1971
1972    /// Verifies that BrowserPoolBuilder implements Default.
1973    #[test]
1974    fn test_builder_default() {
1975        let builder: BrowserPoolBuilder = Default::default();
1976        assert!(builder.config.is_none());
1977        assert!(builder.factory.is_none());
1978        assert!(builder.enable_keep_alive);
1979    }
1980
1981    /// Verifies that enable_keep_alive can be disabled.
1982    #[test]
1983    fn test_builder_disable_keep_alive() {
1984        let builder = BrowserPoolBuilder::new().enable_keep_alive(false);
1985        assert!(!builder.enable_keep_alive);
1986    }
1987}