Skip to main content

html2pdf_api/
pool.rs

1//! Browser pool with lifecycle management.
2//!
3//! This module provides [`BrowserPool`], the main entry point for managing
4//! a pool of headless Chrome browsers with automatic lifecycle management.
5//!
6//! # Overview
7//!
8//! The browser pool provides:
9//! - **Connection Pooling**: Reuses browser instances to avoid expensive startup costs
10//! - **Health Monitoring**: Background thread continuously checks browser health
11//! - **TTL Management**: Automatically retires old browsers and creates replacements
12//! - **Race-Free Design**: Careful lock ordering prevents deadlocks
13//! - **Graceful Shutdown**: Clean termination of all background tasks
14//! - **RAII Pattern**: Automatic return of browsers to pool via Drop
15//!
16//! # Architecture
17//!
18//! ```text
19//! BrowserPool
20//!   ├─ BrowserPoolInner (shared state)
21//!   │   ├─ available: Vec<TrackedBrowser>  (pooled, ready to use)
22//!   │   ├─ active: HashMap<id, TrackedBrowser>  (in-use, tracked for health)
23//!   │   └─ replacement_tasks: Vec<JoinHandle>  (async replacement creators)
24//!   └─ keep_alive_handle: JoinHandle  (health monitoring thread)
25//! ```
26//!
27//! # Critical Invariants
28//!
29//! 1. **Lock Order**: Always acquire `active` before `available` to prevent deadlocks
30//! 2. **Shutdown Flag**: Check before all expensive operations
31//! 3. **Health Checks**: Never hold locks during I/O operations
32//!
33//! # Example
34//!
35//! ```rust,no_run
36//! use html2pdf_api::{BrowserPool, BrowserPoolConfigBuilder, ChromeBrowserFactory};
37//!
38//! #[tokio::main]
39//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
40//!     // Create pool
41//!     let mut pool = BrowserPool::builder()
42//!         .config(
43//!             BrowserPoolConfigBuilder::new()
44//!                 .max_pool_size(5)
45//!                 .warmup_count(3)
46//!                 .build()?
47//!         )
48//!         .factory(Box::new(ChromeBrowserFactory::with_defaults()))
49//!         .build()?;
50//!
51//!     // Warmup
52//!     pool.warmup().await?;
53//!
54//!     // Use browsers
55//!     {
56//!         let browser = pool.get()?;
57//!         let tab = browser.new_tab()?;
58//!         // ... do work ...
59//!     } // browser returned to pool automatically
60//!
61//!     // Shutdown
62//!     pool.shutdown_async().await;
63//!
64//!     Ok(())
65//! }
66//! ```
67
68use std::collections::HashMap;
69use std::sync::atomic::{AtomicBool, Ordering};
70use std::sync::{Arc, Condvar, Mutex};
71use std::thread::{self, JoinHandle};
72use std::time::Duration;
73
74use tokio::task::JoinHandle as TokioJoinHandle;
75
76use crate::config::BrowserPoolConfig;
77use crate::error::{BrowserPoolError, Result};
78use crate::factory::BrowserFactory;
79use crate::handle::BrowserHandle;
80use crate::stats::PoolStats;
81use crate::tracked::TrackedBrowser;
82
83// ============================================================================
84// BrowserPoolInner
85// ============================================================================
86
87/// Internal shared state for the browser pool.
88///
89/// This struct contains all shared state and is wrapped in Arc for thread-safe
90/// sharing between the pool, handles, and background threads.
91///
92/// # Lock Ordering (CRITICAL)
93///
94/// Always acquire locks in this order to prevent deadlocks:
95/// 1. `active` (browsers currently in use)
96/// 2. `available` (browsers in pool ready for use)
97///
98/// Never hold locks during I/O operations or browser creation.
99///
100/// # Thread Safety
101///
102/// All fields are protected by appropriate synchronization primitives:
103/// - `Mutex` for mutable collections
104/// - `AtomicBool` for shutdown flag
105/// - `Arc` for shared ownership
106pub(crate) struct BrowserPoolInner {
107    /// Configuration (immutable after creation).
108    config: BrowserPoolConfig,
109
110    /// Browsers available for checkout (not currently in use).
111    ///
112    /// Protected by Mutex. Browsers are moved from here when checked out
113    /// and returned here when released (if pool not full).
114    available: Mutex<Vec<Arc<TrackedBrowser>>>,
115
116    /// All browsers that exist (both pooled and checked out).
117    ///
118    /// Protected by Mutex. Used for health monitoring and lifecycle tracking.
119    /// Maps browser ID -> TrackedBrowser for fast lookup.
120    active: Mutex<HashMap<u64, Arc<TrackedBrowser>>>,
121
122    /// Factory for creating new browser instances.
123    factory: Box<dyn BrowserFactory>,
124
125    /// Atomic flag indicating shutdown in progress.
126    ///
127    /// Checked before expensive operations. Once set, no new operations start.
128    shutting_down: AtomicBool,
129
130    /// Background tasks creating replacement browsers.
131    ///
132    /// Tracked so we can abort them during shutdown.
133    replacement_tasks: Mutex<Vec<TokioJoinHandle<()>>>,
134
135    /// Handle to tokio runtime for spawning async tasks.
136    ///
137    /// Captured at creation time to allow spawning from any context.
138    runtime_handle: tokio::runtime::Handle,
139
140    /// Shutdown signaling mechanism for keep-alive thread.
141    ///
142    /// Tuple of (flag, condvar) allows immediate wake-up on shutdown
143    /// instead of waiting for full ping_interval.
144    shutdown_signal: Arc<(Mutex<bool>, Condvar)>,
145}
146
147impl BrowserPoolInner {
148    /// Create a new browser pool inner state.
149    ///
150    /// # Parameters
151    ///
152    /// * `config` - Validated configuration.
153    /// * `factory` - Browser factory for creating instances.
154    ///
155    /// # Panics
156    ///
157    /// Panics if called outside a tokio runtime context.
158    pub(crate) fn new(config: BrowserPoolConfig, factory: Box<dyn BrowserFactory>) -> Arc<Self> {
159        log::info!(
160            "🚀 Initializing browser pool with capacity {}",
161            config.max_pool_size
162        );
163        log::debug!(
164            "📋 Pool config: warmup={}, TTL={}s, ping_interval={}s",
165            config.warmup_count,
166            config.browser_ttl.as_secs(),
167            config.ping_interval.as_secs()
168        );
169
170        // Capture runtime handle for spawning async tasks
171        // This allows us to spawn from sync contexts (like Drop)
172        let runtime_handle = tokio::runtime::Handle::current();
173
174        Arc::new(Self {
175            config,
176            available: Mutex::new(Vec::new()),
177            active: Mutex::new(HashMap::new()),
178            factory,
179            shutting_down: AtomicBool::new(false),
180            replacement_tasks: Mutex::new(Vec::new()),
181            runtime_handle,
182            shutdown_signal: Arc::new((Mutex::new(false), Condvar::new())),
183        })
184    }
185
186    /// Create a lightweight mock pool for testing without background threads.
187    #[cfg(test)]
188    pub(crate) fn new_for_test(
189        config: BrowserPoolConfig,
190        factory: Box<dyn BrowserFactory>,
191        runtime_handle: tokio::runtime::Handle,
192    ) -> Self {
193        Self {
194            config,
195            available: Mutex::new(Vec::new()),
196            active: Mutex::new(HashMap::new()),
197            factory,
198            shutting_down: AtomicBool::new(false),
199            replacement_tasks: Mutex::new(Vec::new()),
200            runtime_handle,
201            shutdown_signal: Arc::new((Mutex::new(false), Condvar::new())),
202        }
203    }
204
205    /// Create a browser directly without using the pool.
206    ///
207    /// Used for:
208    /// - Initial warmup
209    /// - Replacing failed browsers
210    /// - When pool is empty
211    ///
212    /// # Important
213    ///
214    /// Adds the browser to `active` tracking immediately for health monitoring.
215    ///
216    /// # Errors
217    ///
218    /// - Returns [`BrowserPoolError::ShuttingDown`] if pool is shutting down.
219    /// - Returns [`BrowserPoolError::BrowserCreation`] if factory fails.
220    pub(crate) fn create_browser_direct(&self) -> Result<Arc<TrackedBrowser>> {
221        // Early exit if shutting down (don't waste time creating browsers)
222        if self.shutting_down.load(Ordering::Acquire) {
223            log::debug!("🛑 Skipping browser creation - pool is shutting down");
224            return Err(BrowserPoolError::ShuttingDown);
225        }
226
227        log::debug!("📦 Creating new browser directly via factory...");
228
229        // Factory handles all Chrome launch complexity
230        let browser = self.factory.create()?;
231
232        // Wrap with tracking metadata and Arc immediately
233        let tracked = Arc::new(TrackedBrowser::new(browser)?);
234        let id = tracked.id();
235
236        // Add to active tracking immediately for health monitoring
237        // This ensures keep-alive thread will monitor it
238        if let Ok(mut active) = self.active.lock() {
239            active.insert(id, Arc::clone(&tracked));
240            log::debug!(
241                "📊 Browser {} added to active tracking (total active: {})",
242                id,
243                active.len()
244            );
245        } else {
246            log::warn!(
247                "⚠️ Failed to add browser {} to active tracking (poisoned lock)",
248                id
249            );
250        }
251
252        log::info!("✅ Created new browser with ID {}", id);
253        Ok(tracked)
254    }
255
256    /// Get a browser from pool or create a new one.
257    ///
258    /// # Algorithm
259    ///
260    /// 1. Loop through pooled browsers
261    /// 2. **Grace Period Check**: Check if browser is within 30s of TTL.
262    ///    - If near expiry: Skip (drop) it immediately.
263    ///    - It remains in `active` tracking so the `keep_alive` thread handles standard retirement/replacement.
264    /// 3. For valid browsers, perform detailed health check (without holding locks)
265    /// 4. If healthy, return it
266    /// 5. If unhealthy, remove from active tracking and try next
267    /// 6. If pool empty or all skipped/unhealthy, create new browser
268    ///
269    /// # Critical: Lock-Free Health Checks
270    ///
271    /// Health checks are performed WITHOUT holding locks to avoid blocking
272    /// other threads. This is why we use a loop pattern instead of iterator.
273    ///
274    /// # Returns
275    ///
276    /// [`BrowserHandle`] that auto-returns browser to pool when dropped.
277    ///
278    /// # Errors
279    ///
280    /// - Returns [`BrowserPoolError::ShuttingDown`] if pool is shutting down.
281    /// - Returns [`BrowserPoolError::BrowserCreation`] if new browser creation fails.
282    pub(crate) fn get_or_create_browser(self: &Arc<Self>) -> Result<BrowserHandle> {
283        log::debug!("🔍 Attempting to get browser from pool...");
284
285        // Try to get from pool - LOOP pattern to avoid holding lock during health checks
286        // This is critical for concurrency: we release the lock between attempts
287        loop {
288            // Acquire lock briefly to pop one browser
289            let tracked_opt = {
290                let mut available = self.available.lock().unwrap_or_else(|poisoned| {
291                    log::warn!("Pool available lock poisoned, recovering");
292                    poisoned.into_inner()
293                });
294                let popped = available.pop();
295                log::trace!("📊 Pool size after pop: {}", available.len());
296                popped
297            }; // Lock released here - critical for performance
298
299            if let Some(tracked) = tracked_opt {
300                // === LOGIC START: Grace Period Check ===
301                let age = tracked.created_at().elapsed();
302                let ttl = self.config.browser_ttl;
303
304                // Safety margin matching your stagger interval
305                let safety_margin = Duration::from_secs(30);
306
307                // If browser is about to expire, don't use it.
308                if age + safety_margin > ttl {
309                    log::debug!(
310                        "⏳ Browser {} is near expiry (Age: {}s, Margin: 30s), skipping.",
311                        tracked.id(),
312                        age.as_secs()
313                    );
314
315                    // CRITICAL: We do NOT remove/recreate here.
316                    // By simply 'continuing', we drop this 'tracked' instance.
317                    // 1. It is NOT returned to 'available' (so no user gets it).
318                    // 2. It REMAINS in 'active' (so the keep_alive thread still tracks it).
319                    // 3. The keep_alive thread will see it expire and handle standard cleanup/replacement.
320                    continue;
321                }
322                // === LOGIC END: Grace Period Check ===
323
324                log::debug!(
325                    "🔍 Testing browser {} from pool for health...",
326                    tracked.id()
327                );
328
329                // Detailed health check WITHOUT holding any locks
330                // This prevents blocking other threads during I/O
331                match tracked.browser().new_tab() {
332                    Ok(tab) => {
333                        log::trace!(
334                            "✅ Browser {} health check: new_tab() successful",
335                            tracked.id()
336                        );
337
338                        // Test navigation capability (full health check)
339                        match tab
340                            .navigate_to("data:text/html,<html><body>Health check</body></html>")
341                        {
342                            Ok(_) => {
343                                log::trace!(
344                                    "✅ Browser {} health check: navigation successful",
345                                    tracked.id()
346                                );
347
348                                // Test cleanup capability
349                                match tab.close(true) {
350                                    Ok(_) => {
351                                        log::debug!(
352                                            "✅ Browser {} passed full health check - ready for use",
353                                            tracked.id()
354                                        );
355
356                                        // Get pool size for logging (brief lock)
357                                        let pool_size = {
358                                            let available =
359                                                self.available.lock().unwrap_or_else(|poisoned| {
360                                                    log::warn!(
361                                                        "Pool available lock poisoned, recovering"
362                                                    );
363                                                    poisoned.into_inner()
364                                                });
365                                            available.len()
366                                        };
367
368                                        log::info!(
369                                            "♻️ Reusing healthy browser {} from pool (pool size: {})",
370                                            tracked.id(),
371                                            pool_size
372                                        );
373
374                                        // Return healthy browser wrapped in RAII handle
375                                        return Ok(BrowserHandle::new(tracked, Arc::clone(self)));
376                                    }
377                                    Err(e) => {
378                                        log::warn!(
379                                            "❌ Browser {} health check: tab close failed: {}",
380                                            tracked.id(),
381                                            e
382                                        );
383                                    }
384                                }
385                            }
386                            Err(e) => {
387                                log::warn!(
388                                    "❌ Browser {} health check: navigation failed: {}",
389                                    tracked.id(),
390                                    e
391                                );
392                            }
393                        }
394                    }
395                    Err(e) => {
396                        log::warn!(
397                            "❌ Browser {} health check: new_tab() failed: {}",
398                            tracked.id(),
399                            e
400                        );
401                    }
402                }
403
404                // If we reach here, health check failed
405                // Remove from active tracking (browser is dead)
406                log::warn!(
407                    "🗑️ Removing unhealthy browser {} from active tracking",
408                    tracked.id()
409                );
410                {
411                    let mut active = self.active.lock().unwrap_or_else(|poisoned| {
412                        log::warn!("Pool active lock poisoned, recovering");
413                        poisoned.into_inner()
414                    });
415                    active.remove(&tracked.id());
416                    log::debug!("📊 Active browsers after removal: {}", active.len());
417                }
418
419                // Continue loop to try next browser in pool
420                log::debug!("🔍 Trying next browser from pool...");
421            } else {
422                // Pool is empty, break to create new browser
423                log::debug!("📥 Pool is empty, will create new browser");
424                break;
425            }
426        }
427
428        // Pool is empty or no healthy browsers found
429        log::info!("📦 Creating new browser (pool was empty or all browsers unhealthy)");
430
431        let tracked = self.create_browser_direct()?;
432
433        log::info!("✅ Returning newly created browser {}", tracked.id());
434        Ok(BrowserHandle::new(tracked, Arc::clone(self)))
435    }
436
437    /// Return a browser to the pool (called by BrowserHandle::drop).
438    ///
439    /// # Critical Lock Ordering
440    ///
441    /// Always acquires locks in order: active -> available.
442    /// Both locks are held together to prevent race conditions.
443    ///
444    /// # Algorithm
445    ///
446    /// 1. Acquire both locks (order: active, then available)
447    /// 2. Verify browser is in active tracking
448    /// 3. Check TTL - if expired, retire and trigger replacement
449    /// 4. If pool has space, add to available pool
450    /// 5. If pool full, remove from active (browser gets dropped)
451    ///
452    /// # Parameters
453    ///
454    /// * `self_arc` - Arc reference to self (needed for spawning async tasks).
455    /// * `tracked` - The browser being returned.
456    pub(crate) fn return_browser(self_arc: &Arc<Self>, tracked: Arc<TrackedBrowser>) {
457        log::debug!("♻️ Returning browser {} to pool...", tracked.id());
458
459        // Early exit if shutting down (don't waste time managing pool)
460        if self_arc.shutting_down.load(Ordering::Acquire) {
461            log::debug!(
462                "🛑 Pool shutting down, not returning browser {}",
463                tracked.id()
464            );
465            return;
466        }
467
468        // CRITICAL: Always acquire in order: active -> pool
469        // Holding both locks prevents ALL race conditions:
470        // - Prevents concurrent modifications to browser state
471        // - Prevents duplicate returns
472        // - Ensures pool size limits are respected
473        let mut active = self_arc.active.lock().unwrap_or_else(|poisoned| {
474            log::warn!("Pool active lock poisoned, recovering");
475            poisoned.into_inner()
476        });
477        let mut pool = self_arc.available.lock().unwrap_or_else(|poisoned| {
478            log::warn!("Pool available lock poisoned, recovering");
479            poisoned.into_inner()
480        });
481
482        // Verify browser is actually tracked (sanity check)
483        if !active.contains_key(&tracked.id()) {
484            log::warn!(
485                "❌ Browser {} not in active tracking (probably already removed), skipping return",
486                tracked.id()
487            );
488            return;
489        }
490
491        // Check TTL before returning to pool
492        // Expired browsers should be retired to prevent memory leaks
493        if tracked.is_expired(self_arc.config.browser_ttl) {
494            log::info!(
495                "⏰ Browser {} expired (age: {}min, TTL: {}min), retiring instead of returning",
496                tracked.id(),
497                tracked.age_minutes(),
498                self_arc.config.browser_ttl.as_secs() / 60
499            );
500
501            // Remove from active tracking
502            active.remove(&tracked.id());
503            log::debug!("📊 Active browsers after TTL retirement: {}", active.len());
504
505            // Release locks before spawning replacement task
506            drop(active);
507            drop(pool);
508
509            // Trigger async replacement creation (non-blocking)
510            log::debug!("🔍 Triggering replacement browser creation for expired browser");
511            Self::spawn_replacement_creation(Arc::clone(self_arc), 1);
512            return;
513        }
514
515        // Prevent duplicate returns (defensive programming)
516        if pool.iter().any(|b| b.id() == tracked.id()) {
517            log::warn!(
518                "⚠️ Browser {} already in pool (duplicate return attempt), skipping",
519                tracked.id()
520            );
521            return;
522        }
523
524        // Check if pool has space for this browser
525        if pool.len() < self_arc.config.max_pool_size {
526            // Add to pool for reuse
527            pool.push(tracked.clone());
528            log::info!(
529                "♻️ Browser {} returned to pool (pool size: {}/{})",
530                tracked.id(),
531                pool.len(),
532                self_arc.config.max_pool_size
533            );
534        } else {
535            // Pool is full, remove from tracking (browser will be dropped)
536            log::debug!(
537                "️ Pool full ({}/{}), removing browser {} from system",
538                pool.len(),
539                self_arc.config.max_pool_size,
540                tracked.id()
541            );
542            active.remove(&tracked.id());
543            log::debug!("📊 Active browsers after removal: {}", active.len());
544        }
545    }
546
547    /// Asynchronously create replacement browsers (internal helper).
548    ///
549    /// This is the async work function that actually creates browsers.
550    /// It's spawned as a tokio task by `spawn_replacement_creation`.
551    ///
552    /// # Algorithm
553    ///
554    /// 1. Check shutdown flag before each creation
555    /// 2. Check pool space before each creation
556    /// 3. Use spawn_blocking for CPU-bound browser creation
557    /// 4. Add successful browsers to pool
558    /// 5. Log detailed status
559    ///
560    /// # Parameters
561    ///
562    /// * `inner` - Arc reference to pool state.
563    /// * `count` - Number of browsers to attempt to create.
564    async fn spawn_replacement_creation_async(inner: Arc<Self>, count: usize) {
565        log::info!(
566            "🔍 Starting async replacement creation for {} browsers",
567            count
568        );
569
570        let mut created_count = 0;
571        let mut failed_count = 0;
572
573        for i in 0..count {
574            // Check shutdown flag before each expensive operation
575            if inner.shutting_down.load(Ordering::Acquire) {
576                log::info!(
577                    "🛑 Shutdown detected during replacement creation, stopping at {}/{}",
578                    i,
579                    count
580                );
581                break;
582            }
583
584            // Check if pool has space BEFORE creating (avoid wasted work)
585            let pool_has_space = {
586                let pool = inner.available.lock().unwrap_or_else(|poisoned| {
587                    log::warn!("Pool available lock poisoned, recovering");
588                    poisoned.into_inner()
589                });
590                let has_space = pool.len() < inner.config.max_pool_size;
591                log::trace!(
592                    "📊 Pool space check: {}/{} (has space: {})",
593                    pool.len(),
594                    inner.config.max_pool_size,
595                    has_space
596                );
597                has_space
598            };
599
600            if !pool_has_space {
601                log::warn!(
602                    "⚠️ Pool is full, stopping replacement creation at {}/{}",
603                    i,
604                    count
605                );
606                break;
607            }
608
609            log::debug!("📦 Creating replacement browser {}/{}", i + 1, count);
610
611            // Use spawn_blocking for CPU-bound browser creation
612            // This prevents blocking the async runtime
613            let inner_clone = Arc::clone(&inner);
614            let result =
615                tokio::task::spawn_blocking(move || inner_clone.create_browser_direct()).await;
616
617            match result {
618                Ok(Ok(tracked)) => {
619                    let id = tracked.id();
620
621                    // Add to pool (with space check to handle race conditions)
622                    let mut pool = inner.available.lock().unwrap_or_else(|poisoned| {
623                        log::warn!("Pool available lock poisoned, recovering");
624                        poisoned.into_inner()
625                    });
626
627                    // Double-check space (another thread might have added browsers)
628                    if pool.len() < inner.config.max_pool_size {
629                        pool.push(tracked);
630                        created_count += 1;
631                        log::info!(
632                            "✅ Created replacement browser {} and added to pool ({}/{})",
633                            id,
634                            i + 1,
635                            count
636                        );
637                    } else {
638                        log::warn!(
639                            "⚠️ Pool became full during creation, replacement browser {} kept in active only",
640                            id
641                        );
642                        created_count += 1; // Still count as created (just not pooled)
643                    }
644                }
645                Ok(Err(e)) => {
646                    failed_count += 1;
647                    log::error!(
648                        "❌ Failed to create replacement browser {}/{}: {}",
649                        i + 1,
650                        count,
651                        e
652                    );
653                }
654                Err(e) => {
655                    failed_count += 1;
656                    log::error!(
657                        "❌ Replacement browser {}/{} task panicked: {:?}",
658                        i + 1,
659                        count,
660                        e
661                    );
662                }
663            }
664        }
665
666        // Final status report
667        let pool_size = inner
668            .available
669            .lock()
670            .unwrap_or_else(|poisoned| {
671                log::warn!("Pool available lock poisoned, recovering");
672                poisoned.into_inner()
673            })
674            .len();
675        let active_size = inner
676            .active
677            .lock()
678            .unwrap_or_else(|poisoned| {
679                log::warn!("Pool active lock poisoned, recovering");
680                poisoned.into_inner()
681            })
682            .len();
683
684        log::info!(
685            "🏁 Replacement creation completed: {}/{} created, {} failed. Pool: {}, Active: {}",
686            created_count,
687            count,
688            failed_count,
689            pool_size,
690            active_size
691        );
692    }
693
694    /// Spawn a background task to create replacement browsers.
695    ///
696    /// This is non-blocking and returns immediately. The actual browser
697    /// creation happens in a tokio task tracked in `replacement_tasks`.
698    ///
699    /// # Why Async
700    ///
701    /// Browser creation is slow (1-3 seconds per browser). Spawning async
702    /// tasks prevents blocking the caller.
703    ///
704    /// # Task Tracking
705    ///
706    /// Tasks are tracked so we can abort them during shutdown.
707    ///
708    /// # Parameters
709    ///
710    /// * `inner` - Arc reference to pool state.
711    /// * `count` - Number of replacement browsers to create.
712    pub(crate) fn spawn_replacement_creation(inner: Arc<Self>, count: usize) {
713        log::info!(
714            "📥 Spawning async task to create {} replacement browsers",
715            count
716        );
717
718        // Clone Arc for moving into async task
719        let inner_for_task = Arc::clone(&inner);
720
721        // Spawn async task on the captured runtime
722        let task_handle = inner.runtime_handle.spawn(async move {
723            Self::spawn_replacement_creation_async(inner_for_task, count).await;
724        });
725
726        // Track task handle for shutdown cleanup
727        if let Ok(mut tasks) = inner.replacement_tasks.lock() {
728            // Clean up finished tasks while we have the lock (housekeeping)
729            let original_count = tasks.len();
730            tasks.retain(|h| !h.is_finished());
731            let cleaned = original_count - tasks.len();
732
733            if cleaned > 0 {
734                log::trace!("🧹 Cleaned up {} finished replacement tasks", cleaned);
735            }
736
737            // Add new task
738            tasks.push(task_handle);
739
740            log::debug!("📋 Now tracking {} active replacement tasks", tasks.len());
741        } else {
742            log::warn!("⚠️ Failed to track replacement task (poisoned lock)");
743        }
744    }
745
746    /// Get the pool configuration.
747    #[inline]
748    pub(crate) fn config(&self) -> &BrowserPoolConfig {
749        &self.config
750    }
751
752    /// Check if the pool is shutting down.
753    #[inline]
754    pub(crate) fn is_shutting_down(&self) -> bool {
755        self.shutting_down.load(Ordering::Acquire)
756    }
757
758    /// Set the shutdown flag.
759    #[inline]
760    pub(crate) fn set_shutting_down(&self, value: bool) {
761        self.shutting_down.store(value, Ordering::Release);
762    }
763
764    /// Get the shutdown signal for the keep-alive thread.
765    #[inline]
766    pub(crate) fn shutdown_signal(&self) -> &Arc<(Mutex<bool>, Condvar)> {
767        &self.shutdown_signal
768    }
769
770    /// Get the available browsers count.
771    pub(crate) fn available_count(&self) -> usize {
772        self.available.lock().map(|g| g.len()).unwrap_or(0)
773    }
774
775    /// Get the active browsers count.
776    pub(crate) fn active_count(&self) -> usize {
777        self.active.lock().map(|g| g.len()).unwrap_or(0)
778    }
779
780    /// Get a snapshot of active browsers for health checking.
781    ///
782    /// Returns a cloned list to avoid holding locks during I/O.
783    pub(crate) fn get_active_browsers_snapshot(&self) -> Vec<(u64, Arc<TrackedBrowser>)> {
784        let active = self.active.lock().unwrap_or_else(|poisoned| {
785            log::warn!("Pool active lock poisoned, recovering");
786            poisoned.into_inner()
787        });
788        active
789            .iter()
790            .map(|(id, tracked)| (*id, Arc::clone(tracked)))
791            .collect()
792    }
793
794    /// Remove a browser from active tracking.
795    pub(crate) fn remove_from_active(&self, id: u64) -> Option<Arc<TrackedBrowser>> {
796        let mut active = self.active.lock().unwrap_or_else(|poisoned| {
797            log::warn!("Pool active lock poisoned, recovering");
798            poisoned.into_inner()
799        });
800        active.remove(&id)
801    }
802
803    /// Remove browsers from the available pool by ID.
804    pub(crate) fn remove_from_available(&self, ids: &[u64]) {
805        let mut pool = self.available.lock().unwrap_or_else(|poisoned| {
806            log::warn!("Pool available lock poisoned, recovering");
807            poisoned.into_inner()
808        });
809        let original_size = pool.len();
810        pool.retain(|b| !ids.contains(&b.id()));
811        let removed = original_size - pool.len();
812        if removed > 0 {
813            log::debug!("🗑️ Removed {} browsers from available pool", removed);
814        }
815    }
816
817    /// Abort all replacement tasks.
818    pub(crate) fn abort_replacement_tasks(&self) -> usize {
819        if let Ok(mut tasks) = self.replacement_tasks.lock() {
820            let count = tasks.len();
821            for handle in tasks.drain(..) {
822                handle.abort();
823            }
824            count
825        } else {
826            0
827        }
828    }
829}
830
831// ============================================================================
832// BrowserPool
833// ============================================================================
834
835/// Main browser pool with lifecycle management.
836///
837/// This is the public-facing API for the browser pool. It wraps the internal
838/// state and manages the keep-alive thread.
839///
840/// # Overview
841///
842/// `BrowserPool` provides:
843/// - Browser checkout via [`get()`](Self::get)
844/// - Pool warmup via [`warmup()`](Self::warmup)
845/// - Statistics via [`stats()`](Self::stats)
846/// - Graceful shutdown via [`shutdown_async()`](Self::shutdown_async)
847///
848/// # Example
849///
850/// ```rust,no_run
851/// use html2pdf_api::{BrowserPool, BrowserPoolConfigBuilder, ChromeBrowserFactory};
852/// use std::time::Duration;
853///
854/// #[tokio::main]
855/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
856///     // Create pool
857///     let mut pool = BrowserPool::builder()
858///         .config(
859///             BrowserPoolConfigBuilder::new()
860///                 .max_pool_size(5)
861///                 .warmup_count(3)
862///                 .build()?
863///         )
864///         .factory(Box::new(ChromeBrowserFactory::with_defaults()))
865///         .build()?;
866///
867///     // Warmup
868///     pool.warmup().await?;
869///
870///     // Use browsers
871///     {
872///         let browser = pool.get()?;
873///         let tab = browser.new_tab()?;
874///         // ... do work ...
875///     } // browser returned to pool automatically
876///
877///     // Shutdown
878///     pool.shutdown_async().await;
879///
880///     Ok(())
881/// }
882/// ```
883///
884/// # Thread Safety
885///
886/// `BrowserPool` uses fine-grained internal locks (`Mutex<Vec>`, `Mutex<HashMap>`)
887/// so it is safe to share as `Arc<BrowserPool>` without an outer `Mutex`.
888/// Use [`into_shared()`](Self::into_shared) for convenience.
889pub struct BrowserPool {
890    /// Shared internal state.
891    inner: Arc<BrowserPoolInner>,
892
893    /// Handle to keep-alive monitoring thread.
894    ///
895    /// Option allows taking during shutdown. None means keep-alive disabled.
896    keep_alive_handle: Option<JoinHandle<()>>,
897}
898
899impl BrowserPool {
900    /// Convert pool into a shared `Arc<BrowserPool>` for use in web handlers.
901    ///
902    /// This is convenient for web frameworks that need shared state.
903    /// No outer `Mutex` is needed — the pool uses fine-grained internal locks.
904    ///
905    /// # Example
906    ///
907    /// ```rust,ignore
908    /// let pool = BrowserPool::builder()
909    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
910    ///     .build()?
911    ///     .into_shared();
912    ///
913    /// // Can now be cloned and shared across handlers
914    /// let pool_clone = Arc::clone(&pool);
915    /// ```
916    pub fn into_shared(self) -> Arc<BrowserPool> {
917        log::debug!("🔍 Converting BrowserPool into shared Arc<BrowserPool>");
918        Arc::new(self)
919    }
920
921    /// Create a new builder for constructing a BrowserPool.
922    ///
923    /// This is the recommended way to create a pool.
924    ///
925    /// # Example
926    ///
927    /// ```rust,ignore
928    /// let pool = BrowserPool::builder()
929    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
930    ///     .build()?;
931    /// ```
932    pub fn builder() -> BrowserPoolBuilder {
933        BrowserPoolBuilder::new()
934    }
935
936    /// Get a browser from the pool (or create one if empty).
937    ///
938    /// Returns a [`BrowserHandle`] that implements `Deref<Target=Browser>`,
939    /// allowing transparent access to browser methods.
940    ///
941    /// # Automatic Return
942    ///
943    /// The browser is automatically returned to the pool when the handle
944    /// is dropped, even if your code panics (RAII pattern).
945    ///
946    /// # Errors
947    ///
948    /// - Returns [`BrowserPoolError::ShuttingDown`] if pool is shutting down.
949    /// - Returns [`BrowserPoolError::BrowserCreation`] if new browser creation fails.
950    /// - Returns [`BrowserPoolError::HealthCheckFailed`] if all pooled browsers are unhealthy.
951    ///
952    /// # Example
953    ///
954    /// ```rust,ignore
955    /// let browser = pool.get()?;
956    /// let tab = browser.new_tab()?;
957    /// tab.navigate_to("https://example.com")?;
958    /// // browser returned automatically when it goes out of scope
959    /// ```
960    pub fn get(&self) -> Result<BrowserHandle> {
961        log::trace!("🎯 BrowserPool::get() called");
962        self.inner.get_or_create_browser()
963    }
964
965    /// Get pool statistics snapshot.
966    ///
967    /// # Returns
968    ///
969    /// [`PoolStats`] containing:
970    /// - `available`: Browsers in pool ready for checkout
971    /// - `active`: All browsers (pooled + checked out)
972    /// - `total`: Currently same as `active` (for future expansion)
973    ///
974    /// # Example
975    ///
976    /// ```rust,ignore
977    /// let stats = pool.stats();
978    /// println!("Available: {}, Active: {}", stats.available, stats.active);
979    /// ```
980    pub fn stats(&self) -> PoolStats {
981        let available = self.inner.available_count();
982        let active = self.inner.active_count();
983
984        log::trace!("📊 Pool stats: available={}, active={}", available, active);
985
986        PoolStats {
987            available,
988            active,
989            total: active,
990        }
991    }
992
993    /// Get a reference to the pool configuration.
994    ///
995    /// Returns the configuration that was used to create this pool.
996    /// The configuration is immutable after pool creation.
997    ///
998    /// # Example
999    ///
1000    /// ```rust,ignore
1001    /// let pool = BrowserPool::builder()
1002    ///     .config(
1003    ///         BrowserPoolConfigBuilder::new()
1004    ///             .max_pool_size(10)
1005    ///             .build()?
1006    ///     )
1007    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1008    ///     .build()?;
1009    ///
1010    /// println!("Max pool size: {}", pool.config().max_pool_size);
1011    /// println!("Browser TTL: {:?}", pool.config().browser_ttl);
1012    /// ```
1013    ///
1014    /// # Use Cases
1015    ///
1016    /// - Logging configuration at startup
1017    /// - Monitoring/metrics collection
1018    /// - Readiness checks (comparing active count vs max_pool_size)
1019    /// - Debugging pool behavior
1020    #[inline]
1021    pub fn config(&self) -> &BrowserPoolConfig {
1022        self.inner.config()
1023    }
1024
1025    /// Warmup the pool by pre-creating browsers.
1026    ///
1027    /// This is highly recommended to reduce first-request latency.
1028    /// Should be called during application startup.
1029    ///
1030    /// # Process
1031    ///
1032    /// 1. Creates `warmup_count` browsers sequentially with staggered timing
1033    /// 2. Tests each browser with navigation
1034    /// 3. Returns all browsers to pool
1035    /// 4. Entire process has timeout (configurable via `warmup_timeout`)
1036    ///
1037    /// # Staggered Creation
1038    ///
1039    /// Browsers are created with a 30-second delay between them to ensure
1040    /// their TTLs are offset. This prevents all browsers from expiring
1041    /// at the same time.
1042    ///
1043    /// # Errors
1044    ///
1045    /// - Returns error if warmup times out.
1046    /// - Returns error if browser creation fails.
1047    ///
1048    /// # Example
1049    ///
1050    /// ```rust,ignore
1051    /// let pool = BrowserPool::builder()
1052    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1053    ///     .build()?;
1054    ///
1055    /// // Warmup during startup
1056    /// pool.warmup().await?;
1057    /// ```
1058    pub async fn warmup(&self) -> Result<()> {
1059        let count = self.inner.config().warmup_count;
1060        let warmup_timeout = self.inner.config().warmup_timeout;
1061
1062        log::info!(
1063            "🔥 Starting browser pool warmup with {} instances (timeout: {}s)",
1064            count,
1065            warmup_timeout.as_secs()
1066        );
1067
1068        // Wrap entire warmup in timeout to prevent hanging forever
1069        let warmup_result = tokio::time::timeout(warmup_timeout, self.warmup_internal(count)).await;
1070
1071        match warmup_result {
1072            Ok(Ok(())) => {
1073                let stats = self.stats();
1074                log::info!(
1075                    "✅ Warmup completed successfully - Available: {}, Active: {}",
1076                    stats.available,
1077                    stats.active
1078                );
1079                Ok(())
1080            }
1081            Ok(Err(e)) => {
1082                log::error!("❌ Warmup failed with error: {}", e);
1083                Err(e)
1084            }
1085            Err(_) => {
1086                log::error!("❌ Warmup timed out after {}s", warmup_timeout.as_secs());
1087                Err(BrowserPoolError::Configuration(format!(
1088                    "Warmup timed out after {}s",
1089                    warmup_timeout.as_secs()
1090                )))
1091            }
1092        }
1093    }
1094
1095    /// Internal warmup implementation (separated for cleaner timeout wrapping).
1096    ///
1097    /// Creates browsers sequentially with a delay between them.
1098    /// This ensures they don't all reach their TTL (expiration) at the exact same moment.
1099    async fn warmup_internal(&self, count: usize) -> Result<()> {
1100        log::debug!("🛠️ Starting internal warmup process for {} browsers", count);
1101
1102        // STAGGER CONFIGURATION
1103        // We wait this long between creations to distribute expiration times
1104        let stagger_interval = Duration::from_secs(30);
1105
1106        let mut handles = Vec::new();
1107        let mut created_count = 0;
1108        let mut failed_count = 0;
1109
1110        for i in 0..count {
1111            log::debug!("🌐 Creating startup browser instance {}/{}", i + 1, count);
1112
1113            // Per-browser timeout (15s per browser is reasonable)
1114            // This prevents one slow browser from blocking entire warmup
1115            let browser_result = tokio::time::timeout(
1116                Duration::from_secs(15),
1117                tokio::task::spawn_blocking({
1118                    let inner = Arc::clone(&self.inner);
1119                    move || inner.create_browser_direct()
1120                }),
1121            )
1122            .await;
1123
1124            match browser_result {
1125                Ok(Ok(Ok(tracked))) => {
1126                    log::debug!(
1127                        "✅ Browser {} created, performing validation test...",
1128                        tracked.id()
1129                    );
1130
1131                    // Test the browser with actual navigation
1132                    match tracked.browser().new_tab() {
1133                        Ok(tab) => {
1134                            log::trace!("✅ Browser {} test: new_tab() successful", tracked.id());
1135
1136                            // Navigate to test page
1137                            let nav_result = tab.navigate_to(
1138                                "data:text/html,<html><body>Warmup test</body></html>",
1139                            );
1140                            if let Err(e) = nav_result {
1141                                log::warn!(
1142                                    "⚠️ Browser {} test navigation failed: {}",
1143                                    tracked.id(),
1144                                    e
1145                                );
1146                            } else {
1147                                log::trace!(
1148                                    "✅ Browser {} test: navigation successful",
1149                                    tracked.id()
1150                                );
1151                            }
1152
1153                            // Clean up test tab
1154                            let _ = tab.close(true);
1155
1156                            // Keep handle so browser stays alive
1157                            handles.push(BrowserHandle::new(tracked, Arc::clone(&self.inner)));
1158
1159                            created_count += 1;
1160                            log::info!(
1161                                "✅ Browser instance {}/{} ready and validated",
1162                                i + 1,
1163                                count
1164                            );
1165                        }
1166                        Err(e) => {
1167                            failed_count += 1;
1168                            log::error!(
1169                                "❌ Browser {} validation test failed: {}",
1170                                tracked.id(),
1171                                e
1172                            );
1173
1174                            // Remove from active tracking since it's broken
1175                            self.inner.remove_from_active(tracked.id());
1176                        }
1177                    }
1178                }
1179                Ok(Ok(Err(e))) => {
1180                    failed_count += 1;
1181                    log::error!("❌ Failed to create browser {}/{}: {}", i + 1, count, e);
1182                }
1183                Ok(Err(e)) => {
1184                    failed_count += 1;
1185                    log::error!(
1186                        "❌ Browser {}/{} creation task panicked: {:?}",
1187                        i + 1,
1188                        count,
1189                        e
1190                    );
1191                }
1192                Err(_) => {
1193                    failed_count += 1;
1194                    log::error!(
1195                        "❌ Browser {}/{} creation timed out (15s limit)",
1196                        i + 1,
1197                        count
1198                    );
1199                }
1200            }
1201
1202            // === STAGGER LOGIC ===
1203            // If this is not the last browser, wait before creating the next one.
1204            // This ensures their TTLs are offset by `stagger_interval`.
1205            if i < count - 1 {
1206                log::info!(
1207                    "⏳ Waiting {}s before creating next warmup browser to stagger TTLs...",
1208                    stagger_interval.as_secs()
1209                );
1210                tokio::time::sleep(stagger_interval).await;
1211            }
1212        }
1213
1214        log::info!(
1215            "📊 Warmup creation phase: {} created, {} failed",
1216            created_count,
1217            failed_count
1218        );
1219
1220        // Return all browsers to pool by dropping handles
1221        log::debug!("🔍 Returning {} warmup browsers to pool...", handles.len());
1222        drop(handles);
1223
1224        // No delay needed: return_browser() is synchronous in the happy path,
1225        // and warmup browsers are never TTL-expired (which is the only path
1226        // that spawns async work via spawn_replacement_creation).
1227
1228        let final_stats = self.stats();
1229        log::info!(
1230            "🏁 Warmup internal completed - Pool: {}, Active: {}",
1231            final_stats.available,
1232            final_stats.active
1233        );
1234
1235        Ok(())
1236    }
1237
1238    /// Start the keep-alive monitoring thread.
1239    ///
1240    /// This background thread:
1241    /// - Pings all active browsers periodically
1242    /// - Removes unresponsive browsers after max_ping_failures
1243    /// - Retires browsers that exceed TTL
1244    /// - Spawns replacement browsers as needed
1245    ///
1246    /// # Critical Design Notes
1247    ///
1248    /// - Uses condvar for immediate shutdown signaling
1249    /// - Never holds locks during I/O operations
1250    /// - Uses consistent lock ordering (active -> pool)
1251    ///
1252    /// # Parameters
1253    ///
1254    /// * `inner` - Arc reference to pool state.
1255    ///
1256    /// # Returns
1257    ///
1258    /// JoinHandle for the background thread.
1259    fn start_keep_alive(inner: Arc<BrowserPoolInner>) -> JoinHandle<()> {
1260        let ping_interval = inner.config().ping_interval;
1261        let max_failures = inner.config().max_ping_failures;
1262        let browser_ttl = inner.config().browser_ttl;
1263        let shutdown_signal = Arc::clone(inner.shutdown_signal());
1264
1265        log::info!(
1266            "🚀 Starting keep-alive thread (interval: {}s, max failures: {}, TTL: {}min)",
1267            ping_interval.as_secs(),
1268            max_failures,
1269            browser_ttl.as_secs() / 60
1270        );
1271
1272        thread::spawn(move || {
1273            log::info!("🏁 Keep-alive thread started successfully");
1274
1275            // Track consecutive failures per browser ID
1276            let mut failure_counts: HashMap<u64, u32> = HashMap::new();
1277
1278            loop {
1279                // Wait for next ping interval OR shutdown signal (whichever comes first)
1280                // Using condvar instead of sleep allows immediate wake-up on shutdown
1281                let (lock, cvar) = &*shutdown_signal;
1282                let wait_result = {
1283                    let shutdown = lock.lock().unwrap_or_else(|poisoned| {
1284                        log::warn!("Shutdown lock poisoned, recovering");
1285                        poisoned.into_inner()
1286                    });
1287                    cvar.wait_timeout(shutdown, ping_interval)
1288                        .unwrap_or_else(|poisoned| {
1289                            log::warn!("Condvar wait_timeout lock poisoned, recovering");
1290                            poisoned.into_inner()
1291                        })
1292                };
1293
1294                let shutdown_flag = *wait_result.0;
1295                let timed_out = wait_result.1.timed_out();
1296
1297                // Check if we were signaled to shutdown
1298                if shutdown_flag {
1299                    log::info!("🛑 Keep-alive received shutdown signal via condvar");
1300                    break;
1301                }
1302
1303                // Double-check atomic shutdown flag (belt and suspenders)
1304                if inner.is_shutting_down() {
1305                    log::info!("🛑 Keep-alive detected shutdown via atomic flag");
1306                    break;
1307                }
1308
1309                // If spuriously woken (not timeout, not shutdown), continue waiting
1310                if !timed_out {
1311                    log::trace!("⏰ Keep-alive spuriously woken, continuing wait...");
1312                    continue;
1313                }
1314
1315                log::trace!("⚡ Keep-alive ping cycle starting...");
1316
1317                // Collect browsers to ping WITHOUT holding locks
1318                // This is critical: we clone the list and release the lock
1319                // before doing any I/O operations
1320                let browsers_to_ping = inner.get_active_browsers_snapshot();
1321                log::trace!(
1322                    "Keep-alive checking {} active browsers",
1323                    browsers_to_ping.len()
1324                );
1325
1326                // Now ping browsers without holding any locks
1327                let mut to_remove = Vec::new();
1328                let mut expired_browsers = Vec::new();
1329
1330                for (id, tracked) in browsers_to_ping {
1331                    // Check shutdown during ping loop (allows early exit)
1332                    if inner.is_shutting_down() {
1333                        log::info!("Shutdown detected during ping loop, exiting immediately");
1334                        return;
1335                    }
1336
1337                    // Check TTL before pinging (no point pinging expired browsers)
1338                    if tracked.is_expired(browser_ttl) {
1339                        log::info!(
1340                            "Browser {} expired (age: {}min, TTL: {}min), marking for retirement",
1341                            id,
1342                            tracked.age_minutes(),
1343                            browser_ttl.as_secs() / 60
1344                        );
1345                        expired_browsers.push(id);
1346                        continue; // Skip ping for expired browsers
1347                    }
1348
1349                    // Perform health check (this is I/O, no locks held)
1350                    use crate::traits::Healthcheck;
1351                    match tracked.ping() {
1352                        Ok(_) => {
1353                            // Reset failure count on success
1354                            if failure_counts.remove(&id).is_some() {
1355                                log::debug!("Browser {} ping successful, failure count reset", id);
1356                            }
1357                        }
1358                        Err(e) => {
1359                            // Only process failures if NOT shutting down
1360                            // (during shutdown, browsers may legitimately fail)
1361                            if !inner.is_shutting_down() {
1362                                let failures = failure_counts.entry(id).or_insert(0);
1363                                *failures += 1;
1364
1365                                log::warn!(
1366                                    "Browser {} ping failed (attempt {}/{}): {}",
1367                                    id,
1368                                    failures,
1369                                    max_failures,
1370                                    e
1371                                );
1372
1373                                // Remove if exceeded max failures
1374                                if *failures >= max_failures {
1375                                    log::error!(
1376                                        "Browser {} exceeded max ping failures ({}), marking for removal",
1377                                        id,
1378                                        max_failures
1379                                    );
1380                                    to_remove.push(id);
1381                                }
1382                            }
1383                        }
1384                    }
1385                }
1386
1387                // Check shutdown before cleanup (avoid work if shutting down)
1388                if inner.is_shutting_down() {
1389                    log::info!("Shutdown detected before cleanup, skipping and exiting");
1390                    break;
1391                }
1392
1393                // Handle TTL retirements first (they need replacement browsers)
1394                if !expired_browsers.is_empty() {
1395                    log::info!("Processing {} TTL-expired browsers", expired_browsers.len());
1396                    Self::handle_browser_retirement(&inner, expired_browsers, &mut failure_counts);
1397                }
1398
1399                // Handle failed browsers (remove from tracking and pool)
1400                if !to_remove.is_empty() {
1401                    log::warn!("Removing {} failed browsers from pool", to_remove.len());
1402
1403                    // Track how many were actually removed so we know how many to replace
1404                    let mut actual_removed_count = 0;
1405
1406                    // Remove dead browsers from active tracking
1407                    for id in &to_remove {
1408                        if inner.remove_from_active(*id).is_some() {
1409                            actual_removed_count += 1;
1410                            log::debug!("Removed failed browser {} from active tracking", id);
1411                        }
1412                        failure_counts.remove(id);
1413                    }
1414
1415                    log::debug!(
1416                        "Active browsers after failure cleanup: {}",
1417                        inner.active_count()
1418                    );
1419
1420                    // Clean up pool (remove dead browsers)
1421                    inner.remove_from_available(&to_remove);
1422
1423                    log::debug!("Pool size after cleanup: {}", inner.available_count());
1424
1425                    // Trigger replacement for the browsers we just removed
1426                    if actual_removed_count > 0 {
1427                        log::info!(
1428                            "Spawning {} replacement browsers for failed ones",
1429                            actual_removed_count
1430                        );
1431                        BrowserPoolInner::spawn_replacement_creation(
1432                            Arc::clone(&inner),
1433                            actual_removed_count,
1434                        );
1435                    }
1436                }
1437
1438                // Log keep-alive cycle summary
1439                log::debug!(
1440                    "Keep-alive cycle complete - Active: {}, Pooled: {}, Tracking {} failure states",
1441                    inner.active_count(),
1442                    inner.available_count(),
1443                    failure_counts.len()
1444                );
1445            }
1446
1447            log::info!("Keep-alive thread exiting cleanly");
1448        })
1449    }
1450
1451    /// Handle browser retirement due to TTL expiration.
1452    ///
1453    /// This function:
1454    /// 1. Removes expired browsers from active and pool tracking
1455    /// 2. Spawns async tasks to create replacement browsers
1456    /// 3. Maintains pool target size
1457    ///
1458    /// # Critical Lock Ordering
1459    ///
1460    /// Acquires active -> pool locks together to prevent races.
1461    ///
1462    /// # Parameters
1463    ///
1464    /// * `inner` - Arc reference to pool state.
1465    /// * `expired_ids` - List of browser IDs that have exceeded TTL.
1466    /// * `failure_counts` - Mutable map of failure counts (updated to remove retired browsers).
1467    fn handle_browser_retirement(
1468        inner: &Arc<BrowserPoolInner>,
1469        expired_ids: Vec<u64>,
1470        failure_counts: &mut HashMap<u64, u32>,
1471    ) {
1472        log::info!(
1473            "Retiring {} expired browsers (TTL enforcement)",
1474            expired_ids.len()
1475        );
1476
1477        // Remove expired browsers from active tracking
1478        let mut retired_count = 0;
1479        for id in &expired_ids {
1480            if inner.remove_from_active(*id).is_some() {
1481                retired_count += 1;
1482                log::debug!("Removed expired browser {} from active tracking", id);
1483            }
1484            // Clean up failure tracking
1485            failure_counts.remove(id);
1486        }
1487
1488        // Remove from pool as well
1489        inner.remove_from_available(&expired_ids);
1490
1491        log::debug!(
1492            "After retirement - Active: {}, Pooled: {}",
1493            inner.active_count(),
1494            inner.available_count()
1495        );
1496
1497        // Create replacement browsers to maintain target count
1498        if retired_count > 0 {
1499            log::info!(
1500                "Spawning {} replacement browsers for retired ones",
1501                retired_count
1502            );
1503            BrowserPoolInner::spawn_replacement_creation(Arc::clone(inner), retired_count);
1504        } else {
1505            log::debug!("No browsers were actually retired (already removed)");
1506        }
1507    }
1508
1509    /// Asynchronously shutdown the pool (recommended method).
1510    ///
1511    /// This is the preferred shutdown method as it can properly await
1512    /// async task cancellation. Should be called during application shutdown.
1513    ///
1514    /// # Shutdown Process
1515    ///
1516    /// 1. Set atomic shutdown flag (stops new operations)
1517    /// 2. Signal condvar to wake keep-alive thread immediately
1518    /// 3. Wait for keep-alive thread to exit (with timeout)
1519    /// 4. Abort all replacement creation tasks
1520    /// 5. Wait briefly for cleanup
1521    /// 6. Log final statistics
1522    ///
1523    /// # Timeout
1524    ///
1525    /// Keep-alive thread is given 5 seconds to exit gracefully.
1526    /// If it doesn't exit, we log an error but continue shutdown.
1527    ///
1528    /// # Example
1529    ///
1530    /// ```rust,ignore
1531    /// let mut pool = /* ... */;
1532    ///
1533    /// // During application shutdown
1534    /// pool.shutdown_async().await;
1535    /// ```
1536    pub async fn shutdown_async(&mut self) {
1537        log::info!("Shutting down browser pool (async mode)...");
1538
1539        // Step 1: Set shutdown flag (prevents new operations)
1540        self.inner.set_shutting_down(true);
1541        log::debug!("Shutdown flag set");
1542
1543        // Step 2: Signal condvar to wake keep-alive thread immediately
1544        // This is critical - without this, keep-alive waits for full ping_interval
1545        {
1546            let (lock, cvar) = &**self.inner.shutdown_signal();
1547            let mut shutdown = lock.lock().unwrap_or_else(|poisoned| {
1548                log::warn!("Shutdown lock poisoned, recovering");
1549                poisoned.into_inner()
1550            });
1551            *shutdown = true;
1552            cvar.notify_all();
1553            log::debug!("Shutdown signal sent to keep-alive thread");
1554        } // Lock released here
1555
1556        // Step 3: Wait for keep-alive thread to exit
1557        if let Some(handle) = self.keep_alive_handle.take() {
1558            log::debug!("Waiting for keep-alive thread to exit...");
1559
1560            // Wrap thread join in spawn_blocking to make it async-friendly
1561            let join_task = tokio::task::spawn_blocking(move || handle.join());
1562
1563            // Give it 5 seconds to exit gracefully
1564            match tokio::time::timeout(Duration::from_secs(5), join_task).await {
1565                Ok(Ok(Ok(_))) => {
1566                    log::info!("Keep-alive thread stopped cleanly");
1567                }
1568                Ok(Ok(Err(_))) => {
1569                    log::error!("Keep-alive thread panicked during shutdown");
1570                }
1571                Ok(Err(_)) => {
1572                    log::error!("Keep-alive join task panicked");
1573                }
1574                Err(_) => {
1575                    log::error!("Keep-alive thread didn't exit within 5s timeout");
1576                }
1577            }
1578        } else {
1579            log::debug!("No keep-alive thread to stop (was disabled or already stopped)");
1580        }
1581
1582        // Step 4: Abort all replacement creation tasks
1583        log::info!("Aborting replacement creation tasks...");
1584        let aborted_count = self.inner.abort_replacement_tasks();
1585        if aborted_count > 0 {
1586            log::info!("Aborted {} replacement tasks", aborted_count);
1587        } else {
1588            log::debug!("No replacement tasks to abort");
1589        }
1590
1591        // Step 5: Small delay to let aborted tasks clean up
1592        tokio::time::sleep(Duration::from_millis(100)).await;
1593
1594        // Step 6: Log final statistics
1595        let stats = self.stats();
1596        log::info!(
1597            "Async shutdown complete - Available: {}, Active: {}, Total: {}",
1598            stats.available,
1599            stats.active,
1600            stats.total
1601        );
1602    }
1603
1604    /// Synchronously shutdown the pool (fallback method).
1605    ///
1606    /// This is a simplified shutdown for use in Drop or non-async contexts.
1607    /// Prefer [`shutdown_async()`](Self::shutdown_async) when possible for cleaner task cancellation.
1608    ///
1609    /// # Note
1610    ///
1611    /// This method doesn't wait for replacement tasks to finish since
1612    /// there's no async runtime available. Tasks are aborted but may not
1613    /// have cleaned up yet.
1614    pub fn shutdown(&mut self) {
1615        log::debug!("Calling synchronous shutdown...");
1616        self.shutdown_sync();
1617    }
1618
1619    /// Internal synchronous shutdown implementation.
1620    fn shutdown_sync(&mut self) {
1621        log::info!("Shutting down browser pool (sync mode)...");
1622
1623        // Set shutdown flag
1624        self.inner.set_shutting_down(true);
1625        log::debug!("Shutdown flag set");
1626
1627        // Signal condvar (same as async version)
1628        {
1629            let (lock, cvar) = &**self.inner.shutdown_signal();
1630            let mut shutdown = lock.lock().unwrap_or_else(|poisoned| {
1631                log::warn!("Shutdown lock poisoned, recovering");
1632                poisoned.into_inner()
1633            });
1634            *shutdown = true;
1635            cvar.notify_all();
1636            log::debug!("Shutdown signal sent");
1637        }
1638
1639        // Wait for keep-alive thread
1640        if let Some(handle) = self.keep_alive_handle.take() {
1641            log::debug!("Joining keep-alive thread (sync)...");
1642
1643            match handle.join() {
1644                Ok(_) => log::info!("Keep-alive thread stopped"),
1645                Err(_) => log::error!("Keep-alive thread panicked"),
1646            }
1647        }
1648
1649        // Abort replacement tasks (best effort - they won't make progress without runtime)
1650        let aborted_count = self.inner.abort_replacement_tasks();
1651        if aborted_count > 0 {
1652            log::debug!("Aborted {} replacement tasks (sync mode)", aborted_count);
1653        }
1654
1655        let stats = self.stats();
1656        log::info!(
1657            "Sync shutdown complete - Available: {}, Active: {}",
1658            stats.available,
1659            stats.active
1660        );
1661    }
1662
1663    /// Get a reference to the inner pool state.
1664    ///
1665    /// This is primarily for internal use and testing.
1666    #[doc(hidden)]
1667    #[allow(dead_code)]
1668    pub(crate) fn inner(&self) -> &Arc<BrowserPoolInner> {
1669        &self.inner
1670    }
1671}
1672
1673impl Drop for BrowserPool {
1674    /// Automatic cleanup when pool is dropped.
1675    ///
1676    /// This ensures resources are released even if shutdown wasn't called explicitly.
1677    /// Uses sync shutdown since Drop can't be async.
1678    fn drop(&mut self) {
1679        log::debug!("🛑 BrowserPool Drop triggered - running cleanup");
1680
1681        // Only shutdown if not already done
1682        if !self.inner.is_shutting_down() {
1683            log::warn!("⚠ BrowserPool dropped without explicit shutdown - cleaning up");
1684            self.shutdown();
1685        } else {
1686            log::debug!(" Pool already shutdown, Drop is no-op");
1687        }
1688    }
1689}
1690
1691// ============================================================================
1692// BrowserPoolBuilder
1693// ============================================================================
1694
1695/// Builder for constructing a [`BrowserPool`] with validation.
1696///
1697/// This is the recommended way to create a pool as it validates
1698/// configuration and provides sensible defaults.
1699///
1700/// # Example
1701///
1702/// ```rust,ignore
1703/// use std::time::Duration;
1704/// use html2pdf_api::{BrowserPool, BrowserPoolConfigBuilder, ChromeBrowserFactory};
1705///
1706/// let pool = BrowserPool::builder()
1707///     .config(
1708///         BrowserPoolConfigBuilder::new()
1709///             .max_pool_size(10)
1710///             .warmup_count(5)
1711///             .browser_ttl(Duration::from_secs(7200))
1712///             .build()?
1713///     )
1714///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1715///     .enable_keep_alive(true)
1716///     .build()?;
1717/// ```
1718pub struct BrowserPoolBuilder {
1719    /// Optional configuration (uses default if not provided).
1720    config: Option<BrowserPoolConfig>,
1721
1722    /// Browser factory (required).
1723    factory: Option<Box<dyn BrowserFactory>>,
1724
1725    /// Whether to enable keep-alive thread (default: true).
1726    enable_keep_alive: bool,
1727}
1728
1729impl BrowserPoolBuilder {
1730    /// Create a new builder with defaults.
1731    ///
1732    /// # Example
1733    ///
1734    /// ```rust,ignore
1735    /// let builder = BrowserPoolBuilder::new();
1736    /// ```
1737    pub fn new() -> Self {
1738        Self {
1739            config: None,
1740            factory: None,
1741            enable_keep_alive: true,
1742        }
1743    }
1744
1745    /// Set custom configuration.
1746    ///
1747    /// If not called, uses [`BrowserPoolConfig::default()`].
1748    ///
1749    /// # Parameters
1750    ///
1751    /// * `config` - Validated configuration from [`crate::BrowserPoolConfigBuilder`].
1752    ///
1753    /// # Example
1754    ///
1755    /// ```rust,ignore
1756    /// let config = BrowserPoolConfigBuilder::new()
1757    ///     .max_pool_size(10)
1758    ///     .build()?;
1759    ///
1760    /// let pool = BrowserPool::builder()
1761    ///     .config(config)
1762    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1763    ///     .build()?;
1764    /// ```
1765    pub fn config(mut self, config: BrowserPoolConfig) -> Self {
1766        self.config = Some(config);
1767        self
1768    }
1769
1770    /// Set browser factory (required).
1771    ///
1772    /// The factory is responsible for creating browser instances.
1773    /// Use [`ChromeBrowserFactory`](crate::ChromeBrowserFactory) for Chrome/Chromium browsers.
1774    ///
1775    /// # Parameters
1776    ///
1777    /// * `factory` - A boxed [`BrowserFactory`] implementation.
1778    ///
1779    /// # Example
1780    ///
1781    /// ```rust,ignore
1782    /// let pool = BrowserPool::builder()
1783    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1784    ///     .build()?;
1785    /// ```
1786    pub fn factory(mut self, factory: Box<dyn BrowserFactory>) -> Self {
1787        self.factory = Some(factory);
1788        self
1789    }
1790
1791    /// Enable or disable keep-alive thread.
1792    ///
1793    /// Keep-alive should be disabled only for testing.
1794    /// Production use should always have it enabled.
1795    ///
1796    /// # Parameters
1797    ///
1798    /// * `enable` - Whether to enable the keep-alive thread.
1799    ///
1800    /// # Example
1801    ///
1802    /// ```rust,ignore
1803    /// // Disable for tests
1804    /// let pool = BrowserPool::builder()
1805    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1806    ///     .enable_keep_alive(false)
1807    ///     .build()?;
1808    /// ```
1809    pub fn enable_keep_alive(mut self, enable: bool) -> Self {
1810        self.enable_keep_alive = enable;
1811        self
1812    }
1813
1814    /// Build the browser pool.
1815    ///
1816    /// # Errors
1817    ///
1818    /// Returns [`BrowserPoolError::Configuration`] if factory is not provided.
1819    ///
1820    /// # Panics
1821    ///
1822    /// Panics if called outside a tokio runtime context.
1823    ///
1824    /// # Example
1825    ///
1826    /// ```rust,ignore
1827    /// let pool = BrowserPool::builder()
1828    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1829    ///     .build()?;
1830    /// ```
1831    pub fn build(self) -> Result<BrowserPool> {
1832        let config = self.config.unwrap_or_default();
1833        let factory = self.factory.ok_or_else(|| {
1834            BrowserPoolError::Configuration("No browser factory provided".to_string())
1835        })?;
1836
1837        log::info!("📦 Building browser pool with config: {:?}", config);
1838
1839        // Create inner state
1840        let inner = BrowserPoolInner::new(config, factory);
1841
1842        // Start keep-alive thread if enabled
1843        let keep_alive_handle = if self.enable_keep_alive {
1844            log::info!("🚀 Starting keep-alive monitoring thread");
1845            Some(BrowserPool::start_keep_alive(Arc::clone(&inner)))
1846        } else {
1847            log::warn!("⚠️ Keep-alive thread disabled (should only be used for testing)");
1848            None
1849        };
1850
1851        log::info!("✅ Browser pool built successfully");
1852
1853        Ok(BrowserPool {
1854            inner,
1855            keep_alive_handle,
1856        })
1857    }
1858}
1859
1860impl Default for BrowserPoolBuilder {
1861    fn default() -> Self {
1862        Self::new()
1863    }
1864}
1865
1866// ============================================================================
1867// Environment Initialization (feature-gated)
1868// ============================================================================
1869
1870/// Initialize browser pool from environment variables.
1871///
1872/// This is a convenience function for common initialization patterns.
1873/// It reads configuration from environment variables with sensible defaults.
1874///
1875/// # Feature Flag
1876///
1877/// This function is only available when the `env-config` feature is enabled.
1878///
1879/// # Environment Variables
1880///
1881/// - `BROWSER_POOL_SIZE`: Maximum pool size (default: 5)
1882/// - `BROWSER_WARMUP_COUNT`: Warmup browser count (default: 3)
1883/// - `BROWSER_TTL_SECONDS`: Browser TTL in seconds (default: 3600)
1884/// - `BROWSER_WARMUP_TIMEOUT_SECONDS`: Warmup timeout (default: 60)
1885/// - `CHROME_PATH`: Custom Chrome binary path (optional)
1886///
1887/// # Returns
1888///
1889/// `Arc<BrowserPool>` ready for use in web handlers.
1890///
1891/// # Errors
1892///
1893/// - Returns error if configuration is invalid.
1894/// - Returns error if warmup fails.
1895///
1896/// # Example
1897///
1898/// ```rust,ignore
1899/// #[tokio::main]
1900/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
1901///     env_logger::init();
1902///
1903///     let pool = init_browser_pool().await?;
1904///
1905///     // Use pool in handlers...
1906///
1907///     Ok(())
1908/// }
1909/// ```
1910#[cfg(feature = "env-config")]
1911pub async fn init_browser_pool() -> Result<Arc<BrowserPool>> {
1912    use crate::config::env::{chrome_path_from_env, from_env};
1913    use crate::factory::ChromeBrowserFactory;
1914
1915    log::info!("Initializing browser pool from environment...");
1916
1917    // Load configuration from environment
1918    let config = from_env()?;
1919
1920    // Get optional Chrome path
1921    let chrome_path = chrome_path_from_env();
1922
1923    log::info!("Pool configuration from environment:");
1924    log::info!("   - Max pool size: {}", config.max_pool_size);
1925    log::info!("   - Warmup count: {}", config.warmup_count);
1926    log::info!(
1927        "   - Browser TTL: {}s ({}min)",
1928        config.browser_ttl.as_secs(),
1929        config.browser_ttl.as_secs() / 60
1930    );
1931    log::info!("   - Warmup timeout: {}s", config.warmup_timeout.as_secs());
1932    log::info!(
1933        "   - Chrome path: {}",
1934        chrome_path.as_deref().unwrap_or("auto-detect")
1935    );
1936
1937    // Create factory based on whether custom path is provided
1938    let factory: Box<dyn BrowserFactory> = match chrome_path {
1939        Some(path) => {
1940            log::info!("Using custom Chrome path: {}", path);
1941            Box::new(ChromeBrowserFactory::with_path(path))
1942        }
1943        None => {
1944            log::info!("Using auto-detected Chrome browser");
1945            Box::new(ChromeBrowserFactory::with_defaults())
1946        }
1947    };
1948
1949    // Create browser pool with Chrome factory
1950    log::debug!("Building browser pool...");
1951    let pool = BrowserPool::builder()
1952        .config(config.clone())
1953        .factory(factory)
1954        .enable_keep_alive(true)
1955        .build()
1956        .map_err(|e| {
1957            log::error!("❌ Failed to create browser pool: {}", e);
1958            e
1959        })?;
1960
1961    log::info!("✅ Browser pool created successfully");
1962
1963    // Warmup the pool
1964    log::info!(
1965        "Warming up browser pool with {} instances...",
1966        config.warmup_count
1967    );
1968    pool.warmup().await.map_err(|e| {
1969        log::error!("❌ Failed to warmup pool: {}", e);
1970        e
1971    })?;
1972
1973    let stats = pool.stats();
1974    log::info!(
1975        "✅ Browser pool ready - Available: {}, Active: {}, Total: {}",
1976        stats.available,
1977        stats.active,
1978        stats.total
1979    );
1980
1981    Ok(pool.into_shared())
1982}
1983
1984// ============================================================================
1985// Unit Tests
1986// ============================================================================
1987
1988#[cfg(test)]
1989mod tests {
1990    use super::*;
1991
1992    /// Verifies that BrowserPool builder rejects missing factory.
1993    ///
1994    /// A factory is mandatory because the pool needs to know how to
1995    /// create browser instances. This test ensures proper error handling.
1996    #[test]
1997    fn test_pool_builder_missing_factory() {
1998        // We need a tokio runtime for the builder
1999        let rt = tokio::runtime::Runtime::new().unwrap();
2000
2001        rt.block_on(async {
2002            let config = crate::config::BrowserPoolConfigBuilder::new()
2003                .max_pool_size(3)
2004                .build()
2005                .unwrap();
2006
2007            let result = BrowserPool::builder()
2008                .config(config)
2009                // Intentionally missing factory
2010                .build();
2011
2012            assert!(result.is_err(), "Build should fail without factory");
2013
2014            match result {
2015                Err(BrowserPoolError::Configuration(msg)) => {
2016                    assert!(
2017                        msg.contains("No browser factory provided"),
2018                        "Expected factory error, got: {}",
2019                        msg
2020                    );
2021                }
2022                _ => panic!("Expected Configuration error for missing factory"),
2023            }
2024        });
2025    }
2026
2027    /// Verifies that BrowserPoolBuilder implements Default.
2028    #[test]
2029    fn test_builder_default() {
2030        let builder: BrowserPoolBuilder = Default::default();
2031        assert!(builder.config.is_none());
2032        assert!(builder.factory.is_none());
2033        assert!(builder.enable_keep_alive);
2034    }
2035
2036    /// Verifies that enable_keep_alive can be disabled.
2037    #[test]
2038    fn test_builder_disable_keep_alive() {
2039        let builder = BrowserPoolBuilder::new().enable_keep_alive(false);
2040        assert!(!builder.enable_keep_alive);
2041    }
2042}