html2pdf_api/
pool.rs

1//! Browser pool with lifecycle management.
2//!
3//! This module provides [`BrowserPool`], the main entry point for managing
4//! a pool of headless Chrome browsers with automatic lifecycle management.
5//!
6//! # Overview
7//!
8//! The browser pool provides:
9//! - **Connection Pooling**: Reuses browser instances to avoid expensive startup costs
10//! - **Health Monitoring**: Background thread continuously checks browser health
11//! - **TTL Management**: Automatically retires old browsers and creates replacements
12//! - **Race-Free Design**: Careful lock ordering prevents deadlocks
13//! - **Graceful Shutdown**: Clean termination of all background tasks
14//! - **RAII Pattern**: Automatic return of browsers to pool via Drop
15//!
16//! # Architecture
17//!
18//! ```text
19//! BrowserPool
20//!   ├─ BrowserPoolInner (shared state)
21//!   │   ├─ available: Vec<TrackedBrowser>  (pooled, ready to use)
22//!   │   ├─ active: HashMap<id, TrackedBrowser>  (in-use, tracked for health)
23//!   │   └─ replacement_tasks: Vec<JoinHandle>  (async replacement creators)
24//!   └─ keep_alive_handle: JoinHandle  (health monitoring thread)
25//! ```
26//!
27//! # Critical Invariants
28//!
29//! 1. **Lock Order**: Always acquire `active` before `available` to prevent deadlocks
30//! 2. **Shutdown Flag**: Check before all expensive operations
31//! 3. **Health Checks**: Never hold locks during I/O operations
32//!
33//! # Example
34//!
35//! ```rust,no_run
36//! use html2pdf_api::{BrowserPool, BrowserPoolConfigBuilder, ChromeBrowserFactory};
37//!
38//! #[tokio::main]
39//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
40//!     // Create pool
41//!     let mut pool = BrowserPool::builder()
42//!         .config(
43//!             BrowserPoolConfigBuilder::new()
44//!                 .max_pool_size(5)
45//!                 .warmup_count(3)
46//!                 .build()?
47//!         )
48//!         .factory(Box::new(ChromeBrowserFactory::with_defaults()))
49//!         .build()?;
50//!
51//!     // Warmup
52//!     pool.warmup().await?;
53//!
54//!     // Use browsers
55//!     {
56//!         let browser = pool.get()?;
57//!         let tab = browser.new_tab()?;
58//!         // ... do work ...
59//!     } // browser returned to pool automatically
60//!
61//!     // Shutdown
62//!     pool.shutdown_async().await;
63//!
64//!     Ok(())
65//! }
66//! ```
67
68use std::collections::HashMap;
69use std::sync::atomic::{AtomicBool, Ordering};
70use std::sync::{Arc, Condvar, Mutex};
71use std::thread::{self, JoinHandle};
72use std::time::Duration;
73
74use tokio::task::JoinHandle as TokioJoinHandle;
75
76use crate::config::BrowserPoolConfig;
77use crate::error::{BrowserPoolError, Result};
78use crate::factory::BrowserFactory;
79use crate::handle::BrowserHandle;
80use crate::stats::PoolStats;
81use crate::tracked::TrackedBrowser;
82
83// ============================================================================
84// BrowserPoolInner
85// ============================================================================
86
87/// Internal shared state for the browser pool.
88///
89/// This struct contains all shared state and is wrapped in Arc for thread-safe
90/// sharing between the pool, handles, and background threads.
91///
92/// # Lock Ordering (CRITICAL)
93///
94/// Always acquire locks in this order to prevent deadlocks:
95/// 1. `active` (browsers currently in use)
96/// 2. `available` (browsers in pool ready for use)
97///
98/// Never hold locks during I/O operations or browser creation.
99///
100/// # Thread Safety
101///
102/// All fields are protected by appropriate synchronization primitives:
103/// - `Mutex` for mutable collections
104/// - `AtomicBool` for shutdown flag
105/// - `Arc` for shared ownership
106pub(crate) struct BrowserPoolInner {
107    /// Configuration (immutable after creation).
108    config: BrowserPoolConfig,
109
110    /// Browsers available for checkout (not currently in use).
111    ///
112    /// Protected by Mutex. Browsers are moved from here when checked out
113    /// and returned here when released (if pool not full).
114    available: Mutex<Vec<TrackedBrowser>>,
115
116    /// All browsers that exist (both pooled and checked out).
117    ///
118    /// Protected by Mutex. Used for health monitoring and lifecycle tracking.
119    /// Maps browser ID -> TrackedBrowser for fast lookup.
120    active: Mutex<HashMap<u64, TrackedBrowser>>,
121
122    /// Factory for creating new browser instances.
123    factory: Box<dyn BrowserFactory>,
124
125    /// Atomic flag indicating shutdown in progress.
126    ///
127    /// Checked before expensive operations. Once set, no new operations start.
128    shutting_down: AtomicBool,
129
130    /// Background tasks creating replacement browsers.
131    ///
132    /// Tracked so we can abort them during shutdown.
133    replacement_tasks: Mutex<Vec<TokioJoinHandle<()>>>,
134
135    /// Handle to tokio runtime for spawning async tasks.
136    ///
137    /// Captured at creation time to allow spawning from any context.
138    runtime_handle: tokio::runtime::Handle,
139
140    /// Shutdown signaling mechanism for keep-alive thread.
141    ///
142    /// Tuple of (flag, condvar) allows immediate wake-up on shutdown
143    /// instead of waiting for full ping_interval.
144    shutdown_signal: Arc<(Mutex<bool>, Condvar)>,
145}
146
147impl BrowserPoolInner {
148    /// Create a new browser pool inner state.
149    ///
150    /// # Parameters
151    ///
152    /// * `config` - Validated configuration.
153    /// * `factory` - Browser factory for creating instances.
154    ///
155    /// # Panics
156    ///
157    /// Panics if called outside a tokio runtime context.
158    pub(crate) fn new(config: BrowserPoolConfig, factory: Box<dyn BrowserFactory>) -> Arc<Self> {
159        log::info!(
160            " Initializing browser pool with capacity {}",
161            config.max_pool_size
162        );
163        log::debug!(
164            " Pool config: warmup={}, TTL={}s, ping_interval={}s",
165            config.warmup_count,
166            config.browser_ttl.as_secs(),
167            config.ping_interval.as_secs()
168        );
169
170        // Capture runtime handle for spawning async tasks
171        // This allows us to spawn from sync contexts (like Drop)
172        let runtime_handle = tokio::runtime::Handle::current();
173
174        Arc::new(Self {
175            config,
176            available: Mutex::new(Vec::new()),
177            active: Mutex::new(HashMap::new()),
178            factory,
179            shutting_down: AtomicBool::new(false),
180            replacement_tasks: Mutex::new(Vec::new()),
181            runtime_handle,
182            shutdown_signal: Arc::new((Mutex::new(false), Condvar::new())),
183        })
184    }
185
186    /// Create a browser directly without using the pool.
187    ///
188    /// Used for:
189    /// - Initial warmup
190    /// - Replacing failed browsers
191    /// - When pool is empty
192    ///
193    /// # Important
194    ///
195    /// Adds the browser to `active` tracking immediately for health monitoring.
196    ///
197    /// # Errors
198    ///
199    /// - Returns [`BrowserPoolError::ShuttingDown`] if pool is shutting down.
200    /// - Returns [`BrowserPoolError::BrowserCreation`] if factory fails.
201    pub(crate) fn create_browser_direct(&self) -> Result<TrackedBrowser> {
202        // Early exit if shutting down (don't waste time creating browsers)
203        if self.shutting_down.load(Ordering::Acquire) {
204            log::debug!(" Skipping browser creation - pool is shutting down");
205            return Err(BrowserPoolError::ShuttingDown);
206        }
207
208        log::debug!("️ Creating new browser directly via factory...");
209
210        // Factory handles all Chrome launch complexity
211        let browser = self.factory.create()?;
212
213        // Wrap with tracking metadata
214        let tracked = TrackedBrowser::new(browser)?;
215        let id = tracked.id();
216
217        // Add to active tracking immediately for health monitoring
218        // This ensures keep-alive thread will monitor it
219        if let Ok(mut active) = self.active.lock() {
220            active.insert(id, tracked.clone());
221            log::debug!(
222                " Browser {} added to active tracking (total active: {})",
223                id,
224                active.len()
225            );
226        } else {
227            log::warn!(
228                "⚠️ Failed to add browser {} to active tracking (poisoned lock)",
229                id
230            );
231        }
232
233        log::info!("✅ Created new browser with ID {}", id);
234        Ok(tracked)
235    }
236
237    /// Get a browser from pool or create a new one.
238    ///
239    /// # Algorithm
240    ///
241    /// 1. Loop through pooled browsers
242    /// 2. **Grace Period Check**: Check if browser is within 30s of TTL.
243    ///    - If near expiry: Skip (drop) it immediately.
244    ///    - It remains in `active` tracking so the `keep_alive` thread handles standard retirement/replacement.
245    /// 3. For valid browsers, perform detailed health check (without holding locks)
246    /// 4. If healthy, return it
247    /// 5. If unhealthy, remove from active tracking and try next
248    /// 6. If pool empty or all skipped/unhealthy, create new browser
249    ///
250    /// # Critical: Lock-Free Health Checks
251    ///
252    /// Health checks are performed WITHOUT holding locks to avoid blocking
253    /// other threads. This is why we use a loop pattern instead of iterator.
254    ///
255    /// # Returns
256    ///
257    /// [`BrowserHandle`] that auto-returns browser to pool when dropped.
258    ///
259    /// # Errors
260    ///
261    /// - Returns [`BrowserPoolError::ShuttingDown`] if pool is shutting down.
262    /// - Returns [`BrowserPoolError::BrowserCreation`] if new browser creation fails.
263    pub(crate) fn get_or_create_browser(self: &Arc<Self>) -> Result<BrowserHandle> {
264        log::debug!(" Attempting to get browser from pool...");
265
266        // Try to get from pool - LOOP pattern to avoid holding lock during health checks
267        // This is critical for concurrency: we release the lock between attempts
268        loop {
269            // Acquire lock briefly to pop one browser
270            let tracked_opt = {
271                let mut available = self.available.lock().unwrap();
272                let popped = available.pop();
273                log::trace!(" Pool size after pop: {}", available.len());
274                popped
275            }; // Lock released here - critical for performance
276
277            if let Some(tracked) = tracked_opt {
278                // === LOGIC START: Grace Period Check ===
279                let age = tracked.created_at().elapsed();
280                let ttl = self.config.browser_ttl;
281
282                // Safety margin matching your stagger interval
283                let safety_margin = Duration::from_secs(30);
284
285                // If browser is about to expire, don't use it.
286                if age + safety_margin > ttl {
287                    log::debug!(
288                        "⏳ Browser {} is near expiry (Age: {}s, Margin: 30s), skipping.",
289                        tracked.id(),
290                        age.as_secs()
291                    );
292
293                    // CRITICAL: We do NOT remove/recreate here.
294                    // By simply 'continuing', we drop this 'tracked' instance.
295                    // 1. It is NOT returned to 'available' (so no user gets it).
296                    // 2. It REMAINS in 'active' (so the keep_alive thread still tracks it).
297                    // 3. The keep_alive thread will see it expire and handle standard cleanup/replacement.
298                    continue;
299                }
300                // === LOGIC END: Grace Period Check ===
301
302                log::debug!(" Testing browser {} from pool for health...", tracked.id());
303
304                // Detailed health check WITHOUT holding any locks
305                // This prevents blocking other threads during I/O
306                match tracked.browser().new_tab() {
307                    Ok(tab) => {
308                        log::trace!(
309                            "✅ Browser {} health check: new_tab() successful",
310                            tracked.id()
311                        );
312
313                        // Test navigation capability (full health check)
314                        match tab
315                            .navigate_to("data:text/html,<html><body>Health check</body></html>")
316                        {
317                            Ok(_) => {
318                                log::trace!(
319                                    "✅ Browser {} health check: navigation successful",
320                                    tracked.id()
321                                );
322
323                                // Test cleanup capability
324                                match tab.close(true) {
325                                    Ok(_) => {
326                                        log::debug!(
327                                            "✅ Browser {} passed full health check - ready for use",
328                                            tracked.id()
329                                        );
330
331                                        // Get pool size for logging (brief lock)
332                                        let pool_size = {
333                                            let available = self.available.lock().unwrap();
334                                            available.len()
335                                        };
336
337                                        log::info!(
338                                            "♻️ Reusing healthy browser {} from pool (pool size: {})",
339                                            tracked.id(),
340                                            pool_size
341                                        );
342
343                                        // Return healthy browser wrapped in RAII handle
344                                        return Ok(BrowserHandle::new(tracked, Arc::clone(self)));
345                                    }
346                                    Err(e) => {
347                                        log::warn!(
348                                            "❌ Browser {} health check: tab close failed: {}",
349                                            tracked.id(),
350                                            e
351                                        );
352                                    }
353                                }
354                            }
355                            Err(e) => {
356                                log::warn!(
357                                    "❌ Browser {} health check: navigation failed: {}",
358                                    tracked.id(),
359                                    e
360                                );
361                            }
362                        }
363                    }
364                    Err(e) => {
365                        log::warn!(
366                            "❌ Browser {} health check: new_tab() failed: {}",
367                            tracked.id(),
368                            e
369                        );
370                    }
371                }
372
373                // If we reach here, health check failed
374                // Remove from active tracking (browser is dead)
375                log::warn!(
376                    "️ Removing unhealthy browser {} from active tracking",
377                    tracked.id()
378                );
379                {
380                    let mut active = self.active.lock().unwrap();
381                    active.remove(&tracked.id());
382                    log::debug!(" Active browsers after removal: {}", active.len());
383                }
384
385                // Continue loop to try next browser in pool
386                log::debug!(" Trying next browser from pool...");
387            } else {
388                // Pool is empty, break to create new browser
389                log::debug!(" Pool is empty, will create new browser");
390                break;
391            }
392        }
393
394        // Pool is empty or no healthy browsers found
395        log::info!("️ Creating new browser (pool was empty or all browsers unhealthy)");
396
397        let tracked = self.create_browser_direct()?;
398
399        log::info!("✅ Returning newly created browser {}", tracked.id());
400        Ok(BrowserHandle::new(tracked, Arc::clone(self)))
401    }
402
403    /// Return a browser to the pool (called by BrowserHandle::drop).
404    ///
405    /// # Critical Lock Ordering
406    ///
407    /// Always acquires locks in order: active -> available.
408    /// Both locks are held together to prevent race conditions.
409    ///
410    /// # Algorithm
411    ///
412    /// 1. Acquire both locks (order: active, then available)
413    /// 2. Verify browser is in active tracking
414    /// 3. Check TTL - if expired, retire and trigger replacement
415    /// 4. If pool has space, add to available pool
416    /// 5. If pool full, remove from active (browser gets dropped)
417    ///
418    /// # Parameters
419    ///
420    /// * `self_arc` - Arc reference to self (needed for spawning async tasks).
421    /// * `tracked` - The browser being returned.
422    pub(crate) fn return_browser(self_arc: &Arc<Self>, tracked: TrackedBrowser) {
423        log::debug!(" Returning browser {} to pool...", tracked.id());
424
425        // Early exit if shutting down (don't waste time managing pool)
426        if self_arc.shutting_down.load(Ordering::Acquire) {
427            log::debug!(
428                " Pool shutting down, not returning browser {}",
429                tracked.id()
430            );
431            return;
432        }
433
434        // CRITICAL: Always acquire in order: active -> pool
435        // Holding both locks prevents ALL race conditions:
436        // - Prevents concurrent modifications to browser state
437        // - Prevents duplicate returns
438        // - Ensures pool size limits are respected
439        let mut active = self_arc.active.lock().unwrap();
440        let mut pool = self_arc.available.lock().unwrap();
441
442        // Verify browser is actually tracked (sanity check)
443        if !active.contains_key(&tracked.id()) {
444            log::warn!(
445                "❌ Browser {} not in active tracking (probably already removed), skipping return",
446                tracked.id()
447            );
448            return;
449        }
450
451        // Check TTL before returning to pool
452        // Expired browsers should be retired to prevent memory leaks
453        if tracked.is_expired(self_arc.config.browser_ttl) {
454            log::info!(
455                "⏰ Browser {} expired (age: {}min, TTL: {}min), retiring instead of returning",
456                tracked.id(),
457                tracked.age_minutes(),
458                self_arc.config.browser_ttl.as_secs() / 60
459            );
460
461            // Remove from active tracking
462            active.remove(&tracked.id());
463            log::debug!(" Active browsers after TTL retirement: {}", active.len());
464
465            // Release locks before spawning replacement task
466            drop(active);
467            drop(pool);
468
469            // Trigger async replacement creation (non-blocking)
470            log::debug!(" Triggering replacement browser creation for expired browser");
471            Self::spawn_replacement_creation(Arc::clone(self_arc), 1);
472            return;
473        }
474
475        // Prevent duplicate returns (defensive programming)
476        if pool.iter().any(|b| b.id() == tracked.id()) {
477            log::warn!(
478                "⚠️ Browser {} already in pool (duplicate return attempt), skipping",
479                tracked.id()
480            );
481            return;
482        }
483
484        // Check if pool has space for this browser
485        if pool.len() < self_arc.config.max_pool_size {
486            // Add to pool for reuse
487            pool.push(tracked.clone());
488            log::info!(
489                "♻️ Browser {} returned to pool (pool size: {}/{})",
490                tracked.id(),
491                pool.len(),
492                self_arc.config.max_pool_size
493            );
494        } else {
495            // Pool is full, remove from tracking (browser will be dropped)
496            log::debug!(
497                "️ Pool full ({}/{}), removing browser {} from system",
498                pool.len(),
499                self_arc.config.max_pool_size,
500                tracked.id()
501            );
502            active.remove(&tracked.id());
503            log::debug!(" Active browsers after removal: {}", active.len());
504        }
505    }
506
507    /// Asynchronously create replacement browsers (internal helper).
508    ///
509    /// This is the async work function that actually creates browsers.
510    /// It's spawned as a tokio task by `spawn_replacement_creation`.
511    ///
512    /// # Algorithm
513    ///
514    /// 1. Check shutdown flag before each creation
515    /// 2. Check pool space before each creation
516    /// 3. Use spawn_blocking for CPU-bound browser creation
517    /// 4. Add successful browsers to pool
518    /// 5. Log detailed status
519    ///
520    /// # Parameters
521    ///
522    /// * `inner` - Arc reference to pool state.
523    /// * `count` - Number of browsers to attempt to create.
524    async fn spawn_replacement_creation_async(inner: Arc<Self>, count: usize) {
525        log::info!(
526            " Starting async replacement creation for {} browsers",
527            count
528        );
529
530        let mut created_count = 0;
531        let mut failed_count = 0;
532
533        for i in 0..count {
534            // Check shutdown flag before each expensive operation
535            if inner.shutting_down.load(Ordering::Acquire) {
536                log::info!(
537                    " Shutdown detected during replacement creation, stopping at {}/{}",
538                    i,
539                    count
540                );
541                break;
542            }
543
544            // Check if pool has space BEFORE creating (avoid wasted work)
545            let pool_has_space = {
546                let pool = inner.available.lock().unwrap();
547                let has_space = pool.len() < inner.config.max_pool_size;
548                log::trace!(
549                    " Pool space check: {}/{} (has space: {})",
550                    pool.len(),
551                    inner.config.max_pool_size,
552                    has_space
553                );
554                has_space
555            };
556
557            if !pool_has_space {
558                log::warn!(
559                    "⚠️ Pool is full, stopping replacement creation at {}/{}",
560                    i,
561                    count
562                );
563                break;
564            }
565
566            log::debug!("️ Creating replacement browser {}/{}", i + 1, count);
567
568            // Use spawn_blocking for CPU-bound browser creation
569            // This prevents blocking the async runtime
570            let inner_clone = Arc::clone(&inner);
571            let result =
572                tokio::task::spawn_blocking(move || inner_clone.create_browser_direct()).await;
573
574            match result {
575                Ok(Ok(tracked)) => {
576                    let id = tracked.id();
577
578                    // Add to pool (with space check to handle race conditions)
579                    let mut pool = inner.available.lock().unwrap();
580
581                    // Double-check space (another thread might have added browsers)
582                    if pool.len() < inner.config.max_pool_size {
583                        pool.push(tracked);
584                        created_count += 1;
585                        log::info!(
586                            "✅ Created replacement browser {} and added to pool ({}/{})",
587                            id,
588                            i + 1,
589                            count
590                        );
591                    } else {
592                        log::warn!(
593                            "⚠️ Pool became full during creation, replacement browser {} kept in active only",
594                            id
595                        );
596                        created_count += 1; // Still count as created (just not pooled)
597                    }
598                }
599                Ok(Err(e)) => {
600                    failed_count += 1;
601                    log::error!(
602                        "❌ Failed to create replacement browser {}/{}: {}",
603                        i + 1,
604                        count,
605                        e
606                    );
607                }
608                Err(e) => {
609                    failed_count += 1;
610                    log::error!(
611                        "❌ Replacement browser {}/{} task panicked: {:?}",
612                        i + 1,
613                        count,
614                        e
615                    );
616                }
617            }
618        }
619
620        // Final status report
621        let pool_size = inner.available.lock().unwrap().len();
622        let active_size = inner.active.lock().unwrap().len();
623
624        log::info!(
625            " Replacement creation completed: {}/{} created, {} failed. Pool: {}, Active: {}",
626            created_count,
627            count,
628            failed_count,
629            pool_size,
630            active_size
631        );
632    }
633
634    /// Spawn a background task to create replacement browsers.
635    ///
636    /// This is non-blocking and returns immediately. The actual browser
637    /// creation happens in a tokio task tracked in `replacement_tasks`.
638    ///
639    /// # Why Async
640    ///
641    /// Browser creation is slow (1-3 seconds per browser). Spawning async
642    /// tasks prevents blocking the caller.
643    ///
644    /// # Task Tracking
645    ///
646    /// Tasks are tracked so we can abort them during shutdown.
647    ///
648    /// # Parameters
649    ///
650    /// * `inner` - Arc reference to pool state.
651    /// * `count` - Number of replacement browsers to create.
652    pub(crate) fn spawn_replacement_creation(inner: Arc<Self>, count: usize) {
653        log::info!(
654            " Spawning async task to create {} replacement browsers",
655            count
656        );
657
658        // Clone Arc for moving into async task
659        let inner_for_task = Arc::clone(&inner);
660
661        // Spawn async task on the captured runtime
662        let task_handle = inner.runtime_handle.spawn(async move {
663            Self::spawn_replacement_creation_async(inner_for_task, count).await;
664        });
665
666        // Track task handle for shutdown cleanup
667        if let Ok(mut tasks) = inner.replacement_tasks.lock() {
668            // Clean up finished tasks while we have the lock (housekeeping)
669            let original_count = tasks.len();
670            tasks.retain(|h| !h.is_finished());
671            let cleaned = original_count - tasks.len();
672
673            if cleaned > 0 {
674                log::trace!("粒 Cleaned up {} finished replacement tasks", cleaned);
675            }
676
677            // Add new task
678            tasks.push(task_handle);
679
680            log::debug!(" Now tracking {} active replacement tasks", tasks.len());
681        } else {
682            log::warn!("⚠️ Failed to track replacement task (poisoned lock)");
683        }
684    }
685
686    /// Get the pool configuration.
687    #[inline]
688    pub(crate) fn config(&self) -> &BrowserPoolConfig {
689        &self.config
690    }
691
692    /// Check if the pool is shutting down.
693    #[inline]
694    pub(crate) fn is_shutting_down(&self) -> bool {
695        self.shutting_down.load(Ordering::Acquire)
696    }
697
698    /// Set the shutdown flag.
699    #[inline]
700    pub(crate) fn set_shutting_down(&self, value: bool) {
701        self.shutting_down.store(value, Ordering::Release);
702    }
703
704    /// Get the shutdown signal for the keep-alive thread.
705    #[inline]
706    pub(crate) fn shutdown_signal(&self) -> &Arc<(Mutex<bool>, Condvar)> {
707        &self.shutdown_signal
708    }
709
710    /// Get the available browsers count.
711    pub(crate) fn available_count(&self) -> usize {
712        self.available.lock().map(|g| g.len()).unwrap_or(0)
713    }
714
715    /// Get the active browsers count.
716    pub(crate) fn active_count(&self) -> usize {
717        self.active.lock().map(|g| g.len()).unwrap_or(0)
718    }
719
720    /// Get a snapshot of active browsers for health checking.
721    ///
722    /// Returns a cloned list to avoid holding locks during I/O.
723    pub(crate) fn get_active_browsers_snapshot(&self) -> Vec<(u64, TrackedBrowser)> {
724        let active = self.active.lock().unwrap();
725        active
726            .iter()
727            .map(|(id, tracked)| (*id, tracked.clone()))
728            .collect()
729    }
730
731    /// Remove a browser from active tracking.
732    pub(crate) fn remove_from_active(&self, id: u64) -> Option<TrackedBrowser> {
733        let mut active = self.active.lock().unwrap();
734        active.remove(&id)
735    }
736
737    /// Remove browsers from the available pool by ID.
738    pub(crate) fn remove_from_available(&self, ids: &[u64]) {
739        let mut pool = self.available.lock().unwrap();
740        let original_size = pool.len();
741        pool.retain(|b| !ids.contains(&b.id()));
742        let removed = original_size - pool.len();
743        if removed > 0 {
744            log::debug!("️ Removed {} browsers from available pool", removed);
745        }
746    }
747
748    /// Abort all replacement tasks.
749    pub(crate) fn abort_replacement_tasks(&self) -> usize {
750        if let Ok(mut tasks) = self.replacement_tasks.lock() {
751            let count = tasks.len();
752            for handle in tasks.drain(..) {
753                handle.abort();
754            }
755            count
756        } else {
757            0
758        }
759    }
760}
761
762// ============================================================================
763// BrowserPool
764// ============================================================================
765
766/// Main browser pool with lifecycle management.
767///
768/// This is the public-facing API for the browser pool. It wraps the internal
769/// state and manages the keep-alive thread.
770///
771/// # Overview
772///
773/// `BrowserPool` provides:
774/// - Browser checkout via [`get()`](Self::get)
775/// - Pool warmup via [`warmup()`](Self::warmup)
776/// - Statistics via [`stats()`](Self::stats)
777/// - Graceful shutdown via [`shutdown_async()`](Self::shutdown_async)
778///
779/// # Example
780///
781/// ```rust,no_run
782/// use html2pdf_api::{BrowserPool, BrowserPoolConfigBuilder, ChromeBrowserFactory};
783/// use std::time::Duration;
784///
785/// #[tokio::main]
786/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
787///     // Create pool
788///     let mut pool = BrowserPool::builder()
789///         .config(
790///             BrowserPoolConfigBuilder::new()
791///                 .max_pool_size(5)
792///                 .warmup_count(3)
793///                 .build()?
794///         )
795///         .factory(Box::new(ChromeBrowserFactory::with_defaults()))
796///         .build()?;
797///
798///     // Warmup
799///     pool.warmup().await?;
800///
801///     // Use browsers
802///     {
803///         let browser = pool.get()?;
804///         let tab = browser.new_tab()?;
805///         // ... do work ...
806///     } // browser returned to pool automatically
807///
808///     // Shutdown
809///     pool.shutdown_async().await;
810///
811///     Ok(())
812/// }
813/// ```
814///
815/// # Thread Safety
816///
817/// `BrowserPool` is `Send` and can be wrapped in `Arc<Mutex<>>` for sharing
818/// across threads. Use [`into_shared()`](Self::into_shared) for convenience.
819pub struct BrowserPool {
820    /// Shared internal state.
821    inner: Arc<BrowserPoolInner>,
822
823    /// Handle to keep-alive monitoring thread.
824    ///
825    /// Option allows taking during shutdown. None means keep-alive disabled.
826    keep_alive_handle: Option<JoinHandle<()>>,
827}
828
829impl BrowserPool {
830    /// Convert pool into a shared `Arc<Mutex<>>` for use in web handlers.
831    ///
832    /// This is convenient for web frameworks that need shared state.
833    ///
834    /// # Example
835    ///
836    /// ```rust,ignore
837    /// let pool = BrowserPool::builder()
838    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
839    ///     .build()?
840    ///     .into_shared();
841    ///
842    /// // Can now be cloned and shared across handlers
843    /// let pool_clone = Arc::clone(&pool);
844    /// ```
845    pub fn into_shared(self) -> Arc<Mutex<BrowserPool>> {
846        log::debug!(" Converting BrowserPool into shared Arc<Mutex<>>");
847        Arc::new(Mutex::new(self))
848    }
849
850    /// Create a new builder for constructing a BrowserPool.
851    ///
852    /// This is the recommended way to create a pool.
853    ///
854    /// # Example
855    ///
856    /// ```rust,ignore
857    /// let pool = BrowserPool::builder()
858    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
859    ///     .build()?;
860    /// ```
861    pub fn builder() -> BrowserPoolBuilder {
862        BrowserPoolBuilder::new()
863    }
864
865    /// Get a browser from the pool (or create one if empty).
866    ///
867    /// Returns a [`BrowserHandle`] that implements `Deref<Target=Browser>`,
868    /// allowing transparent access to browser methods.
869    ///
870    /// # Automatic Return
871    ///
872    /// The browser is automatically returned to the pool when the handle
873    /// is dropped, even if your code panics (RAII pattern).
874    ///
875    /// # Errors
876    ///
877    /// - Returns [`BrowserPoolError::ShuttingDown`] if pool is shutting down.
878    /// - Returns [`BrowserPoolError::BrowserCreation`] if new browser creation fails.
879    /// - Returns [`BrowserPoolError::HealthCheckFailed`] if all pooled browsers are unhealthy.
880    ///
881    /// # Example
882    ///
883    /// ```rust,ignore
884    /// let browser = pool.get()?;
885    /// let tab = browser.new_tab()?;
886    /// tab.navigate_to("https://example.com")?;
887    /// // browser returned automatically when it goes out of scope
888    /// ```
889    pub fn get(&self) -> Result<BrowserHandle> {
890        log::trace!(" BrowserPool::get() called");
891        self.inner.get_or_create_browser()
892    }
893
894    /// Get pool statistics snapshot.
895    ///
896    /// # Returns
897    ///
898    /// [`PoolStats`] containing:
899    /// - `available`: Browsers in pool ready for checkout
900    /// - `active`: All browsers (pooled + checked out)
901    /// - `total`: Currently same as `active` (for future expansion)
902    ///
903    /// # Example
904    ///
905    /// ```rust,ignore
906    /// let stats = pool.stats();
907    /// println!("Available: {}, Active: {}", stats.available, stats.active);
908    /// ```
909    pub fn stats(&self) -> PoolStats {
910        let available = self.inner.available_count();
911        let active = self.inner.active_count();
912
913        log::trace!(" Pool stats: available={}, active={}", available, active);
914
915        PoolStats {
916            available,
917            active,
918            total: active,
919        }
920    }
921
922    /// Warmup the pool by pre-creating browsers.
923    ///
924    /// This is highly recommended to reduce first-request latency.
925    /// Should be called during application startup.
926    ///
927    /// # Process
928    ///
929    /// 1. Creates `warmup_count` browsers sequentially with staggered timing
930    /// 2. Tests each browser with navigation
931    /// 3. Returns all browsers to pool
932    /// 4. Entire process has timeout (configurable via `warmup_timeout`)
933    ///
934    /// # Staggered Creation
935    ///
936    /// Browsers are created with a 30-second delay between them to ensure
937    /// their TTLs are offset. This prevents all browsers from expiring
938    /// at the same time.
939    ///
940    /// # Errors
941    ///
942    /// - Returns error if warmup times out.
943    /// - Returns error if browser creation fails.
944    ///
945    /// # Example
946    ///
947    /// ```rust,ignore
948    /// let pool = BrowserPool::builder()
949    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
950    ///     .build()?;
951    ///
952    /// // Warmup during startup
953    /// pool.warmup().await?;
954    /// ```
955    pub async fn warmup(&self) -> Result<()> {
956        let count = self.inner.config().warmup_count;
957        let warmup_timeout = self.inner.config().warmup_timeout;
958
959        log::info!(
960            " Starting browser pool warmup with {} instances (timeout: {}s)",
961            count,
962            warmup_timeout.as_secs()
963        );
964
965        // Wrap entire warmup in timeout to prevent hanging forever
966        let warmup_result = tokio::time::timeout(warmup_timeout, self.warmup_internal(count)).await;
967
968        match warmup_result {
969            Ok(Ok(())) => {
970                let stats = self.stats();
971                log::info!(
972                    "✅ Warmup completed successfully - Available: {}, Active: {}",
973                    stats.available,
974                    stats.active
975                );
976                Ok(())
977            }
978            Ok(Err(e)) => {
979                log::error!("❌ Warmup failed with error: {}", e);
980                Err(e)
981            }
982            Err(_) => {
983                log::error!("❌ Warmup timed out after {}s", warmup_timeout.as_secs());
984                Err(BrowserPoolError::Configuration(format!(
985                    "Warmup timed out after {}s",
986                    warmup_timeout.as_secs()
987                )))
988            }
989        }
990    }
991
992    /// Internal warmup implementation (separated for cleaner timeout wrapping).
993    ///
994    /// Creates browsers sequentially with a delay between them.
995    /// This ensures they don't all reach their TTL (expiration) at the exact same moment.
996    async fn warmup_internal(&self, count: usize) -> Result<()> {
997        log::debug!(" Starting internal warmup process for {} browsers", count);
998
999        // STAGGER CONFIGURATION
1000        // We wait this long between creations to distribute expiration times
1001        let stagger_interval = Duration::from_secs(30);
1002
1003        let mut handles = Vec::new();
1004        let mut created_count = 0;
1005        let mut failed_count = 0;
1006
1007        for i in 0..count {
1008            log::debug!(" Creating startup browser instance {}/{}", i + 1, count);
1009
1010            // Per-browser timeout (15s per browser is reasonable)
1011            // This prevents one slow browser from blocking entire warmup
1012            let browser_result = tokio::time::timeout(
1013                Duration::from_secs(15),
1014                tokio::task::spawn_blocking({
1015                    let inner = Arc::clone(&self.inner);
1016                    move || inner.create_browser_direct()
1017                }),
1018            )
1019            .await;
1020
1021            match browser_result {
1022                Ok(Ok(Ok(tracked))) => {
1023                    log::debug!(
1024                        "✅ Browser {} created, performing validation test...",
1025                        tracked.id()
1026                    );
1027
1028                    // Test the browser with actual navigation
1029                    match tracked.browser().new_tab() {
1030                        Ok(tab) => {
1031                            log::trace!("✅ Browser {} test: new_tab() successful", tracked.id());
1032
1033                            // Navigate to test page
1034                            let nav_result = tab.navigate_to(
1035                                "data:text/html,<html><body>Warmup test</body></html>",
1036                            );
1037                            if let Err(e) = nav_result {
1038                                log::warn!(
1039                                    "⚠️ Browser {} test navigation failed: {}",
1040                                    tracked.id(),
1041                                    e
1042                                );
1043                            } else {
1044                                log::trace!(
1045                                    "✅ Browser {} test: navigation successful",
1046                                    tracked.id()
1047                                );
1048                            }
1049
1050                            // Clean up test tab
1051                            let _ = tab.close(true);
1052
1053                            // Keep handle so browser stays alive
1054                            handles.push(BrowserHandle::new(tracked, Arc::clone(&self.inner)));
1055
1056                            created_count += 1;
1057                            log::info!(
1058                                "✅ Browser instance {}/{} ready and validated",
1059                                i + 1,
1060                                count
1061                            );
1062                        }
1063                        Err(e) => {
1064                            failed_count += 1;
1065                            log::error!(
1066                                "❌ Browser {} validation test failed: {}",
1067                                tracked.id(),
1068                                e
1069                            );
1070
1071                            // Remove from active tracking since it's broken
1072                            self.inner.remove_from_active(tracked.id());
1073                        }
1074                    }
1075                }
1076                Ok(Ok(Err(e))) => {
1077                    failed_count += 1;
1078                    log::error!("❌ Failed to create browser {}/{}: {}", i + 1, count, e);
1079                }
1080                Ok(Err(e)) => {
1081                    failed_count += 1;
1082                    log::error!(
1083                        "❌ Browser {}/{} creation task panicked: {:?}",
1084                        i + 1,
1085                        count,
1086                        e
1087                    );
1088                }
1089                Err(_) => {
1090                    failed_count += 1;
1091                    log::error!(
1092                        "❌ Browser {}/{} creation timed out (15s limit)",
1093                        i + 1,
1094                        count
1095                    );
1096                }
1097            }
1098
1099            // === STAGGER LOGIC ===
1100            // If this is not the last browser, wait before creating the next one.
1101            // This ensures their TTLs are offset by `stagger_interval`.
1102            if i < count - 1 {
1103                log::info!(
1104                    "⏳ Waiting {}s before creating next warmup browser to stagger TTLs...",
1105                    stagger_interval.as_secs()
1106                );
1107                tokio::time::sleep(stagger_interval).await;
1108            }
1109        }
1110
1111        log::info!(
1112            " Warmup creation phase: {} created, {} failed",
1113            created_count,
1114            failed_count
1115        );
1116
1117        // Return all browsers to pool by dropping handles
1118        log::debug!(" Returning {} warmup browsers to pool...", handles.len());
1119        drop(handles);
1120
1121        // Small delay to ensure Drop handlers complete
1122        tokio::time::sleep(Duration::from_millis(300)).await;
1123
1124        let final_stats = self.stats();
1125        log::info!(
1126            " Warmup internal completed - Pool: {}, Active: {}",
1127            final_stats.available,
1128            final_stats.active
1129        );
1130
1131        Ok(())
1132    }
1133
1134    /// Start the keep-alive monitoring thread.
1135    ///
1136    /// This background thread:
1137    /// - Pings all active browsers periodically
1138    /// - Removes unresponsive browsers after max_ping_failures
1139    /// - Retires browsers that exceed TTL
1140    /// - Spawns replacement browsers as needed
1141    ///
1142    /// # Critical Design Notes
1143    ///
1144    /// - Uses condvar for immediate shutdown signaling
1145    /// - Never holds locks during I/O operations
1146    /// - Uses consistent lock ordering (active -> pool)
1147    ///
1148    /// # Parameters
1149    ///
1150    /// * `inner` - Arc reference to pool state.
1151    ///
1152    /// # Returns
1153    ///
1154    /// JoinHandle for the background thread.
1155    fn start_keep_alive(inner: Arc<BrowserPoolInner>) -> JoinHandle<()> {
1156        let ping_interval = inner.config().ping_interval;
1157        let max_failures = inner.config().max_ping_failures;
1158        let browser_ttl = inner.config().browser_ttl;
1159        let shutdown_signal = Arc::clone(inner.shutdown_signal());
1160
1161        log::info!(
1162            " Starting keep-alive thread (interval: {}s, max failures: {}, TTL: {}min)",
1163            ping_interval.as_secs(),
1164            max_failures,
1165            browser_ttl.as_secs() / 60
1166        );
1167
1168        thread::spawn(move || {
1169            log::info!(" Keep-alive thread started successfully");
1170
1171            // Track consecutive failures per browser ID
1172            let mut failure_counts: HashMap<u64, u32> = HashMap::new();
1173
1174            loop {
1175                // Wait for next ping interval OR shutdown signal (whichever comes first)
1176                // Using condvar instead of sleep allows immediate wake-up on shutdown
1177                let (lock, cvar) = &*shutdown_signal;
1178                let wait_result = {
1179                    let shutdown = lock.lock().unwrap();
1180                    cvar.wait_timeout(shutdown, ping_interval).unwrap()
1181                };
1182
1183                let shutdown_flag = *wait_result.0;
1184                let timed_out = wait_result.1.timed_out();
1185
1186                // Check if we were signaled to shutdown
1187                if shutdown_flag {
1188                    log::info!(" Keep-alive received shutdown signal via condvar");
1189                    break;
1190                }
1191
1192                // Double-check atomic shutdown flag (belt and suspenders)
1193                if inner.is_shutting_down() {
1194                    log::info!(" Keep-alive detected shutdown via atomic flag");
1195                    break;
1196                }
1197
1198                // If spuriously woken (not timeout, not shutdown), continue waiting
1199                if !timed_out {
1200                    log::trace!("⏰ Keep-alive spuriously woken, continuing wait...");
1201                    continue;
1202                }
1203
1204                log::trace!(" Keep-alive ping cycle starting...");
1205
1206                // Collect browsers to ping WITHOUT holding locks
1207                // This is critical: we clone the list and release the lock
1208                // before doing any I/O operations
1209                let browsers_to_ping = inner.get_active_browsers_snapshot();
1210                log::trace!(
1211                    "Keep-alive checking {} active browsers",
1212                    browsers_to_ping.len()
1213                );
1214
1215                // Now ping browsers without holding any locks
1216                let mut to_remove = Vec::new();
1217                let mut expired_browsers = Vec::new();
1218
1219                for (id, tracked) in browsers_to_ping {
1220                    // Check shutdown during ping loop (allows early exit)
1221                    if inner.is_shutting_down() {
1222                        log::info!("Shutdown detected during ping loop, exiting immediately");
1223                        return;
1224                    }
1225
1226                    // Check TTL before pinging (no point pinging expired browsers)
1227                    if tracked.is_expired(browser_ttl) {
1228                        log::info!(
1229                            "Browser {} expired (age: {}min, TTL: {}min), marking for retirement",
1230                            id,
1231                            tracked.age_minutes(),
1232                            browser_ttl.as_secs() / 60
1233                        );
1234                        expired_browsers.push(id);
1235                        continue; // Skip ping for expired browsers
1236                    }
1237
1238                    // Perform health check (this is I/O, no locks held)
1239                    use crate::traits::Healthcheck;
1240                    match tracked.ping() {
1241                        Ok(_) => {
1242                            // Reset failure count on success
1243                            if failure_counts.remove(&id).is_some() {
1244                                log::debug!("Browser {} ping successful, failure count reset", id);
1245                            }
1246                        }
1247                        Err(e) => {
1248                            // Only process failures if NOT shutting down
1249                            // (during shutdown, browsers may legitimately fail)
1250                            if !inner.is_shutting_down() {
1251                                let failures = failure_counts.entry(id).or_insert(0);
1252                                *failures += 1;
1253
1254                                log::warn!(
1255                                    "Browser {} ping failed (attempt {}/{}): {}",
1256                                    id,
1257                                    failures,
1258                                    max_failures,
1259                                    e
1260                                );
1261
1262                                // Remove if exceeded max failures
1263                                if *failures >= max_failures {
1264                                    log::error!(
1265                                        "Browser {} exceeded max ping failures ({}), marking for removal",
1266                                        id,
1267                                        max_failures
1268                                    );
1269                                    to_remove.push(id);
1270                                }
1271                            }
1272                        }
1273                    }
1274                }
1275
1276                // Check shutdown before cleanup (avoid work if shutting down)
1277                if inner.is_shutting_down() {
1278                    log::info!("Shutdown detected before cleanup, skipping and exiting");
1279                    break;
1280                }
1281
1282                // Handle TTL retirements first (they need replacement browsers)
1283                if !expired_browsers.is_empty() {
1284                    log::info!("Processing {} TTL-expired browsers", expired_browsers.len());
1285                    Self::handle_browser_retirement(&inner, expired_browsers, &mut failure_counts);
1286                }
1287
1288                // Handle failed browsers (remove from tracking and pool)
1289                if !to_remove.is_empty() {
1290                    log::warn!("Removing {} failed browsers from pool", to_remove.len());
1291
1292                    // Track how many were actually removed so we know how many to replace
1293                    let mut actual_removed_count = 0;
1294
1295                    // Remove dead browsers from active tracking
1296                    for id in &to_remove {
1297                        if inner.remove_from_active(*id).is_some() {
1298                            actual_removed_count += 1;
1299                            log::debug!("Removed failed browser {} from active tracking", id);
1300                        }
1301                        failure_counts.remove(id);
1302                    }
1303
1304                    log::debug!(
1305                        "Active browsers after failure cleanup: {}",
1306                        inner.active_count()
1307                    );
1308
1309                    // Clean up pool (remove dead browsers)
1310                    inner.remove_from_available(&to_remove);
1311
1312                    log::debug!("Pool size after cleanup: {}", inner.available_count());
1313
1314                    // Trigger replacement for the browsers we just removed
1315                    if actual_removed_count > 0 {
1316                        log::info!(
1317                            "Spawning {} replacement browsers for failed ones",
1318                            actual_removed_count
1319                        );
1320                        BrowserPoolInner::spawn_replacement_creation(
1321                            Arc::clone(&inner),
1322                            actual_removed_count,
1323                        );
1324                    }
1325                }
1326
1327                // Log keep-alive cycle summary
1328                log::debug!(
1329                    "Keep-alive cycle complete - Active: {}, Pooled: {}, Tracking {} failure states",
1330                    inner.active_count(),
1331                    inner.available_count(),
1332                    failure_counts.len()
1333                );
1334            }
1335
1336            log::info!("Keep-alive thread exiting cleanly");
1337        })
1338    }
1339
1340    /// Handle browser retirement due to TTL expiration.
1341    ///
1342    /// This function:
1343    /// 1. Removes expired browsers from active and pool tracking
1344    /// 2. Spawns async tasks to create replacement browsers
1345    /// 3. Maintains pool target size
1346    ///
1347    /// # Critical Lock Ordering
1348    ///
1349    /// Acquires active -> pool locks together to prevent races.
1350    ///
1351    /// # Parameters
1352    ///
1353    /// * `inner` - Arc reference to pool state.
1354    /// * `expired_ids` - List of browser IDs that have exceeded TTL.
1355    /// * `failure_counts` - Mutable map of failure counts (updated to remove retired browsers).
1356    fn handle_browser_retirement(
1357        inner: &Arc<BrowserPoolInner>,
1358        expired_ids: Vec<u64>,
1359        failure_counts: &mut HashMap<u64, u32>,
1360    ) {
1361        log::info!(
1362            "Retiring {} expired browsers (TTL enforcement)",
1363            expired_ids.len()
1364        );
1365
1366        // Remove expired browsers from active tracking
1367        let mut retired_count = 0;
1368        for id in &expired_ids {
1369            if inner.remove_from_active(*id).is_some() {
1370                retired_count += 1;
1371                log::debug!("Removed expired browser {} from active tracking", id);
1372            }
1373            // Clean up failure tracking
1374            failure_counts.remove(id);
1375        }
1376
1377        // Remove from pool as well
1378        inner.remove_from_available(&expired_ids);
1379
1380        log::debug!(
1381            "After retirement - Active: {}, Pooled: {}",
1382            inner.active_count(),
1383            inner.available_count()
1384        );
1385
1386        // Create replacement browsers to maintain target count
1387        if retired_count > 0 {
1388            log::info!(
1389                "Spawning {} replacement browsers for retired ones",
1390                retired_count
1391            );
1392            BrowserPoolInner::spawn_replacement_creation(Arc::clone(inner), retired_count);
1393        } else {
1394            log::debug!("No browsers were actually retired (already removed)");
1395        }
1396    }
1397
1398    /// Asynchronously shutdown the pool (recommended method).
1399    ///
1400    /// This is the preferred shutdown method as it can properly await
1401    /// async task cancellation. Should be called during application shutdown.
1402    ///
1403    /// # Shutdown Process
1404    ///
1405    /// 1. Set atomic shutdown flag (stops new operations)
1406    /// 2. Signal condvar to wake keep-alive thread immediately
1407    /// 3. Wait for keep-alive thread to exit (with timeout)
1408    /// 4. Abort all replacement creation tasks
1409    /// 5. Wait briefly for cleanup
1410    /// 6. Log final statistics
1411    ///
1412    /// # Timeout
1413    ///
1414    /// Keep-alive thread is given 5 seconds to exit gracefully.
1415    /// If it doesn't exit, we log an error but continue shutdown.
1416    ///
1417    /// # Example
1418    ///
1419    /// ```rust,ignore
1420    /// let mut pool = /* ... */;
1421    ///
1422    /// // During application shutdown
1423    /// pool.shutdown_async().await;
1424    /// ```
1425    pub async fn shutdown_async(&mut self) {
1426        log::info!("Shutting down browser pool (async mode)...");
1427
1428        // Step 1: Set shutdown flag (prevents new operations)
1429        self.inner.set_shutting_down(true);
1430        log::debug!("Shutdown flag set");
1431
1432        // Step 2: Signal condvar to wake keep-alive thread immediately
1433        // This is critical - without this, keep-alive waits for full ping_interval
1434        {
1435            let (lock, cvar) = &**self.inner.shutdown_signal();
1436            let mut shutdown = lock.lock().unwrap();
1437            *shutdown = true;
1438            cvar.notify_all();
1439            log::debug!("Shutdown signal sent to keep-alive thread");
1440        } // Lock released here
1441
1442        // Step 3: Wait for keep-alive thread to exit
1443        if let Some(handle) = self.keep_alive_handle.take() {
1444            log::debug!("Waiting for keep-alive thread to exit...");
1445
1446            // Wrap thread join in spawn_blocking to make it async-friendly
1447            let join_task = tokio::task::spawn_blocking(move || handle.join());
1448
1449            // Give it 5 seconds to exit gracefully
1450            match tokio::time::timeout(Duration::from_secs(5), join_task).await {
1451                Ok(Ok(Ok(_))) => {
1452                    log::info!("Keep-alive thread stopped cleanly");
1453                }
1454                Ok(Ok(Err(_))) => {
1455                    log::error!("Keep-alive thread panicked during shutdown");
1456                }
1457                Ok(Err(_)) => {
1458                    log::error!("Keep-alive join task panicked");
1459                }
1460                Err(_) => {
1461                    log::error!("Keep-alive thread didn't exit within 5s timeout");
1462                }
1463            }
1464        } else {
1465            log::debug!("No keep-alive thread to stop (was disabled or already stopped)");
1466        }
1467
1468        // Step 4: Abort all replacement creation tasks
1469        log::info!("Aborting replacement creation tasks...");
1470        let aborted_count = self.inner.abort_replacement_tasks();
1471        if aborted_count > 0 {
1472            log::info!("Aborted {} replacement tasks", aborted_count);
1473        } else {
1474            log::debug!("No replacement tasks to abort");
1475        }
1476
1477        // Step 5: Small delay to let aborted tasks clean up
1478        tokio::time::sleep(Duration::from_millis(100)).await;
1479
1480        // Step 6: Log final statistics
1481        let stats = self.stats();
1482        log::info!(
1483            "Async shutdown complete - Available: {}, Active: {}, Total: {}",
1484            stats.available,
1485            stats.active,
1486            stats.total
1487        );
1488    }
1489
1490    /// Synchronously shutdown the pool (fallback method).
1491    ///
1492    /// This is a simplified shutdown for use in Drop or non-async contexts.
1493    /// Prefer [`shutdown_async()`](Self::shutdown_async) when possible for cleaner task cancellation.
1494    ///
1495    /// # Note
1496    ///
1497    /// This method doesn't wait for replacement tasks to finish since
1498    /// there's no async runtime available. Tasks are aborted but may not
1499    /// have cleaned up yet.
1500    pub fn shutdown(&mut self) {
1501        log::debug!("Calling synchronous shutdown...");
1502        self.shutdown_sync();
1503    }
1504
1505    /// Internal synchronous shutdown implementation.
1506    fn shutdown_sync(&mut self) {
1507        log::info!("Shutting down browser pool (sync mode)...");
1508
1509        // Set shutdown flag
1510        self.inner.set_shutting_down(true);
1511        log::debug!("Shutdown flag set");
1512
1513        // Signal condvar (same as async version)
1514        {
1515            let (lock, cvar) = &**self.inner.shutdown_signal();
1516            let mut shutdown = lock.lock().unwrap();
1517            *shutdown = true;
1518            cvar.notify_all();
1519            log::debug!("Shutdown signal sent");
1520        }
1521
1522        // Wait for keep-alive thread
1523        if let Some(handle) = self.keep_alive_handle.take() {
1524            log::debug!("Joining keep-alive thread (sync)...");
1525
1526            match handle.join() {
1527                Ok(_) => log::info!("Keep-alive thread stopped"),
1528                Err(_) => log::error!("Keep-alive thread panicked"),
1529            }
1530        }
1531
1532        // Abort replacement tasks (best effort - they won't make progress without runtime)
1533        let aborted_count = self.inner.abort_replacement_tasks();
1534        if aborted_count > 0 {
1535            log::debug!("Aborted {} replacement tasks (sync mode)", aborted_count);
1536        }
1537
1538        let stats = self.stats();
1539        log::info!(
1540            "Sync shutdown complete - Available: {}, Active: {}",
1541            stats.available,
1542            stats.active
1543        );
1544    }
1545
1546    /// Get a reference to the inner pool state.
1547    ///
1548    /// This is primarily for internal use and testing.
1549    #[doc(hidden)]
1550    #[allow(dead_code)]
1551    pub(crate) fn inner(&self) -> &Arc<BrowserPoolInner> {
1552        &self.inner
1553    }
1554}
1555
1556impl Drop for BrowserPool {
1557    /// Automatic cleanup when pool is dropped.
1558    ///
1559    /// This ensures resources are released even if shutdown wasn't called explicitly.
1560    /// Uses sync shutdown since Drop can't be async.
1561    fn drop(&mut self) {
1562        log::debug!("� BrowserPool Drop triggered - running cleanup");
1563
1564        // Only shutdown if not already done
1565        if !self.inner.is_shutting_down() {
1566            log::warn!("� BrowserPool dropped without explicit shutdown - cleaning up");
1567            self.shutdown();
1568        } else {
1569            log::debug!(" Pool already shutdown, Drop is no-op");
1570        }
1571    }
1572}
1573
1574// ============================================================================
1575// BrowserPoolBuilder
1576// ============================================================================
1577
1578/// Builder for constructing a [`BrowserPool`] with validation.
1579///
1580/// This is the recommended way to create a pool as it validates
1581/// configuration and provides sensible defaults.
1582///
1583/// # Example
1584///
1585/// ```rust,ignore
1586/// use std::time::Duration;
1587/// use html2pdf_api::{BrowserPool, BrowserPoolConfigBuilder, ChromeBrowserFactory};
1588///
1589/// let pool = BrowserPool::builder()
1590///     .config(
1591///         BrowserPoolConfigBuilder::new()
1592///             .max_pool_size(10)
1593///             .warmup_count(5)
1594///             .browser_ttl(Duration::from_secs(7200))
1595///             .build()?
1596///     )
1597///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1598///     .enable_keep_alive(true)
1599///     .build()?;
1600/// ```
1601pub struct BrowserPoolBuilder {
1602    /// Optional configuration (uses default if not provided).
1603    config: Option<BrowserPoolConfig>,
1604
1605    /// Browser factory (required).
1606    factory: Option<Box<dyn BrowserFactory>>,
1607
1608    /// Whether to enable keep-alive thread (default: true).
1609    enable_keep_alive: bool,
1610}
1611
1612impl BrowserPoolBuilder {
1613    /// Create a new builder with defaults.
1614    ///
1615    /// # Example
1616    ///
1617    /// ```rust,ignore
1618    /// let builder = BrowserPoolBuilder::new();
1619    /// ```
1620    pub fn new() -> Self {
1621        Self {
1622            config: None,
1623            factory: None,
1624            enable_keep_alive: true,
1625        }
1626    }
1627
1628    /// Set custom configuration.
1629    ///
1630    /// If not called, uses [`BrowserPoolConfig::default()`].
1631    ///
1632    /// # Parameters
1633    ///
1634    /// * `config` - Validated configuration from [`crate::BrowserPoolConfigBuilder`].
1635    ///
1636    /// # Example
1637    ///
1638    /// ```rust,ignore
1639    /// let config = BrowserPoolConfigBuilder::new()
1640    ///     .max_pool_size(10)
1641    ///     .build()?;
1642    ///
1643    /// let pool = BrowserPool::builder()
1644    ///     .config(config)
1645    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1646    ///     .build()?;
1647    /// ```
1648    pub fn config(mut self, config: BrowserPoolConfig) -> Self {
1649        self.config = Some(config);
1650        self
1651    }
1652
1653    /// Set browser factory (required).
1654    ///
1655    /// The factory is responsible for creating browser instances.
1656    /// Use [`ChromeBrowserFactory`](crate::ChromeBrowserFactory) for Chrome/Chromium browsers.
1657    ///
1658    /// # Parameters
1659    ///
1660    /// * `factory` - A boxed [`BrowserFactory`] implementation.
1661    ///
1662    /// # Example
1663    ///
1664    /// ```rust,ignore
1665    /// let pool = BrowserPool::builder()
1666    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1667    ///     .build()?;
1668    /// ```
1669    pub fn factory(mut self, factory: Box<dyn BrowserFactory>) -> Self {
1670        self.factory = Some(factory);
1671        self
1672    }
1673
1674    /// Enable or disable keep-alive thread.
1675    ///
1676    /// Keep-alive should be disabled only for testing.
1677    /// Production use should always have it enabled.
1678    ///
1679    /// # Parameters
1680    ///
1681    /// * `enable` - Whether to enable the keep-alive thread.
1682    ///
1683    /// # Example
1684    ///
1685    /// ```rust,ignore
1686    /// // Disable for tests
1687    /// let pool = BrowserPool::builder()
1688    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1689    ///     .enable_keep_alive(false)
1690    ///     .build()?;
1691    /// ```
1692    pub fn enable_keep_alive(mut self, enable: bool) -> Self {
1693        self.enable_keep_alive = enable;
1694        self
1695    }
1696
1697    /// Build the browser pool.
1698    ///
1699    /// # Errors
1700    ///
1701    /// Returns [`BrowserPoolError::Configuration`] if factory is not provided.
1702    ///
1703    /// # Panics
1704    ///
1705    /// Panics if called outside a tokio runtime context.
1706    ///
1707    /// # Example
1708    ///
1709    /// ```rust,ignore
1710    /// let pool = BrowserPool::builder()
1711    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1712    ///     .build()?;
1713    /// ```
1714    pub fn build(self) -> Result<BrowserPool> {
1715        let config = self.config.unwrap_or_default();
1716        let factory = self.factory.ok_or_else(|| {
1717            BrowserPoolError::Configuration("No browser factory provided".to_string())
1718        })?;
1719
1720        log::info!("️ Building browser pool with config: {:?}", config);
1721
1722        // Create inner state
1723        let inner = BrowserPoolInner::new(config, factory);
1724
1725        // Start keep-alive thread if enabled
1726        let keep_alive_handle = if self.enable_keep_alive {
1727            log::info!(" Starting keep-alive monitoring thread");
1728            Some(BrowserPool::start_keep_alive(Arc::clone(&inner)))
1729        } else {
1730            log::warn!("⚠️ Keep-alive thread disabled (should only be used for testing)");
1731            None
1732        };
1733
1734        log::info!("✅ Browser pool built successfully");
1735
1736        Ok(BrowserPool {
1737            inner,
1738            keep_alive_handle,
1739        })
1740    }
1741}
1742
1743impl Default for BrowserPoolBuilder {
1744    fn default() -> Self {
1745        Self::new()
1746    }
1747}
1748
1749// ============================================================================
1750// Environment Initialization (feature-gated)
1751// ============================================================================
1752
1753/// Initialize browser pool from environment variables.
1754///
1755/// This is a convenience function for common initialization patterns.
1756/// It reads configuration from environment variables with sensible defaults.
1757///
1758/// # Feature Flag
1759///
1760/// This function is only available when the `env-config` feature is enabled.
1761///
1762/// # Environment Variables
1763///
1764/// - `BROWSER_POOL_SIZE`: Maximum pool size (default: 5)
1765/// - `BROWSER_WARMUP_COUNT`: Warmup browser count (default: 3)
1766/// - `BROWSER_TTL_SECONDS`: Browser TTL in seconds (default: 3600)
1767/// - `BROWSER_WARMUP_TIMEOUT_SECONDS`: Warmup timeout (default: 60)
1768/// - `CHROME_PATH`: Custom Chrome binary path (optional)
1769///
1770/// # Returns
1771///
1772/// `Arc<Mutex<BrowserPool>>` ready for use in web handlers.
1773///
1774/// # Errors
1775///
1776/// - Returns error if configuration is invalid.
1777/// - Returns error if warmup fails.
1778///
1779/// # Example
1780///
1781/// ```rust,ignore
1782/// #[tokio::main]
1783/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
1784///     env_logger::init();
1785///
1786///     let pool = init_browser_pool().await?;
1787///
1788///     // Use pool in handlers...
1789///
1790///     Ok(())
1791/// }
1792/// ```
1793#[cfg(feature = "env-config")]
1794pub async fn init_browser_pool() -> Result<Arc<Mutex<BrowserPool>>> {
1795    use crate::config::env::{chrome_path_from_env, from_env};
1796    use crate::factory::ChromeBrowserFactory;
1797
1798    log::info!("Initializing browser pool from environment...");
1799
1800    // Load configuration from environment
1801    let config = from_env()?;
1802
1803    // Get optional Chrome path
1804    let chrome_path = chrome_path_from_env();
1805
1806    log::info!("Pool configuration from environment:");
1807    log::info!("   - Max pool size: {}", config.max_pool_size);
1808    log::info!("   - Warmup count: {}", config.warmup_count);
1809    log::info!(
1810        "   - Browser TTL: {}s ({}min)",
1811        config.browser_ttl.as_secs(),
1812        config.browser_ttl.as_secs() / 60
1813    );
1814    log::info!("   - Warmup timeout: {}s", config.warmup_timeout.as_secs());
1815    log::info!(
1816        "   - Chrome path: {}",
1817        chrome_path.as_deref().unwrap_or("auto-detect")
1818    );
1819
1820    // Create factory based on whether custom path is provided
1821    let factory: Box<dyn BrowserFactory> = match chrome_path {
1822        Some(path) => {
1823            log::info!("Using custom Chrome path: {}", path);
1824            Box::new(ChromeBrowserFactory::with_path(path))
1825        }
1826        None => {
1827            log::info!("Using auto-detected Chrome browser");
1828            Box::new(ChromeBrowserFactory::with_defaults())
1829        }
1830    };
1831
1832    // Create browser pool with Chrome factory
1833    log::debug!("Building browser pool...");
1834    let pool = BrowserPool::builder()
1835        .config(config.clone())
1836        .factory(factory)
1837        .enable_keep_alive(true)
1838        .build()
1839        .map_err(|e| {
1840            log::error!("❌ Failed to create browser pool: {}", e);
1841            e
1842        })?;
1843
1844    log::info!("✅ Browser pool created successfully");
1845
1846    // Warmup the pool
1847    log::info!(
1848        "Warming up browser pool with {} instances...",
1849        config.warmup_count
1850    );
1851    pool.warmup().await.map_err(|e| {
1852        log::error!("❌ Failed to warmup pool: {}", e);
1853        e
1854    })?;
1855
1856    let stats = pool.stats();
1857    log::info!(
1858        "✅ Browser pool ready - Available: {}, Active: {}, Total: {}",
1859        stats.available,
1860        stats.active,
1861        stats.total
1862    );
1863
1864    Ok(pool.into_shared())
1865}
1866
1867// ============================================================================
1868// Unit Tests
1869// ============================================================================
1870
1871#[cfg(test)]
1872mod tests {
1873    use super::*;
1874
1875    /// Verifies that BrowserPool builder rejects missing factory.
1876    ///
1877    /// A factory is mandatory because the pool needs to know how to
1878    /// create browser instances. This test ensures proper error handling.
1879    #[test]
1880    fn test_pool_builder_missing_factory() {
1881        // We need a tokio runtime for the builder
1882        let rt = tokio::runtime::Runtime::new().unwrap();
1883
1884        rt.block_on(async {
1885            let config = crate::config::BrowserPoolConfigBuilder::new()
1886                .max_pool_size(3)
1887                .build()
1888                .unwrap();
1889
1890            let result = BrowserPool::builder()
1891                .config(config)
1892                // Intentionally missing factory
1893                .build();
1894
1895            assert!(result.is_err(), "Build should fail without factory");
1896
1897            match result {
1898                Err(BrowserPoolError::Configuration(msg)) => {
1899                    assert!(
1900                        msg.contains("No browser factory provided"),
1901                        "Expected factory error, got: {}",
1902                        msg
1903                    );
1904                }
1905                _ => panic!("Expected Configuration error for missing factory"),
1906            }
1907        });
1908    }
1909
1910    /// Verifies that BrowserPoolBuilder implements Default.
1911    #[test]
1912    fn test_builder_default() {
1913        let builder: BrowserPoolBuilder = Default::default();
1914        assert!(builder.config.is_none());
1915        assert!(builder.factory.is_none());
1916        assert!(builder.enable_keep_alive);
1917    }
1918
1919    /// Verifies that enable_keep_alive can be disabled.
1920    #[test]
1921    fn test_builder_disable_keep_alive() {
1922        let builder = BrowserPoolBuilder::new().enable_keep_alive(false);
1923        assert!(!builder.enable_keep_alive);
1924    }
1925}