Skip to main content

html2pdf_api/
pool.rs

1//! Browser pool with lifecycle management.
2//!
3//! This module provides [`BrowserPool`], the main entry point for managing
4//! a pool of headless Chrome browsers with automatic lifecycle management.
5//!
6//! # Overview
7//!
8//! The browser pool provides:
9//! - **Connection Pooling**: Reuses browser instances to avoid expensive startup costs
10//! - **Health Monitoring**: Background thread continuously checks browser health
11//! - **TTL Management**: Automatically retires old browsers and creates replacements
12//! - **Race-Free Design**: Careful lock ordering prevents deadlocks
13//! - **Graceful Shutdown**: Clean termination of all background tasks
14//! - **RAII Pattern**: Automatic return of browsers to pool via Drop
15//!
16//! # Architecture
17//!
18//! ```text
19//! BrowserPool
20//!   ├─ BrowserPoolInner (shared state)
21//!   │   ├─ available: Vec<TrackedBrowser>  (pooled, ready to use)
22//!   │   ├─ active: HashMap<id, TrackedBrowser>  (in-use, tracked for health)
23//!   │   └─ replacement_tasks: Vec<JoinHandle>  (async replacement creators)
24//!   └─ keep_alive_handle: JoinHandle  (health monitoring thread)
25//! ```
26//!
27//! # Critical Invariants
28//!
29//! 1. **Lock Order**: Always acquire `active` before `available` to prevent deadlocks
30//! 2. **Shutdown Flag**: Check before all expensive operations
31//! 3. **Health Checks**: Never hold locks during I/O operations
32//!
33//! # Example
34//!
35//! ```rust,no_run
36//! use html2pdf_api::{BrowserPool, BrowserPoolConfigBuilder, ChromeBrowserFactory};
37//!
38//! #[tokio::main]
39//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
40//!     // Create pool
41//!     let mut pool = BrowserPool::builder()
42//!         .config(
43//!             BrowserPoolConfigBuilder::new()
44//!                 .max_pool_size(5)
45//!                 .warmup_count(3)
46//!                 .build()?
47//!         )
48//!         .factory(Box::new(ChromeBrowserFactory::with_defaults()))
49//!         .build()?;
50//!
51//!     // Warmup
52//!     pool.warmup().await?;
53//!
54//!     // Use browsers
55//!     {
56//!         let browser = pool.get()?;
57//!         let tab = browser.new_tab()?;
58//!         // ... do work ...
59//!     } // browser returned to pool automatically
60//!
61//!     // Shutdown
62//!     pool.shutdown_async().await;
63//!
64//!     Ok(())
65//! }
66//! ```
67
68use std::collections::HashMap;
69use std::sync::atomic::{AtomicBool, Ordering};
70use std::sync::{Arc, Condvar, Mutex};
71use std::thread::{self, JoinHandle};
72use std::time::Duration;
73
74use tokio::task::JoinHandle as TokioJoinHandle;
75
76use crate::config::BrowserPoolConfig;
77use crate::error::{BrowserPoolError, Result};
78use crate::factory::BrowserFactory;
79use crate::handle::BrowserHandle;
80use crate::stats::PoolStats;
81use crate::tracked::TrackedBrowser;
82
83// ============================================================================
84// BrowserPoolInner
85// ============================================================================
86
87/// Internal shared state for the browser pool.
88///
89/// This struct contains all shared state and is wrapped in Arc for thread-safe
90/// sharing between the pool, handles, and background threads.
91///
92/// # Lock Ordering (CRITICAL)
93///
94/// Always acquire locks in this order to prevent deadlocks:
95/// 1. `active` (browsers currently in use)
96/// 2. `available` (browsers in pool ready for use)
97///
98/// Never hold locks during I/O operations or browser creation.
99///
100/// # Thread Safety
101///
102/// All fields are protected by appropriate synchronization primitives:
103/// - `Mutex` for mutable collections
104/// - `AtomicBool` for shutdown flag
105/// - `Arc` for shared ownership
106pub(crate) struct BrowserPoolInner {
107    /// Configuration (immutable after creation).
108    config: BrowserPoolConfig,
109
110    /// Browsers available for checkout (not currently in use).
111    ///
112    /// Protected by Mutex. Browsers are moved from here when checked out
113    /// and returned here when released (if pool not full).
114    available: Mutex<Vec<TrackedBrowser>>,
115
116    /// All browsers that exist (both pooled and checked out).
117    ///
118    /// Protected by Mutex. Used for health monitoring and lifecycle tracking.
119    /// Maps browser ID -> TrackedBrowser for fast lookup.
120    active: Mutex<HashMap<u64, TrackedBrowser>>,
121
122    /// Factory for creating new browser instances.
123    factory: Box<dyn BrowserFactory>,
124
125    /// Atomic flag indicating shutdown in progress.
126    ///
127    /// Checked before expensive operations. Once set, no new operations start.
128    shutting_down: AtomicBool,
129
130    /// Background tasks creating replacement browsers.
131    ///
132    /// Tracked so we can abort them during shutdown.
133    replacement_tasks: Mutex<Vec<TokioJoinHandle<()>>>,
134
135    /// Handle to tokio runtime for spawning async tasks.
136    ///
137    /// Captured at creation time to allow spawning from any context.
138    runtime_handle: tokio::runtime::Handle,
139
140    /// Shutdown signaling mechanism for keep-alive thread.
141    ///
142    /// Tuple of (flag, condvar) allows immediate wake-up on shutdown
143    /// instead of waiting for full ping_interval.
144    shutdown_signal: Arc<(Mutex<bool>, Condvar)>,
145}
146
147impl BrowserPoolInner {
148    /// Create a new browser pool inner state.
149    ///
150    /// # Parameters
151    ///
152    /// * `config` - Validated configuration.
153    /// * `factory` - Browser factory for creating instances.
154    ///
155    /// # Panics
156    ///
157    /// Panics if called outside a tokio runtime context.
158    pub(crate) fn new(config: BrowserPoolConfig, factory: Box<dyn BrowserFactory>) -> Arc<Self> {
159        log::info!(
160            "🚀 Initializing browser pool with capacity {}",
161            config.max_pool_size
162        );
163        log::debug!(
164            "📋 Pool config: warmup={}, TTL={}s, ping_interval={}s",
165            config.warmup_count,
166            config.browser_ttl.as_secs(),
167            config.ping_interval.as_secs()
168        );
169
170        // Capture runtime handle for spawning async tasks
171        // This allows us to spawn from sync contexts (like Drop)
172        let runtime_handle = tokio::runtime::Handle::current();
173
174        Arc::new(Self {
175            config,
176            available: Mutex::new(Vec::new()),
177            active: Mutex::new(HashMap::new()),
178            factory,
179            shutting_down: AtomicBool::new(false),
180            replacement_tasks: Mutex::new(Vec::new()),
181            runtime_handle,
182            shutdown_signal: Arc::new((Mutex::new(false), Condvar::new())),
183        })
184    }
185
186    /// Create a browser directly without using the pool.
187    ///
188    /// Used for:
189    /// - Initial warmup
190    /// - Replacing failed browsers
191    /// - When pool is empty
192    ///
193    /// # Important
194    ///
195    /// Adds the browser to `active` tracking immediately for health monitoring.
196    ///
197    /// # Errors
198    ///
199    /// - Returns [`BrowserPoolError::ShuttingDown`] if pool is shutting down.
200    /// - Returns [`BrowserPoolError::BrowserCreation`] if factory fails.
201    pub(crate) fn create_browser_direct(&self) -> Result<TrackedBrowser> {
202        // Early exit if shutting down (don't waste time creating browsers)
203        if self.shutting_down.load(Ordering::Acquire) {
204            log::debug!("🛑 Skipping browser creation - pool is shutting down");
205            return Err(BrowserPoolError::ShuttingDown);
206        }
207
208        log::debug!("📦 Creating new browser directly via factory...");
209
210        // Factory handles all Chrome launch complexity
211        let browser = self.factory.create()?;
212
213        // Wrap with tracking metadata
214        let tracked = TrackedBrowser::new(browser)?;
215        let id = tracked.id();
216
217        // Add to active tracking immediately for health monitoring
218        // This ensures keep-alive thread will monitor it
219        if let Ok(mut active) = self.active.lock() {
220            active.insert(id, tracked.clone());
221            log::debug!(
222                "📊 Browser {} added to active tracking (total active: {})",
223                id,
224                active.len()
225            );
226        } else {
227            log::warn!(
228                "⚠️ Failed to add browser {} to active tracking (poisoned lock)",
229                id
230            );
231        }
232
233        log::info!("✅ Created new browser with ID {}", id);
234        Ok(tracked)
235    }
236
237    /// Get a browser from pool or create a new one.
238    ///
239    /// # Algorithm
240    ///
241    /// 1. Loop through pooled browsers
242    /// 2. **Grace Period Check**: Check if browser is within 30s of TTL.
243    ///    - If near expiry: Skip (drop) it immediately.
244    ///    - It remains in `active` tracking so the `keep_alive` thread handles standard retirement/replacement.
245    /// 3. For valid browsers, perform detailed health check (without holding locks)
246    /// 4. If healthy, return it
247    /// 5. If unhealthy, remove from active tracking and try next
248    /// 6. If pool empty or all skipped/unhealthy, create new browser
249    ///
250    /// # Critical: Lock-Free Health Checks
251    ///
252    /// Health checks are performed WITHOUT holding locks to avoid blocking
253    /// other threads. This is why we use a loop pattern instead of iterator.
254    ///
255    /// # Returns
256    ///
257    /// [`BrowserHandle`] that auto-returns browser to pool when dropped.
258    ///
259    /// # Errors
260    ///
261    /// - Returns [`BrowserPoolError::ShuttingDown`] if pool is shutting down.
262    /// - Returns [`BrowserPoolError::BrowserCreation`] if new browser creation fails.
263    pub(crate) fn get_or_create_browser(self: &Arc<Self>) -> Result<BrowserHandle> {
264        log::debug!("🔍 Attempting to get browser from pool...");
265
266        // Try to get from pool - LOOP pattern to avoid holding lock during health checks
267        // This is critical for concurrency: we release the lock between attempts
268        loop {
269            // Acquire lock briefly to pop one browser
270            let tracked_opt = {
271                let mut available = self.available.lock().unwrap_or_else(|poisoned| {
272                    log::warn!("Pool available lock poisoned, recovering");
273                    poisoned.into_inner()
274                });
275                let popped = available.pop();
276                log::trace!("📊 Pool size after pop: {}", available.len());
277                popped
278            }; // Lock released here - critical for performance
279
280            if let Some(tracked) = tracked_opt {
281                // === LOGIC START: Grace Period Check ===
282                let age = tracked.created_at().elapsed();
283                let ttl = self.config.browser_ttl;
284
285                // Safety margin matching your stagger interval
286                let safety_margin = Duration::from_secs(30);
287
288                // If browser is about to expire, don't use it.
289                if age + safety_margin > ttl {
290                    log::debug!(
291                        "⏳ Browser {} is near expiry (Age: {}s, Margin: 30s), skipping.",
292                        tracked.id(),
293                        age.as_secs()
294                    );
295
296                    // CRITICAL: We do NOT remove/recreate here.
297                    // By simply 'continuing', we drop this 'tracked' instance.
298                    // 1. It is NOT returned to 'available' (so no user gets it).
299                    // 2. It REMAINS in 'active' (so the keep_alive thread still tracks it).
300                    // 3. The keep_alive thread will see it expire and handle standard cleanup/replacement.
301                    continue;
302                }
303                // === LOGIC END: Grace Period Check ===
304
305                log::debug!("🔍 Testing browser {} from pool for health...", tracked.id());
306
307                // Detailed health check WITHOUT holding any locks
308                // This prevents blocking other threads during I/O
309                match tracked.browser().new_tab() {
310                    Ok(tab) => {
311                        log::trace!(
312                            "✅ Browser {} health check: new_tab() successful",
313                            tracked.id()
314                        );
315
316                        // Test navigation capability (full health check)
317                        match tab
318                            .navigate_to("data:text/html,<html><body>Health check</body></html>")
319                        {
320                            Ok(_) => {
321                                log::trace!(
322                                    "✅ Browser {} health check: navigation successful",
323                                    tracked.id()
324                                );
325
326                                // Test cleanup capability
327                                match tab.close(true) {
328                                    Ok(_) => {
329                                        log::debug!(
330                                            "✅ Browser {} passed full health check - ready for use",
331                                            tracked.id()
332                                        );
333
334                                        // Get pool size for logging (brief lock)
335                                        let pool_size = {
336                                            let available = self.available.lock().unwrap_or_else(|poisoned| {
337                    log::warn!("Pool available lock poisoned, recovering");
338                    poisoned.into_inner()
339                });
340                                            available.len()
341                                        };
342
343                                        log::info!(
344                                            "♻️ Reusing healthy browser {} from pool (pool size: {})",
345                                            tracked.id(),
346                                            pool_size
347                                        );
348
349                                        // Return healthy browser wrapped in RAII handle
350                                        return Ok(BrowserHandle::new(tracked, Arc::clone(self)));
351                                    }
352                                    Err(e) => {
353                                        log::warn!(
354                                            "❌ Browser {} health check: tab close failed: {}",
355                                            tracked.id(),
356                                            e
357                                        );
358                                    }
359                                }
360                            }
361                            Err(e) => {
362                                log::warn!(
363                                    "❌ Browser {} health check: navigation failed: {}",
364                                    tracked.id(),
365                                    e
366                                );
367                            }
368                        }
369                    }
370                    Err(e) => {
371                        log::warn!(
372                            "❌ Browser {} health check: new_tab() failed: {}",
373                            tracked.id(),
374                            e
375                        );
376                    }
377                }
378
379                // If we reach here, health check failed
380                // Remove from active tracking (browser is dead)
381                log::warn!(
382                    "🗑️ Removing unhealthy browser {} from active tracking",
383                    tracked.id()
384                );
385                {
386                    let mut active = self.active.lock().unwrap_or_else(|poisoned| {
387                    log::warn!("Pool active lock poisoned, recovering");
388                    poisoned.into_inner()
389                });
390                    active.remove(&tracked.id());
391                    log::debug!("📊 Active browsers after removal: {}", active.len());
392                }
393
394                // Continue loop to try next browser in pool
395                log::debug!("🔍 Trying next browser from pool...");
396            } else {
397                // Pool is empty, break to create new browser
398                log::debug!("📥 Pool is empty, will create new browser");
399                break;
400            }
401        }
402
403        // Pool is empty or no healthy browsers found
404        log::info!("📦 Creating new browser (pool was empty or all browsers unhealthy)");
405
406        let tracked = self.create_browser_direct()?;
407
408        log::info!("✅ Returning newly created browser {}", tracked.id());
409        Ok(BrowserHandle::new(tracked, Arc::clone(self)))
410    }
411
412    /// Return a browser to the pool (called by BrowserHandle::drop).
413    ///
414    /// # Critical Lock Ordering
415    ///
416    /// Always acquires locks in order: active -> available.
417    /// Both locks are held together to prevent race conditions.
418    ///
419    /// # Algorithm
420    ///
421    /// 1. Acquire both locks (order: active, then available)
422    /// 2. Verify browser is in active tracking
423    /// 3. Check TTL - if expired, retire and trigger replacement
424    /// 4. If pool has space, add to available pool
425    /// 5. If pool full, remove from active (browser gets dropped)
426    ///
427    /// # Parameters
428    ///
429    /// * `self_arc` - Arc reference to self (needed for spawning async tasks).
430    /// * `tracked` - The browser being returned.
431    pub(crate) fn return_browser(self_arc: &Arc<Self>, tracked: TrackedBrowser) {
432        log::debug!("♻️ Returning browser {} to pool...", tracked.id());
433
434        // Early exit if shutting down (don't waste time managing pool)
435        if self_arc.shutting_down.load(Ordering::Acquire) {
436            log::debug!(
437                "🛑 Pool shutting down, not returning browser {}",
438                tracked.id()
439            );
440            return;
441        }
442
443        // CRITICAL: Always acquire in order: active -> pool
444        // Holding both locks prevents ALL race conditions:
445        // - Prevents concurrent modifications to browser state
446        // - Prevents duplicate returns
447        // - Ensures pool size limits are respected
448        let mut active = self_arc.active.lock().unwrap_or_else(|poisoned| {
449            log::warn!("Pool active lock poisoned, recovering");
450            poisoned.into_inner()
451        });
452        let mut pool = self_arc.available.lock().unwrap_or_else(|poisoned| {
453            log::warn!("Pool available lock poisoned, recovering");
454            poisoned.into_inner()
455        });
456
457        // Verify browser is actually tracked (sanity check)
458        if !active.contains_key(&tracked.id()) {
459            log::warn!(
460                "❌ Browser {} not in active tracking (probably already removed), skipping return",
461                tracked.id()
462            );
463            return;
464        }
465
466        // Check TTL before returning to pool
467        // Expired browsers should be retired to prevent memory leaks
468        if tracked.is_expired(self_arc.config.browser_ttl) {
469            log::info!(
470                "⏰ Browser {} expired (age: {}min, TTL: {}min), retiring instead of returning",
471                tracked.id(),
472                tracked.age_minutes(),
473                self_arc.config.browser_ttl.as_secs() / 60
474            );
475
476            // Remove from active tracking
477            active.remove(&tracked.id());
478            log::debug!("📊 Active browsers after TTL retirement: {}", active.len());
479
480            // Release locks before spawning replacement task
481            drop(active);
482            drop(pool);
483
484            // Trigger async replacement creation (non-blocking)
485            log::debug!("🔍 Triggering replacement browser creation for expired browser");
486            Self::spawn_replacement_creation(Arc::clone(self_arc), 1);
487            return;
488        }
489
490        // Prevent duplicate returns (defensive programming)
491        if pool.iter().any(|b| b.id() == tracked.id()) {
492            log::warn!(
493                "⚠️ Browser {} already in pool (duplicate return attempt), skipping",
494                tracked.id()
495            );
496            return;
497        }
498
499        // Check if pool has space for this browser
500        if pool.len() < self_arc.config.max_pool_size {
501            // Add to pool for reuse
502            pool.push(tracked.clone());
503            log::info!(
504                "♻️ Browser {} returned to pool (pool size: {}/{})",
505                tracked.id(),
506                pool.len(),
507                self_arc.config.max_pool_size
508            );
509        } else {
510            // Pool is full, remove from tracking (browser will be dropped)
511            log::debug!(
512                "️ Pool full ({}/{}), removing browser {} from system",
513                pool.len(),
514                self_arc.config.max_pool_size,
515                tracked.id()
516            );
517            active.remove(&tracked.id());
518            log::debug!("📊 Active browsers after removal: {}", active.len());
519        }
520    }
521
522    /// Asynchronously create replacement browsers (internal helper).
523    ///
524    /// This is the async work function that actually creates browsers.
525    /// It's spawned as a tokio task by `spawn_replacement_creation`.
526    ///
527    /// # Algorithm
528    ///
529    /// 1. Check shutdown flag before each creation
530    /// 2. Check pool space before each creation
531    /// 3. Use spawn_blocking for CPU-bound browser creation
532    /// 4. Add successful browsers to pool
533    /// 5. Log detailed status
534    ///
535    /// # Parameters
536    ///
537    /// * `inner` - Arc reference to pool state.
538    /// * `count` - Number of browsers to attempt to create.
539    async fn spawn_replacement_creation_async(inner: Arc<Self>, count: usize) {
540        log::info!(
541            "🔍 Starting async replacement creation for {} browsers",
542            count
543        );
544
545        let mut created_count = 0;
546        let mut failed_count = 0;
547
548        for i in 0..count {
549            // Check shutdown flag before each expensive operation
550            if inner.shutting_down.load(Ordering::Acquire) {
551                log::info!(
552                    "🛑 Shutdown detected during replacement creation, stopping at {}/{}",
553                    i,
554                    count
555                );
556                break;
557            }
558
559            // Check if pool has space BEFORE creating (avoid wasted work)
560            let pool_has_space = {
561                let pool = inner.available.lock().unwrap_or_else(|poisoned| {
562                    log::warn!("Pool available lock poisoned, recovering");
563                    poisoned.into_inner()
564                });
565                let has_space = pool.len() < inner.config.max_pool_size;
566                log::trace!(
567                    "📊 Pool space check: {}/{} (has space: {})",
568                    pool.len(),
569                    inner.config.max_pool_size,
570                    has_space
571                );
572                has_space
573            };
574
575            if !pool_has_space {
576                log::warn!(
577                    "⚠️ Pool is full, stopping replacement creation at {}/{}",
578                    i,
579                    count
580                );
581                break;
582            }
583
584            log::debug!("📦 Creating replacement browser {}/{}", i + 1, count);
585
586            // Use spawn_blocking for CPU-bound browser creation
587            // This prevents blocking the async runtime
588            let inner_clone = Arc::clone(&inner);
589            let result =
590                tokio::task::spawn_blocking(move || inner_clone.create_browser_direct()).await;
591
592            match result {
593                Ok(Ok(tracked)) => {
594                    let id = tracked.id();
595
596                    // Add to pool (with space check to handle race conditions)
597                    let mut pool = inner.available.lock().unwrap_or_else(|poisoned| {
598                        log::warn!("Pool available lock poisoned, recovering");
599                        poisoned.into_inner()
600                    });
601
602                    // Double-check space (another thread might have added browsers)
603                    if pool.len() < inner.config.max_pool_size {
604                        pool.push(tracked);
605                        created_count += 1;
606                        log::info!(
607                            "✅ Created replacement browser {} and added to pool ({}/{})",
608                            id,
609                            i + 1,
610                            count
611                        );
612                    } else {
613                        log::warn!(
614                            "⚠️ Pool became full during creation, replacement browser {} kept in active only",
615                            id
616                        );
617                        created_count += 1; // Still count as created (just not pooled)
618                    }
619                }
620                Ok(Err(e)) => {
621                    failed_count += 1;
622                    log::error!(
623                        "❌ Failed to create replacement browser {}/{}: {}",
624                        i + 1,
625                        count,
626                        e
627                    );
628                }
629                Err(e) => {
630                    failed_count += 1;
631                    log::error!(
632                        "❌ Replacement browser {}/{} task panicked: {:?}",
633                        i + 1,
634                        count,
635                        e
636                    );
637                }
638            }
639        }
640
641        // Final status report
642        let pool_size = inner.available.lock().unwrap_or_else(|poisoned| {
643            log::warn!("Pool available lock poisoned, recovering");
644            poisoned.into_inner()
645        }).len();
646        let active_size = inner.active.lock().unwrap_or_else(|poisoned| {
647            log::warn!("Pool active lock poisoned, recovering");
648            poisoned.into_inner()
649        }).len();
650
651        log::info!(
652            "🏁 Replacement creation completed: {}/{} created, {} failed. Pool: {}, Active: {}",
653            created_count,
654            count,
655            failed_count,
656            pool_size,
657            active_size
658        );
659    }
660
661    /// Spawn a background task to create replacement browsers.
662    ///
663    /// This is non-blocking and returns immediately. The actual browser
664    /// creation happens in a tokio task tracked in `replacement_tasks`.
665    ///
666    /// # Why Async
667    ///
668    /// Browser creation is slow (1-3 seconds per browser). Spawning async
669    /// tasks prevents blocking the caller.
670    ///
671    /// # Task Tracking
672    ///
673    /// Tasks are tracked so we can abort them during shutdown.
674    ///
675    /// # Parameters
676    ///
677    /// * `inner` - Arc reference to pool state.
678    /// * `count` - Number of replacement browsers to create.
679    pub(crate) fn spawn_replacement_creation(inner: Arc<Self>, count: usize) {
680        log::info!(
681            "📥 Spawning async task to create {} replacement browsers",
682            count
683        );
684
685        // Clone Arc for moving into async task
686        let inner_for_task = Arc::clone(&inner);
687
688        // Spawn async task on the captured runtime
689        let task_handle = inner.runtime_handle.spawn(async move {
690            Self::spawn_replacement_creation_async(inner_for_task, count).await;
691        });
692
693        // Track task handle for shutdown cleanup
694        if let Ok(mut tasks) = inner.replacement_tasks.lock() {
695            // Clean up finished tasks while we have the lock (housekeeping)
696            let original_count = tasks.len();
697            tasks.retain(|h| !h.is_finished());
698            let cleaned = original_count - tasks.len();
699
700            if cleaned > 0 {
701                log::trace!("🧹 Cleaned up {} finished replacement tasks", cleaned);
702            }
703
704            // Add new task
705            tasks.push(task_handle);
706
707            log::debug!("📋 Now tracking {} active replacement tasks", tasks.len());
708        } else {
709            log::warn!("⚠️ Failed to track replacement task (poisoned lock)");
710        }
711    }
712
713    /// Get the pool configuration.
714    #[inline]
715    pub(crate) fn config(&self) -> &BrowserPoolConfig {
716        &self.config
717    }
718
719    /// Check if the pool is shutting down.
720    #[inline]
721    pub(crate) fn is_shutting_down(&self) -> bool {
722        self.shutting_down.load(Ordering::Acquire)
723    }
724
725    /// Set the shutdown flag.
726    #[inline]
727    pub(crate) fn set_shutting_down(&self, value: bool) {
728        self.shutting_down.store(value, Ordering::Release);
729    }
730
731    /// Get the shutdown signal for the keep-alive thread.
732    #[inline]
733    pub(crate) fn shutdown_signal(&self) -> &Arc<(Mutex<bool>, Condvar)> {
734        &self.shutdown_signal
735    }
736
737    /// Get the available browsers count.
738    pub(crate) fn available_count(&self) -> usize {
739        self.available.lock().map(|g| g.len()).unwrap_or(0)
740    }
741
742    /// Get the active browsers count.
743    pub(crate) fn active_count(&self) -> usize {
744        self.active.lock().map(|g| g.len()).unwrap_or(0)
745    }
746
747    /// Get a snapshot of active browsers for health checking.
748    ///
749    /// Returns a cloned list to avoid holding locks during I/O.
750    pub(crate) fn get_active_browsers_snapshot(&self) -> Vec<(u64, TrackedBrowser)> {
751        let active = self.active.lock().unwrap_or_else(|poisoned| {
752                    log::warn!("Pool active lock poisoned, recovering");
753                    poisoned.into_inner()
754                });
755        active
756            .iter()
757            .map(|(id, tracked)| (*id, tracked.clone()))
758            .collect()
759    }
760
761    /// Remove a browser from active tracking.
762    pub(crate) fn remove_from_active(&self, id: u64) -> Option<TrackedBrowser> {
763        let mut active = self.active.lock().unwrap_or_else(|poisoned| {
764            log::warn!("Pool active lock poisoned, recovering");
765            poisoned.into_inner()
766        });
767        active.remove(&id)
768    }
769
770    /// Remove browsers from the available pool by ID.
771    pub(crate) fn remove_from_available(&self, ids: &[u64]) {
772        let mut pool = self.available.lock().unwrap_or_else(|poisoned| {
773                    log::warn!("Pool available lock poisoned, recovering");
774                    poisoned.into_inner()
775                });
776        let original_size = pool.len();
777        pool.retain(|b| !ids.contains(&b.id()));
778        let removed = original_size - pool.len();
779        if removed > 0 {
780            log::debug!("🗑️ Removed {} browsers from available pool", removed);
781        }
782    }
783
784    /// Abort all replacement tasks.
785    pub(crate) fn abort_replacement_tasks(&self) -> usize {
786        if let Ok(mut tasks) = self.replacement_tasks.lock() {
787            let count = tasks.len();
788            for handle in tasks.drain(..) {
789                handle.abort();
790            }
791            count
792        } else {
793            0
794        }
795    }
796}
797
798// ============================================================================
799// BrowserPool
800// ============================================================================
801
802/// Main browser pool with lifecycle management.
803///
804/// This is the public-facing API for the browser pool. It wraps the internal
805/// state and manages the keep-alive thread.
806///
807/// # Overview
808///
809/// `BrowserPool` provides:
810/// - Browser checkout via [`get()`](Self::get)
811/// - Pool warmup via [`warmup()`](Self::warmup)
812/// - Statistics via [`stats()`](Self::stats)
813/// - Graceful shutdown via [`shutdown_async()`](Self::shutdown_async)
814///
815/// # Example
816///
817/// ```rust,no_run
818/// use html2pdf_api::{BrowserPool, BrowserPoolConfigBuilder, ChromeBrowserFactory};
819/// use std::time::Duration;
820///
821/// #[tokio::main]
822/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
823///     // Create pool
824///     let mut pool = BrowserPool::builder()
825///         .config(
826///             BrowserPoolConfigBuilder::new()
827///                 .max_pool_size(5)
828///                 .warmup_count(3)
829///                 .build()?
830///         )
831///         .factory(Box::new(ChromeBrowserFactory::with_defaults()))
832///         .build()?;
833///
834///     // Warmup
835///     pool.warmup().await?;
836///
837///     // Use browsers
838///     {
839///         let browser = pool.get()?;
840///         let tab = browser.new_tab()?;
841///         // ... do work ...
842///     } // browser returned to pool automatically
843///
844///     // Shutdown
845///     pool.shutdown_async().await;
846///
847///     Ok(())
848/// }
849/// ```
850///
851/// # Thread Safety
852///
853/// `BrowserPool` uses fine-grained internal locks (`Mutex<Vec>`, `Mutex<HashMap>`)
854/// so it is safe to share as `Arc<BrowserPool>` without an outer `Mutex`.
855/// Use [`into_shared()`](Self::into_shared) for convenience.
856pub struct BrowserPool {
857    /// Shared internal state.
858    inner: Arc<BrowserPoolInner>,
859
860    /// Handle to keep-alive monitoring thread.
861    ///
862    /// Option allows taking during shutdown. None means keep-alive disabled.
863    keep_alive_handle: Option<JoinHandle<()>>,
864}
865
866impl BrowserPool {
867    /// Convert pool into a shared `Arc<BrowserPool>` for use in web handlers.
868    ///
869    /// This is convenient for web frameworks that need shared state.
870    /// No outer `Mutex` is needed — the pool uses fine-grained internal locks.
871    ///
872    /// # Example
873    ///
874    /// ```rust,ignore
875    /// let pool = BrowserPool::builder()
876    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
877    ///     .build()?
878    ///     .into_shared();
879    ///
880    /// // Can now be cloned and shared across handlers
881    /// let pool_clone = Arc::clone(&pool);
882    /// ```
883    pub fn into_shared(self) -> Arc<BrowserPool> {
884        log::debug!("🔍 Converting BrowserPool into shared Arc<BrowserPool>");
885        Arc::new(self)
886    }
887
888    /// Create a new builder for constructing a BrowserPool.
889    ///
890    /// This is the recommended way to create a pool.
891    ///
892    /// # Example
893    ///
894    /// ```rust,ignore
895    /// let pool = BrowserPool::builder()
896    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
897    ///     .build()?;
898    /// ```
899    pub fn builder() -> BrowserPoolBuilder {
900        BrowserPoolBuilder::new()
901    }
902
903    /// Get a browser from the pool (or create one if empty).
904    ///
905    /// Returns a [`BrowserHandle`] that implements `Deref<Target=Browser>`,
906    /// allowing transparent access to browser methods.
907    ///
908    /// # Automatic Return
909    ///
910    /// The browser is automatically returned to the pool when the handle
911    /// is dropped, even if your code panics (RAII pattern).
912    ///
913    /// # Errors
914    ///
915    /// - Returns [`BrowserPoolError::ShuttingDown`] if pool is shutting down.
916    /// - Returns [`BrowserPoolError::BrowserCreation`] if new browser creation fails.
917    /// - Returns [`BrowserPoolError::HealthCheckFailed`] if all pooled browsers are unhealthy.
918    ///
919    /// # Example
920    ///
921    /// ```rust,ignore
922    /// let browser = pool.get()?;
923    /// let tab = browser.new_tab()?;
924    /// tab.navigate_to("https://example.com")?;
925    /// // browser returned automatically when it goes out of scope
926    /// ```
927    pub fn get(&self) -> Result<BrowserHandle> {
928        log::trace!("🎯 BrowserPool::get() called");
929        self.inner.get_or_create_browser()
930    }
931
932    /// Get pool statistics snapshot.
933    ///
934    /// # Returns
935    ///
936    /// [`PoolStats`] containing:
937    /// - `available`: Browsers in pool ready for checkout
938    /// - `active`: All browsers (pooled + checked out)
939    /// - `total`: Currently same as `active` (for future expansion)
940    ///
941    /// # Example
942    ///
943    /// ```rust,ignore
944    /// let stats = pool.stats();
945    /// println!("Available: {}, Active: {}", stats.available, stats.active);
946    /// ```
947    pub fn stats(&self) -> PoolStats {
948        let available = self.inner.available_count();
949        let active = self.inner.active_count();
950
951        log::trace!("📊 Pool stats: available={}, active={}", available, active);
952
953        PoolStats {
954            available,
955            active,
956            total: active,
957        }
958    }
959
960    /// Get a reference to the pool configuration.
961    ///
962    /// Returns the configuration that was used to create this pool.
963    /// The configuration is immutable after pool creation.
964    ///
965    /// # Example
966    ///
967    /// ```rust,ignore
968    /// let pool = BrowserPool::builder()
969    ///     .config(
970    ///         BrowserPoolConfigBuilder::new()
971    ///             .max_pool_size(10)
972    ///             .build()?
973    ///     )
974    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
975    ///     .build()?;
976    ///
977    /// println!("Max pool size: {}", pool.config().max_pool_size);
978    /// println!("Browser TTL: {:?}", pool.config().browser_ttl);
979    /// ```
980    ///
981    /// # Use Cases
982    ///
983    /// - Logging configuration at startup
984    /// - Monitoring/metrics collection
985    /// - Readiness checks (comparing active count vs max_pool_size)
986    /// - Debugging pool behavior
987    #[inline]
988    pub fn config(&self) -> &BrowserPoolConfig {
989        self.inner.config()
990    }
991
992    /// Warmup the pool by pre-creating browsers.
993    ///
994    /// This is highly recommended to reduce first-request latency.
995    /// Should be called during application startup.
996    ///
997    /// # Process
998    ///
999    /// 1. Creates `warmup_count` browsers sequentially with staggered timing
1000    /// 2. Tests each browser with navigation
1001    /// 3. Returns all browsers to pool
1002    /// 4. Entire process has timeout (configurable via `warmup_timeout`)
1003    ///
1004    /// # Staggered Creation
1005    ///
1006    /// Browsers are created with a 30-second delay between them to ensure
1007    /// their TTLs are offset. This prevents all browsers from expiring
1008    /// at the same time.
1009    ///
1010    /// # Errors
1011    ///
1012    /// - Returns error if warmup times out.
1013    /// - Returns error if browser creation fails.
1014    ///
1015    /// # Example
1016    ///
1017    /// ```rust,ignore
1018    /// let pool = BrowserPool::builder()
1019    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1020    ///     .build()?;
1021    ///
1022    /// // Warmup during startup
1023    /// pool.warmup().await?;
1024    /// ```
1025    pub async fn warmup(&self) -> Result<()> {
1026        let count = self.inner.config().warmup_count;
1027        let warmup_timeout = self.inner.config().warmup_timeout;
1028
1029        log::info!(
1030            "🔥 Starting browser pool warmup with {} instances (timeout: {}s)",
1031            count,
1032            warmup_timeout.as_secs()
1033        );
1034
1035        // Wrap entire warmup in timeout to prevent hanging forever
1036        let warmup_result = tokio::time::timeout(warmup_timeout, self.warmup_internal(count)).await;
1037
1038        match warmup_result {
1039            Ok(Ok(())) => {
1040                let stats = self.stats();
1041                log::info!(
1042                    "✅ Warmup completed successfully - Available: {}, Active: {}",
1043                    stats.available,
1044                    stats.active
1045                );
1046                Ok(())
1047            }
1048            Ok(Err(e)) => {
1049                log::error!("❌ Warmup failed with error: {}", e);
1050                Err(e)
1051            }
1052            Err(_) => {
1053                log::error!("❌ Warmup timed out after {}s", warmup_timeout.as_secs());
1054                Err(BrowserPoolError::Configuration(format!(
1055                    "Warmup timed out after {}s",
1056                    warmup_timeout.as_secs()
1057                )))
1058            }
1059        }
1060    }
1061
1062    /// Internal warmup implementation (separated for cleaner timeout wrapping).
1063    ///
1064    /// Creates browsers sequentially with a delay between them.
1065    /// This ensures they don't all reach their TTL (expiration) at the exact same moment.
1066    async fn warmup_internal(&self, count: usize) -> Result<()> {
1067        log::debug!("🛠️ Starting internal warmup process for {} browsers", count);
1068
1069        // STAGGER CONFIGURATION
1070        // We wait this long between creations to distribute expiration times
1071        let stagger_interval = Duration::from_secs(30);
1072
1073        let mut handles = Vec::new();
1074        let mut created_count = 0;
1075        let mut failed_count = 0;
1076
1077        for i in 0..count {
1078            log::debug!("🌐 Creating startup browser instance {}/{}", i + 1, count);
1079
1080            // Per-browser timeout (15s per browser is reasonable)
1081            // This prevents one slow browser from blocking entire warmup
1082            let browser_result = tokio::time::timeout(
1083                Duration::from_secs(15),
1084                tokio::task::spawn_blocking({
1085                    let inner = Arc::clone(&self.inner);
1086                    move || inner.create_browser_direct()
1087                }),
1088            )
1089            .await;
1090
1091            match browser_result {
1092                Ok(Ok(Ok(tracked))) => {
1093                    log::debug!(
1094                        "✅ Browser {} created, performing validation test...",
1095                        tracked.id()
1096                    );
1097
1098                    // Test the browser with actual navigation
1099                    match tracked.browser().new_tab() {
1100                        Ok(tab) => {
1101                            log::trace!("✅ Browser {} test: new_tab() successful", tracked.id());
1102
1103                            // Navigate to test page
1104                            let nav_result = tab.navigate_to(
1105                                "data:text/html,<html><body>Warmup test</body></html>",
1106                            );
1107                            if let Err(e) = nav_result {
1108                                log::warn!(
1109                                    "⚠️ Browser {} test navigation failed: {}",
1110                                    tracked.id(),
1111                                    e
1112                                );
1113                            } else {
1114                                log::trace!(
1115                                    "✅ Browser {} test: navigation successful",
1116                                    tracked.id()
1117                                );
1118                            }
1119
1120                            // Clean up test tab
1121                            let _ = tab.close(true);
1122
1123                            // Keep handle so browser stays alive
1124                            handles.push(BrowserHandle::new(tracked, Arc::clone(&self.inner)));
1125
1126                            created_count += 1;
1127                            log::info!(
1128                                "✅ Browser instance {}/{} ready and validated",
1129                                i + 1,
1130                                count
1131                            );
1132                        }
1133                        Err(e) => {
1134                            failed_count += 1;
1135                            log::error!(
1136                                "❌ Browser {} validation test failed: {}",
1137                                tracked.id(),
1138                                e
1139                            );
1140
1141                            // Remove from active tracking since it's broken
1142                            self.inner.remove_from_active(tracked.id());
1143                        }
1144                    }
1145                }
1146                Ok(Ok(Err(e))) => {
1147                    failed_count += 1;
1148                    log::error!("❌ Failed to create browser {}/{}: {}", i + 1, count, e);
1149                }
1150                Ok(Err(e)) => {
1151                    failed_count += 1;
1152                    log::error!(
1153                        "❌ Browser {}/{} creation task panicked: {:?}",
1154                        i + 1,
1155                        count,
1156                        e
1157                    );
1158                }
1159                Err(_) => {
1160                    failed_count += 1;
1161                    log::error!(
1162                        "❌ Browser {}/{} creation timed out (15s limit)",
1163                        i + 1,
1164                        count
1165                    );
1166                }
1167            }
1168
1169            // === STAGGER LOGIC ===
1170            // If this is not the last browser, wait before creating the next one.
1171            // This ensures their TTLs are offset by `stagger_interval`.
1172            if i < count - 1 {
1173                log::info!(
1174                    "⏳ Waiting {}s before creating next warmup browser to stagger TTLs...",
1175                    stagger_interval.as_secs()
1176                );
1177                tokio::time::sleep(stagger_interval).await;
1178            }
1179        }
1180
1181        log::info!(
1182            "📊 Warmup creation phase: {} created, {} failed",
1183            created_count,
1184            failed_count
1185        );
1186
1187        // Return all browsers to pool by dropping handles
1188        log::debug!("🔍 Returning {} warmup browsers to pool...", handles.len());
1189        drop(handles);
1190
1191        // Small delay to ensure Drop handlers complete
1192        tokio::time::sleep(Duration::from_millis(300)).await;
1193
1194        let final_stats = self.stats();
1195        log::info!(
1196            "🏁 Warmup internal completed - Pool: {}, Active: {}",
1197            final_stats.available,
1198            final_stats.active
1199        );
1200
1201        Ok(())
1202    }
1203
1204    /// Start the keep-alive monitoring thread.
1205    ///
1206    /// This background thread:
1207    /// - Pings all active browsers periodically
1208    /// - Removes unresponsive browsers after max_ping_failures
1209    /// - Retires browsers that exceed TTL
1210    /// - Spawns replacement browsers as needed
1211    ///
1212    /// # Critical Design Notes
1213    ///
1214    /// - Uses condvar for immediate shutdown signaling
1215    /// - Never holds locks during I/O operations
1216    /// - Uses consistent lock ordering (active -> pool)
1217    ///
1218    /// # Parameters
1219    ///
1220    /// * `inner` - Arc reference to pool state.
1221    ///
1222    /// # Returns
1223    ///
1224    /// JoinHandle for the background thread.
1225    fn start_keep_alive(inner: Arc<BrowserPoolInner>) -> JoinHandle<()> {
1226        let ping_interval = inner.config().ping_interval;
1227        let max_failures = inner.config().max_ping_failures;
1228        let browser_ttl = inner.config().browser_ttl;
1229        let shutdown_signal = Arc::clone(inner.shutdown_signal());
1230
1231        log::info!(
1232            "🚀 Starting keep-alive thread (interval: {}s, max failures: {}, TTL: {}min)",
1233            ping_interval.as_secs(),
1234            max_failures,
1235            browser_ttl.as_secs() / 60
1236        );
1237
1238        thread::spawn(move || {
1239            log::info!("🏁 Keep-alive thread started successfully");
1240
1241            // Track consecutive failures per browser ID
1242            let mut failure_counts: HashMap<u64, u32> = HashMap::new();
1243
1244            loop {
1245                // Wait for next ping interval OR shutdown signal (whichever comes first)
1246                // Using condvar instead of sleep allows immediate wake-up on shutdown
1247                let (lock, cvar) = &*shutdown_signal;
1248                let wait_result = {
1249                    let shutdown = lock.lock().unwrap_or_else(|poisoned| {
1250                        log::warn!("Shutdown lock poisoned, recovering");
1251                        poisoned.into_inner()
1252                    });
1253                    cvar.wait_timeout(shutdown, ping_interval).unwrap_or_else(|poisoned| {
1254                        log::warn!("Condvar wait_timeout lock poisoned, recovering");
1255                        poisoned.into_inner()
1256                    })
1257                };
1258
1259                let shutdown_flag = *wait_result.0;
1260                let timed_out = wait_result.1.timed_out();
1261
1262                // Check if we were signaled to shutdown
1263                if shutdown_flag {
1264                    log::info!("🛑 Keep-alive received shutdown signal via condvar");
1265                    break;
1266                }
1267
1268                // Double-check atomic shutdown flag (belt and suspenders)
1269                if inner.is_shutting_down() {
1270                    log::info!("🛑 Keep-alive detected shutdown via atomic flag");
1271                    break;
1272                }
1273
1274                // If spuriously woken (not timeout, not shutdown), continue waiting
1275                if !timed_out {
1276                    log::trace!("⏰ Keep-alive spuriously woken, continuing wait...");
1277                    continue;
1278                }
1279
1280                log::trace!("⚡ Keep-alive ping cycle starting...");
1281
1282                // Collect browsers to ping WITHOUT holding locks
1283                // This is critical: we clone the list and release the lock
1284                // before doing any I/O operations
1285                let browsers_to_ping = inner.get_active_browsers_snapshot();
1286                log::trace!(
1287                    "Keep-alive checking {} active browsers",
1288                    browsers_to_ping.len()
1289                );
1290
1291                // Now ping browsers without holding any locks
1292                let mut to_remove = Vec::new();
1293                let mut expired_browsers = Vec::new();
1294
1295                for (id, tracked) in browsers_to_ping {
1296                    // Check shutdown during ping loop (allows early exit)
1297                    if inner.is_shutting_down() {
1298                        log::info!("Shutdown detected during ping loop, exiting immediately");
1299                        return;
1300                    }
1301
1302                    // Check TTL before pinging (no point pinging expired browsers)
1303                    if tracked.is_expired(browser_ttl) {
1304                        log::info!(
1305                            "Browser {} expired (age: {}min, TTL: {}min), marking for retirement",
1306                            id,
1307                            tracked.age_minutes(),
1308                            browser_ttl.as_secs() / 60
1309                        );
1310                        expired_browsers.push(id);
1311                        continue; // Skip ping for expired browsers
1312                    }
1313
1314                    // Perform health check (this is I/O, no locks held)
1315                    use crate::traits::Healthcheck;
1316                    match tracked.ping() {
1317                        Ok(_) => {
1318                            // Reset failure count on success
1319                            if failure_counts.remove(&id).is_some() {
1320                                log::debug!("Browser {} ping successful, failure count reset", id);
1321                            }
1322                        }
1323                        Err(e) => {
1324                            // Only process failures if NOT shutting down
1325                            // (during shutdown, browsers may legitimately fail)
1326                            if !inner.is_shutting_down() {
1327                                let failures = failure_counts.entry(id).or_insert(0);
1328                                *failures += 1;
1329
1330                                log::warn!(
1331                                    "Browser {} ping failed (attempt {}/{}): {}",
1332                                    id,
1333                                    failures,
1334                                    max_failures,
1335                                    e
1336                                );
1337
1338                                // Remove if exceeded max failures
1339                                if *failures >= max_failures {
1340                                    log::error!(
1341                                        "Browser {} exceeded max ping failures ({}), marking for removal",
1342                                        id,
1343                                        max_failures
1344                                    );
1345                                    to_remove.push(id);
1346                                }
1347                            }
1348                        }
1349                    }
1350                }
1351
1352                // Check shutdown before cleanup (avoid work if shutting down)
1353                if inner.is_shutting_down() {
1354                    log::info!("Shutdown detected before cleanup, skipping and exiting");
1355                    break;
1356                }
1357
1358                // Handle TTL retirements first (they need replacement browsers)
1359                if !expired_browsers.is_empty() {
1360                    log::info!("Processing {} TTL-expired browsers", expired_browsers.len());
1361                    Self::handle_browser_retirement(&inner, expired_browsers, &mut failure_counts);
1362                }
1363
1364                // Handle failed browsers (remove from tracking and pool)
1365                if !to_remove.is_empty() {
1366                    log::warn!("Removing {} failed browsers from pool", to_remove.len());
1367
1368                    // Track how many were actually removed so we know how many to replace
1369                    let mut actual_removed_count = 0;
1370
1371                    // Remove dead browsers from active tracking
1372                    for id in &to_remove {
1373                        if inner.remove_from_active(*id).is_some() {
1374                            actual_removed_count += 1;
1375                            log::debug!("Removed failed browser {} from active tracking", id);
1376                        }
1377                        failure_counts.remove(id);
1378                    }
1379
1380                    log::debug!(
1381                        "Active browsers after failure cleanup: {}",
1382                        inner.active_count()
1383                    );
1384
1385                    // Clean up pool (remove dead browsers)
1386                    inner.remove_from_available(&to_remove);
1387
1388                    log::debug!("Pool size after cleanup: {}", inner.available_count());
1389
1390                    // Trigger replacement for the browsers we just removed
1391                    if actual_removed_count > 0 {
1392                        log::info!(
1393                            "Spawning {} replacement browsers for failed ones",
1394                            actual_removed_count
1395                        );
1396                        BrowserPoolInner::spawn_replacement_creation(
1397                            Arc::clone(&inner),
1398                            actual_removed_count,
1399                        );
1400                    }
1401                }
1402
1403                // Log keep-alive cycle summary
1404                log::debug!(
1405                    "Keep-alive cycle complete - Active: {}, Pooled: {}, Tracking {} failure states",
1406                    inner.active_count(),
1407                    inner.available_count(),
1408                    failure_counts.len()
1409                );
1410            }
1411
1412            log::info!("Keep-alive thread exiting cleanly");
1413        })
1414    }
1415
1416    /// Handle browser retirement due to TTL expiration.
1417    ///
1418    /// This function:
1419    /// 1. Removes expired browsers from active and pool tracking
1420    /// 2. Spawns async tasks to create replacement browsers
1421    /// 3. Maintains pool target size
1422    ///
1423    /// # Critical Lock Ordering
1424    ///
1425    /// Acquires active -> pool locks together to prevent races.
1426    ///
1427    /// # Parameters
1428    ///
1429    /// * `inner` - Arc reference to pool state.
1430    /// * `expired_ids` - List of browser IDs that have exceeded TTL.
1431    /// * `failure_counts` - Mutable map of failure counts (updated to remove retired browsers).
1432    fn handle_browser_retirement(
1433        inner: &Arc<BrowserPoolInner>,
1434        expired_ids: Vec<u64>,
1435        failure_counts: &mut HashMap<u64, u32>,
1436    ) {
1437        log::info!(
1438            "Retiring {} expired browsers (TTL enforcement)",
1439            expired_ids.len()
1440        );
1441
1442        // Remove expired browsers from active tracking
1443        let mut retired_count = 0;
1444        for id in &expired_ids {
1445            if inner.remove_from_active(*id).is_some() {
1446                retired_count += 1;
1447                log::debug!("Removed expired browser {} from active tracking", id);
1448            }
1449            // Clean up failure tracking
1450            failure_counts.remove(id);
1451        }
1452
1453        // Remove from pool as well
1454        inner.remove_from_available(&expired_ids);
1455
1456        log::debug!(
1457            "After retirement - Active: {}, Pooled: {}",
1458            inner.active_count(),
1459            inner.available_count()
1460        );
1461
1462        // Create replacement browsers to maintain target count
1463        if retired_count > 0 {
1464            log::info!(
1465                "Spawning {} replacement browsers for retired ones",
1466                retired_count
1467            );
1468            BrowserPoolInner::spawn_replacement_creation(Arc::clone(inner), retired_count);
1469        } else {
1470            log::debug!("No browsers were actually retired (already removed)");
1471        }
1472    }
1473
1474    /// Asynchronously shutdown the pool (recommended method).
1475    ///
1476    /// This is the preferred shutdown method as it can properly await
1477    /// async task cancellation. Should be called during application shutdown.
1478    ///
1479    /// # Shutdown Process
1480    ///
1481    /// 1. Set atomic shutdown flag (stops new operations)
1482    /// 2. Signal condvar to wake keep-alive thread immediately
1483    /// 3. Wait for keep-alive thread to exit (with timeout)
1484    /// 4. Abort all replacement creation tasks
1485    /// 5. Wait briefly for cleanup
1486    /// 6. Log final statistics
1487    ///
1488    /// # Timeout
1489    ///
1490    /// Keep-alive thread is given 5 seconds to exit gracefully.
1491    /// If it doesn't exit, we log an error but continue shutdown.
1492    ///
1493    /// # Example
1494    ///
1495    /// ```rust,ignore
1496    /// let mut pool = /* ... */;
1497    ///
1498    /// // During application shutdown
1499    /// pool.shutdown_async().await;
1500    /// ```
1501    pub async fn shutdown_async(&mut self) {
1502        log::info!("Shutting down browser pool (async mode)...");
1503
1504        // Step 1: Set shutdown flag (prevents new operations)
1505        self.inner.set_shutting_down(true);
1506        log::debug!("Shutdown flag set");
1507
1508        // Step 2: Signal condvar to wake keep-alive thread immediately
1509        // This is critical - without this, keep-alive waits for full ping_interval
1510        {
1511            let (lock, cvar) = &**self.inner.shutdown_signal();
1512            let mut shutdown = lock.lock().unwrap_or_else(|poisoned| {
1513                log::warn!("Shutdown lock poisoned, recovering");
1514                poisoned.into_inner()
1515            });
1516            *shutdown = true;
1517            cvar.notify_all();
1518            log::debug!("Shutdown signal sent to keep-alive thread");
1519        } // Lock released here
1520
1521        // Step 3: Wait for keep-alive thread to exit
1522        if let Some(handle) = self.keep_alive_handle.take() {
1523            log::debug!("Waiting for keep-alive thread to exit...");
1524
1525            // Wrap thread join in spawn_blocking to make it async-friendly
1526            let join_task = tokio::task::spawn_blocking(move || handle.join());
1527
1528            // Give it 5 seconds to exit gracefully
1529            match tokio::time::timeout(Duration::from_secs(5), join_task).await {
1530                Ok(Ok(Ok(_))) => {
1531                    log::info!("Keep-alive thread stopped cleanly");
1532                }
1533                Ok(Ok(Err(_))) => {
1534                    log::error!("Keep-alive thread panicked during shutdown");
1535                }
1536                Ok(Err(_)) => {
1537                    log::error!("Keep-alive join task panicked");
1538                }
1539                Err(_) => {
1540                    log::error!("Keep-alive thread didn't exit within 5s timeout");
1541                }
1542            }
1543        } else {
1544            log::debug!("No keep-alive thread to stop (was disabled or already stopped)");
1545        }
1546
1547        // Step 4: Abort all replacement creation tasks
1548        log::info!("Aborting replacement creation tasks...");
1549        let aborted_count = self.inner.abort_replacement_tasks();
1550        if aborted_count > 0 {
1551            log::info!("Aborted {} replacement tasks", aborted_count);
1552        } else {
1553            log::debug!("No replacement tasks to abort");
1554        }
1555
1556        // Step 5: Small delay to let aborted tasks clean up
1557        tokio::time::sleep(Duration::from_millis(100)).await;
1558
1559        // Step 6: Log final statistics
1560        let stats = self.stats();
1561        log::info!(
1562            "Async shutdown complete - Available: {}, Active: {}, Total: {}",
1563            stats.available,
1564            stats.active,
1565            stats.total
1566        );
1567    }
1568
1569    /// Synchronously shutdown the pool (fallback method).
1570    ///
1571    /// This is a simplified shutdown for use in Drop or non-async contexts.
1572    /// Prefer [`shutdown_async()`](Self::shutdown_async) when possible for cleaner task cancellation.
1573    ///
1574    /// # Note
1575    ///
1576    /// This method doesn't wait for replacement tasks to finish since
1577    /// there's no async runtime available. Tasks are aborted but may not
1578    /// have cleaned up yet.
1579    pub fn shutdown(&mut self) {
1580        log::debug!("Calling synchronous shutdown...");
1581        self.shutdown_sync();
1582    }
1583
1584    /// Internal synchronous shutdown implementation.
1585    fn shutdown_sync(&mut self) {
1586        log::info!("Shutting down browser pool (sync mode)...");
1587
1588        // Set shutdown flag
1589        self.inner.set_shutting_down(true);
1590        log::debug!("Shutdown flag set");
1591
1592        // Signal condvar (same as async version)
1593        {
1594            let (lock, cvar) = &**self.inner.shutdown_signal();
1595            let mut shutdown = lock.lock().unwrap_or_else(|poisoned| {
1596                log::warn!("Shutdown lock poisoned, recovering");
1597                poisoned.into_inner()
1598            });
1599            *shutdown = true;
1600            cvar.notify_all();
1601            log::debug!("Shutdown signal sent");
1602        }
1603
1604        // Wait for keep-alive thread
1605        if let Some(handle) = self.keep_alive_handle.take() {
1606            log::debug!("Joining keep-alive thread (sync)...");
1607
1608            match handle.join() {
1609                Ok(_) => log::info!("Keep-alive thread stopped"),
1610                Err(_) => log::error!("Keep-alive thread panicked"),
1611            }
1612        }
1613
1614        // Abort replacement tasks (best effort - they won't make progress without runtime)
1615        let aborted_count = self.inner.abort_replacement_tasks();
1616        if aborted_count > 0 {
1617            log::debug!("Aborted {} replacement tasks (sync mode)", aborted_count);
1618        }
1619
1620        let stats = self.stats();
1621        log::info!(
1622            "Sync shutdown complete - Available: {}, Active: {}",
1623            stats.available,
1624            stats.active
1625        );
1626    }
1627
1628    /// Get a reference to the inner pool state.
1629    ///
1630    /// This is primarily for internal use and testing.
1631    #[doc(hidden)]
1632    #[allow(dead_code)]
1633    pub(crate) fn inner(&self) -> &Arc<BrowserPoolInner> {
1634        &self.inner
1635    }
1636}
1637
1638impl Drop for BrowserPool {
1639    /// Automatic cleanup when pool is dropped.
1640    ///
1641    /// This ensures resources are released even if shutdown wasn't called explicitly.
1642    /// Uses sync shutdown since Drop can't be async.
1643    fn drop(&mut self) {
1644        log::debug!("🛑 BrowserPool Drop triggered - running cleanup");
1645
1646        // Only shutdown if not already done
1647        if !self.inner.is_shutting_down() {
1648            log::warn!("⚠ BrowserPool dropped without explicit shutdown - cleaning up");
1649            self.shutdown();
1650        } else {
1651            log::debug!(" Pool already shutdown, Drop is no-op");
1652        }
1653    }
1654}
1655
1656// ============================================================================
1657// BrowserPoolBuilder
1658// ============================================================================
1659
1660/// Builder for constructing a [`BrowserPool`] with validation.
1661///
1662/// This is the recommended way to create a pool as it validates
1663/// configuration and provides sensible defaults.
1664///
1665/// # Example
1666///
1667/// ```rust,ignore
1668/// use std::time::Duration;
1669/// use html2pdf_api::{BrowserPool, BrowserPoolConfigBuilder, ChromeBrowserFactory};
1670///
1671/// let pool = BrowserPool::builder()
1672///     .config(
1673///         BrowserPoolConfigBuilder::new()
1674///             .max_pool_size(10)
1675///             .warmup_count(5)
1676///             .browser_ttl(Duration::from_secs(7200))
1677///             .build()?
1678///     )
1679///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1680///     .enable_keep_alive(true)
1681///     .build()?;
1682/// ```
1683pub struct BrowserPoolBuilder {
1684    /// Optional configuration (uses default if not provided).
1685    config: Option<BrowserPoolConfig>,
1686
1687    /// Browser factory (required).
1688    factory: Option<Box<dyn BrowserFactory>>,
1689
1690    /// Whether to enable keep-alive thread (default: true).
1691    enable_keep_alive: bool,
1692}
1693
1694impl BrowserPoolBuilder {
1695    /// Create a new builder with defaults.
1696    ///
1697    /// # Example
1698    ///
1699    /// ```rust,ignore
1700    /// let builder = BrowserPoolBuilder::new();
1701    /// ```
1702    pub fn new() -> Self {
1703        Self {
1704            config: None,
1705            factory: None,
1706            enable_keep_alive: true,
1707        }
1708    }
1709
1710    /// Set custom configuration.
1711    ///
1712    /// If not called, uses [`BrowserPoolConfig::default()`].
1713    ///
1714    /// # Parameters
1715    ///
1716    /// * `config` - Validated configuration from [`crate::BrowserPoolConfigBuilder`].
1717    ///
1718    /// # Example
1719    ///
1720    /// ```rust,ignore
1721    /// let config = BrowserPoolConfigBuilder::new()
1722    ///     .max_pool_size(10)
1723    ///     .build()?;
1724    ///
1725    /// let pool = BrowserPool::builder()
1726    ///     .config(config)
1727    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1728    ///     .build()?;
1729    /// ```
1730    pub fn config(mut self, config: BrowserPoolConfig) -> Self {
1731        self.config = Some(config);
1732        self
1733    }
1734
1735    /// Set browser factory (required).
1736    ///
1737    /// The factory is responsible for creating browser instances.
1738    /// Use [`ChromeBrowserFactory`](crate::ChromeBrowserFactory) for Chrome/Chromium browsers.
1739    ///
1740    /// # Parameters
1741    ///
1742    /// * `factory` - A boxed [`BrowserFactory`] implementation.
1743    ///
1744    /// # Example
1745    ///
1746    /// ```rust,ignore
1747    /// let pool = BrowserPool::builder()
1748    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1749    ///     .build()?;
1750    /// ```
1751    pub fn factory(mut self, factory: Box<dyn BrowserFactory>) -> Self {
1752        self.factory = Some(factory);
1753        self
1754    }
1755
1756    /// Enable or disable keep-alive thread.
1757    ///
1758    /// Keep-alive should be disabled only for testing.
1759    /// Production use should always have it enabled.
1760    ///
1761    /// # Parameters
1762    ///
1763    /// * `enable` - Whether to enable the keep-alive thread.
1764    ///
1765    /// # Example
1766    ///
1767    /// ```rust,ignore
1768    /// // Disable for tests
1769    /// let pool = BrowserPool::builder()
1770    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1771    ///     .enable_keep_alive(false)
1772    ///     .build()?;
1773    /// ```
1774    pub fn enable_keep_alive(mut self, enable: bool) -> Self {
1775        self.enable_keep_alive = enable;
1776        self
1777    }
1778
1779    /// Build the browser pool.
1780    ///
1781    /// # Errors
1782    ///
1783    /// Returns [`BrowserPoolError::Configuration`] if factory is not provided.
1784    ///
1785    /// # Panics
1786    ///
1787    /// Panics if called outside a tokio runtime context.
1788    ///
1789    /// # Example
1790    ///
1791    /// ```rust,ignore
1792    /// let pool = BrowserPool::builder()
1793    ///     .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1794    ///     .build()?;
1795    /// ```
1796    pub fn build(self) -> Result<BrowserPool> {
1797        let config = self.config.unwrap_or_default();
1798        let factory = self.factory.ok_or_else(|| {
1799            BrowserPoolError::Configuration("No browser factory provided".to_string())
1800        })?;
1801
1802        log::info!("📦 Building browser pool with config: {:?}", config);
1803
1804        // Create inner state
1805        let inner = BrowserPoolInner::new(config, factory);
1806
1807        // Start keep-alive thread if enabled
1808        let keep_alive_handle = if self.enable_keep_alive {
1809            log::info!("🚀 Starting keep-alive monitoring thread");
1810            Some(BrowserPool::start_keep_alive(Arc::clone(&inner)))
1811        } else {
1812            log::warn!("⚠️ Keep-alive thread disabled (should only be used for testing)");
1813            None
1814        };
1815
1816        log::info!("✅ Browser pool built successfully");
1817
1818        Ok(BrowserPool {
1819            inner,
1820            keep_alive_handle,
1821        })
1822    }
1823}
1824
1825impl Default for BrowserPoolBuilder {
1826    fn default() -> Self {
1827        Self::new()
1828    }
1829}
1830
1831// ============================================================================
1832// Environment Initialization (feature-gated)
1833// ============================================================================
1834
1835/// Initialize browser pool from environment variables.
1836///
1837/// This is a convenience function for common initialization patterns.
1838/// It reads configuration from environment variables with sensible defaults.
1839///
1840/// # Feature Flag
1841///
1842/// This function is only available when the `env-config` feature is enabled.
1843///
1844/// # Environment Variables
1845///
1846/// - `BROWSER_POOL_SIZE`: Maximum pool size (default: 5)
1847/// - `BROWSER_WARMUP_COUNT`: Warmup browser count (default: 3)
1848/// - `BROWSER_TTL_SECONDS`: Browser TTL in seconds (default: 3600)
1849/// - `BROWSER_WARMUP_TIMEOUT_SECONDS`: Warmup timeout (default: 60)
1850/// - `CHROME_PATH`: Custom Chrome binary path (optional)
1851///
1852/// # Returns
1853///
1854/// `Arc<BrowserPool>` ready for use in web handlers.
1855///
1856/// # Errors
1857///
1858/// - Returns error if configuration is invalid.
1859/// - Returns error if warmup fails.
1860///
1861/// # Example
1862///
1863/// ```rust,ignore
1864/// #[tokio::main]
1865/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
1866///     env_logger::init();
1867///
1868///     let pool = init_browser_pool().await?;
1869///
1870///     // Use pool in handlers...
1871///
1872///     Ok(())
1873/// }
1874/// ```
1875#[cfg(feature = "env-config")]
1876pub async fn init_browser_pool() -> Result<Arc<BrowserPool>> {
1877    use crate::config::env::{chrome_path_from_env, from_env};
1878    use crate::factory::ChromeBrowserFactory;
1879
1880    log::info!("Initializing browser pool from environment...");
1881
1882    // Load configuration from environment
1883    let config = from_env()?;
1884
1885    // Get optional Chrome path
1886    let chrome_path = chrome_path_from_env();
1887
1888    log::info!("Pool configuration from environment:");
1889    log::info!("   - Max pool size: {}", config.max_pool_size);
1890    log::info!("   - Warmup count: {}", config.warmup_count);
1891    log::info!(
1892        "   - Browser TTL: {}s ({}min)",
1893        config.browser_ttl.as_secs(),
1894        config.browser_ttl.as_secs() / 60
1895    );
1896    log::info!("   - Warmup timeout: {}s", config.warmup_timeout.as_secs());
1897    log::info!(
1898        "   - Chrome path: {}",
1899        chrome_path.as_deref().unwrap_or("auto-detect")
1900    );
1901
1902    // Create factory based on whether custom path is provided
1903    let factory: Box<dyn BrowserFactory> = match chrome_path {
1904        Some(path) => {
1905            log::info!("Using custom Chrome path: {}", path);
1906            Box::new(ChromeBrowserFactory::with_path(path))
1907        }
1908        None => {
1909            log::info!("Using auto-detected Chrome browser");
1910            Box::new(ChromeBrowserFactory::with_defaults())
1911        }
1912    };
1913
1914    // Create browser pool with Chrome factory
1915    log::debug!("Building browser pool...");
1916    let pool = BrowserPool::builder()
1917        .config(config.clone())
1918        .factory(factory)
1919        .enable_keep_alive(true)
1920        .build()
1921        .map_err(|e| {
1922            log::error!("❌ Failed to create browser pool: {}", e);
1923            e
1924        })?;
1925
1926    log::info!("✅ Browser pool created successfully");
1927
1928    // Warmup the pool
1929    log::info!(
1930        "Warming up browser pool with {} instances...",
1931        config.warmup_count
1932    );
1933    pool.warmup().await.map_err(|e| {
1934        log::error!("❌ Failed to warmup pool: {}", e);
1935        e
1936    })?;
1937
1938    let stats = pool.stats();
1939    log::info!(
1940        "✅ Browser pool ready - Available: {}, Active: {}, Total: {}",
1941        stats.available,
1942        stats.active,
1943        stats.total
1944    );
1945
1946    Ok(pool.into_shared())
1947}
1948
1949// ============================================================================
1950// Unit Tests
1951// ============================================================================
1952
1953#[cfg(test)]
1954mod tests {
1955    use super::*;
1956
1957    /// Verifies that BrowserPool builder rejects missing factory.
1958    ///
1959    /// A factory is mandatory because the pool needs to know how to
1960    /// create browser instances. This test ensures proper error handling.
1961    #[test]
1962    fn test_pool_builder_missing_factory() {
1963        // We need a tokio runtime for the builder
1964        let rt = tokio::runtime::Runtime::new().unwrap();
1965
1966        rt.block_on(async {
1967            let config = crate::config::BrowserPoolConfigBuilder::new()
1968                .max_pool_size(3)
1969                .build()
1970                .unwrap();
1971
1972            let result = BrowserPool::builder()
1973                .config(config)
1974                // Intentionally missing factory
1975                .build();
1976
1977            assert!(result.is_err(), "Build should fail without factory");
1978
1979            match result {
1980                Err(BrowserPoolError::Configuration(msg)) => {
1981                    assert!(
1982                        msg.contains("No browser factory provided"),
1983                        "Expected factory error, got: {}",
1984                        msg
1985                    );
1986                }
1987                _ => panic!("Expected Configuration error for missing factory"),
1988            }
1989        });
1990    }
1991
1992    /// Verifies that BrowserPoolBuilder implements Default.
1993    #[test]
1994    fn test_builder_default() {
1995        let builder: BrowserPoolBuilder = Default::default();
1996        assert!(builder.config.is_none());
1997        assert!(builder.factory.is_none());
1998        assert!(builder.enable_keep_alive);
1999    }
2000
2001    /// Verifies that enable_keep_alive can be disabled.
2002    #[test]
2003    fn test_builder_disable_keep_alive() {
2004        let builder = BrowserPoolBuilder::new().enable_keep_alive(false);
2005        assert!(!builder.enable_keep_alive);
2006    }
2007}