html2pdf_api/pool.rs
1//! Browser pool with lifecycle management.
2//!
3//! This module provides [`BrowserPool`], the main entry point for managing
4//! a pool of headless Chrome browsers with automatic lifecycle management.
5//!
6//! # Overview
7//!
8//! The browser pool provides:
9//! - **Connection Pooling**: Reuses browser instances to avoid expensive startup costs
10//! - **Health Monitoring**: Background thread continuously checks browser health
11//! - **TTL Management**: Automatically retires old browsers and creates replacements
12//! - **Race-Free Design**: Careful lock ordering prevents deadlocks
13//! - **Graceful Shutdown**: Clean termination of all background tasks
14//! - **RAII Pattern**: Automatic return of browsers to pool via Drop
15//!
16//! # Architecture
17//!
18//! ```text
19//! BrowserPool
20//! ├─ BrowserPoolInner (shared state)
21//! │ ├─ available: Vec<TrackedBrowser> (pooled, ready to use)
22//! │ ├─ active: HashMap<id, TrackedBrowser> (in-use, tracked for health)
23//! │ └─ replacement_tasks: Vec<JoinHandle> (async replacement creators)
24//! └─ keep_alive_handle: JoinHandle (health monitoring thread)
25//! ```
26//!
27//! # Critical Invariants
28//!
29//! 1. **Lock Order**: Always acquire `active` before `available` to prevent deadlocks
30//! 2. **Shutdown Flag**: Check before all expensive operations
31//! 3. **Health Checks**: Never hold locks during I/O operations
32//!
33//! # Example
34//!
35//! ```rust,no_run
36//! use html2pdf_api::{BrowserPool, BrowserPoolConfigBuilder, ChromeBrowserFactory};
37//!
38//! #[tokio::main]
39//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
40//! // Create pool
41//! let mut pool = BrowserPool::builder()
42//! .config(
43//! BrowserPoolConfigBuilder::new()
44//! .max_pool_size(5)
45//! .warmup_count(3)
46//! .build()?
47//! )
48//! .factory(Box::new(ChromeBrowserFactory::with_defaults()))
49//! .build()?;
50//!
51//! // Warmup
52//! pool.warmup().await?;
53//!
54//! // Use browsers
55//! {
56//! let browser = pool.get()?;
57//! let tab = browser.new_tab()?;
58//! // ... do work ...
59//! } // browser returned to pool automatically
60//!
61//! // Shutdown
62//! pool.shutdown_async().await;
63//!
64//! Ok(())
65//! }
66//! ```
67
68use std::collections::HashMap;
69use std::sync::atomic::{AtomicBool, Ordering};
70use std::sync::{Arc, Condvar, Mutex};
71use std::thread::{self, JoinHandle};
72use std::time::Duration;
73
74use tokio::task::JoinHandle as TokioJoinHandle;
75
76use crate::config::BrowserPoolConfig;
77use crate::error::{BrowserPoolError, Result};
78use crate::factory::BrowserFactory;
79use crate::handle::BrowserHandle;
80use crate::stats::PoolStats;
81use crate::tracked::TrackedBrowser;
82
83// ============================================================================
84// BrowserPoolInner
85// ============================================================================
86
87/// Internal shared state for the browser pool.
88///
89/// This struct contains all shared state and is wrapped in Arc for thread-safe
90/// sharing between the pool, handles, and background threads.
91///
92/// # Lock Ordering (CRITICAL)
93///
94/// Always acquire locks in this order to prevent deadlocks:
95/// 1. `active` (browsers currently in use)
96/// 2. `available` (browsers in pool ready for use)
97///
98/// Never hold locks during I/O operations or browser creation.
99///
100/// # Thread Safety
101///
102/// All fields are protected by appropriate synchronization primitives:
103/// - `Mutex` for mutable collections
104/// - `AtomicBool` for shutdown flag
105/// - `Arc` for shared ownership
106pub(crate) struct BrowserPoolInner {
107 /// Configuration (immutable after creation).
108 config: BrowserPoolConfig,
109
110 /// Browsers available for checkout (not currently in use).
111 ///
112 /// Protected by Mutex. Browsers are moved from here when checked out
113 /// and returned here when released (if pool not full).
114 available: Mutex<Vec<Arc<TrackedBrowser>>>,
115
116 /// All browsers that exist (both pooled and checked out).
117 ///
118 /// Protected by Mutex. Used for health monitoring and lifecycle tracking.
119 /// Maps browser ID -> TrackedBrowser for fast lookup.
120 active: Mutex<HashMap<u64, Arc<TrackedBrowser>>>,
121
122 /// Factory for creating new browser instances.
123 factory: Box<dyn BrowserFactory>,
124
125 /// Atomic flag indicating shutdown in progress.
126 ///
127 /// Checked before expensive operations. Once set, no new operations start.
128 shutting_down: AtomicBool,
129
130 /// Background tasks creating replacement browsers.
131 ///
132 /// Tracked so we can abort them during shutdown.
133 replacement_tasks: Mutex<Vec<TokioJoinHandle<()>>>,
134
135 /// Handle to tokio runtime for spawning async tasks.
136 ///
137 /// Captured at creation time to allow spawning from any context.
138 runtime_handle: tokio::runtime::Handle,
139
140 /// Shutdown signaling mechanism for keep-alive thread.
141 ///
142 /// Tuple of (flag, condvar) allows immediate wake-up on shutdown
143 /// instead of waiting for full ping_interval.
144 shutdown_signal: Arc<(Mutex<bool>, Condvar)>,
145}
146
147impl BrowserPoolInner {
148 /// Create a new browser pool inner state.
149 ///
150 /// # Parameters
151 ///
152 /// * `config` - Validated configuration.
153 /// * `factory` - Browser factory for creating instances.
154 ///
155 /// # Panics
156 ///
157 /// Panics if called outside a tokio runtime context.
158 pub(crate) fn new(config: BrowserPoolConfig, factory: Box<dyn BrowserFactory>) -> Arc<Self> {
159 log::info!(
160 "🚀 Initializing browser pool with capacity {}",
161 config.max_pool_size
162 );
163 log::debug!(
164 "📋 Pool config: warmup={}, TTL={}s, ping_interval={}s",
165 config.warmup_count,
166 config.browser_ttl.as_secs(),
167 config.ping_interval.as_secs()
168 );
169
170 // Capture runtime handle for spawning async tasks
171 // This allows us to spawn from sync contexts (like Drop)
172 let runtime_handle = tokio::runtime::Handle::current();
173
174 Arc::new(Self {
175 config,
176 available: Mutex::new(Vec::new()),
177 active: Mutex::new(HashMap::new()),
178 factory,
179 shutting_down: AtomicBool::new(false),
180 replacement_tasks: Mutex::new(Vec::new()),
181 runtime_handle,
182 shutdown_signal: Arc::new((Mutex::new(false), Condvar::new())),
183 })
184 }
185
186 /// Create a lightweight mock pool for testing without background threads.
187 #[cfg(test)]
188 pub(crate) fn new_for_test(
189 config: BrowserPoolConfig,
190 factory: Box<dyn BrowserFactory>,
191 runtime_handle: tokio::runtime::Handle,
192 ) -> Self {
193 Self {
194 config,
195 available: Mutex::new(Vec::new()),
196 active: Mutex::new(HashMap::new()),
197 factory,
198 shutting_down: AtomicBool::new(false),
199 replacement_tasks: Mutex::new(Vec::new()),
200 runtime_handle,
201 shutdown_signal: Arc::new((Mutex::new(false), Condvar::new())),
202 }
203 }
204
205 /// Create a browser directly without using the pool.
206 ///
207 /// Used for:
208 /// - Initial warmup
209 /// - Replacing failed browsers
210 /// - When pool is empty
211 ///
212 /// # Important
213 ///
214 /// Adds the browser to `active` tracking immediately for health monitoring.
215 ///
216 /// # Errors
217 ///
218 /// - Returns [`BrowserPoolError::ShuttingDown`] if pool is shutting down.
219 /// - Returns [`BrowserPoolError::BrowserCreation`] if factory fails.
220 pub(crate) fn create_browser_direct(&self) -> Result<Arc<TrackedBrowser>> {
221 // Early exit if shutting down (don't waste time creating browsers)
222 if self.shutting_down.load(Ordering::Acquire) {
223 log::debug!("🛑 Skipping browser creation - pool is shutting down");
224 return Err(BrowserPoolError::ShuttingDown);
225 }
226
227 log::debug!("📦 Creating new browser directly via factory...");
228
229 // Factory handles all Chrome launch complexity
230 let browser = self.factory.create()?;
231
232 // Wrap with tracking metadata and Arc immediately
233 let tracked = Arc::new(TrackedBrowser::new(browser)?);
234 let id = tracked.id();
235
236 // Add to active tracking immediately for health monitoring
237 // This ensures keep-alive thread will monitor it
238 if let Ok(mut active) = self.active.lock() {
239 active.insert(id, Arc::clone(&tracked));
240 log::debug!(
241 "📊 Browser {} added to active tracking (total active: {})",
242 id,
243 active.len()
244 );
245 } else {
246 log::warn!(
247 "⚠️ Failed to add browser {} to active tracking (poisoned lock)",
248 id
249 );
250 }
251
252 log::info!("✅ Created new browser with ID {}", id);
253 Ok(tracked)
254 }
255
256 /// Get a browser from pool or create a new one.
257 ///
258 /// # Algorithm
259 ///
260 /// 1. Loop through pooled browsers
261 /// 2. **Grace Period Check**: Check if browser is within 30s of TTL.
262 /// - If near expiry: Skip (drop) it immediately.
263 /// - It remains in `active` tracking so the `keep_alive` thread handles standard retirement/replacement.
264 /// 3. For valid browsers, perform detailed health check (without holding locks)
265 /// 4. If healthy, return it
266 /// 5. If unhealthy, remove from active tracking and try next
267 /// 6. If pool empty or all skipped/unhealthy, create new browser
268 ///
269 /// # Critical: Lock-Free Health Checks
270 ///
271 /// Health checks are performed WITHOUT holding locks to avoid blocking
272 /// other threads. This is why we use a loop pattern instead of iterator.
273 ///
274 /// # Returns
275 ///
276 /// [`BrowserHandle`] that auto-returns browser to pool when dropped.
277 ///
278 /// # Errors
279 ///
280 /// - Returns [`BrowserPoolError::ShuttingDown`] if pool is shutting down.
281 /// - Returns [`BrowserPoolError::BrowserCreation`] if new browser creation fails.
282 pub(crate) fn get_or_create_browser(self: &Arc<Self>) -> Result<BrowserHandle> {
283 log::debug!("🔍 Attempting to get browser from pool...");
284
285 // Try to get from pool - LOOP pattern to avoid holding lock during health checks
286 // This is critical for concurrency: we release the lock between attempts
287 loop {
288 // Acquire lock briefly to pop one browser
289 let tracked_opt = {
290 let mut available = self.available.lock().unwrap_or_else(|poisoned| {
291 log::warn!("Pool available lock poisoned, recovering");
292 poisoned.into_inner()
293 });
294 let popped = available.pop();
295 log::trace!("📊 Pool size after pop: {}", available.len());
296 popped
297 }; // Lock released here - critical for performance
298
299 if let Some(tracked) = tracked_opt {
300 // === LOGIC START: Grace Period Check ===
301 let age = tracked.created_at().elapsed();
302 let ttl = self.config.browser_ttl;
303
304 // Safety margin matching your stagger interval
305 let safety_margin = Duration::from_secs(30);
306
307 // If browser is about to expire, don't use it.
308 if age + safety_margin > ttl {
309 log::debug!(
310 "⏳ Browser {} is near expiry (Age: {}s, Margin: 30s), skipping.",
311 tracked.id(),
312 age.as_secs()
313 );
314
315 // CRITICAL: We do NOT remove/recreate here.
316 // By simply 'continuing', we drop this 'tracked' instance.
317 // 1. It is NOT returned to 'available' (so no user gets it).
318 // 2. It REMAINS in 'active' (so the keep_alive thread still tracks it).
319 // 3. The keep_alive thread will see it expire and handle standard cleanup/replacement.
320 continue;
321 }
322 // === LOGIC END: Grace Period Check ===
323
324 // Get pool size for logging (brief lock)
325 let pool_size = {
326 let available = self.available.lock().unwrap_or_else(|poisoned| {
327 log::warn!("Pool available lock poisoned, recovering");
328 poisoned.into_inner()
329 });
330 available.len()
331 };
332
333 log::info!(
334 "♻️ Reusing healthy browser {} from pool (pool size: {})",
335 tracked.id(),
336 pool_size
337 );
338
339 // Return healthy browser wrapped in RAII handle
340 return Ok(BrowserHandle::new(tracked, Arc::clone(self)));
341 } else {
342 // Pool is empty, break to create new browser
343 log::debug!("📥 Pool is empty, will create new browser");
344 break;
345 }
346 }
347
348 // Pool is empty or no healthy browsers found
349 log::info!("📦 Creating new browser (pool was empty or all browsers unhealthy)");
350
351 let tracked = self.create_browser_direct()?;
352
353 log::info!("✅ Returning newly created browser {}", tracked.id());
354 Ok(BrowserHandle::new(tracked, Arc::clone(self)))
355 }
356
357 /// Return a browser to the pool (called by BrowserHandle::drop).
358 ///
359 /// # Critical Lock Ordering
360 ///
361 /// Always acquires locks in order: active -> available.
362 /// Both locks are held together to prevent race conditions.
363 ///
364 /// # Algorithm
365 ///
366 /// 1. Acquire both locks (order: active, then available)
367 /// 2. Verify browser is in active tracking
368 /// 3. Check TTL - if expired, retire and trigger replacement
369 /// 4. If pool has space, add to available pool
370 /// 5. If pool full, remove from active (browser gets dropped)
371 ///
372 /// # Parameters
373 ///
374 /// * `self_arc` - Arc reference to self (needed for spawning async tasks).
375 /// * `tracked` - The browser being returned.
376 pub(crate) fn return_browser(self_arc: &Arc<Self>, tracked: Arc<TrackedBrowser>) {
377 log::debug!("♻️ Returning browser {} to pool...", tracked.id());
378
379 // Early exit if shutting down (don't waste time managing pool)
380 if self_arc.shutting_down.load(Ordering::Acquire) {
381 log::debug!(
382 "🛑 Pool shutting down, not returning browser {}",
383 tracked.id()
384 );
385 return;
386 }
387
388 // CRITICAL: Always acquire in order: active -> pool
389 // Holding both locks prevents ALL race conditions:
390 // - Prevents concurrent modifications to browser state
391 // - Prevents duplicate returns
392 // - Ensures pool size limits are respected
393 let mut active = self_arc.active.lock().unwrap_or_else(|poisoned| {
394 log::warn!("Pool active lock poisoned, recovering");
395 poisoned.into_inner()
396 });
397 let mut pool = self_arc.available.lock().unwrap_or_else(|poisoned| {
398 log::warn!("Pool available lock poisoned, recovering");
399 poisoned.into_inner()
400 });
401
402 // Verify browser is actually tracked (sanity check)
403 if !active.contains_key(&tracked.id()) {
404 log::warn!(
405 "❌ Browser {} not in active tracking (probably already removed), skipping return",
406 tracked.id()
407 );
408 return;
409 }
410
411 // Check TTL before returning to pool
412 // Expired browsers should be retired to prevent memory leaks
413 if tracked.is_expired(self_arc.config.browser_ttl) {
414 log::info!(
415 "⏰ Browser {} expired (age: {}min, TTL: {}min), retiring instead of returning",
416 tracked.id(),
417 tracked.age_minutes(),
418 self_arc.config.browser_ttl.as_secs() / 60
419 );
420
421 // Remove from active tracking
422 active.remove(&tracked.id());
423 log::debug!("📊 Active browsers after TTL retirement: {}", active.len());
424
425 // Release locks before spawning replacement task
426 drop(active);
427 drop(pool);
428
429 // Trigger async replacement creation (non-blocking)
430 log::debug!("🔍 Triggering replacement browser creation for expired browser");
431 Self::spawn_replacement_creation(Arc::clone(self_arc), 1);
432 return;
433 }
434
435 // Check health marker before returning to pool
436 // Crashed browsers must be retired to prevent poison pill loops
437 if !tracked.is_healthy() {
438 log::warn!(
439 "⚕️ Browser {} marked unhealthy, retiring instead of returning",
440 tracked.id()
441 );
442
443 // Remove from active tracking
444 active.remove(&tracked.id());
445 log::debug!(
446 "📊 Active browsers after health retirement: {}",
447 active.len()
448 );
449
450 // Release locks before spawning replacement task
451 drop(active);
452 drop(pool);
453
454 // Trigger async replacement creation (non-blocking)
455 log::debug!("🔍 Triggering replacement browser creation for unhealthy browser");
456 Self::spawn_replacement_creation(Arc::clone(self_arc), 1);
457 return;
458 }
459
460 // Prevent duplicate returns (defensive programming)
461 if pool.iter().any(|b| b.id() == tracked.id()) {
462 log::warn!(
463 "⚠️ Browser {} already in pool (duplicate return attempt), skipping",
464 tracked.id()
465 );
466 return;
467 }
468
469 // Check if pool has space for this browser
470 if pool.len() < self_arc.config.max_pool_size {
471 // Add to pool for reuse
472 pool.push(tracked.clone());
473 log::info!(
474 "♻️ Browser {} returned to pool (pool size: {}/{})",
475 tracked.id(),
476 pool.len(),
477 self_arc.config.max_pool_size
478 );
479 } else {
480 // Pool is full, remove from tracking (browser will be dropped)
481 log::debug!(
482 "️ Pool full ({}/{}), removing browser {} from system",
483 pool.len(),
484 self_arc.config.max_pool_size,
485 tracked.id()
486 );
487 active.remove(&tracked.id());
488 log::debug!("📊 Active browsers after removal: {}", active.len());
489 }
490 }
491
492 /// Asynchronously create replacement browsers (internal helper).
493 ///
494 /// This is the async work function that actually creates browsers.
495 /// It's spawned as a tokio task by `spawn_replacement_creation`.
496 ///
497 /// # Algorithm
498 ///
499 /// 1. Check shutdown flag before each creation
500 /// 2. Check pool space before each creation
501 /// 3. Use spawn_blocking for CPU-bound browser creation
502 /// 4. Add successful browsers to pool
503 /// 5. Log detailed status
504 ///
505 /// # Parameters
506 ///
507 /// * `inner` - Arc reference to pool state.
508 /// * `count` - Number of browsers to attempt to create.
509 async fn spawn_replacement_creation_async(inner: Arc<Self>, count: usize) {
510 log::info!(
511 "🔍 Starting async replacement creation for {} browsers",
512 count
513 );
514
515 let mut created_count = 0;
516 let mut failed_count = 0;
517
518 for i in 0..count {
519 // Check shutdown flag before each expensive operation
520 if inner.shutting_down.load(Ordering::Acquire) {
521 log::info!(
522 "🛑 Shutdown detected during replacement creation, stopping at {}/{}",
523 i,
524 count
525 );
526 break;
527 }
528
529 // Check if pool has space BEFORE creating (avoid wasted work)
530 let pool_has_space = {
531 let pool = inner.available.lock().unwrap_or_else(|poisoned| {
532 log::warn!("Pool available lock poisoned, recovering");
533 poisoned.into_inner()
534 });
535 let has_space = pool.len() < inner.config.max_pool_size;
536 log::trace!(
537 "📊 Pool space check: {}/{} (has space: {})",
538 pool.len(),
539 inner.config.max_pool_size,
540 has_space
541 );
542 has_space
543 };
544
545 if !pool_has_space {
546 log::warn!(
547 "⚠️ Pool is full, stopping replacement creation at {}/{}",
548 i,
549 count
550 );
551 break;
552 }
553
554 log::debug!("📦 Creating replacement browser {}/{}", i + 1, count);
555
556 // Use spawn_blocking for CPU-bound browser creation
557 // This prevents blocking the async runtime
558 let inner_clone = Arc::clone(&inner);
559 let result =
560 tokio::task::spawn_blocking(move || inner_clone.create_browser_direct()).await;
561
562 match result {
563 Ok(Ok(tracked)) => {
564 let id = tracked.id();
565
566 // Add to pool (with space check to handle race conditions)
567 let mut pool = inner.available.lock().unwrap_or_else(|poisoned| {
568 log::warn!("Pool available lock poisoned, recovering");
569 poisoned.into_inner()
570 });
571
572 // Double-check space (another thread might have added browsers)
573 if pool.len() < inner.config.max_pool_size {
574 pool.push(tracked);
575 created_count += 1;
576 log::info!(
577 "✅ Created replacement browser {} and added to pool ({}/{})",
578 id,
579 i + 1,
580 count
581 );
582 } else {
583 log::warn!(
584 "⚠️ Pool became full during creation, replacement browser {} kept in active only",
585 id
586 );
587 created_count += 1; // Still count as created (just not pooled)
588 }
589 }
590 Ok(Err(e)) => {
591 failed_count += 1;
592 log::error!(
593 "❌ Failed to create replacement browser {}/{}: {}",
594 i + 1,
595 count,
596 e
597 );
598 }
599 Err(e) => {
600 failed_count += 1;
601 log::error!(
602 "❌ Replacement browser {}/{} task panicked: {:?}",
603 i + 1,
604 count,
605 e
606 );
607 }
608 }
609 }
610
611 // Final status report
612 let pool_size = inner
613 .available
614 .lock()
615 .unwrap_or_else(|poisoned| {
616 log::warn!("Pool available lock poisoned, recovering");
617 poisoned.into_inner()
618 })
619 .len();
620 let active_size = inner
621 .active
622 .lock()
623 .unwrap_or_else(|poisoned| {
624 log::warn!("Pool active lock poisoned, recovering");
625 poisoned.into_inner()
626 })
627 .len();
628
629 log::info!(
630 "🏁 Replacement creation completed: {}/{} created, {} failed. Pool: {}, Active: {}",
631 created_count,
632 count,
633 failed_count,
634 pool_size,
635 active_size
636 );
637 }
638
639 /// Spawn a background task to create replacement browsers.
640 ///
641 /// This is non-blocking and returns immediately. The actual browser
642 /// creation happens in a tokio task tracked in `replacement_tasks`.
643 ///
644 /// # Why Async
645 ///
646 /// Browser creation is slow (1-3 seconds per browser). Spawning async
647 /// tasks prevents blocking the caller.
648 ///
649 /// # Task Tracking
650 ///
651 /// Tasks are tracked so we can abort them during shutdown.
652 ///
653 /// # Parameters
654 ///
655 /// * `inner` - Arc reference to pool state.
656 /// * `count` - Number of replacement browsers to create.
657 pub(crate) fn spawn_replacement_creation(inner: Arc<Self>, count: usize) {
658 log::info!(
659 "📥 Spawning async task to create {} replacement browsers",
660 count
661 );
662
663 // Clone Arc for moving into async task
664 let inner_for_task = Arc::clone(&inner);
665
666 // Spawn async task on the captured runtime
667 let task_handle = inner.runtime_handle.spawn(async move {
668 Self::spawn_replacement_creation_async(inner_for_task, count).await;
669 });
670
671 // Track task handle for shutdown cleanup
672 if let Ok(mut tasks) = inner.replacement_tasks.lock() {
673 // Clean up finished tasks while we have the lock (housekeeping)
674 let original_count = tasks.len();
675 tasks.retain(|h| !h.is_finished());
676 let cleaned = original_count - tasks.len();
677
678 if cleaned > 0 {
679 log::trace!("🧹 Cleaned up {} finished replacement tasks", cleaned);
680 }
681
682 // Add new task
683 tasks.push(task_handle);
684
685 log::debug!("📋 Now tracking {} active replacement tasks", tasks.len());
686 } else {
687 log::warn!("⚠️ Failed to track replacement task (poisoned lock)");
688 }
689 }
690
691 /// Get the pool configuration.
692 #[inline]
693 pub(crate) fn config(&self) -> &BrowserPoolConfig {
694 &self.config
695 }
696
697 /// Check if the pool is shutting down.
698 #[inline]
699 pub(crate) fn is_shutting_down(&self) -> bool {
700 self.shutting_down.load(Ordering::Acquire)
701 }
702
703 /// Set the shutdown flag.
704 #[inline]
705 pub(crate) fn set_shutting_down(&self, value: bool) {
706 self.shutting_down.store(value, Ordering::Release);
707 }
708
709 /// Get the shutdown signal for the keep-alive thread.
710 #[inline]
711 pub(crate) fn shutdown_signal(&self) -> &Arc<(Mutex<bool>, Condvar)> {
712 &self.shutdown_signal
713 }
714
715 /// Get the available browsers count.
716 pub(crate) fn available_count(&self) -> usize {
717 self.available.lock().map(|g| g.len()).unwrap_or(0)
718 }
719
720 /// Get the active browsers count.
721 pub(crate) fn active_count(&self) -> usize {
722 self.active.lock().map(|g| g.len()).unwrap_or(0)
723 }
724
725 /// Get a snapshot of active browsers for health checking.
726 ///
727 /// Returns a cloned list to avoid holding locks during I/O.
728 pub(crate) fn get_active_browsers_snapshot(&self) -> Vec<(u64, Arc<TrackedBrowser>)> {
729 let active = self.active.lock().unwrap_or_else(|poisoned| {
730 log::warn!("Pool active lock poisoned, recovering");
731 poisoned.into_inner()
732 });
733 active
734 .iter()
735 .map(|(id, tracked)| (*id, Arc::clone(tracked)))
736 .collect()
737 }
738
739 /// Remove a browser from active tracking.
740 pub(crate) fn remove_from_active(&self, id: u64) -> Option<Arc<TrackedBrowser>> {
741 let mut active = self.active.lock().unwrap_or_else(|poisoned| {
742 log::warn!("Pool active lock poisoned, recovering");
743 poisoned.into_inner()
744 });
745 active.remove(&id)
746 }
747
748 /// Remove browsers from the available pool by ID.
749 pub(crate) fn remove_from_available(&self, ids: &[u64]) {
750 let mut pool = self.available.lock().unwrap_or_else(|poisoned| {
751 log::warn!("Pool available lock poisoned, recovering");
752 poisoned.into_inner()
753 });
754 let original_size = pool.len();
755 pool.retain(|b| !ids.contains(&b.id()));
756 let removed = original_size - pool.len();
757 if removed > 0 {
758 log::debug!("🗑️ Removed {} browsers from available pool", removed);
759 }
760 }
761
762 /// Abort all replacement tasks.
763 pub(crate) fn abort_replacement_tasks(&self) -> usize {
764 if let Ok(mut tasks) = self.replacement_tasks.lock() {
765 let count = tasks.len();
766 for handle in tasks.drain(..) {
767 handle.abort();
768 }
769 count
770 } else {
771 0
772 }
773 }
774}
775
776// ============================================================================
777// BrowserPool
778// ============================================================================
779
780/// Main browser pool with lifecycle management.
781///
782/// This is the public-facing API for the browser pool. It wraps the internal
783/// state and manages the keep-alive thread.
784///
785/// # Overview
786///
787/// `BrowserPool` provides:
788/// - Browser checkout via [`get()`](Self::get)
789/// - Pool warmup via [`warmup()`](Self::warmup)
790/// - Statistics via [`stats()`](Self::stats)
791/// - Graceful shutdown via [`shutdown_async()`](Self::shutdown_async)
792///
793/// # Example
794///
795/// ```rust,no_run
796/// use html2pdf_api::{BrowserPool, BrowserPoolConfigBuilder, ChromeBrowserFactory};
797/// use std::time::Duration;
798///
799/// #[tokio::main]
800/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
801/// // Create pool
802/// let mut pool = BrowserPool::builder()
803/// .config(
804/// BrowserPoolConfigBuilder::new()
805/// .max_pool_size(5)
806/// .warmup_count(3)
807/// .build()?
808/// )
809/// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
810/// .build()?;
811///
812/// // Warmup
813/// pool.warmup().await?;
814///
815/// // Use browsers
816/// {
817/// let browser = pool.get()?;
818/// let tab = browser.new_tab()?;
819/// // ... do work ...
820/// } // browser returned to pool automatically
821///
822/// // Shutdown
823/// pool.shutdown_async().await;
824///
825/// Ok(())
826/// }
827/// ```
828///
829/// # Thread Safety
830///
831/// `BrowserPool` uses fine-grained internal locks (`Mutex<Vec>`, `Mutex<HashMap>`)
832/// so it is safe to share as `Arc<BrowserPool>` without an outer `Mutex`.
833/// Use [`into_shared()`](Self::into_shared) for convenience.
834pub struct BrowserPool {
835 /// Shared internal state.
836 inner: Arc<BrowserPoolInner>,
837
838 /// Handle to keep-alive monitoring thread.
839 ///
840 /// Option allows taking during shutdown. None means keep-alive disabled.
841 keep_alive_handle: Option<JoinHandle<()>>,
842}
843
844impl BrowserPool {
845 /// Convert pool into a shared `Arc<BrowserPool>` for use in web handlers.
846 ///
847 /// This is convenient for web frameworks that need shared state.
848 /// No outer `Mutex` is needed — the pool uses fine-grained internal locks.
849 ///
850 /// # Example
851 ///
852 /// ```rust,ignore
853 /// let pool = BrowserPool::builder()
854 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
855 /// .build()?
856 /// .into_shared();
857 ///
858 /// // Can now be cloned and shared across handlers
859 /// let pool_clone = Arc::clone(&pool);
860 /// ```
861 pub fn into_shared(self) -> Arc<BrowserPool> {
862 log::debug!("🔍 Converting BrowserPool into shared Arc<BrowserPool>");
863 Arc::new(self)
864 }
865
866 /// Create a new builder for constructing a BrowserPool.
867 ///
868 /// This is the recommended way to create a pool.
869 ///
870 /// # Example
871 ///
872 /// ```rust,ignore
873 /// let pool = BrowserPool::builder()
874 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
875 /// .build()?;
876 /// ```
877 pub fn builder() -> BrowserPoolBuilder {
878 BrowserPoolBuilder::new()
879 }
880
881 /// Get a browser from the pool (or create one if empty).
882 ///
883 /// Returns a [`BrowserHandle`] that implements `Deref<Target=Browser>`,
884 /// allowing transparent access to browser methods.
885 ///
886 /// # Automatic Return
887 ///
888 /// The browser is automatically returned to the pool when the handle
889 /// is dropped, even if your code panics (RAII pattern).
890 ///
891 /// # Errors
892 ///
893 /// - Returns [`BrowserPoolError::ShuttingDown`] if pool is shutting down.
894 /// - Returns [`BrowserPoolError::BrowserCreation`] if new browser creation fails.
895 /// - Returns [`BrowserPoolError::HealthCheckFailed`] if all pooled browsers are unhealthy.
896 ///
897 /// # Example
898 ///
899 /// ```rust,ignore
900 /// let browser = pool.get()?;
901 /// let tab = browser.new_tab()?;
902 /// tab.navigate_to("https://example.com")?;
903 /// // browser returned automatically when it goes out of scope
904 /// ```
905 pub fn get(&self) -> Result<BrowserHandle> {
906 log::trace!("🎯 BrowserPool::get() called");
907 self.inner.get_or_create_browser()
908 }
909
910 /// Get pool statistics snapshot.
911 ///
912 /// # Returns
913 ///
914 /// [`PoolStats`] containing:
915 /// - `available`: Browsers in pool ready for checkout
916 /// - `active`: All browsers (pooled + checked out)
917 /// - `total`: Currently same as `active` (for future expansion)
918 ///
919 /// # Example
920 ///
921 /// ```rust,ignore
922 /// let stats = pool.stats();
923 /// println!("Available: {}, Active: {}", stats.available, stats.active);
924 /// ```
925 pub fn stats(&self) -> PoolStats {
926 let available = self.inner.available_count();
927 let active = self.inner.active_count();
928
929 log::trace!("📊 Pool stats: available={}, active={}", available, active);
930
931 PoolStats {
932 available,
933 active,
934 total: active,
935 }
936 }
937
938 /// Get a reference to the pool configuration.
939 ///
940 /// Returns the configuration that was used to create this pool.
941 /// The configuration is immutable after pool creation.
942 ///
943 /// # Example
944 ///
945 /// ```rust,ignore
946 /// let pool = BrowserPool::builder()
947 /// .config(
948 /// BrowserPoolConfigBuilder::new()
949 /// .max_pool_size(10)
950 /// .build()?
951 /// )
952 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
953 /// .build()?;
954 ///
955 /// println!("Max pool size: {}", pool.config().max_pool_size);
956 /// println!("Browser TTL: {:?}", pool.config().browser_ttl);
957 /// ```
958 ///
959 /// # Use Cases
960 ///
961 /// - Logging configuration at startup
962 /// - Monitoring/metrics collection
963 /// - Readiness checks (comparing active count vs max_pool_size)
964 /// - Debugging pool behavior
965 #[inline]
966 pub fn config(&self) -> &BrowserPoolConfig {
967 self.inner.config()
968 }
969
970 /// Warmup the pool by pre-creating browsers.
971 ///
972 /// This is highly recommended to reduce first-request latency.
973 /// Should be called during application startup.
974 ///
975 /// # Process
976 ///
977 /// 1. Creates `warmup_count` browsers sequentially with staggered timing
978 /// 2. Tests each browser with navigation
979 /// 3. Returns all browsers to pool
980 /// 4. Entire process has timeout (configurable via `warmup_timeout`)
981 ///
982 /// # Staggered Creation
983 ///
984 /// Browsers are created with a 30-second delay between them to ensure
985 /// their TTLs are offset. This prevents all browsers from expiring
986 /// at the same time.
987 ///
988 /// # Errors
989 ///
990 /// - Returns error if warmup times out.
991 /// - Returns error if browser creation fails.
992 ///
993 /// # Example
994 ///
995 /// ```rust,ignore
996 /// let pool = BrowserPool::builder()
997 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
998 /// .build()?;
999 ///
1000 /// // Warmup during startup
1001 /// pool.warmup().await?;
1002 /// ```
1003 pub async fn warmup(&self) -> Result<()> {
1004 let count = self.inner.config().warmup_count;
1005 let warmup_timeout = self.inner.config().warmup_timeout;
1006
1007 log::info!(
1008 "🔥 Starting browser pool warmup with {} instances (timeout: {}s)",
1009 count,
1010 warmup_timeout.as_secs()
1011 );
1012
1013 // Wrap entire warmup in timeout to prevent hanging forever
1014 let warmup_result = tokio::time::timeout(warmup_timeout, self.warmup_internal(count)).await;
1015
1016 match warmup_result {
1017 Ok(Ok(())) => {
1018 let stats = self.stats();
1019 log::info!(
1020 "✅ Warmup completed successfully - Available: {}, Active: {}",
1021 stats.available,
1022 stats.active
1023 );
1024 Ok(())
1025 }
1026 Ok(Err(e)) => {
1027 log::error!("❌ Warmup failed with error: {}", e);
1028 Err(e)
1029 }
1030 Err(_) => {
1031 log::error!("❌ Warmup timed out after {}s", warmup_timeout.as_secs());
1032 Err(BrowserPoolError::Configuration(format!(
1033 "Warmup timed out after {}s",
1034 warmup_timeout.as_secs()
1035 )))
1036 }
1037 }
1038 }
1039
1040 /// Internal warmup implementation (separated for cleaner timeout wrapping).
1041 ///
1042 /// Creates browsers sequentially with a delay between them.
1043 /// This ensures they don't all reach their TTL (expiration) at the exact same moment.
1044 async fn warmup_internal(&self, count: usize) -> Result<()> {
1045 log::debug!("🛠️ Starting internal warmup process for {} browsers", count);
1046
1047 // STAGGER CONFIGURATION
1048 // We wait this long between creations to distribute expiration times
1049 let stagger_interval = self.config().warmup_stagger;
1050
1051 let mut handles = Vec::new();
1052 let mut created_count = 0;
1053 let mut failed_count = 0;
1054
1055 for i in 0..count {
1056 log::debug!("🌐 Creating startup browser instance {}/{}", i + 1, count);
1057
1058 // Per-browser timeout (15s per browser is reasonable)
1059 // This prevents one slow browser from blocking entire warmup
1060 let browser_result = tokio::time::timeout(
1061 Duration::from_secs(15),
1062 tokio::task::spawn_blocking({
1063 let inner = Arc::clone(&self.inner);
1064 move || inner.create_browser_direct()
1065 }),
1066 )
1067 .await;
1068
1069 match browser_result {
1070 Ok(Ok(Ok(tracked))) => {
1071 log::debug!(
1072 "✅ Browser {} created, performing validation test...",
1073 tracked.id()
1074 );
1075
1076 // Test the browser with actual navigation
1077 match tracked.browser().new_tab() {
1078 Ok(tab) => {
1079 log::trace!("✅ Browser {} test: new_tab() successful", tracked.id());
1080
1081 // Navigate to test page
1082 let nav_result = tab.navigate_to(
1083 "data:text/html,<html><body>Warmup test</body></html>",
1084 );
1085 if let Err(e) = nav_result {
1086 log::warn!(
1087 "⚠️ Browser {} test navigation failed: {}",
1088 tracked.id(),
1089 e
1090 );
1091 } else {
1092 log::trace!(
1093 "✅ Browser {} test: navigation successful",
1094 tracked.id()
1095 );
1096 }
1097
1098 // Clean up test tab
1099 let _ = tab.close(true);
1100
1101 // Keep handle so browser stays alive
1102 handles.push(BrowserHandle::new(tracked, Arc::clone(&self.inner)));
1103
1104 created_count += 1;
1105 log::info!(
1106 "✅ Browser instance {}/{} ready and validated",
1107 i + 1,
1108 count
1109 );
1110 }
1111 Err(e) => {
1112 failed_count += 1;
1113 log::error!(
1114 "❌ Browser {} validation test failed: {}",
1115 tracked.id(),
1116 e
1117 );
1118
1119 // Remove from active tracking since it's broken
1120 self.inner.remove_from_active(tracked.id());
1121 }
1122 }
1123 }
1124 Ok(Ok(Err(e))) => {
1125 failed_count += 1;
1126 log::error!("❌ Failed to create browser {}/{}: {}", i + 1, count, e);
1127 }
1128 Ok(Err(e)) => {
1129 failed_count += 1;
1130 log::error!(
1131 "❌ Browser {}/{} creation task panicked: {:?}",
1132 i + 1,
1133 count,
1134 e
1135 );
1136 }
1137 Err(_) => {
1138 failed_count += 1;
1139 log::error!(
1140 "❌ Browser {}/{} creation timed out (15s limit)",
1141 i + 1,
1142 count
1143 );
1144 }
1145 }
1146
1147 // === STAGGER LOGIC ===
1148 // If this is not the last browser, wait before creating the next one.
1149 // This ensures their TTLs are offset by `stagger_interval`.
1150 if i < count - 1 {
1151 log::info!(
1152 "⏳ Waiting {}s before creating next warmup browser to stagger TTLs...",
1153 stagger_interval.as_secs()
1154 );
1155 tokio::time::sleep(stagger_interval).await;
1156 }
1157 }
1158
1159 log::info!(
1160 "📊 Warmup creation phase: {} created, {} failed",
1161 created_count,
1162 failed_count
1163 );
1164
1165 // Return all browsers to pool by dropping handles
1166 log::debug!("🔍 Returning {} warmup browsers to pool...", handles.len());
1167 drop(handles);
1168
1169 // No delay needed: return_browser() is synchronous in the happy path,
1170 // and warmup browsers are never TTL-expired (which is the only path
1171 // that spawns async work via spawn_replacement_creation).
1172
1173 let final_stats = self.stats();
1174 log::info!(
1175 "🏁 Warmup internal completed - Pool: {}, Active: {}",
1176 final_stats.available,
1177 final_stats.active
1178 );
1179
1180 Ok(())
1181 }
1182
1183 /// Start the keep-alive monitoring thread.
1184 ///
1185 /// This background thread:
1186 /// - Pings all active browsers periodically
1187 /// - Removes unresponsive browsers after max_ping_failures
1188 /// - Retires browsers that exceed TTL
1189 /// - Spawns replacement browsers as needed
1190 ///
1191 /// # Critical Design Notes
1192 ///
1193 /// - Uses condvar for immediate shutdown signaling
1194 /// - Never holds locks during I/O operations
1195 /// - Uses consistent lock ordering (active -> pool)
1196 ///
1197 /// # Parameters
1198 ///
1199 /// * `inner` - Arc reference to pool state.
1200 ///
1201 /// # Returns
1202 ///
1203 /// JoinHandle for the background thread.
1204 fn start_keep_alive(inner: Arc<BrowserPoolInner>) -> JoinHandle<()> {
1205 let ping_interval = inner.config().ping_interval;
1206 let max_failures = inner.config().max_ping_failures;
1207 let browser_ttl = inner.config().browser_ttl;
1208 let shutdown_signal = Arc::clone(inner.shutdown_signal());
1209
1210 log::info!(
1211 "🚀 Starting keep-alive thread (interval: {}s, max failures: {}, TTL: {}min)",
1212 ping_interval.as_secs(),
1213 max_failures,
1214 browser_ttl.as_secs() / 60
1215 );
1216
1217 thread::spawn(move || {
1218 log::info!("🏁 Keep-alive thread started successfully");
1219
1220 // Track consecutive failures per browser ID
1221 let mut failure_counts: HashMap<u64, u32> = HashMap::new();
1222
1223 loop {
1224 // Wait for next ping interval OR shutdown signal (whichever comes first)
1225 // Using condvar instead of sleep allows immediate wake-up on shutdown
1226 let (lock, cvar) = &*shutdown_signal;
1227 let wait_result = {
1228 let shutdown = lock.lock().unwrap_or_else(|poisoned| {
1229 log::warn!("Shutdown lock poisoned, recovering");
1230 poisoned.into_inner()
1231 });
1232 cvar.wait_timeout(shutdown, ping_interval)
1233 .unwrap_or_else(|poisoned| {
1234 log::warn!("Condvar wait_timeout lock poisoned, recovering");
1235 poisoned.into_inner()
1236 })
1237 };
1238
1239 let shutdown_flag = *wait_result.0;
1240 let timed_out = wait_result.1.timed_out();
1241
1242 // Check if we were signaled to shutdown
1243 if shutdown_flag {
1244 log::info!("🛑 Keep-alive received shutdown signal via condvar");
1245 break;
1246 }
1247
1248 // Double-check atomic shutdown flag (belt and suspenders)
1249 if inner.is_shutting_down() {
1250 log::info!("🛑 Keep-alive detected shutdown via atomic flag");
1251 break;
1252 }
1253
1254 // If spuriously woken (not timeout, not shutdown), continue waiting
1255 if !timed_out {
1256 log::trace!("⏰ Keep-alive spuriously woken, continuing wait...");
1257 continue;
1258 }
1259
1260 log::trace!("⚡ Keep-alive ping cycle starting...");
1261
1262 // Collect browsers to ping WITHOUT holding locks
1263 // This is critical: we clone the list and release the lock
1264 // before doing any I/O operations
1265 let browsers_to_ping = inner.get_active_browsers_snapshot();
1266 log::trace!(
1267 "Keep-alive checking {} active browsers",
1268 browsers_to_ping.len()
1269 );
1270
1271 // Now ping browsers without holding any locks
1272 let mut to_remove = Vec::new();
1273 let mut expired_browsers = Vec::new();
1274
1275 for (id, tracked) in browsers_to_ping {
1276 // Check shutdown during ping loop (allows early exit)
1277 if inner.is_shutting_down() {
1278 log::info!("Shutdown detected during ping loop, exiting immediately");
1279 return;
1280 }
1281
1282 // Check TTL before pinging (no point pinging expired browsers)
1283 if tracked.is_expired(browser_ttl) {
1284 log::info!(
1285 "Browser {} expired (age: {}min, TTL: {}min), marking for retirement",
1286 id,
1287 tracked.age_minutes(),
1288 browser_ttl.as_secs() / 60
1289 );
1290 expired_browsers.push(id);
1291 continue; // Skip ping for expired browsers
1292 }
1293
1294 // Perform health check (this is I/O, no locks held)
1295 use crate::traits::Healthcheck;
1296 match tracked.ping() {
1297 Ok(_) => {
1298 // Reset failure count on success
1299 if failure_counts.remove(&id).is_some() {
1300 log::debug!("Browser {} ping successful, failure count reset", id);
1301 }
1302 }
1303 Err(e) => {
1304 // Only process failures if NOT shutting down
1305 // (during shutdown, browsers may legitimately fail)
1306 if !inner.is_shutting_down() {
1307 let failures = failure_counts.entry(id).or_insert(0);
1308 *failures += 1;
1309
1310 log::warn!(
1311 "Browser {} ping failed (attempt {}/{}): {}",
1312 id,
1313 failures,
1314 max_failures,
1315 e
1316 );
1317
1318 // Remove if exceeded max failures
1319 if *failures >= max_failures {
1320 log::error!(
1321 "Browser {} exceeded max ping failures ({}), marking for removal",
1322 id,
1323 max_failures
1324 );
1325 to_remove.push(id);
1326 }
1327 }
1328 }
1329 }
1330 }
1331
1332 // Check shutdown before cleanup (avoid work if shutting down)
1333 if inner.is_shutting_down() {
1334 log::info!("Shutdown detected before cleanup, skipping and exiting");
1335 break;
1336 }
1337
1338 // Handle TTL retirements first (they need replacement browsers)
1339 if !expired_browsers.is_empty() {
1340 log::info!("Processing {} TTL-expired browsers", expired_browsers.len());
1341 Self::handle_browser_retirement(&inner, expired_browsers, &mut failure_counts);
1342 }
1343
1344 // Handle failed browsers (remove from tracking and pool)
1345 if !to_remove.is_empty() {
1346 log::warn!("Removing {} failed browsers from pool", to_remove.len());
1347
1348 // Track how many were actually removed so we know how many to replace
1349 let mut actual_removed_count = 0;
1350
1351 // Remove dead browsers from active tracking
1352 for id in &to_remove {
1353 if inner.remove_from_active(*id).is_some() {
1354 actual_removed_count += 1;
1355 log::debug!("Removed failed browser {} from active tracking", id);
1356 }
1357 failure_counts.remove(id);
1358 }
1359
1360 log::debug!(
1361 "Active browsers after failure cleanup: {}",
1362 inner.active_count()
1363 );
1364
1365 // Clean up pool (remove dead browsers)
1366 inner.remove_from_available(&to_remove);
1367
1368 log::debug!("Pool size after cleanup: {}", inner.available_count());
1369
1370 // Trigger replacement for the browsers we just removed
1371 if actual_removed_count > 0 {
1372 log::info!(
1373 "Spawning {} replacement browsers for failed ones",
1374 actual_removed_count
1375 );
1376 BrowserPoolInner::spawn_replacement_creation(
1377 Arc::clone(&inner),
1378 actual_removed_count,
1379 );
1380 }
1381 }
1382
1383 // Log keep-alive cycle summary
1384 log::debug!(
1385 "Keep-alive cycle complete - Active: {}, Pooled: {}, Tracking {} failure states",
1386 inner.active_count(),
1387 inner.available_count(),
1388 failure_counts.len()
1389 );
1390 }
1391
1392 log::info!("Keep-alive thread exiting cleanly");
1393 })
1394 }
1395
1396 /// Handle browser retirement due to TTL expiration.
1397 ///
1398 /// This function:
1399 /// 1. Removes expired browsers from active and pool tracking
1400 /// 2. Spawns async tasks to create replacement browsers
1401 /// 3. Maintains pool target size
1402 ///
1403 /// # Critical Lock Ordering
1404 ///
1405 /// Acquires active -> pool locks together to prevent races.
1406 ///
1407 /// # Parameters
1408 ///
1409 /// * `inner` - Arc reference to pool state.
1410 /// * `expired_ids` - List of browser IDs that have exceeded TTL.
1411 /// * `failure_counts` - Mutable map of failure counts (updated to remove retired browsers).
1412 fn handle_browser_retirement(
1413 inner: &Arc<BrowserPoolInner>,
1414 expired_ids: Vec<u64>,
1415 failure_counts: &mut HashMap<u64, u32>,
1416 ) {
1417 log::info!(
1418 "Retiring {} expired browsers (TTL enforcement)",
1419 expired_ids.len()
1420 );
1421
1422 // Remove expired browsers from active tracking
1423 let mut retired_count = 0;
1424 for id in &expired_ids {
1425 if inner.remove_from_active(*id).is_some() {
1426 retired_count += 1;
1427 log::debug!("Removed expired browser {} from active tracking", id);
1428 }
1429 // Clean up failure tracking
1430 failure_counts.remove(id);
1431 }
1432
1433 // Remove from pool as well
1434 inner.remove_from_available(&expired_ids);
1435
1436 log::debug!(
1437 "After retirement - Active: {}, Pooled: {}",
1438 inner.active_count(),
1439 inner.available_count()
1440 );
1441
1442 // Create replacement browsers to maintain target count
1443 if retired_count > 0 {
1444 log::info!(
1445 "Spawning {} replacement browsers for retired ones",
1446 retired_count
1447 );
1448 BrowserPoolInner::spawn_replacement_creation(Arc::clone(inner), retired_count);
1449 } else {
1450 log::debug!("No browsers were actually retired (already removed)");
1451 }
1452 }
1453
1454 /// Asynchronously shutdown the pool (recommended method).
1455 ///
1456 /// This is the preferred shutdown method as it can properly await
1457 /// async task cancellation. Should be called during application shutdown.
1458 ///
1459 /// # Shutdown Process
1460 ///
1461 /// 1. Set atomic shutdown flag (stops new operations)
1462 /// 2. Signal condvar to wake keep-alive thread immediately
1463 /// 3. Wait for keep-alive thread to exit (with timeout)
1464 /// 4. Abort all replacement creation tasks
1465 /// 5. Wait briefly for cleanup
1466 /// 6. Log final statistics
1467 ///
1468 /// # Timeout
1469 ///
1470 /// Keep-alive thread is given 5 seconds to exit gracefully.
1471 /// If it doesn't exit, we log an error but continue shutdown.
1472 ///
1473 /// # Example
1474 ///
1475 /// ```rust,ignore
1476 /// let mut pool = /* ... */;
1477 ///
1478 /// // During application shutdown
1479 /// pool.shutdown_async().await;
1480 /// ```
1481 pub async fn shutdown_async(&mut self) {
1482 log::info!("Shutting down browser pool (async mode)...");
1483
1484 // Step 1: Set shutdown flag (prevents new operations)
1485 self.inner.set_shutting_down(true);
1486 log::debug!("Shutdown flag set");
1487
1488 // Step 2: Signal condvar to wake keep-alive thread immediately
1489 // This is critical - without this, keep-alive waits for full ping_interval
1490 {
1491 let (lock, cvar) = &**self.inner.shutdown_signal();
1492 let mut shutdown = lock.lock().unwrap_or_else(|poisoned| {
1493 log::warn!("Shutdown lock poisoned, recovering");
1494 poisoned.into_inner()
1495 });
1496 *shutdown = true;
1497 cvar.notify_all();
1498 log::debug!("Shutdown signal sent to keep-alive thread");
1499 } // Lock released here
1500
1501 // Step 3: Wait for keep-alive thread to exit
1502 if let Some(handle) = self.keep_alive_handle.take() {
1503 log::debug!("Waiting for keep-alive thread to exit...");
1504
1505 // Wrap thread join in spawn_blocking to make it async-friendly
1506 let join_task = tokio::task::spawn_blocking(move || handle.join());
1507
1508 // Give it 5 seconds to exit gracefully
1509 match tokio::time::timeout(Duration::from_secs(5), join_task).await {
1510 Ok(Ok(Ok(_))) => {
1511 log::info!("Keep-alive thread stopped cleanly");
1512 }
1513 Ok(Ok(Err(_))) => {
1514 log::error!("Keep-alive thread panicked during shutdown");
1515 }
1516 Ok(Err(_)) => {
1517 log::error!("Keep-alive join task panicked");
1518 }
1519 Err(_) => {
1520 log::error!("Keep-alive thread didn't exit within 5s timeout");
1521 }
1522 }
1523 } else {
1524 log::debug!("No keep-alive thread to stop (was disabled or already stopped)");
1525 }
1526
1527 // Step 4: Abort all replacement creation tasks
1528 log::info!("Aborting replacement creation tasks...");
1529 let aborted_count = self.inner.abort_replacement_tasks();
1530 if aborted_count > 0 {
1531 log::info!("Aborted {} replacement tasks", aborted_count);
1532 } else {
1533 log::debug!("No replacement tasks to abort");
1534 }
1535
1536 // Step 5: Small delay to let aborted tasks clean up
1537 tokio::time::sleep(Duration::from_millis(100)).await;
1538
1539 // Step 6: Log final statistics
1540 let stats = self.stats();
1541 log::info!(
1542 "Async shutdown complete - Available: {}, Active: {}, Total: {}",
1543 stats.available,
1544 stats.active,
1545 stats.total
1546 );
1547 }
1548
1549 /// Synchronously shutdown the pool (fallback method).
1550 ///
1551 /// This is a simplified shutdown for use in Drop or non-async contexts.
1552 /// Prefer [`shutdown_async()`](Self::shutdown_async) when possible for cleaner task cancellation.
1553 ///
1554 /// # Note
1555 ///
1556 /// This method doesn't wait for replacement tasks to finish since
1557 /// there's no async runtime available. Tasks are aborted but may not
1558 /// have cleaned up yet.
1559 pub fn shutdown(&mut self) {
1560 log::debug!("Calling synchronous shutdown...");
1561 self.shutdown_sync();
1562 }
1563
1564 /// Internal synchronous shutdown implementation.
1565 fn shutdown_sync(&mut self) {
1566 log::info!("Shutting down browser pool (sync mode)...");
1567
1568 // Set shutdown flag
1569 self.inner.set_shutting_down(true);
1570 log::debug!("Shutdown flag set");
1571
1572 // Signal condvar (same as async version)
1573 {
1574 let (lock, cvar) = &**self.inner.shutdown_signal();
1575 let mut shutdown = lock.lock().unwrap_or_else(|poisoned| {
1576 log::warn!("Shutdown lock poisoned, recovering");
1577 poisoned.into_inner()
1578 });
1579 *shutdown = true;
1580 cvar.notify_all();
1581 log::debug!("Shutdown signal sent");
1582 }
1583
1584 // Wait for keep-alive thread
1585 if let Some(handle) = self.keep_alive_handle.take() {
1586 log::debug!("Joining keep-alive thread (sync)...");
1587
1588 match handle.join() {
1589 Ok(_) => log::info!("Keep-alive thread stopped"),
1590 Err(_) => log::error!("Keep-alive thread panicked"),
1591 }
1592 }
1593
1594 // Abort replacement tasks (best effort - they won't make progress without runtime)
1595 let aborted_count = self.inner.abort_replacement_tasks();
1596 if aborted_count > 0 {
1597 log::debug!("Aborted {} replacement tasks (sync mode)", aborted_count);
1598 }
1599
1600 let stats = self.stats();
1601 log::info!(
1602 "Sync shutdown complete - Available: {}, Active: {}",
1603 stats.available,
1604 stats.active
1605 );
1606 }
1607
1608 /// Get a reference to the inner pool state.
1609 ///
1610 /// This is primarily for internal use and testing.
1611 #[doc(hidden)]
1612 #[allow(dead_code)]
1613 pub(crate) fn inner(&self) -> &Arc<BrowserPoolInner> {
1614 &self.inner
1615 }
1616}
1617
1618impl Drop for BrowserPool {
1619 /// Automatic cleanup when pool is dropped.
1620 ///
1621 /// This ensures resources are released even if shutdown wasn't called explicitly.
1622 /// Uses sync shutdown since Drop can't be async.
1623 fn drop(&mut self) {
1624 log::debug!("🛑 BrowserPool Drop triggered - running cleanup");
1625
1626 // Only shutdown if not already done
1627 if !self.inner.is_shutting_down() {
1628 log::warn!("⚠ BrowserPool dropped without explicit shutdown - cleaning up");
1629 self.shutdown();
1630 } else {
1631 log::debug!(" Pool already shutdown, Drop is no-op");
1632 }
1633 }
1634}
1635
1636// ============================================================================
1637// BrowserPoolBuilder
1638// ============================================================================
1639
1640/// Builder for constructing a [`BrowserPool`] with validation.
1641///
1642/// This is the recommended way to create a pool as it validates
1643/// configuration and provides sensible defaults.
1644///
1645/// # Example
1646///
1647/// ```rust,ignore
1648/// use std::time::Duration;
1649/// use html2pdf_api::{BrowserPool, BrowserPoolConfigBuilder, ChromeBrowserFactory};
1650///
1651/// let pool = BrowserPool::builder()
1652/// .config(
1653/// BrowserPoolConfigBuilder::new()
1654/// .max_pool_size(10)
1655/// .warmup_count(5)
1656/// .browser_ttl(Duration::from_secs(7200))
1657/// .build()?
1658/// )
1659/// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1660/// .enable_keep_alive(true)
1661/// .build()?;
1662/// ```
1663pub struct BrowserPoolBuilder {
1664 /// Optional configuration (uses default if not provided).
1665 config: Option<BrowserPoolConfig>,
1666
1667 /// Browser factory (required).
1668 factory: Option<Box<dyn BrowserFactory>>,
1669
1670 /// Whether to enable keep-alive thread (default: true).
1671 enable_keep_alive: bool,
1672}
1673
1674impl BrowserPoolBuilder {
1675 /// Create a new builder with defaults.
1676 ///
1677 /// # Example
1678 ///
1679 /// ```rust,ignore
1680 /// let builder = BrowserPoolBuilder::new();
1681 /// ```
1682 pub fn new() -> Self {
1683 Self {
1684 config: None,
1685 factory: None,
1686 enable_keep_alive: true,
1687 }
1688 }
1689
1690 /// Set custom configuration.
1691 ///
1692 /// If not called, uses [`BrowserPoolConfig::default()`].
1693 ///
1694 /// # Parameters
1695 ///
1696 /// * `config` - Validated configuration from [`crate::BrowserPoolConfigBuilder`].
1697 ///
1698 /// # Example
1699 ///
1700 /// ```rust,ignore
1701 /// let config = BrowserPoolConfigBuilder::new()
1702 /// .max_pool_size(10)
1703 /// .build()?;
1704 ///
1705 /// let pool = BrowserPool::builder()
1706 /// .config(config)
1707 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1708 /// .build()?;
1709 /// ```
1710 pub fn config(mut self, config: BrowserPoolConfig) -> Self {
1711 self.config = Some(config);
1712 self
1713 }
1714
1715 /// Set browser factory (required).
1716 ///
1717 /// The factory is responsible for creating browser instances.
1718 /// Use [`ChromeBrowserFactory`](crate::ChromeBrowserFactory) for Chrome/Chromium browsers.
1719 ///
1720 /// # Parameters
1721 ///
1722 /// * `factory` - A boxed [`BrowserFactory`] implementation.
1723 ///
1724 /// # Example
1725 ///
1726 /// ```rust,ignore
1727 /// let pool = BrowserPool::builder()
1728 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1729 /// .build()?;
1730 /// ```
1731 pub fn factory(mut self, factory: Box<dyn BrowserFactory>) -> Self {
1732 self.factory = Some(factory);
1733 self
1734 }
1735
1736 /// Enable or disable keep-alive thread.
1737 ///
1738 /// Keep-alive should be disabled only for testing.
1739 /// Production use should always have it enabled.
1740 ///
1741 /// # Parameters
1742 ///
1743 /// * `enable` - Whether to enable the keep-alive thread.
1744 ///
1745 /// # Example
1746 ///
1747 /// ```rust,ignore
1748 /// // Disable for tests
1749 /// let pool = BrowserPool::builder()
1750 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1751 /// .enable_keep_alive(false)
1752 /// .build()?;
1753 /// ```
1754 pub fn enable_keep_alive(mut self, enable: bool) -> Self {
1755 self.enable_keep_alive = enable;
1756 self
1757 }
1758
1759 /// Build the browser pool.
1760 ///
1761 /// # Errors
1762 ///
1763 /// Returns [`BrowserPoolError::Configuration`] if factory is not provided.
1764 ///
1765 /// # Panics
1766 ///
1767 /// Panics if called outside a tokio runtime context.
1768 ///
1769 /// # Example
1770 ///
1771 /// ```rust,ignore
1772 /// let pool = BrowserPool::builder()
1773 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1774 /// .build()?;
1775 /// ```
1776 pub fn build(self) -> Result<BrowserPool> {
1777 let config = self.config.unwrap_or_default();
1778 let factory = self.factory.ok_or_else(|| {
1779 BrowserPoolError::Configuration("No browser factory provided".to_string())
1780 })?;
1781
1782 log::info!("📦 Building browser pool with config: {:?}", config);
1783
1784 // Create inner state
1785 let inner = BrowserPoolInner::new(config, factory);
1786
1787 // Start keep-alive thread if enabled
1788 let keep_alive_handle = if self.enable_keep_alive {
1789 log::info!("🚀 Starting keep-alive monitoring thread");
1790 Some(BrowserPool::start_keep_alive(Arc::clone(&inner)))
1791 } else {
1792 log::warn!("⚠️ Keep-alive thread disabled (should only be used for testing)");
1793 None
1794 };
1795
1796 log::info!("✅ Browser pool built successfully");
1797
1798 Ok(BrowserPool {
1799 inner,
1800 keep_alive_handle,
1801 })
1802 }
1803}
1804
1805impl Default for BrowserPoolBuilder {
1806 fn default() -> Self {
1807 Self::new()
1808 }
1809}
1810
1811// ============================================================================
1812// Environment Initialization (feature-gated)
1813// ============================================================================
1814
1815/// Initialize browser pool from environment variables.
1816///
1817/// This is a convenience function for common initialization patterns.
1818/// It reads configuration from environment variables with sensible defaults.
1819///
1820/// # Feature Flag
1821///
1822/// This function is only available when the `env-config` feature is enabled.
1823///
1824/// # Environment Variables
1825///
1826/// - `BROWSER_POOL_SIZE`: Maximum pool size (default: 5)
1827/// - `BROWSER_WARMUP_COUNT`: Warmup browser count (default: 3)
1828/// - `BROWSER_TTL_SECONDS`: Browser TTL in seconds (default: 3600)
1829/// - `BROWSER_WARMUP_TIMEOUT_SECONDS`: Warmup timeout (default: 60)
1830/// - `CHROME_PATH`: Custom Chrome binary path (optional)
1831///
1832/// # Returns
1833///
1834/// `Arc<BrowserPool>` ready for use in web handlers.
1835///
1836/// # Errors
1837///
1838/// - Returns error if configuration is invalid.
1839/// - Returns error if warmup fails.
1840///
1841/// # Example
1842///
1843/// ```rust,ignore
1844/// #[tokio::main]
1845/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
1846/// env_logger::init();
1847///
1848/// let pool = init_browser_pool().await?;
1849///
1850/// // Use pool in handlers...
1851///
1852/// Ok(())
1853/// }
1854/// ```
1855#[cfg(feature = "env-config")]
1856pub async fn init_browser_pool() -> Result<Arc<BrowserPool>> {
1857 use crate::config::env::{chrome_path_from_env, from_env};
1858 use crate::factory::ChromeBrowserFactory;
1859
1860 log::info!("Initializing browser pool from environment...");
1861
1862 // Load configuration from environment
1863 let config = from_env()?;
1864
1865 // Get optional Chrome path
1866 let chrome_path = chrome_path_from_env();
1867
1868 log::info!("Pool configuration from environment:");
1869 log::info!(" - Max pool size: {}", config.max_pool_size);
1870 log::info!(" - Warmup count: {}", config.warmup_count);
1871 log::info!(
1872 " - Browser TTL: {}s ({}min)",
1873 config.browser_ttl.as_secs(),
1874 config.browser_ttl.as_secs() / 60
1875 );
1876 log::info!(" - Warmup timeout: {}s", config.warmup_timeout.as_secs());
1877 log::info!(
1878 " - Chrome path: {}",
1879 chrome_path.as_deref().unwrap_or("auto-detect")
1880 );
1881
1882 // Create factory based on whether custom path is provided
1883 let factory: Box<dyn BrowserFactory> = match chrome_path {
1884 Some(path) => {
1885 log::info!("Using custom Chrome path: {}", path);
1886 Box::new(ChromeBrowserFactory::with_path(path))
1887 }
1888 None => {
1889 log::info!("Using auto-detected Chrome browser");
1890 Box::new(ChromeBrowserFactory::with_defaults())
1891 }
1892 };
1893
1894 // Create browser pool with Chrome factory
1895 log::debug!("Building browser pool...");
1896 let pool = BrowserPool::builder()
1897 .config(config.clone())
1898 .factory(factory)
1899 .enable_keep_alive(true)
1900 .build()
1901 .map_err(|e| {
1902 log::error!("❌ Failed to create browser pool: {}", e);
1903 e
1904 })?;
1905
1906 log::info!("✅ Browser pool created successfully");
1907
1908 // Warmup the pool
1909 log::info!(
1910 "Warming up browser pool with {} instances...",
1911 config.warmup_count
1912 );
1913 pool.warmup().await.map_err(|e| {
1914 log::error!("❌ Failed to warmup pool: {}", e);
1915 e
1916 })?;
1917
1918 let stats = pool.stats();
1919 log::info!(
1920 "✅ Browser pool ready - Available: {}, Active: {}, Total: {}",
1921 stats.available,
1922 stats.active,
1923 stats.total
1924 );
1925
1926 Ok(pool.into_shared())
1927}
1928
1929// ============================================================================
1930// Unit Tests
1931// ============================================================================
1932
1933#[cfg(test)]
1934mod tests {
1935 use super::*;
1936
1937 /// Verifies that BrowserPool builder rejects missing factory.
1938 ///
1939 /// A factory is mandatory because the pool needs to know how to
1940 /// create browser instances. This test ensures proper error handling.
1941 #[test]
1942 fn test_pool_builder_missing_factory() {
1943 // We need a tokio runtime for the builder
1944 let rt = tokio::runtime::Runtime::new().unwrap();
1945
1946 rt.block_on(async {
1947 let config = crate::config::BrowserPoolConfigBuilder::new()
1948 .max_pool_size(3)
1949 .build()
1950 .unwrap();
1951
1952 let result = BrowserPool::builder()
1953 .config(config)
1954 // Intentionally missing factory
1955 .build();
1956
1957 assert!(result.is_err(), "Build should fail without factory");
1958
1959 match result {
1960 Err(BrowserPoolError::Configuration(msg)) => {
1961 assert!(
1962 msg.contains("No browser factory provided"),
1963 "Expected factory error, got: {}",
1964 msg
1965 );
1966 }
1967 _ => panic!("Expected Configuration error for missing factory"),
1968 }
1969 });
1970 }
1971
1972 /// Verifies that BrowserPoolBuilder implements Default.
1973 #[test]
1974 fn test_builder_default() {
1975 let builder: BrowserPoolBuilder = Default::default();
1976 assert!(builder.config.is_none());
1977 assert!(builder.factory.is_none());
1978 assert!(builder.enable_keep_alive);
1979 }
1980
1981 /// Verifies that enable_keep_alive can be disabled.
1982 #[test]
1983 fn test_builder_disable_keep_alive() {
1984 let builder = BrowserPoolBuilder::new().enable_keep_alive(false);
1985 assert!(!builder.enable_keep_alive);
1986 }
1987}