html2pdf_api/pool.rs
1//! Browser pool with lifecycle management.
2//!
3//! This module provides [`BrowserPool`], the main entry point for managing
4//! a pool of headless Chrome browsers with automatic lifecycle management.
5//!
6//! # Overview
7//!
8//! The browser pool provides:
9//! - **Connection Pooling**: Reuses browser instances to avoid expensive startup costs
10//! - **Health Monitoring**: Background thread continuously checks browser health
11//! - **TTL Management**: Automatically retires old browsers and creates replacements
12//! - **Race-Free Design**: Careful lock ordering prevents deadlocks
13//! - **Graceful Shutdown**: Clean termination of all background tasks
14//! - **RAII Pattern**: Automatic return of browsers to pool via Drop
15//!
16//! # Architecture
17//!
18//! ```text
19//! BrowserPool
20//! ├─ BrowserPoolInner (shared state)
21//! │ ├─ available: Vec<TrackedBrowser> (pooled, ready to use)
22//! │ ├─ active: HashMap<id, TrackedBrowser> (in-use, tracked for health)
23//! │ └─ replacement_tasks: Vec<JoinHandle> (async replacement creators)
24//! └─ keep_alive_handle: JoinHandle (health monitoring thread)
25//! ```
26//!
27//! # Critical Invariants
28//!
29//! 1. **Lock Order**: Always acquire `active` before `available` to prevent deadlocks
30//! 2. **Shutdown Flag**: Check before all expensive operations
31//! 3. **Health Checks**: Never hold locks during I/O operations
32//!
33//! # Example
34//!
35//! ```rust,no_run
36//! use html2pdf_api::{BrowserPool, BrowserPoolConfigBuilder, ChromeBrowserFactory};
37//!
38//! #[tokio::main]
39//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
40//! // Create pool
41//! let mut pool = BrowserPool::builder()
42//! .config(
43//! BrowserPoolConfigBuilder::new()
44//! .max_pool_size(5)
45//! .warmup_count(3)
46//! .build()?
47//! )
48//! .factory(Box::new(ChromeBrowserFactory::with_defaults()))
49//! .build()?;
50//!
51//! // Warmup
52//! pool.warmup().await?;
53//!
54//! // Use browsers
55//! {
56//! let browser = pool.get()?;
57//! let tab = browser.new_tab()?;
58//! // ... do work ...
59//! } // browser returned to pool automatically
60//!
61//! // Shutdown
62//! pool.shutdown_async().await;
63//!
64//! Ok(())
65//! }
66//! ```
67
68use std::collections::HashMap;
69use std::sync::atomic::{AtomicBool, Ordering};
70use std::sync::{Arc, Condvar, Mutex};
71use std::thread::{self, JoinHandle};
72use std::time::Duration;
73
74use tokio::task::JoinHandle as TokioJoinHandle;
75
76use crate::config::BrowserPoolConfig;
77use crate::error::{BrowserPoolError, Result};
78use crate::factory::BrowserFactory;
79use crate::handle::BrowserHandle;
80use crate::stats::PoolStats;
81use crate::tracked::TrackedBrowser;
82
83// ============================================================================
84// BrowserPoolInner
85// ============================================================================
86
87/// Internal shared state for the browser pool.
88///
89/// This struct contains all shared state and is wrapped in Arc for thread-safe
90/// sharing between the pool, handles, and background threads.
91///
92/// # Lock Ordering (CRITICAL)
93///
94/// Always acquire locks in this order to prevent deadlocks:
95/// 1. `active` (browsers currently in use)
96/// 2. `available` (browsers in pool ready for use)
97///
98/// Never hold locks during I/O operations or browser creation.
99///
100/// # Thread Safety
101///
102/// All fields are protected by appropriate synchronization primitives:
103/// - `Mutex` for mutable collections
104/// - `AtomicBool` for shutdown flag
105/// - `Arc` for shared ownership
106pub(crate) struct BrowserPoolInner {
107 /// Configuration (immutable after creation).
108 config: BrowserPoolConfig,
109
110 /// Browsers available for checkout (not currently in use).
111 ///
112 /// Protected by Mutex. Browsers are moved from here when checked out
113 /// and returned here when released (if pool not full).
114 available: Mutex<Vec<Arc<TrackedBrowser>>>,
115
116 /// All browsers that exist (both pooled and checked out).
117 ///
118 /// Protected by Mutex. Used for health monitoring and lifecycle tracking.
119 /// Maps browser ID -> TrackedBrowser for fast lookup.
120 active: Mutex<HashMap<u64, Arc<TrackedBrowser>>>,
121
122 /// Factory for creating new browser instances.
123 factory: Box<dyn BrowserFactory>,
124
125 /// Atomic flag indicating shutdown in progress.
126 ///
127 /// Checked before expensive operations. Once set, no new operations start.
128 shutting_down: AtomicBool,
129
130 /// Background tasks creating replacement browsers.
131 ///
132 /// Tracked so we can abort them during shutdown.
133 replacement_tasks: Mutex<Vec<TokioJoinHandle<()>>>,
134
135 /// Handle to tokio runtime for spawning async tasks.
136 ///
137 /// Captured at creation time to allow spawning from any context.
138 runtime_handle: tokio::runtime::Handle,
139
140 /// Shutdown signaling mechanism for keep-alive thread.
141 ///
142 /// Tuple of (flag, condvar) allows immediate wake-up on shutdown
143 /// instead of waiting for full ping_interval.
144 shutdown_signal: Arc<(Mutex<bool>, Condvar)>,
145}
146
147impl BrowserPoolInner {
148 /// Create a new browser pool inner state.
149 ///
150 /// # Parameters
151 ///
152 /// * `config` - Validated configuration.
153 /// * `factory` - Browser factory for creating instances.
154 ///
155 /// # Panics
156 ///
157 /// Panics if called outside a tokio runtime context.
158 pub(crate) fn new(config: BrowserPoolConfig, factory: Box<dyn BrowserFactory>) -> Arc<Self> {
159 log::info!(
160 "🚀 Initializing browser pool with capacity {}",
161 config.max_pool_size
162 );
163 log::debug!(
164 "📋 Pool config: warmup={}, TTL={}s, ping_interval={}s",
165 config.warmup_count,
166 config.browser_ttl.as_secs(),
167 config.ping_interval.as_secs()
168 );
169
170 // Capture runtime handle for spawning async tasks
171 // This allows us to spawn from sync contexts (like Drop)
172 let runtime_handle = tokio::runtime::Handle::current();
173
174 Arc::new(Self {
175 config,
176 available: Mutex::new(Vec::new()),
177 active: Mutex::new(HashMap::new()),
178 factory,
179 shutting_down: AtomicBool::new(false),
180 replacement_tasks: Mutex::new(Vec::new()),
181 runtime_handle,
182 shutdown_signal: Arc::new((Mutex::new(false), Condvar::new())),
183 })
184 }
185
186 /// Create a lightweight mock pool for testing without background threads.
187 #[cfg(test)]
188 pub(crate) fn new_for_test(
189 config: BrowserPoolConfig,
190 factory: Box<dyn BrowserFactory>,
191 runtime_handle: tokio::runtime::Handle,
192 ) -> Self {
193 Self {
194 config,
195 available: Mutex::new(Vec::new()),
196 active: Mutex::new(HashMap::new()),
197 factory,
198 shutting_down: AtomicBool::new(false),
199 replacement_tasks: Mutex::new(Vec::new()),
200 runtime_handle,
201 shutdown_signal: Arc::new((Mutex::new(false), Condvar::new())),
202 }
203 }
204
205 /// Create a browser directly without using the pool.
206 ///
207 /// Used for:
208 /// - Initial warmup
209 /// - Replacing failed browsers
210 /// - When pool is empty
211 ///
212 /// # Important
213 ///
214 /// Adds the browser to `active` tracking immediately for health monitoring.
215 ///
216 /// # Errors
217 ///
218 /// - Returns [`BrowserPoolError::ShuttingDown`] if pool is shutting down.
219 /// - Returns [`BrowserPoolError::BrowserCreation`] if factory fails.
220 pub(crate) fn create_browser_direct(&self) -> Result<Arc<TrackedBrowser>> {
221 // Early exit if shutting down (don't waste time creating browsers)
222 if self.shutting_down.load(Ordering::Acquire) {
223 log::debug!("🛑 Skipping browser creation - pool is shutting down");
224 return Err(BrowserPoolError::ShuttingDown);
225 }
226
227 log::debug!("📦 Creating new browser directly via factory...");
228
229 // Factory handles all Chrome launch complexity
230 let browser = self.factory.create()?;
231
232 // Wrap with tracking metadata and Arc immediately
233 let tracked = Arc::new(TrackedBrowser::new(browser)?);
234 let id = tracked.id();
235
236 // Add to active tracking immediately for health monitoring
237 // This ensures keep-alive thread will monitor it
238 if let Ok(mut active) = self.active.lock() {
239 active.insert(id, Arc::clone(&tracked));
240 log::debug!(
241 "📊 Browser {} added to active tracking (total active: {})",
242 id,
243 active.len()
244 );
245 } else {
246 log::warn!(
247 "⚠️ Failed to add browser {} to active tracking (poisoned lock)",
248 id
249 );
250 }
251
252 log::info!("✅ Created new browser with ID {}", id);
253 Ok(tracked)
254 }
255
256 /// Get a browser from pool or create a new one.
257 ///
258 /// # Algorithm
259 ///
260 /// 1. Loop through pooled browsers
261 /// 2. **Grace Period Check**: Check if browser is within 30s of TTL.
262 /// - If near expiry: Skip (drop) it immediately.
263 /// - It remains in `active` tracking so the `keep_alive` thread handles standard retirement/replacement.
264 /// 3. For valid browsers, perform detailed health check (without holding locks)
265 /// 4. If healthy, return it
266 /// 5. If unhealthy, remove from active tracking and try next
267 /// 6. If pool empty or all skipped/unhealthy, create new browser
268 ///
269 /// # Critical: Lock-Free Health Checks
270 ///
271 /// Health checks are performed WITHOUT holding locks to avoid blocking
272 /// other threads. This is why we use a loop pattern instead of iterator.
273 ///
274 /// # Returns
275 ///
276 /// [`BrowserHandle`] that auto-returns browser to pool when dropped.
277 ///
278 /// # Errors
279 ///
280 /// - Returns [`BrowserPoolError::ShuttingDown`] if pool is shutting down.
281 /// - Returns [`BrowserPoolError::BrowserCreation`] if new browser creation fails.
282 pub(crate) fn get_or_create_browser(self: &Arc<Self>) -> Result<BrowserHandle> {
283 log::debug!("🔍 Attempting to get browser from pool...");
284
285 // Try to get from pool - LOOP pattern to avoid holding lock during health checks
286 // This is critical for concurrency: we release the lock between attempts
287 loop {
288 // Acquire lock briefly to pop one browser
289 let tracked_opt = {
290 let mut available = self.available.lock().unwrap_or_else(|poisoned| {
291 log::warn!("Pool available lock poisoned, recovering");
292 poisoned.into_inner()
293 });
294 let popped = available.pop();
295 log::trace!("📊 Pool size after pop: {}", available.len());
296 popped
297 }; // Lock released here - critical for performance
298
299 if let Some(tracked) = tracked_opt {
300 // === LOGIC START: Grace Period Check ===
301 let age = tracked.created_at().elapsed();
302 let ttl = self.config.browser_ttl;
303
304 // Safety margin matching your stagger interval
305 let safety_margin = Duration::from_secs(30);
306
307 // If browser is about to expire, don't use it.
308 if age + safety_margin > ttl {
309 log::debug!(
310 "⏳ Browser {} is near expiry (Age: {}s, Margin: 30s), skipping.",
311 tracked.id(),
312 age.as_secs()
313 );
314
315 // CRITICAL: We do NOT remove/recreate here.
316 // By simply 'continuing', we drop this 'tracked' instance.
317 // 1. It is NOT returned to 'available' (so no user gets it).
318 // 2. It REMAINS in 'active' (so the keep_alive thread still tracks it).
319 // 3. The keep_alive thread will see it expire and handle standard cleanup/replacement.
320 continue;
321 }
322 // === LOGIC END: Grace Period Check ===
323
324 log::debug!(
325 "🔍 Testing browser {} from pool for health...",
326 tracked.id()
327 );
328
329 // Detailed health check WITHOUT holding any locks
330 // This prevents blocking other threads during I/O
331 match tracked.browser().new_tab() {
332 Ok(tab) => {
333 log::trace!(
334 "✅ Browser {} health check: new_tab() successful",
335 tracked.id()
336 );
337
338 // Test navigation capability (full health check)
339 match tab
340 .navigate_to("data:text/html,<html><body>Health check</body></html>")
341 {
342 Ok(_) => {
343 log::trace!(
344 "✅ Browser {} health check: navigation successful",
345 tracked.id()
346 );
347
348 // Test cleanup capability
349 match tab.close(true) {
350 Ok(_) => {
351 log::debug!(
352 "✅ Browser {} passed full health check - ready for use",
353 tracked.id()
354 );
355
356 // Get pool size for logging (brief lock)
357 let pool_size = {
358 let available =
359 self.available.lock().unwrap_or_else(|poisoned| {
360 log::warn!(
361 "Pool available lock poisoned, recovering"
362 );
363 poisoned.into_inner()
364 });
365 available.len()
366 };
367
368 log::info!(
369 "♻️ Reusing healthy browser {} from pool (pool size: {})",
370 tracked.id(),
371 pool_size
372 );
373
374 // Return healthy browser wrapped in RAII handle
375 return Ok(BrowserHandle::new(tracked, Arc::clone(self)));
376 }
377 Err(e) => {
378 log::warn!(
379 "❌ Browser {} health check: tab close failed: {}",
380 tracked.id(),
381 e
382 );
383 }
384 }
385 }
386 Err(e) => {
387 log::warn!(
388 "❌ Browser {} health check: navigation failed: {}",
389 tracked.id(),
390 e
391 );
392 }
393 }
394 }
395 Err(e) => {
396 log::warn!(
397 "❌ Browser {} health check: new_tab() failed: {}",
398 tracked.id(),
399 e
400 );
401 }
402 }
403
404 // If we reach here, health check failed
405 // Remove from active tracking (browser is dead)
406 log::warn!(
407 "🗑️ Removing unhealthy browser {} from active tracking",
408 tracked.id()
409 );
410 {
411 let mut active = self.active.lock().unwrap_or_else(|poisoned| {
412 log::warn!("Pool active lock poisoned, recovering");
413 poisoned.into_inner()
414 });
415 active.remove(&tracked.id());
416 log::debug!("📊 Active browsers after removal: {}", active.len());
417 }
418
419 // Continue loop to try next browser in pool
420 log::debug!("🔍 Trying next browser from pool...");
421 } else {
422 // Pool is empty, break to create new browser
423 log::debug!("📥 Pool is empty, will create new browser");
424 break;
425 }
426 }
427
428 // Pool is empty or no healthy browsers found
429 log::info!("📦 Creating new browser (pool was empty or all browsers unhealthy)");
430
431 let tracked = self.create_browser_direct()?;
432
433 log::info!("✅ Returning newly created browser {}", tracked.id());
434 Ok(BrowserHandle::new(tracked, Arc::clone(self)))
435 }
436
437 /// Return a browser to the pool (called by BrowserHandle::drop).
438 ///
439 /// # Critical Lock Ordering
440 ///
441 /// Always acquires locks in order: active -> available.
442 /// Both locks are held together to prevent race conditions.
443 ///
444 /// # Algorithm
445 ///
446 /// 1. Acquire both locks (order: active, then available)
447 /// 2. Verify browser is in active tracking
448 /// 3. Check TTL - if expired, retire and trigger replacement
449 /// 4. If pool has space, add to available pool
450 /// 5. If pool full, remove from active (browser gets dropped)
451 ///
452 /// # Parameters
453 ///
454 /// * `self_arc` - Arc reference to self (needed for spawning async tasks).
455 /// * `tracked` - The browser being returned.
456 pub(crate) fn return_browser(self_arc: &Arc<Self>, tracked: Arc<TrackedBrowser>) {
457 log::debug!("♻️ Returning browser {} to pool...", tracked.id());
458
459 // Early exit if shutting down (don't waste time managing pool)
460 if self_arc.shutting_down.load(Ordering::Acquire) {
461 log::debug!(
462 "🛑 Pool shutting down, not returning browser {}",
463 tracked.id()
464 );
465 return;
466 }
467
468 // CRITICAL: Always acquire in order: active -> pool
469 // Holding both locks prevents ALL race conditions:
470 // - Prevents concurrent modifications to browser state
471 // - Prevents duplicate returns
472 // - Ensures pool size limits are respected
473 let mut active = self_arc.active.lock().unwrap_or_else(|poisoned| {
474 log::warn!("Pool active lock poisoned, recovering");
475 poisoned.into_inner()
476 });
477 let mut pool = self_arc.available.lock().unwrap_or_else(|poisoned| {
478 log::warn!("Pool available lock poisoned, recovering");
479 poisoned.into_inner()
480 });
481
482 // Verify browser is actually tracked (sanity check)
483 if !active.contains_key(&tracked.id()) {
484 log::warn!(
485 "❌ Browser {} not in active tracking (probably already removed), skipping return",
486 tracked.id()
487 );
488 return;
489 }
490
491 // Check TTL before returning to pool
492 // Expired browsers should be retired to prevent memory leaks
493 if tracked.is_expired(self_arc.config.browser_ttl) {
494 log::info!(
495 "⏰ Browser {} expired (age: {}min, TTL: {}min), retiring instead of returning",
496 tracked.id(),
497 tracked.age_minutes(),
498 self_arc.config.browser_ttl.as_secs() / 60
499 );
500
501 // Remove from active tracking
502 active.remove(&tracked.id());
503 log::debug!("📊 Active browsers after TTL retirement: {}", active.len());
504
505 // Release locks before spawning replacement task
506 drop(active);
507 drop(pool);
508
509 // Trigger async replacement creation (non-blocking)
510 log::debug!("🔍 Triggering replacement browser creation for expired browser");
511 Self::spawn_replacement_creation(Arc::clone(self_arc), 1);
512 return;
513 }
514
515 // Prevent duplicate returns (defensive programming)
516 if pool.iter().any(|b| b.id() == tracked.id()) {
517 log::warn!(
518 "⚠️ Browser {} already in pool (duplicate return attempt), skipping",
519 tracked.id()
520 );
521 return;
522 }
523
524 // Check if pool has space for this browser
525 if pool.len() < self_arc.config.max_pool_size {
526 // Add to pool for reuse
527 pool.push(tracked.clone());
528 log::info!(
529 "♻️ Browser {} returned to pool (pool size: {}/{})",
530 tracked.id(),
531 pool.len(),
532 self_arc.config.max_pool_size
533 );
534 } else {
535 // Pool is full, remove from tracking (browser will be dropped)
536 log::debug!(
537 "️ Pool full ({}/{}), removing browser {} from system",
538 pool.len(),
539 self_arc.config.max_pool_size,
540 tracked.id()
541 );
542 active.remove(&tracked.id());
543 log::debug!("📊 Active browsers after removal: {}", active.len());
544 }
545 }
546
547 /// Asynchronously create replacement browsers (internal helper).
548 ///
549 /// This is the async work function that actually creates browsers.
550 /// It's spawned as a tokio task by `spawn_replacement_creation`.
551 ///
552 /// # Algorithm
553 ///
554 /// 1. Check shutdown flag before each creation
555 /// 2. Check pool space before each creation
556 /// 3. Use spawn_blocking for CPU-bound browser creation
557 /// 4. Add successful browsers to pool
558 /// 5. Log detailed status
559 ///
560 /// # Parameters
561 ///
562 /// * `inner` - Arc reference to pool state.
563 /// * `count` - Number of browsers to attempt to create.
564 async fn spawn_replacement_creation_async(inner: Arc<Self>, count: usize) {
565 log::info!(
566 "🔍 Starting async replacement creation for {} browsers",
567 count
568 );
569
570 let mut created_count = 0;
571 let mut failed_count = 0;
572
573 for i in 0..count {
574 // Check shutdown flag before each expensive operation
575 if inner.shutting_down.load(Ordering::Acquire) {
576 log::info!(
577 "🛑 Shutdown detected during replacement creation, stopping at {}/{}",
578 i,
579 count
580 );
581 break;
582 }
583
584 // Check if pool has space BEFORE creating (avoid wasted work)
585 let pool_has_space = {
586 let pool = inner.available.lock().unwrap_or_else(|poisoned| {
587 log::warn!("Pool available lock poisoned, recovering");
588 poisoned.into_inner()
589 });
590 let has_space = pool.len() < inner.config.max_pool_size;
591 log::trace!(
592 "📊 Pool space check: {}/{} (has space: {})",
593 pool.len(),
594 inner.config.max_pool_size,
595 has_space
596 );
597 has_space
598 };
599
600 if !pool_has_space {
601 log::warn!(
602 "⚠️ Pool is full, stopping replacement creation at {}/{}",
603 i,
604 count
605 );
606 break;
607 }
608
609 log::debug!("📦 Creating replacement browser {}/{}", i + 1, count);
610
611 // Use spawn_blocking for CPU-bound browser creation
612 // This prevents blocking the async runtime
613 let inner_clone = Arc::clone(&inner);
614 let result =
615 tokio::task::spawn_blocking(move || inner_clone.create_browser_direct()).await;
616
617 match result {
618 Ok(Ok(tracked)) => {
619 let id = tracked.id();
620
621 // Add to pool (with space check to handle race conditions)
622 let mut pool = inner.available.lock().unwrap_or_else(|poisoned| {
623 log::warn!("Pool available lock poisoned, recovering");
624 poisoned.into_inner()
625 });
626
627 // Double-check space (another thread might have added browsers)
628 if pool.len() < inner.config.max_pool_size {
629 pool.push(tracked);
630 created_count += 1;
631 log::info!(
632 "✅ Created replacement browser {} and added to pool ({}/{})",
633 id,
634 i + 1,
635 count
636 );
637 } else {
638 log::warn!(
639 "⚠️ Pool became full during creation, replacement browser {} kept in active only",
640 id
641 );
642 created_count += 1; // Still count as created (just not pooled)
643 }
644 }
645 Ok(Err(e)) => {
646 failed_count += 1;
647 log::error!(
648 "❌ Failed to create replacement browser {}/{}: {}",
649 i + 1,
650 count,
651 e
652 );
653 }
654 Err(e) => {
655 failed_count += 1;
656 log::error!(
657 "❌ Replacement browser {}/{} task panicked: {:?}",
658 i + 1,
659 count,
660 e
661 );
662 }
663 }
664 }
665
666 // Final status report
667 let pool_size = inner
668 .available
669 .lock()
670 .unwrap_or_else(|poisoned| {
671 log::warn!("Pool available lock poisoned, recovering");
672 poisoned.into_inner()
673 })
674 .len();
675 let active_size = inner
676 .active
677 .lock()
678 .unwrap_or_else(|poisoned| {
679 log::warn!("Pool active lock poisoned, recovering");
680 poisoned.into_inner()
681 })
682 .len();
683
684 log::info!(
685 "🏁 Replacement creation completed: {}/{} created, {} failed. Pool: {}, Active: {}",
686 created_count,
687 count,
688 failed_count,
689 pool_size,
690 active_size
691 );
692 }
693
694 /// Spawn a background task to create replacement browsers.
695 ///
696 /// This is non-blocking and returns immediately. The actual browser
697 /// creation happens in a tokio task tracked in `replacement_tasks`.
698 ///
699 /// # Why Async
700 ///
701 /// Browser creation is slow (1-3 seconds per browser). Spawning async
702 /// tasks prevents blocking the caller.
703 ///
704 /// # Task Tracking
705 ///
706 /// Tasks are tracked so we can abort them during shutdown.
707 ///
708 /// # Parameters
709 ///
710 /// * `inner` - Arc reference to pool state.
711 /// * `count` - Number of replacement browsers to create.
712 pub(crate) fn spawn_replacement_creation(inner: Arc<Self>, count: usize) {
713 log::info!(
714 "📥 Spawning async task to create {} replacement browsers",
715 count
716 );
717
718 // Clone Arc for moving into async task
719 let inner_for_task = Arc::clone(&inner);
720
721 // Spawn async task on the captured runtime
722 let task_handle = inner.runtime_handle.spawn(async move {
723 Self::spawn_replacement_creation_async(inner_for_task, count).await;
724 });
725
726 // Track task handle for shutdown cleanup
727 if let Ok(mut tasks) = inner.replacement_tasks.lock() {
728 // Clean up finished tasks while we have the lock (housekeeping)
729 let original_count = tasks.len();
730 tasks.retain(|h| !h.is_finished());
731 let cleaned = original_count - tasks.len();
732
733 if cleaned > 0 {
734 log::trace!("🧹 Cleaned up {} finished replacement tasks", cleaned);
735 }
736
737 // Add new task
738 tasks.push(task_handle);
739
740 log::debug!("📋 Now tracking {} active replacement tasks", tasks.len());
741 } else {
742 log::warn!("⚠️ Failed to track replacement task (poisoned lock)");
743 }
744 }
745
746 /// Get the pool configuration.
747 #[inline]
748 pub(crate) fn config(&self) -> &BrowserPoolConfig {
749 &self.config
750 }
751
752 /// Check if the pool is shutting down.
753 #[inline]
754 pub(crate) fn is_shutting_down(&self) -> bool {
755 self.shutting_down.load(Ordering::Acquire)
756 }
757
758 /// Set the shutdown flag.
759 #[inline]
760 pub(crate) fn set_shutting_down(&self, value: bool) {
761 self.shutting_down.store(value, Ordering::Release);
762 }
763
764 /// Get the shutdown signal for the keep-alive thread.
765 #[inline]
766 pub(crate) fn shutdown_signal(&self) -> &Arc<(Mutex<bool>, Condvar)> {
767 &self.shutdown_signal
768 }
769
770 /// Get the available browsers count.
771 pub(crate) fn available_count(&self) -> usize {
772 self.available.lock().map(|g| g.len()).unwrap_or(0)
773 }
774
775 /// Get the active browsers count.
776 pub(crate) fn active_count(&self) -> usize {
777 self.active.lock().map(|g| g.len()).unwrap_or(0)
778 }
779
780 /// Get a snapshot of active browsers for health checking.
781 ///
782 /// Returns a cloned list to avoid holding locks during I/O.
783 pub(crate) fn get_active_browsers_snapshot(&self) -> Vec<(u64, Arc<TrackedBrowser>)> {
784 let active = self.active.lock().unwrap_or_else(|poisoned| {
785 log::warn!("Pool active lock poisoned, recovering");
786 poisoned.into_inner()
787 });
788 active
789 .iter()
790 .map(|(id, tracked)| (*id, Arc::clone(tracked)))
791 .collect()
792 }
793
794 /// Remove a browser from active tracking.
795 pub(crate) fn remove_from_active(&self, id: u64) -> Option<Arc<TrackedBrowser>> {
796 let mut active = self.active.lock().unwrap_or_else(|poisoned| {
797 log::warn!("Pool active lock poisoned, recovering");
798 poisoned.into_inner()
799 });
800 active.remove(&id)
801 }
802
803 /// Remove browsers from the available pool by ID.
804 pub(crate) fn remove_from_available(&self, ids: &[u64]) {
805 let mut pool = self.available.lock().unwrap_or_else(|poisoned| {
806 log::warn!("Pool available lock poisoned, recovering");
807 poisoned.into_inner()
808 });
809 let original_size = pool.len();
810 pool.retain(|b| !ids.contains(&b.id()));
811 let removed = original_size - pool.len();
812 if removed > 0 {
813 log::debug!("🗑️ Removed {} browsers from available pool", removed);
814 }
815 }
816
817 /// Abort all replacement tasks.
818 pub(crate) fn abort_replacement_tasks(&self) -> usize {
819 if let Ok(mut tasks) = self.replacement_tasks.lock() {
820 let count = tasks.len();
821 for handle in tasks.drain(..) {
822 handle.abort();
823 }
824 count
825 } else {
826 0
827 }
828 }
829}
830
831// ============================================================================
832// BrowserPool
833// ============================================================================
834
835/// Main browser pool with lifecycle management.
836///
837/// This is the public-facing API for the browser pool. It wraps the internal
838/// state and manages the keep-alive thread.
839///
840/// # Overview
841///
842/// `BrowserPool` provides:
843/// - Browser checkout via [`get()`](Self::get)
844/// - Pool warmup via [`warmup()`](Self::warmup)
845/// - Statistics via [`stats()`](Self::stats)
846/// - Graceful shutdown via [`shutdown_async()`](Self::shutdown_async)
847///
848/// # Example
849///
850/// ```rust,no_run
851/// use html2pdf_api::{BrowserPool, BrowserPoolConfigBuilder, ChromeBrowserFactory};
852/// use std::time::Duration;
853///
854/// #[tokio::main]
855/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
856/// // Create pool
857/// let mut pool = BrowserPool::builder()
858/// .config(
859/// BrowserPoolConfigBuilder::new()
860/// .max_pool_size(5)
861/// .warmup_count(3)
862/// .build()?
863/// )
864/// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
865/// .build()?;
866///
867/// // Warmup
868/// pool.warmup().await?;
869///
870/// // Use browsers
871/// {
872/// let browser = pool.get()?;
873/// let tab = browser.new_tab()?;
874/// // ... do work ...
875/// } // browser returned to pool automatically
876///
877/// // Shutdown
878/// pool.shutdown_async().await;
879///
880/// Ok(())
881/// }
882/// ```
883///
884/// # Thread Safety
885///
886/// `BrowserPool` uses fine-grained internal locks (`Mutex<Vec>`, `Mutex<HashMap>`)
887/// so it is safe to share as `Arc<BrowserPool>` without an outer `Mutex`.
888/// Use [`into_shared()`](Self::into_shared) for convenience.
889pub struct BrowserPool {
890 /// Shared internal state.
891 inner: Arc<BrowserPoolInner>,
892
893 /// Handle to keep-alive monitoring thread.
894 ///
895 /// Option allows taking during shutdown. None means keep-alive disabled.
896 keep_alive_handle: Option<JoinHandle<()>>,
897}
898
899impl BrowserPool {
900 /// Convert pool into a shared `Arc<BrowserPool>` for use in web handlers.
901 ///
902 /// This is convenient for web frameworks that need shared state.
903 /// No outer `Mutex` is needed — the pool uses fine-grained internal locks.
904 ///
905 /// # Example
906 ///
907 /// ```rust,ignore
908 /// let pool = BrowserPool::builder()
909 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
910 /// .build()?
911 /// .into_shared();
912 ///
913 /// // Can now be cloned and shared across handlers
914 /// let pool_clone = Arc::clone(&pool);
915 /// ```
916 pub fn into_shared(self) -> Arc<BrowserPool> {
917 log::debug!("🔍 Converting BrowserPool into shared Arc<BrowserPool>");
918 Arc::new(self)
919 }
920
921 /// Create a new builder for constructing a BrowserPool.
922 ///
923 /// This is the recommended way to create a pool.
924 ///
925 /// # Example
926 ///
927 /// ```rust,ignore
928 /// let pool = BrowserPool::builder()
929 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
930 /// .build()?;
931 /// ```
932 pub fn builder() -> BrowserPoolBuilder {
933 BrowserPoolBuilder::new()
934 }
935
936 /// Get a browser from the pool (or create one if empty).
937 ///
938 /// Returns a [`BrowserHandle`] that implements `Deref<Target=Browser>`,
939 /// allowing transparent access to browser methods.
940 ///
941 /// # Automatic Return
942 ///
943 /// The browser is automatically returned to the pool when the handle
944 /// is dropped, even if your code panics (RAII pattern).
945 ///
946 /// # Errors
947 ///
948 /// - Returns [`BrowserPoolError::ShuttingDown`] if pool is shutting down.
949 /// - Returns [`BrowserPoolError::BrowserCreation`] if new browser creation fails.
950 /// - Returns [`BrowserPoolError::HealthCheckFailed`] if all pooled browsers are unhealthy.
951 ///
952 /// # Example
953 ///
954 /// ```rust,ignore
955 /// let browser = pool.get()?;
956 /// let tab = browser.new_tab()?;
957 /// tab.navigate_to("https://example.com")?;
958 /// // browser returned automatically when it goes out of scope
959 /// ```
960 pub fn get(&self) -> Result<BrowserHandle> {
961 log::trace!("🎯 BrowserPool::get() called");
962 self.inner.get_or_create_browser()
963 }
964
965 /// Get pool statistics snapshot.
966 ///
967 /// # Returns
968 ///
969 /// [`PoolStats`] containing:
970 /// - `available`: Browsers in pool ready for checkout
971 /// - `active`: All browsers (pooled + checked out)
972 /// - `total`: Currently same as `active` (for future expansion)
973 ///
974 /// # Example
975 ///
976 /// ```rust,ignore
977 /// let stats = pool.stats();
978 /// println!("Available: {}, Active: {}", stats.available, stats.active);
979 /// ```
980 pub fn stats(&self) -> PoolStats {
981 let available = self.inner.available_count();
982 let active = self.inner.active_count();
983
984 log::trace!("📊 Pool stats: available={}, active={}", available, active);
985
986 PoolStats {
987 available,
988 active,
989 total: active,
990 }
991 }
992
993 /// Get a reference to the pool configuration.
994 ///
995 /// Returns the configuration that was used to create this pool.
996 /// The configuration is immutable after pool creation.
997 ///
998 /// # Example
999 ///
1000 /// ```rust,ignore
1001 /// let pool = BrowserPool::builder()
1002 /// .config(
1003 /// BrowserPoolConfigBuilder::new()
1004 /// .max_pool_size(10)
1005 /// .build()?
1006 /// )
1007 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1008 /// .build()?;
1009 ///
1010 /// println!("Max pool size: {}", pool.config().max_pool_size);
1011 /// println!("Browser TTL: {:?}", pool.config().browser_ttl);
1012 /// ```
1013 ///
1014 /// # Use Cases
1015 ///
1016 /// - Logging configuration at startup
1017 /// - Monitoring/metrics collection
1018 /// - Readiness checks (comparing active count vs max_pool_size)
1019 /// - Debugging pool behavior
1020 #[inline]
1021 pub fn config(&self) -> &BrowserPoolConfig {
1022 self.inner.config()
1023 }
1024
1025 /// Warmup the pool by pre-creating browsers.
1026 ///
1027 /// This is highly recommended to reduce first-request latency.
1028 /// Should be called during application startup.
1029 ///
1030 /// # Process
1031 ///
1032 /// 1. Creates `warmup_count` browsers sequentially with staggered timing
1033 /// 2. Tests each browser with navigation
1034 /// 3. Returns all browsers to pool
1035 /// 4. Entire process has timeout (configurable via `warmup_timeout`)
1036 ///
1037 /// # Staggered Creation
1038 ///
1039 /// Browsers are created with a 30-second delay between them to ensure
1040 /// their TTLs are offset. This prevents all browsers from expiring
1041 /// at the same time.
1042 ///
1043 /// # Errors
1044 ///
1045 /// - Returns error if warmup times out.
1046 /// - Returns error if browser creation fails.
1047 ///
1048 /// # Example
1049 ///
1050 /// ```rust,ignore
1051 /// let pool = BrowserPool::builder()
1052 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1053 /// .build()?;
1054 ///
1055 /// // Warmup during startup
1056 /// pool.warmup().await?;
1057 /// ```
1058 pub async fn warmup(&self) -> Result<()> {
1059 let count = self.inner.config().warmup_count;
1060 let warmup_timeout = self.inner.config().warmup_timeout;
1061
1062 log::info!(
1063 "🔥 Starting browser pool warmup with {} instances (timeout: {}s)",
1064 count,
1065 warmup_timeout.as_secs()
1066 );
1067
1068 // Wrap entire warmup in timeout to prevent hanging forever
1069 let warmup_result = tokio::time::timeout(warmup_timeout, self.warmup_internal(count)).await;
1070
1071 match warmup_result {
1072 Ok(Ok(())) => {
1073 let stats = self.stats();
1074 log::info!(
1075 "✅ Warmup completed successfully - Available: {}, Active: {}",
1076 stats.available,
1077 stats.active
1078 );
1079 Ok(())
1080 }
1081 Ok(Err(e)) => {
1082 log::error!("❌ Warmup failed with error: {}", e);
1083 Err(e)
1084 }
1085 Err(_) => {
1086 log::error!("❌ Warmup timed out after {}s", warmup_timeout.as_secs());
1087 Err(BrowserPoolError::Configuration(format!(
1088 "Warmup timed out after {}s",
1089 warmup_timeout.as_secs()
1090 )))
1091 }
1092 }
1093 }
1094
1095 /// Internal warmup implementation (separated for cleaner timeout wrapping).
1096 ///
1097 /// Creates browsers sequentially with a delay between them.
1098 /// This ensures they don't all reach their TTL (expiration) at the exact same moment.
1099 async fn warmup_internal(&self, count: usize) -> Result<()> {
1100 log::debug!("🛠️ Starting internal warmup process for {} browsers", count);
1101
1102 // STAGGER CONFIGURATION
1103 // We wait this long between creations to distribute expiration times
1104 let stagger_interval = Duration::from_secs(30);
1105
1106 let mut handles = Vec::new();
1107 let mut created_count = 0;
1108 let mut failed_count = 0;
1109
1110 for i in 0..count {
1111 log::debug!("🌐 Creating startup browser instance {}/{}", i + 1, count);
1112
1113 // Per-browser timeout (15s per browser is reasonable)
1114 // This prevents one slow browser from blocking entire warmup
1115 let browser_result = tokio::time::timeout(
1116 Duration::from_secs(15),
1117 tokio::task::spawn_blocking({
1118 let inner = Arc::clone(&self.inner);
1119 move || inner.create_browser_direct()
1120 }),
1121 )
1122 .await;
1123
1124 match browser_result {
1125 Ok(Ok(Ok(tracked))) => {
1126 log::debug!(
1127 "✅ Browser {} created, performing validation test...",
1128 tracked.id()
1129 );
1130
1131 // Test the browser with actual navigation
1132 match tracked.browser().new_tab() {
1133 Ok(tab) => {
1134 log::trace!("✅ Browser {} test: new_tab() successful", tracked.id());
1135
1136 // Navigate to test page
1137 let nav_result = tab.navigate_to(
1138 "data:text/html,<html><body>Warmup test</body></html>",
1139 );
1140 if let Err(e) = nav_result {
1141 log::warn!(
1142 "⚠️ Browser {} test navigation failed: {}",
1143 tracked.id(),
1144 e
1145 );
1146 } else {
1147 log::trace!(
1148 "✅ Browser {} test: navigation successful",
1149 tracked.id()
1150 );
1151 }
1152
1153 // Clean up test tab
1154 let _ = tab.close(true);
1155
1156 // Keep handle so browser stays alive
1157 handles.push(BrowserHandle::new(tracked, Arc::clone(&self.inner)));
1158
1159 created_count += 1;
1160 log::info!(
1161 "✅ Browser instance {}/{} ready and validated",
1162 i + 1,
1163 count
1164 );
1165 }
1166 Err(e) => {
1167 failed_count += 1;
1168 log::error!(
1169 "❌ Browser {} validation test failed: {}",
1170 tracked.id(),
1171 e
1172 );
1173
1174 // Remove from active tracking since it's broken
1175 self.inner.remove_from_active(tracked.id());
1176 }
1177 }
1178 }
1179 Ok(Ok(Err(e))) => {
1180 failed_count += 1;
1181 log::error!("❌ Failed to create browser {}/{}: {}", i + 1, count, e);
1182 }
1183 Ok(Err(e)) => {
1184 failed_count += 1;
1185 log::error!(
1186 "❌ Browser {}/{} creation task panicked: {:?}",
1187 i + 1,
1188 count,
1189 e
1190 );
1191 }
1192 Err(_) => {
1193 failed_count += 1;
1194 log::error!(
1195 "❌ Browser {}/{} creation timed out (15s limit)",
1196 i + 1,
1197 count
1198 );
1199 }
1200 }
1201
1202 // === STAGGER LOGIC ===
1203 // If this is not the last browser, wait before creating the next one.
1204 // This ensures their TTLs are offset by `stagger_interval`.
1205 if i < count - 1 {
1206 log::info!(
1207 "⏳ Waiting {}s before creating next warmup browser to stagger TTLs...",
1208 stagger_interval.as_secs()
1209 );
1210 tokio::time::sleep(stagger_interval).await;
1211 }
1212 }
1213
1214 log::info!(
1215 "📊 Warmup creation phase: {} created, {} failed",
1216 created_count,
1217 failed_count
1218 );
1219
1220 // Return all browsers to pool by dropping handles
1221 log::debug!("🔍 Returning {} warmup browsers to pool...", handles.len());
1222 drop(handles);
1223
1224 // No delay needed: return_browser() is synchronous in the happy path,
1225 // and warmup browsers are never TTL-expired (which is the only path
1226 // that spawns async work via spawn_replacement_creation).
1227
1228 let final_stats = self.stats();
1229 log::info!(
1230 "🏁 Warmup internal completed - Pool: {}, Active: {}",
1231 final_stats.available,
1232 final_stats.active
1233 );
1234
1235 Ok(())
1236 }
1237
1238 /// Start the keep-alive monitoring thread.
1239 ///
1240 /// This background thread:
1241 /// - Pings all active browsers periodically
1242 /// - Removes unresponsive browsers after max_ping_failures
1243 /// - Retires browsers that exceed TTL
1244 /// - Spawns replacement browsers as needed
1245 ///
1246 /// # Critical Design Notes
1247 ///
1248 /// - Uses condvar for immediate shutdown signaling
1249 /// - Never holds locks during I/O operations
1250 /// - Uses consistent lock ordering (active -> pool)
1251 ///
1252 /// # Parameters
1253 ///
1254 /// * `inner` - Arc reference to pool state.
1255 ///
1256 /// # Returns
1257 ///
1258 /// JoinHandle for the background thread.
1259 fn start_keep_alive(inner: Arc<BrowserPoolInner>) -> JoinHandle<()> {
1260 let ping_interval = inner.config().ping_interval;
1261 let max_failures = inner.config().max_ping_failures;
1262 let browser_ttl = inner.config().browser_ttl;
1263 let shutdown_signal = Arc::clone(inner.shutdown_signal());
1264
1265 log::info!(
1266 "🚀 Starting keep-alive thread (interval: {}s, max failures: {}, TTL: {}min)",
1267 ping_interval.as_secs(),
1268 max_failures,
1269 browser_ttl.as_secs() / 60
1270 );
1271
1272 thread::spawn(move || {
1273 log::info!("🏁 Keep-alive thread started successfully");
1274
1275 // Track consecutive failures per browser ID
1276 let mut failure_counts: HashMap<u64, u32> = HashMap::new();
1277
1278 loop {
1279 // Wait for next ping interval OR shutdown signal (whichever comes first)
1280 // Using condvar instead of sleep allows immediate wake-up on shutdown
1281 let (lock, cvar) = &*shutdown_signal;
1282 let wait_result = {
1283 let shutdown = lock.lock().unwrap_or_else(|poisoned| {
1284 log::warn!("Shutdown lock poisoned, recovering");
1285 poisoned.into_inner()
1286 });
1287 cvar.wait_timeout(shutdown, ping_interval)
1288 .unwrap_or_else(|poisoned| {
1289 log::warn!("Condvar wait_timeout lock poisoned, recovering");
1290 poisoned.into_inner()
1291 })
1292 };
1293
1294 let shutdown_flag = *wait_result.0;
1295 let timed_out = wait_result.1.timed_out();
1296
1297 // Check if we were signaled to shutdown
1298 if shutdown_flag {
1299 log::info!("🛑 Keep-alive received shutdown signal via condvar");
1300 break;
1301 }
1302
1303 // Double-check atomic shutdown flag (belt and suspenders)
1304 if inner.is_shutting_down() {
1305 log::info!("🛑 Keep-alive detected shutdown via atomic flag");
1306 break;
1307 }
1308
1309 // If spuriously woken (not timeout, not shutdown), continue waiting
1310 if !timed_out {
1311 log::trace!("⏰ Keep-alive spuriously woken, continuing wait...");
1312 continue;
1313 }
1314
1315 log::trace!("⚡ Keep-alive ping cycle starting...");
1316
1317 // Collect browsers to ping WITHOUT holding locks
1318 // This is critical: we clone the list and release the lock
1319 // before doing any I/O operations
1320 let browsers_to_ping = inner.get_active_browsers_snapshot();
1321 log::trace!(
1322 "Keep-alive checking {} active browsers",
1323 browsers_to_ping.len()
1324 );
1325
1326 // Now ping browsers without holding any locks
1327 let mut to_remove = Vec::new();
1328 let mut expired_browsers = Vec::new();
1329
1330 for (id, tracked) in browsers_to_ping {
1331 // Check shutdown during ping loop (allows early exit)
1332 if inner.is_shutting_down() {
1333 log::info!("Shutdown detected during ping loop, exiting immediately");
1334 return;
1335 }
1336
1337 // Check TTL before pinging (no point pinging expired browsers)
1338 if tracked.is_expired(browser_ttl) {
1339 log::info!(
1340 "Browser {} expired (age: {}min, TTL: {}min), marking for retirement",
1341 id,
1342 tracked.age_minutes(),
1343 browser_ttl.as_secs() / 60
1344 );
1345 expired_browsers.push(id);
1346 continue; // Skip ping for expired browsers
1347 }
1348
1349 // Perform health check (this is I/O, no locks held)
1350 use crate::traits::Healthcheck;
1351 match tracked.ping() {
1352 Ok(_) => {
1353 // Reset failure count on success
1354 if failure_counts.remove(&id).is_some() {
1355 log::debug!("Browser {} ping successful, failure count reset", id);
1356 }
1357 }
1358 Err(e) => {
1359 // Only process failures if NOT shutting down
1360 // (during shutdown, browsers may legitimately fail)
1361 if !inner.is_shutting_down() {
1362 let failures = failure_counts.entry(id).or_insert(0);
1363 *failures += 1;
1364
1365 log::warn!(
1366 "Browser {} ping failed (attempt {}/{}): {}",
1367 id,
1368 failures,
1369 max_failures,
1370 e
1371 );
1372
1373 // Remove if exceeded max failures
1374 if *failures >= max_failures {
1375 log::error!(
1376 "Browser {} exceeded max ping failures ({}), marking for removal",
1377 id,
1378 max_failures
1379 );
1380 to_remove.push(id);
1381 }
1382 }
1383 }
1384 }
1385 }
1386
1387 // Check shutdown before cleanup (avoid work if shutting down)
1388 if inner.is_shutting_down() {
1389 log::info!("Shutdown detected before cleanup, skipping and exiting");
1390 break;
1391 }
1392
1393 // Handle TTL retirements first (they need replacement browsers)
1394 if !expired_browsers.is_empty() {
1395 log::info!("Processing {} TTL-expired browsers", expired_browsers.len());
1396 Self::handle_browser_retirement(&inner, expired_browsers, &mut failure_counts);
1397 }
1398
1399 // Handle failed browsers (remove from tracking and pool)
1400 if !to_remove.is_empty() {
1401 log::warn!("Removing {} failed browsers from pool", to_remove.len());
1402
1403 // Track how many were actually removed so we know how many to replace
1404 let mut actual_removed_count = 0;
1405
1406 // Remove dead browsers from active tracking
1407 for id in &to_remove {
1408 if inner.remove_from_active(*id).is_some() {
1409 actual_removed_count += 1;
1410 log::debug!("Removed failed browser {} from active tracking", id);
1411 }
1412 failure_counts.remove(id);
1413 }
1414
1415 log::debug!(
1416 "Active browsers after failure cleanup: {}",
1417 inner.active_count()
1418 );
1419
1420 // Clean up pool (remove dead browsers)
1421 inner.remove_from_available(&to_remove);
1422
1423 log::debug!("Pool size after cleanup: {}", inner.available_count());
1424
1425 // Trigger replacement for the browsers we just removed
1426 if actual_removed_count > 0 {
1427 log::info!(
1428 "Spawning {} replacement browsers for failed ones",
1429 actual_removed_count
1430 );
1431 BrowserPoolInner::spawn_replacement_creation(
1432 Arc::clone(&inner),
1433 actual_removed_count,
1434 );
1435 }
1436 }
1437
1438 // Log keep-alive cycle summary
1439 log::debug!(
1440 "Keep-alive cycle complete - Active: {}, Pooled: {}, Tracking {} failure states",
1441 inner.active_count(),
1442 inner.available_count(),
1443 failure_counts.len()
1444 );
1445 }
1446
1447 log::info!("Keep-alive thread exiting cleanly");
1448 })
1449 }
1450
1451 /// Handle browser retirement due to TTL expiration.
1452 ///
1453 /// This function:
1454 /// 1. Removes expired browsers from active and pool tracking
1455 /// 2. Spawns async tasks to create replacement browsers
1456 /// 3. Maintains pool target size
1457 ///
1458 /// # Critical Lock Ordering
1459 ///
1460 /// Acquires active -> pool locks together to prevent races.
1461 ///
1462 /// # Parameters
1463 ///
1464 /// * `inner` - Arc reference to pool state.
1465 /// * `expired_ids` - List of browser IDs that have exceeded TTL.
1466 /// * `failure_counts` - Mutable map of failure counts (updated to remove retired browsers).
1467 fn handle_browser_retirement(
1468 inner: &Arc<BrowserPoolInner>,
1469 expired_ids: Vec<u64>,
1470 failure_counts: &mut HashMap<u64, u32>,
1471 ) {
1472 log::info!(
1473 "Retiring {} expired browsers (TTL enforcement)",
1474 expired_ids.len()
1475 );
1476
1477 // Remove expired browsers from active tracking
1478 let mut retired_count = 0;
1479 for id in &expired_ids {
1480 if inner.remove_from_active(*id).is_some() {
1481 retired_count += 1;
1482 log::debug!("Removed expired browser {} from active tracking", id);
1483 }
1484 // Clean up failure tracking
1485 failure_counts.remove(id);
1486 }
1487
1488 // Remove from pool as well
1489 inner.remove_from_available(&expired_ids);
1490
1491 log::debug!(
1492 "After retirement - Active: {}, Pooled: {}",
1493 inner.active_count(),
1494 inner.available_count()
1495 );
1496
1497 // Create replacement browsers to maintain target count
1498 if retired_count > 0 {
1499 log::info!(
1500 "Spawning {} replacement browsers for retired ones",
1501 retired_count
1502 );
1503 BrowserPoolInner::spawn_replacement_creation(Arc::clone(inner), retired_count);
1504 } else {
1505 log::debug!("No browsers were actually retired (already removed)");
1506 }
1507 }
1508
1509 /// Asynchronously shutdown the pool (recommended method).
1510 ///
1511 /// This is the preferred shutdown method as it can properly await
1512 /// async task cancellation. Should be called during application shutdown.
1513 ///
1514 /// # Shutdown Process
1515 ///
1516 /// 1. Set atomic shutdown flag (stops new operations)
1517 /// 2. Signal condvar to wake keep-alive thread immediately
1518 /// 3. Wait for keep-alive thread to exit (with timeout)
1519 /// 4. Abort all replacement creation tasks
1520 /// 5. Wait briefly for cleanup
1521 /// 6. Log final statistics
1522 ///
1523 /// # Timeout
1524 ///
1525 /// Keep-alive thread is given 5 seconds to exit gracefully.
1526 /// If it doesn't exit, we log an error but continue shutdown.
1527 ///
1528 /// # Example
1529 ///
1530 /// ```rust,ignore
1531 /// let mut pool = /* ... */;
1532 ///
1533 /// // During application shutdown
1534 /// pool.shutdown_async().await;
1535 /// ```
1536 pub async fn shutdown_async(&mut self) {
1537 log::info!("Shutting down browser pool (async mode)...");
1538
1539 // Step 1: Set shutdown flag (prevents new operations)
1540 self.inner.set_shutting_down(true);
1541 log::debug!("Shutdown flag set");
1542
1543 // Step 2: Signal condvar to wake keep-alive thread immediately
1544 // This is critical - without this, keep-alive waits for full ping_interval
1545 {
1546 let (lock, cvar) = &**self.inner.shutdown_signal();
1547 let mut shutdown = lock.lock().unwrap_or_else(|poisoned| {
1548 log::warn!("Shutdown lock poisoned, recovering");
1549 poisoned.into_inner()
1550 });
1551 *shutdown = true;
1552 cvar.notify_all();
1553 log::debug!("Shutdown signal sent to keep-alive thread");
1554 } // Lock released here
1555
1556 // Step 3: Wait for keep-alive thread to exit
1557 if let Some(handle) = self.keep_alive_handle.take() {
1558 log::debug!("Waiting for keep-alive thread to exit...");
1559
1560 // Wrap thread join in spawn_blocking to make it async-friendly
1561 let join_task = tokio::task::spawn_blocking(move || handle.join());
1562
1563 // Give it 5 seconds to exit gracefully
1564 match tokio::time::timeout(Duration::from_secs(5), join_task).await {
1565 Ok(Ok(Ok(_))) => {
1566 log::info!("Keep-alive thread stopped cleanly");
1567 }
1568 Ok(Ok(Err(_))) => {
1569 log::error!("Keep-alive thread panicked during shutdown");
1570 }
1571 Ok(Err(_)) => {
1572 log::error!("Keep-alive join task panicked");
1573 }
1574 Err(_) => {
1575 log::error!("Keep-alive thread didn't exit within 5s timeout");
1576 }
1577 }
1578 } else {
1579 log::debug!("No keep-alive thread to stop (was disabled or already stopped)");
1580 }
1581
1582 // Step 4: Abort all replacement creation tasks
1583 log::info!("Aborting replacement creation tasks...");
1584 let aborted_count = self.inner.abort_replacement_tasks();
1585 if aborted_count > 0 {
1586 log::info!("Aborted {} replacement tasks", aborted_count);
1587 } else {
1588 log::debug!("No replacement tasks to abort");
1589 }
1590
1591 // Step 5: Small delay to let aborted tasks clean up
1592 tokio::time::sleep(Duration::from_millis(100)).await;
1593
1594 // Step 6: Log final statistics
1595 let stats = self.stats();
1596 log::info!(
1597 "Async shutdown complete - Available: {}, Active: {}, Total: {}",
1598 stats.available,
1599 stats.active,
1600 stats.total
1601 );
1602 }
1603
1604 /// Synchronously shutdown the pool (fallback method).
1605 ///
1606 /// This is a simplified shutdown for use in Drop or non-async contexts.
1607 /// Prefer [`shutdown_async()`](Self::shutdown_async) when possible for cleaner task cancellation.
1608 ///
1609 /// # Note
1610 ///
1611 /// This method doesn't wait for replacement tasks to finish since
1612 /// there's no async runtime available. Tasks are aborted but may not
1613 /// have cleaned up yet.
1614 pub fn shutdown(&mut self) {
1615 log::debug!("Calling synchronous shutdown...");
1616 self.shutdown_sync();
1617 }
1618
1619 /// Internal synchronous shutdown implementation.
1620 fn shutdown_sync(&mut self) {
1621 log::info!("Shutting down browser pool (sync mode)...");
1622
1623 // Set shutdown flag
1624 self.inner.set_shutting_down(true);
1625 log::debug!("Shutdown flag set");
1626
1627 // Signal condvar (same as async version)
1628 {
1629 let (lock, cvar) = &**self.inner.shutdown_signal();
1630 let mut shutdown = lock.lock().unwrap_or_else(|poisoned| {
1631 log::warn!("Shutdown lock poisoned, recovering");
1632 poisoned.into_inner()
1633 });
1634 *shutdown = true;
1635 cvar.notify_all();
1636 log::debug!("Shutdown signal sent");
1637 }
1638
1639 // Wait for keep-alive thread
1640 if let Some(handle) = self.keep_alive_handle.take() {
1641 log::debug!("Joining keep-alive thread (sync)...");
1642
1643 match handle.join() {
1644 Ok(_) => log::info!("Keep-alive thread stopped"),
1645 Err(_) => log::error!("Keep-alive thread panicked"),
1646 }
1647 }
1648
1649 // Abort replacement tasks (best effort - they won't make progress without runtime)
1650 let aborted_count = self.inner.abort_replacement_tasks();
1651 if aborted_count > 0 {
1652 log::debug!("Aborted {} replacement tasks (sync mode)", aborted_count);
1653 }
1654
1655 let stats = self.stats();
1656 log::info!(
1657 "Sync shutdown complete - Available: {}, Active: {}",
1658 stats.available,
1659 stats.active
1660 );
1661 }
1662
1663 /// Get a reference to the inner pool state.
1664 ///
1665 /// This is primarily for internal use and testing.
1666 #[doc(hidden)]
1667 #[allow(dead_code)]
1668 pub(crate) fn inner(&self) -> &Arc<BrowserPoolInner> {
1669 &self.inner
1670 }
1671}
1672
1673impl Drop for BrowserPool {
1674 /// Automatic cleanup when pool is dropped.
1675 ///
1676 /// This ensures resources are released even if shutdown wasn't called explicitly.
1677 /// Uses sync shutdown since Drop can't be async.
1678 fn drop(&mut self) {
1679 log::debug!("🛑 BrowserPool Drop triggered - running cleanup");
1680
1681 // Only shutdown if not already done
1682 if !self.inner.is_shutting_down() {
1683 log::warn!("⚠ BrowserPool dropped without explicit shutdown - cleaning up");
1684 self.shutdown();
1685 } else {
1686 log::debug!(" Pool already shutdown, Drop is no-op");
1687 }
1688 }
1689}
1690
1691// ============================================================================
1692// BrowserPoolBuilder
1693// ============================================================================
1694
1695/// Builder for constructing a [`BrowserPool`] with validation.
1696///
1697/// This is the recommended way to create a pool as it validates
1698/// configuration and provides sensible defaults.
1699///
1700/// # Example
1701///
1702/// ```rust,ignore
1703/// use std::time::Duration;
1704/// use html2pdf_api::{BrowserPool, BrowserPoolConfigBuilder, ChromeBrowserFactory};
1705///
1706/// let pool = BrowserPool::builder()
1707/// .config(
1708/// BrowserPoolConfigBuilder::new()
1709/// .max_pool_size(10)
1710/// .warmup_count(5)
1711/// .browser_ttl(Duration::from_secs(7200))
1712/// .build()?
1713/// )
1714/// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1715/// .enable_keep_alive(true)
1716/// .build()?;
1717/// ```
1718pub struct BrowserPoolBuilder {
1719 /// Optional configuration (uses default if not provided).
1720 config: Option<BrowserPoolConfig>,
1721
1722 /// Browser factory (required).
1723 factory: Option<Box<dyn BrowserFactory>>,
1724
1725 /// Whether to enable keep-alive thread (default: true).
1726 enable_keep_alive: bool,
1727}
1728
1729impl BrowserPoolBuilder {
1730 /// Create a new builder with defaults.
1731 ///
1732 /// # Example
1733 ///
1734 /// ```rust,ignore
1735 /// let builder = BrowserPoolBuilder::new();
1736 /// ```
1737 pub fn new() -> Self {
1738 Self {
1739 config: None,
1740 factory: None,
1741 enable_keep_alive: true,
1742 }
1743 }
1744
1745 /// Set custom configuration.
1746 ///
1747 /// If not called, uses [`BrowserPoolConfig::default()`].
1748 ///
1749 /// # Parameters
1750 ///
1751 /// * `config` - Validated configuration from [`crate::BrowserPoolConfigBuilder`].
1752 ///
1753 /// # Example
1754 ///
1755 /// ```rust,ignore
1756 /// let config = BrowserPoolConfigBuilder::new()
1757 /// .max_pool_size(10)
1758 /// .build()?;
1759 ///
1760 /// let pool = BrowserPool::builder()
1761 /// .config(config)
1762 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1763 /// .build()?;
1764 /// ```
1765 pub fn config(mut self, config: BrowserPoolConfig) -> Self {
1766 self.config = Some(config);
1767 self
1768 }
1769
1770 /// Set browser factory (required).
1771 ///
1772 /// The factory is responsible for creating browser instances.
1773 /// Use [`ChromeBrowserFactory`](crate::ChromeBrowserFactory) for Chrome/Chromium browsers.
1774 ///
1775 /// # Parameters
1776 ///
1777 /// * `factory` - A boxed [`BrowserFactory`] implementation.
1778 ///
1779 /// # Example
1780 ///
1781 /// ```rust,ignore
1782 /// let pool = BrowserPool::builder()
1783 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1784 /// .build()?;
1785 /// ```
1786 pub fn factory(mut self, factory: Box<dyn BrowserFactory>) -> Self {
1787 self.factory = Some(factory);
1788 self
1789 }
1790
1791 /// Enable or disable keep-alive thread.
1792 ///
1793 /// Keep-alive should be disabled only for testing.
1794 /// Production use should always have it enabled.
1795 ///
1796 /// # Parameters
1797 ///
1798 /// * `enable` - Whether to enable the keep-alive thread.
1799 ///
1800 /// # Example
1801 ///
1802 /// ```rust,ignore
1803 /// // Disable for tests
1804 /// let pool = BrowserPool::builder()
1805 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1806 /// .enable_keep_alive(false)
1807 /// .build()?;
1808 /// ```
1809 pub fn enable_keep_alive(mut self, enable: bool) -> Self {
1810 self.enable_keep_alive = enable;
1811 self
1812 }
1813
1814 /// Build the browser pool.
1815 ///
1816 /// # Errors
1817 ///
1818 /// Returns [`BrowserPoolError::Configuration`] if factory is not provided.
1819 ///
1820 /// # Panics
1821 ///
1822 /// Panics if called outside a tokio runtime context.
1823 ///
1824 /// # Example
1825 ///
1826 /// ```rust,ignore
1827 /// let pool = BrowserPool::builder()
1828 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1829 /// .build()?;
1830 /// ```
1831 pub fn build(self) -> Result<BrowserPool> {
1832 let config = self.config.unwrap_or_default();
1833 let factory = self.factory.ok_or_else(|| {
1834 BrowserPoolError::Configuration("No browser factory provided".to_string())
1835 })?;
1836
1837 log::info!("📦 Building browser pool with config: {:?}", config);
1838
1839 // Create inner state
1840 let inner = BrowserPoolInner::new(config, factory);
1841
1842 // Start keep-alive thread if enabled
1843 let keep_alive_handle = if self.enable_keep_alive {
1844 log::info!("🚀 Starting keep-alive monitoring thread");
1845 Some(BrowserPool::start_keep_alive(Arc::clone(&inner)))
1846 } else {
1847 log::warn!("⚠️ Keep-alive thread disabled (should only be used for testing)");
1848 None
1849 };
1850
1851 log::info!("✅ Browser pool built successfully");
1852
1853 Ok(BrowserPool {
1854 inner,
1855 keep_alive_handle,
1856 })
1857 }
1858}
1859
1860impl Default for BrowserPoolBuilder {
1861 fn default() -> Self {
1862 Self::new()
1863 }
1864}
1865
1866// ============================================================================
1867// Environment Initialization (feature-gated)
1868// ============================================================================
1869
1870/// Initialize browser pool from environment variables.
1871///
1872/// This is a convenience function for common initialization patterns.
1873/// It reads configuration from environment variables with sensible defaults.
1874///
1875/// # Feature Flag
1876///
1877/// This function is only available when the `env-config` feature is enabled.
1878///
1879/// # Environment Variables
1880///
1881/// - `BROWSER_POOL_SIZE`: Maximum pool size (default: 5)
1882/// - `BROWSER_WARMUP_COUNT`: Warmup browser count (default: 3)
1883/// - `BROWSER_TTL_SECONDS`: Browser TTL in seconds (default: 3600)
1884/// - `BROWSER_WARMUP_TIMEOUT_SECONDS`: Warmup timeout (default: 60)
1885/// - `CHROME_PATH`: Custom Chrome binary path (optional)
1886///
1887/// # Returns
1888///
1889/// `Arc<BrowserPool>` ready for use in web handlers.
1890///
1891/// # Errors
1892///
1893/// - Returns error if configuration is invalid.
1894/// - Returns error if warmup fails.
1895///
1896/// # Example
1897///
1898/// ```rust,ignore
1899/// #[tokio::main]
1900/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
1901/// env_logger::init();
1902///
1903/// let pool = init_browser_pool().await?;
1904///
1905/// // Use pool in handlers...
1906///
1907/// Ok(())
1908/// }
1909/// ```
1910#[cfg(feature = "env-config")]
1911pub async fn init_browser_pool() -> Result<Arc<BrowserPool>> {
1912 use crate::config::env::{chrome_path_from_env, from_env};
1913 use crate::factory::ChromeBrowserFactory;
1914
1915 log::info!("Initializing browser pool from environment...");
1916
1917 // Load configuration from environment
1918 let config = from_env()?;
1919
1920 // Get optional Chrome path
1921 let chrome_path = chrome_path_from_env();
1922
1923 log::info!("Pool configuration from environment:");
1924 log::info!(" - Max pool size: {}", config.max_pool_size);
1925 log::info!(" - Warmup count: {}", config.warmup_count);
1926 log::info!(
1927 " - Browser TTL: {}s ({}min)",
1928 config.browser_ttl.as_secs(),
1929 config.browser_ttl.as_secs() / 60
1930 );
1931 log::info!(" - Warmup timeout: {}s", config.warmup_timeout.as_secs());
1932 log::info!(
1933 " - Chrome path: {}",
1934 chrome_path.as_deref().unwrap_or("auto-detect")
1935 );
1936
1937 // Create factory based on whether custom path is provided
1938 let factory: Box<dyn BrowserFactory> = match chrome_path {
1939 Some(path) => {
1940 log::info!("Using custom Chrome path: {}", path);
1941 Box::new(ChromeBrowserFactory::with_path(path))
1942 }
1943 None => {
1944 log::info!("Using auto-detected Chrome browser");
1945 Box::new(ChromeBrowserFactory::with_defaults())
1946 }
1947 };
1948
1949 // Create browser pool with Chrome factory
1950 log::debug!("Building browser pool...");
1951 let pool = BrowserPool::builder()
1952 .config(config.clone())
1953 .factory(factory)
1954 .enable_keep_alive(true)
1955 .build()
1956 .map_err(|e| {
1957 log::error!("❌ Failed to create browser pool: {}", e);
1958 e
1959 })?;
1960
1961 log::info!("✅ Browser pool created successfully");
1962
1963 // Warmup the pool
1964 log::info!(
1965 "Warming up browser pool with {} instances...",
1966 config.warmup_count
1967 );
1968 pool.warmup().await.map_err(|e| {
1969 log::error!("❌ Failed to warmup pool: {}", e);
1970 e
1971 })?;
1972
1973 let stats = pool.stats();
1974 log::info!(
1975 "✅ Browser pool ready - Available: {}, Active: {}, Total: {}",
1976 stats.available,
1977 stats.active,
1978 stats.total
1979 );
1980
1981 Ok(pool.into_shared())
1982}
1983
1984// ============================================================================
1985// Unit Tests
1986// ============================================================================
1987
1988#[cfg(test)]
1989mod tests {
1990 use super::*;
1991
1992 /// Verifies that BrowserPool builder rejects missing factory.
1993 ///
1994 /// A factory is mandatory because the pool needs to know how to
1995 /// create browser instances. This test ensures proper error handling.
1996 #[test]
1997 fn test_pool_builder_missing_factory() {
1998 // We need a tokio runtime for the builder
1999 let rt = tokio::runtime::Runtime::new().unwrap();
2000
2001 rt.block_on(async {
2002 let config = crate::config::BrowserPoolConfigBuilder::new()
2003 .max_pool_size(3)
2004 .build()
2005 .unwrap();
2006
2007 let result = BrowserPool::builder()
2008 .config(config)
2009 // Intentionally missing factory
2010 .build();
2011
2012 assert!(result.is_err(), "Build should fail without factory");
2013
2014 match result {
2015 Err(BrowserPoolError::Configuration(msg)) => {
2016 assert!(
2017 msg.contains("No browser factory provided"),
2018 "Expected factory error, got: {}",
2019 msg
2020 );
2021 }
2022 _ => panic!("Expected Configuration error for missing factory"),
2023 }
2024 });
2025 }
2026
2027 /// Verifies that BrowserPoolBuilder implements Default.
2028 #[test]
2029 fn test_builder_default() {
2030 let builder: BrowserPoolBuilder = Default::default();
2031 assert!(builder.config.is_none());
2032 assert!(builder.factory.is_none());
2033 assert!(builder.enable_keep_alive);
2034 }
2035
2036 /// Verifies that enable_keep_alive can be disabled.
2037 #[test]
2038 fn test_builder_disable_keep_alive() {
2039 let builder = BrowserPoolBuilder::new().enable_keep_alive(false);
2040 assert!(!builder.enable_keep_alive);
2041 }
2042}